summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kbuild1
-rw-r--r--arch/x86/Kconfig92
-rw-r--r--arch/x86/Kconfig.cpu13
-rw-r--r--arch/x86/Kconfig.debug46
-rw-r--r--arch/x86/boot/a20.c4
-rw-r--r--arch/x86/boot/apm.c4
-rw-r--r--arch/x86/boot/bioscall.S5
-rw-r--r--arch/x86/boot/bitops.h4
-rw-r--r--arch/x86/boot/boot.h4
-rw-r--r--arch/x86/boot/cmdline.c4
-rw-r--r--arch/x86/boot/compressed/acpi.c143
-rw-r--r--arch/x86/boot/compressed/eboot.c14
-rw-r--r--arch/x86/boot/compressed/head_64.S1
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S5
-rw-r--r--arch/x86/boot/compressed/misc.c12
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c16
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c1
-rw-r--r--arch/x86/boot/compressed/string.c14
-rw-r--r--arch/x86/boot/copy.S4
-rw-r--r--arch/x86/boot/cpu.c4
-rw-r--r--arch/x86/boot/cpucheck.c4
-rw-r--r--arch/x86/boot/edd.c4
-rw-r--r--arch/x86/boot/header.S14
-rw-r--r--arch/x86/boot/main.c4
-rw-r--r--arch/x86/boot/memory.c4
-rw-r--r--arch/x86/boot/mkcpustr.c5
-rw-r--r--arch/x86/boot/pm.c4
-rw-r--r--arch/x86/boot/pmjump.S4
-rw-r--r--arch/x86/boot/printf.c4
-rw-r--r--arch/x86/boot/regs.c5
-rw-r--r--arch/x86/boot/string.c4
-rw-r--r--arch/x86/boot/tty.c4
-rw-r--r--arch/x86/boot/version.c4
-rw-r--r--arch/x86/boot/vesa.h7
-rw-r--r--arch/x86/boot/video-bios.c4
-rw-r--r--arch/x86/boot/video-mode.c4
-rw-r--r--arch/x86/boot/video-vesa.c4
-rw-r--r--arch/x86/boot/video-vga.c4
-rw-r--r--arch/x86/boot/video.c4
-rw-r--r--arch/x86/boot/video.h4
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S5
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c6
-rw-r--r--arch/x86/crypto/aegis128l-aesni-asm.S5
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c6
-rw-r--r--arch/x86/crypto/aegis256-aesni-asm.S5
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c6
-rw-r--r--arch/x86/crypto/aes_glue.c1
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S6
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c51
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S17
-rw-r--r--arch/x86/crypto/blowfish_glue.c17
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S7
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S17
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c7
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c7
-rw-r--r--arch/x86/crypto/camellia_glue.c17
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S17
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c17
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S17
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c17
-rw-r--r--arch/x86/crypto/chacha-avx2-x86_64.S6
-rw-r--r--arch/x86/crypto/chacha-ssse3-x86_64.S6
-rw-r--r--arch/x86/crypto/chacha_glue.c8
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c15
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S11
-rw-r--r--arch/x86/crypto/des3_ede_glue.c12
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S5
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c5
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx.S12
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx2.S7
-rw-r--r--arch/x86/crypto/glue_helper.c17
-rw-r--r--arch/x86/crypto/morus1280-avx2-asm.S5
-rw-r--r--arch/x86/crypto/morus1280-avx2-glue.c6
-rw-r--r--arch/x86/crypto/morus1280-sse2-asm.S5
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c6
-rw-r--r--arch/x86/crypto/morus1280_glue.c6
-rw-r--r--arch/x86/crypto/morus640-sse2-asm.S5
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c6
-rw-r--r--arch/x86/crypto/morus640_glue.c6
-rw-r--r--arch/x86/crypto/poly1305-avx2-x86_64.S6
-rw-r--r--arch/x86/crypto/poly1305-sse2-x86_64.S6
-rw-r--r--arch/x86/crypto/poly1305_glue.c6
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S17
-rw-r--r--arch/x86/crypto/serpent-avx2-asm_64.S7
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S17
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S17
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c7
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c17
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c17
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S6
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c7
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S17
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S15
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S17
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S15
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c17
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c17
-rw-r--r--arch/x86/entry/calling.h21
-rw-r--r--arch/x86/entry/common.c19
-rw-r--r--arch/x86/entry/entry_32.S228
-rw-r--r--arch/x86/entry/entry_64.S199
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/entry/thunk_32.S2
-rw-r--r--arch/x86/entry/thunk_64.S7
-rw-r--r--arch/x86/entry/vdso/Makefile20
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c247
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdso32-setup.c7
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso32.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdsox32.lds.S1
-rw-r--r--arch/x86/entry/vdso/vgetcpu.c2
-rw-r--r--arch/x86/entry/vdso/vma.c4
-rw-r--r--arch/x86/entry/vsyscall/Makefile3
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c41
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_emu_64.S3
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c83
-rw-r--r--arch/x86/events/Makefile3
-rw-r--r--arch/x86/events/amd/core.c1
-rw-r--r--arch/x86/events/amd/iommu.c5
-rw-r--r--arch/x86/events/amd/iommu.h5
-rw-r--r--arch/x86/events/amd/power.c5
-rw-r--r--arch/x86/events/amd/uncore.c20
-rw-r--r--arch/x86/events/core.c124
-rw-r--r--arch/x86/events/intel/bts.c10
-rw-r--r--arch/x86/events/intel/core.c194
-rw-r--r--arch/x86/events/intel/cstate.c167
-rw-r--r--arch/x86/events/intel/ds.c45
-rw-r--r--arch/x86/events/intel/pt.c10
-rw-r--r--arch/x86/events/intel/pt.h10
-rw-r--r--arch/x86/events/intel/rapl.c400
-rw-r--r--arch/x86/events/intel/uncore.c192
-rw-r--r--arch/x86/events/intel/uncore.h45
-rw-r--r--arch/x86/events/intel/uncore_snb.c101
-rw-r--r--arch/x86/events/intel/uncore_snbep.c605
-rw-r--r--arch/x86/events/msr.c110
-rw-r--r--arch/x86/events/perf_event.h28
-rw-r--r--arch/x86/events/probe.c45
-rw-r--r--arch/x86/events/probe.h29
-rw-r--r--arch/x86/hyperv/Makefile1
-rw-r--r--arch/x86/hyperv/hv_init.c116
-rw-r--r--arch/x86/ia32/Makefile1
-rw-r--r--arch/x86/ia32/ia32_aout.c1
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/ia32/sys_ia32.c16
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/acenv.h5
-rw-r--r--arch/x86/include/asm/acpi.h19
-rw-r--r--arch/x86/include/asm/acrn.h11
-rw-r--r--arch/x86/include/asm/apb_timer.h6
-rw-r--r--arch/x86/include/asm/apic.h9
-rw-r--r--arch/x86/include/asm/archrandom.h15
-rw-r--r--arch/x86/include/asm/atomic.h8
-rw-r--r--arch/x86/include/asm/atomic64_32.h66
-rw-r--r--arch/x86/include/asm/atomic64_64.h46
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/bitops.h189
-rw-r--r--arch/x86/include/asm/bootparam_utils.h2
-rw-r--r--arch/x86/include/asm/calgary.h15
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/cpufeatures.h21
-rw-r--r--arch/x86/include/asm/fpu/internal.h6
-rw-r--r--arch/x86/include/asm/fpu/xstate.h1
-rw-r--r--arch/x86/include/asm/frame.h49
-rw-r--r--arch/x86/include/asm/geode.h5
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hpet.h7
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h6
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/imr.h6
-rw-r--r--arch/x86/include/asm/inat.h16
-rw-r--r--arch/x86/include/asm/inat_types.h16
-rw-r--r--arch/x86/include/asm/insn.h15
-rw-r--r--arch/x86/include/asm/intel-family.h5
-rw-r--r--arch/x86/include/asm/intel-mid.h6
-rw-r--r--arch/x86/include/asm/intel_telemetry.h11
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/iomap.h15
-rw-r--r--arch/x86/include/asm/ipi.h3
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/irq_remapping.h14
-rw-r--r--arch/x86/include/asm/ist.h11
-rw-r--r--arch/x86/include/asm/jump_label.h2
-rw-r--r--arch/x86/include/asm/kexec.h17
-rw-r--r--arch/x86/include/asm/kprobes.h15
-rw-r--r--arch/x86/include/asm/kvm_host.h50
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/livepatch.h14
-rw-r--r--arch/x86/include/asm/mem_encrypt.h5
-rw-r--r--arch/x86/include/asm/mmu.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h225
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/mwait.h4
-rw-r--r--arch/x86/include/asm/olpc.h31
-rw-r--r--arch/x86/include/asm/orc_lookup.h14
-rw-r--r--arch/x86/include/asm/orc_types.h14
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/paravirt.h23
-rw-r--r--arch/x86/include/asm/paravirt_types.h23
-rw-r--r--arch/x86/include/asm/percpu.h236
-rw-r--r--arch/x86/include/asm/pgalloc.h19
-rw-r--r--arch/x86/include/asm/pgtable-3level.h47
-rw-r--r--arch/x86/include/asm/pgtable.h4
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/platform_sst_audio.h6
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/include/asm/prom.h6
-rw-r--r--arch/x86/include/asm/ptrace.h50
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/sections.h2
-rw-r--r--arch/x86/include/asm/smap.h6
-rw-r--r--arch/x86/include/asm/smp.h4
-rw-r--r--arch/x86/include/asm/special_insns.h14
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/syscall.h5
-rw-r--r--arch/x86/include/asm/syscalls.h4
-rw-r--r--arch/x86/include/asm/sysfb.h6
-rw-r--r--arch/x86/include/asm/tce.h15
-rw-r--r--arch/x86/include/asm/text-patching.h17
-rw-r--r--arch/x86/include/asm/time.h1
-rw-r--r--arch/x86/include/asm/topology.h17
-rw-r--r--arch/x86/include/asm/traps.h6
-rw-r--r--arch/x86/include/asm/uaccess.h4
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/uprobes.h15
-rw-r--r--arch/x86/include/asm/uv/bios.h15
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h261
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h44
-rw-r--r--arch/x86/include/asm/vgtod.h75
-rw-r--r--arch/x86/include/asm/virtext.h4
-rw-r--r--arch/x86/include/asm/vmx.h15
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/include/asm/vvar.h9
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h6
-rw-r--r--arch/x86/include/asm/xor.h10
-rw-r--r--arch/x86/include/asm/xor_32.h10
-rw-r--r--arch/x86/include/asm/xor_avx.h6
-rw-r--r--arch/x86/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h55
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h3
-rw-r--r--arch/x86/include/uapi/asm/perf_regs.h3
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/apei.c11
-rw-r--r--arch/x86/kernel/acpi/boot.c19
-rw-r--r--arch/x86/kernel/acpi/cppc_msr.c11
-rw-r--r--arch/x86/kernel/acpi/cstate.c16
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S3
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S3
-rw-r--r--arch/x86/kernel/alternative.c304
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/amd_nb.c5
-rw-r--r--arch/x86/kernel/apb_timer.c6
-rw-r--r--arch/x86/kernel/apic/apic.c93
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c6
-rw-r--r--arch/x86/kernel/apic/io_apic.c50
-rw-r--r--arch/x86/kernel/apic/msi.c9
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/probe_64.c2
-rw-r--r--arch/x86/kernel/apic/vector.c9
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apm_32.c11
-rw-r--r--arch/x86/kernel/asm-offsets.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile11
-rw-r--r--arch/x86/kernel/cpu/acrn.c69
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c15
-rw-r--r--arch/x86/kernel/cpu/bugs.c11
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c3
-rw-r--r--arch/x86/kernel/cpu/common.c144
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c9
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c21
-rw-r--r--arch/x86/kernel/cpu/intel.c27
-rw-r--r--arch/x86/kernel/cpu/intel_epb.c22
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c96
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c14
-rw-r--r--arch/x86/kernel/cpu/mce/core.c180
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c1
-rw-r--r--arch/x86/kernel/cpu/mce/genpool.c3
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c46
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h12
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c20
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/Makefile1
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c6
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c19
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c6
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c14
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c16
-rw-r--r--arch/x86/kernel/cpu/rdrand.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c10
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c10
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c13
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c61
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
-rw-r--r--arch/x86/kernel/cpu/topology.c88
-rw-r--r--arch/x86/kernel/cpu/umwait.c200
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/cpu/zhaoxin.c167
-rw-r--r--arch/x86/kernel/cpuid.c7
-rw-r--r--arch/x86/kernel/crash.c19
-rw-r--r--arch/x86/kernel/e820.c7
-rw-r--r--arch/x86/kernel/eisa.c3
-rw-r--r--arch/x86/kernel/espfix_64.c10
-rw-r--r--arch/x86/kernel/fpu/Makefile1
-rw-r--r--arch/x86/kernel/fpu/core.c53
-rw-r--r--arch/x86/kernel/fpu/init.c20
-rw-r--r--arch/x86/kernel/fpu/signal.c16
-rw-r--r--arch/x86/kernel/fpu/xstate.c59
-rw-r--r--arch/x86/kernel/ftrace.c23
-rw-r--r--arch/x86/kernel/ftrace_32.S78
-rw-r--r--arch/x86/kernel/ftrace_64.S3
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/head_64.S8
-rw-r--r--arch/x86/kernel/hpet.c936
-rw-r--r--arch/x86/kernel/hw_breakpoint.c14
-rw-r--r--arch/x86/kernel/i8237.c6
-rw-r--r--arch/x86/kernel/i8253.c25
-rw-r--r--arch/x86/kernel/idt.c6
-rw-r--r--arch/x86/kernel/ima_arch.c17
-rw-r--r--arch/x86/kernel/io_delay.c38
-rw-r--r--arch/x86/kernel/irq.c5
-rw-r--r--arch/x86/kernel/itmt.c12
-rw-r--r--arch/x86/kernel/jailhouse.c5
-rw-r--r--arch/x86/kernel/jump_label.c121
-rw-r--r--arch/x86/kernel/kdebugfs.c63
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c11
-rw-r--r--arch/x86/kernel/kgdb.c21
-rw-r--r--arch/x86/kernel/kprobes/Makefile1
-rw-r--r--arch/x86/kernel/kprobes/common.h28
-rw-r--r--arch/x86/kernel/kprobes/core.c46
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c15
-rw-r--r--arch/x86/kernel/kprobes/opt.c51
-rw-r--r--arch/x86/kernel/ksysfs.c3
-rw-r--r--arch/x86/kernel/kvm.c45
-rw-r--r--arch/x86/kernel/livepatch.c14
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c122
-rw-r--r--arch/x86/kernel/module.c14
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/msr.c7
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kernel/paravirt.c62
-rw-r--r--arch/x86/kernel/paravirt_patch.c126
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c67
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c75
-rw-r--r--arch/x86/kernel/pci-calgary_64.c14
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/perf_regs.c7
-rw-r--r--arch/x86/kernel/process_32.c16
-rw-r--r--arch/x86/kernel/process_64.c13
-rw-r--r--arch/x86/kernel/ptrace.c46
-rw-r--r--arch/x86/kernel/pvclock.c15
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S4
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S4
-rw-r--r--arch/x86/kernel/setup.c24
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/kernel/smp.c6
-rw-r--r--arch/x86/kernel/smpboot.c84
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/sysfb.c6
-rw-r--r--arch/x86/kernel/sysfb_efi.c6
-rw-r--r--arch/x86/kernel/sysfb_simplefb.c6
-rw-r--r--arch/x86/kernel/tboot.c15
-rw-r--r--arch/x86/kernel/tce_64.c15
-rw-r--r--arch/x86/kernel/time.c10
-rw-r--r--arch/x86/kernel/tls.c9
-rw-r--r--arch/x86/kernel/tls.h5
-rw-r--r--arch/x86/kernel/traps.c16
-rw-r--r--arch/x86/kernel/tsc.c62
-rw-r--r--arch/x86/kernel/tsc_msr.c4
-rw-r--r--arch/x86/kernel/umip.c2
-rw-r--r--arch/x86/kernel/unwind_frame.c33
-rw-r--r--arch/x86/kernel/unwind_guess.c1
-rw-r--r--arch/x86/kernel/unwind_orc.c29
-rw-r--r--arch/x86/kernel/uprobes.c17
-rw-r--r--arch/x86/kernel/verify_cpu.S4
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S40
-rw-r--r--arch/x86/kernel/vsmp_64.c5
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c270
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/debugfs.c23
-rw-r--r--arch/x86/kvm/emulate.c50
-rw-r--r--arch/x86/kvm/hyperv.c25
-rw-r--r--arch/x86/kvm/hyperv.h5
-rw-r--r--arch/x86/kvm/ioapic.c15
-rw-r--r--arch/x86/kvm/irq.c21
-rw-r--r--arch/x86/kvm/irq.h14
-rw-r--r--arch/x86/kvm/irq_comm.c15
-rw-r--r--arch/x86/kvm/lapic.c321
-rw-r--r--arch/x86/kvm/lapic.h9
-rw-r--r--arch/x86/kvm/mmu.c211
-rw-r--r--arch/x86/kvm/mmu_audit.c5
-rw-r--r--arch/x86/kvm/mmutrace.h59
-rw-r--r--arch/x86/kvm/mtrr.c4
-rw-r--r--arch/x86/kvm/page_track.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h47
-rw-r--r--arch/x86/kvm/pmu.c101
-rw-r--r--arch/x86/kvm/pmu.h4
-rw-r--r--arch/x86/kvm/pmu_amd.c8
-rw-r--r--arch/x86/kvm/svm.c113
-rw-r--r--arch/x86/kvm/trace.h2
-rw-r--r--arch/x86/kvm/vmx/evmcs.c18
-rw-r--r--arch/x86/kvm/vmx/evmcs.h1
-rw-r--r--arch/x86/kvm/vmx/nested.c924
-rw-r--r--arch/x86/kvm/vmx/nested.h4
-rw-r--r--arch/x86/kvm/vmx/ops.h1
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c42
-rw-r--r--arch/x86/kvm/vmx/vmcs.h17
-rw-r--r--arch/x86/kvm/vmx/vmcs12.h62
-rw-r--r--arch/x86/kvm/vmx/vmcs_shadow_fields.h79
-rw-r--r--arch/x86/kvm/vmx/vmenter.S6
-rw-r--r--arch/x86/kvm/vmx/vmx.c486
-rw-r--r--arch/x86/kvm/vmx/vmx.h125
-rw-r--r--arch/x86/kvm/x86.c280
-rw-r--r--arch/x86/kvm/x86.h12
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S6
-rw-r--r--arch/x86/lib/cache-smp.c3
-rw-r--r--arch/x86/lib/checksum_32.S6
-rw-r--r--arch/x86/lib/clear_page_64.S1
-rw-r--r--arch/x86/lib/cmdline.c3
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S8
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S8
-rw-r--r--arch/x86/lib/copy_user_64.S4
-rw-r--r--arch/x86/lib/cpu.c1
-rw-r--r--arch/x86/lib/csum-wrappers_64.c2
-rw-r--r--arch/x86/lib/getuser.S20
-rw-r--r--arch/x86/lib/inat.c16
-rw-r--r--arch/x86/lib/insn-eval.c47
-rw-r--r--arch/x86/lib/insn.c15
-rw-r--r--arch/x86/lib/iomap_copy_64.S14
-rw-r--r--arch/x86/lib/memcpy_64.S1
-rw-r--r--arch/x86/lib/putuser.S29
-rw-r--r--arch/x86/lib/usercopy_64.c3
-rw-r--r--arch/x86/math-emu/fpu_emu.h2
-rw-r--r--arch/x86/math-emu/reg_constant.c2
-rw-r--r--arch/x86/mm/debug_pagetables.c36
-rw-r--r--arch/x86/mm/dump_pagetables.c6
-rw-r--r--arch/x86/mm/extable.c1
-rw-r--r--arch/x86/mm/fault.c81
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c39
-rw-r--r--arch/x86/mm/iomap_32.c15
-rw-r--r--arch/x86/mm/ioremap.c77
-rw-r--r--arch/x86/mm/kasan_init_64.c2
-rw-r--r--arch/x86/mm/kaslr.c11
-rw-r--r--arch/x86/mm/mem_encrypt.c37
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S5
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c27
-rw-r--r--arch/x86/mm/mmap.c15
-rw-r--r--arch/x86/mm/mmio-mod.c14
-rw-r--r--arch/x86/mm/mpx.c2
-rw-r--r--arch/x86/mm/numa.c1
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/pat.c1
-rw-r--r--arch/x86/mm/pf_in.c17
-rw-r--r--arch/x86/mm/pf_in.h17
-rw-r--r--arch/x86/mm/pgtable.c33
-rw-r--r--arch/x86/mm/pkeys.c10
-rw-r--r--arch/x86/mm/pti.c10
-rw-r--r--arch/x86/mm/testmmiotrace.c1
-rw-r--r--arch/x86/mm/tlb.c3
-rw-r--r--arch/x86/net/Makefile1
-rw-r--r--arch/x86/net/bpf_jit_comp.c80
-rw-r--r--arch/x86/net/bpf_jit_comp32.c367
-rw-r--r--arch/x86/pci/broadcom_bus.c6
-rw-r--r--arch/x86/pci/ce4100.c18
-rw-r--r--arch/x86/pci/common.c1
-rw-r--r--arch/x86/pci/legacy.c1
-rw-r--r--arch/x86/pci/mmconfig_32.c3
-rw-r--r--arch/x86/pci/olpc.c6
-rw-r--r--arch/x86/pci/sta2x11-fixup.c15
-rw-r--r--arch/x86/pci/xen.c1
-rw-r--r--arch/x86/platform/atom/Makefile1
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c34
-rw-r--r--arch/x86/platform/ce4100/Makefile1
-rw-r--r--arch/x86/platform/ce4100/ce4100.c6
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts5
-rw-r--r--arch/x86/platform/efi/efi.c2
-rw-r--r--arch/x86/platform/efi/efi_64.c9
-rw-r--r--arch/x86/platform/efi/quirks.c6
-rw-r--r--arch/x86/platform/geode/Makefile1
-rw-r--r--arch/x86/platform/geode/alix.c7
-rw-r--r--arch/x86/platform/geode/geos.c6
-rw-r--r--arch/x86/platform/geode/net5501.c7
-rw-r--r--arch/x86/platform/goldfish/Makefile1
-rw-r--r--arch/x86/platform/goldfish/goldfish.c11
-rw-r--r--arch/x86/platform/intel-mid/Makefile1
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bma023.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_emc1403.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_lis331.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_max7315.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic.h6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_tca6416.c6
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c6
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_vrtc.c6
-rw-r--r--arch/x86/platform/intel-mid/pwr.c5
-rw-r--r--arch/x86/platform/intel-mid/sfi.c6
-rw-r--r--arch/x86/platform/intel-quark/Makefile1
-rw-r--r--arch/x86/platform/intel-quark/imr.c15
-rw-r--r--arch/x86/platform/intel/Makefile1
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c32
-rw-r--r--arch/x86/platform/iris/Makefile1
-rw-r--r--arch/x86/platform/iris/iris.c15
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c6
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-rtc.c6
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c6
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c6
-rw-r--r--arch/x86/platform/olpc/olpc.c125
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c8
-rw-r--r--arch/x86/platform/olpc/olpc_ofw.c1
-rw-r--r--arch/x86/platform/pvh/enlighten.c2
-rw-r--r--arch/x86/platform/scx200/Makefile1
-rw-r--r--arch/x86/platform/scx200/scx200_32.c1
-rw-r--r--arch/x86/platform/sfi/Makefile1
-rw-r--r--arch/x86/platform/sfi/sfi.c15
-rw-r--r--arch/x86/platform/ts5500/Makefile1
-rw-r--r--arch/x86/platform/ts5500/ts5500.c7
-rw-r--r--arch/x86/platform/uv/Makefile1
-rw-r--r--arch/x86/platform/uv/bios_uv.c15
-rw-r--r--arch/x86/platform/uv/tlb_uv.c19
-rw-r--r--arch/x86/platform/uv/uv_nmi.c15
-rw-r--r--arch/x86/platform/uv/uv_sysfs.c15
-rw-r--r--arch/x86/platform/uv/uv_time.c15
-rw-r--r--arch/x86/power/cpu.c13
-rw-r--r--arch/x86/power/hibernate.c33
-rw-r--r--arch/x86/power/hibernate_32.c3
-rw-r--r--arch/x86/power/hibernate_64.c3
-rw-r--r--arch/x86/power/hibernate_asm_64.S3
-rw-r--r--arch/x86/purgatory/entry64.S4
-rw-r--r--arch/x86/purgatory/purgatory.c4
-rw-r--r--arch/x86/purgatory/setup-x86_64.S4
-rw-r--r--arch/x86/purgatory/stack.S4
-rw-r--r--arch/x86/purgatory/string.c4
-rw-r--r--arch/x86/ras/Kconfig10
-rw-r--r--arch/x86/tools/insn_decoder_test.c18
-rw-r--r--arch/x86/tools/insn_sanity.c43
-rw-r--r--arch/x86/um/checksum_32.S6
-rw-r--r--arch/x86/um/delay.c5
-rw-r--r--arch/x86/um/mem_32.c5
-rw-r--r--arch/x86/um/signal.c4
-rw-r--r--arch/x86/um/vdso/um_vdso.c5
-rw-r--r--arch/x86/um/vdso/vma.c5
-rw-r--r--arch/x86/video/Makefile1
-rw-r--r--arch/x86/xen/Kconfig1
-rw-r--r--arch/x86/xen/debugfs.c7
-rw-r--r--arch/x86/xen/enlighten_hvm.c58
-rw-r--r--arch/x86/xen/enlighten_pv.c6
-rw-r--r--arch/x86/xen/mmu_pv.c15
-rw-r--r--arch/x86/xen/p2m.c3
-rw-r--r--arch/x86/xen/smp_pv.c2
-rw-r--r--arch/x86/xen/spinlock.c6
-rw-r--r--arch/x86/xen/xen-asm.S16
-rw-r--r--arch/x86/xen/xen-asm_64.S1
-rw-r--r--arch/x86/xen/xen-ops.h3
591 files changed, 8577 insertions, 8057 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index c625f57472f7..30dec019756b 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-y += entry/
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2bbbd4d1ba31..222855cc0158 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,6 +17,7 @@ config X86_32
select HAVE_DEBUG_STACKOVERFLOW
select MODULES_USE_ELF_REL
select OLD_SIGACTION
+ select GENERIC_VDSO_32
config X86_64
def_bool y
@@ -69,6 +70,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PMEM_API if X86_64
+ select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
@@ -79,7 +81,6 @@ config X86
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_UBSAN_SANITIZE_ALL
- select ARCH_HAS_ZONE_DEVICE if X86_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
@@ -93,6 +94,7 @@ config X86
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANTS_THP_SWAP if X86_64
select BUILDTIME_EXTABLE_SORT
select CLKEVT_I8253
@@ -121,6 +123,8 @@ config X86
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
+ select GUP_GET_PTE_LOW_HIGH if X86_PAE
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
@@ -156,6 +160,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA
select HAVE_EXIT_THREAD
+ select HAVE_FAST_GUP
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
@@ -202,6 +207,7 @@ config X86
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
+ select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
select NEED_SG_DMA_LENGTH
@@ -217,6 +223,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMES if PROC_FS
+ select PROC_PID_ARCH_STATUS if PROC_FS
config INSTRUCTION_DECODER
def_bool y
@@ -301,9 +308,6 @@ config ARCH_HIBERNATION_POSSIBLE
config ARCH_SUSPEND_POSSIBLE
def_bool y
-config ARCH_WANT_HUGE_PMD_SHARE
- def_bool y
-
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
@@ -395,8 +399,8 @@ config SMP
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
- See also <file:Documentation/x86/i386/IO-APIC.txt>,
- <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
+ See also <file:Documentation/x86/i386/IO-APIC.rst>,
+ <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
If you don't know what to do here, say N.
@@ -781,6 +785,9 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
+config X86_HV_CALLBACK_VECTOR
+ def_bool n
+
source "arch/x86/xen/Kconfig"
config KVM_GUEST
@@ -832,6 +839,17 @@ config JAILHOUSE_GUEST
cell. You can leave this option disabled if you only want to start
Jailhouse and run Linux afterwards in the root cell.
+config ACRN_GUEST
+ bool "ACRN Guest support"
+ depends on X86_64
+ select X86_HV_CALLBACK_VECTOR
+ help
+ This option allows to run Linux as guest in the ACRN hypervisor. ACRN is
+ a flexible, lightweight reference open-source hypervisor, built with
+ real-time and safety-criticality in mind. It is built for embedded
+ IOT with small footprint and real-time features. More details can be
+ found in https://projectacrn.org/.
+
endif #HYPERVISOR_GUEST
source "arch/x86/Kconfig.cpu"
@@ -1290,7 +1308,7 @@ config MICROCODE
the Linux kernel.
The preferred method to load microcode from a detached initrd is described
- in Documentation/x86/microcode.txt. For that you need to enable
+ in Documentation/x86/microcode.rst. For that you need to enable
CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
initrd for microcode blobs.
@@ -1329,7 +1347,7 @@ config MICROCODE_OLD_INTERFACE
It is inadequate because it runs too late to be able to properly
load microcode on a machine and it needs special tools. Instead, you
should've switched to the early loading method with the initrd or
- builtin microcode by now: Documentation/x86/microcode.txt
+ builtin microcode by now: Documentation/x86/microcode.rst
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -1478,7 +1496,7 @@ config X86_5LEVEL
A kernel with the option enabled can be booted on machines that
support 4- or 5-level paging.
- See Documentation/x86/x86_64/5level-paging.txt for more
+ See Documentation/x86/x86_64/5level-paging.rst for more
information.
Say N if unsure.
@@ -1508,6 +1526,7 @@ config AMD_MEM_ENCRYPT
depends on X86_64 && CPU_SUP_AMD
select DYNAMIC_PHYSICAL_MASK
select ARCH_USE_MEMREMAP_PROT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
---help---
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
@@ -1626,7 +1645,7 @@ config ARCH_MEMORY_PROBE
depends on X86_64 && MEMORY_HOTPLUG
help
This option enables a sysfs memory/probe interface for testing.
- See Documentation/memory-hotplug.txt for more information.
+ See Documentation/admin-guide/mm/memory-hotplug.rst for more information.
If you are unsure how to answer this question, answer N.
config ARCH_PROC_KCORE_TEXT
@@ -1783,7 +1802,7 @@ config MTRR
You can safely say Y even if your machine doesn't have MTRRs, you'll
just add about 9 KB to your kernel.
- See <file:Documentation/x86/mtrr.txt> for more information.
+ See <file:Documentation/x86/mtrr.rst> for more information.
config MTRR_SANITIZER
def_bool y
@@ -1895,7 +1914,7 @@ config X86_INTEL_MPX
process and adds some branches to paths used during
exec() and munmap().
- For details, see Documentation/x86/intel_mpx.txt
+ For details, see Documentation/x86/intel_mpx.rst
If unsure, say N.
@@ -1911,7 +1930,7 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
page-based protections, but without requiring modification of the
page tables when an application changes protection domains.
- For details, see Documentation/x86/protection-keys.txt
+ For details, see Documentation/core-api/protection-keys.rst
If unsure, say y.
@@ -1939,7 +1958,7 @@ config EFI_STUB
This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
- See Documentation/efi-stub.txt for more information.
+ See Documentation/admin-guide/efi-stub.rst for more information.
config EFI_MIXED
bool "EFI mixed-mode support"
@@ -2037,7 +2056,7 @@ config CRASH_DUMP
to a memory address not used by the main kernel or BIOS using
PHYSICAL_START, or it must be built as a relocatable image
(CONFIG_RELOCATABLE=y).
- For more details see Documentation/kdump/kdump.txt
+ For more details see Documentation/admin-guide/kdump/kdump.rst
config KEXEC_JUMP
bool "kexec jump"
@@ -2074,7 +2093,7 @@ config PHYSICAL_START
the reserved region. In other words, it can be set based on
the "X" value as specified in the "crashkernel=YM@XM"
command line boot parameter passed to the panic-ed
- kernel. Please take a look at Documentation/kdump/kdump.txt
+ kernel. Please take a look at Documentation/admin-guide/kdump/kdump.rst
for more details about crash dumps.
Usage of bzImage for capturing the crash dump is recommended as
@@ -2285,7 +2304,7 @@ config COMPAT_VDSO
choice
prompt "vsyscall table for legacy applications"
depends on X86_64
- default LEGACY_VSYSCALL_EMULATE
+ default LEGACY_VSYSCALL_XONLY
help
Legacy user code that does not know how to find the vDSO expects
to be able to issue three syscalls by calling fixed addresses in
@@ -2293,23 +2312,38 @@ choice
it can be used to assist security vulnerability exploitation.
This setting can be changed at boot time via the kernel command
- line parameter vsyscall=[emulate|none].
+ line parameter vsyscall=[emulate|xonly|none].
On a system with recent enough glibc (2.14 or newer) and no
static binaries, you can say None without a performance penalty
to improve security.
- If unsure, select "Emulate".
+ If unsure, select "Emulate execution only".
config LEGACY_VSYSCALL_EMULATE
- bool "Emulate"
+ bool "Full emulation"
+ help
+ The kernel traps and emulates calls into the fixed vsyscall
+ address mapping. This makes the mapping non-executable, but
+ it still contains readable known contents, which could be
+ used in certain rare security vulnerability exploits. This
+ configuration is recommended when using legacy userspace
+ that still uses vsyscalls along with legacy binary
+ instrumentation tools that require code to be readable.
+
+ An example of this type of legacy userspace is running
+ Pin on an old binary that still uses vsyscalls.
+
+ config LEGACY_VSYSCALL_XONLY
+ bool "Emulate execution only"
help
- The kernel traps and emulates calls into the fixed
- vsyscall address mapping. This makes the mapping
- non-executable, but it still contains known contents,
- which could be used in certain rare security vulnerability
- exploits. This configuration is recommended when userspace
- still uses the vsyscall area.
+ The kernel traps and emulates calls into the fixed vsyscall
+ address mapping and does not allow reads. This
+ configuration is recommended when userspace might use the
+ legacy vsyscall area but support for legacy binary
+ instrumentation of legacy code is not needed. It mitigates
+ certain uses of the vsyscall area as an ASLR-bypassing
+ buffer.
config LEGACY_VSYSCALL_NONE
bool "None"
@@ -2447,7 +2481,7 @@ menuconfig APM
machines with more than one CPU.
In order to use APM, you will need supporting software. For location
- and more information, read <file:Documentation/power/apm-acpi.txt>
+ and more information, read <file:Documentation/power/apm-acpi.rst>
and the Battery Powered Linux mini-HOWTO, available from
<http://www.tldp.org/docs.html#howto>.
@@ -2698,6 +2732,7 @@ config OLPC
select OF
select OF_PROMTREE
select IRQ_DOMAIN
+ select OLPC_EC
---help---
Add support for detecting the unique features of the OLPC
XO hardware.
@@ -2873,9 +2908,6 @@ config HAVE_ATOMIC_IOMAP
config X86_DEV_DMA_OPS
bool
-config HAVE_GENERIC_GUP
- def_bool y
-
source "drivers/firmware/Kconfig"
source "arch/x86/kvm/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6adce15268bd..8e29c991ba3e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -480,3 +480,16 @@ config CPU_SUP_UMC_32
CPU might render the kernel unbootable.
If unsure, say N.
+
+config CPU_SUP_ZHAOXIN
+ default y
+ bool "Support Zhaoxin processors" if PROCESSOR_SELECT
+ help
+ This enables detection, tunings and quirks for Zhaoxin processors
+
+ You need this enabled if you want your kernel to run on a
+ Zhaoxin CPU. Disabling this option on other types of CPUs
+ makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin
+ CPU might render the kernel unbootable.
+
+ If unsure, say N.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index f730680dc818..71c92db47c41 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -156,7 +156,7 @@ config IOMMU_DEBUG
code. When you use it make sure you have a big enough
IOMMU/AGP aperture. Most of the options enabled by this can
be set more finegrained using the iommu= command line
- options. See Documentation/x86/x86_64/boot-options.txt for more
+ options. See Documentation/x86/x86_64/boot-options.rst for more
details.
config IOMMU_LEAK
@@ -179,26 +179,6 @@ config X86_DECODER_SELFTEST
decoder code.
If unsure, say "N".
-#
-# IO delay types:
-#
-
-config IO_DELAY_TYPE_0X80
- int
- default "0"
-
-config IO_DELAY_TYPE_0XED
- int
- default "1"
-
-config IO_DELAY_TYPE_UDELAY
- int
- default "2"
-
-config IO_DELAY_TYPE_NONE
- int
- default "3"
-
choice
prompt "IO delay type"
default IO_DELAY_0X80
@@ -229,30 +209,6 @@ config IO_DELAY_NONE
endchoice
-if IO_DELAY_0X80
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_0X80
-endif
-
-if IO_DELAY_0XED
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_0XED
-endif
-
-if IO_DELAY_UDELAY
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_UDELAY
-endif
-
-if IO_DELAY_NONE
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_NONE
-endif
-
config DEBUG_BOOT_PARAMS
bool "Debug boot parameters"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 64a31a6d751a..a2b6b428922a 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index ee274834ea8b..b72fc10fc1be 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
@@ -7,9 +8,6 @@
* Original APM BIOS checking by Stephen Rothwell, May 1994
* (sfr@canb.auug.org.au)
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index d401b4a262b0..5521ea12f44e 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* -----------------------------------------------------------------------
*
* Copyright 2009-2014 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2 or (at your
- * option) any later version; incorporated herein by reference.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 2e1382486e91..02e1dea11d94 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -1,11 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 32a09eb5c101..19eca14b49a0 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -1,12 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 625d21b0cd3f..4ff01176c1cc 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index ad84239e595e..15255f388a85 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
return addr;
}
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+/*
+ * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+ bool efi_64)
{
acpi_physical_address rsdp_addr = 0;
#ifdef CONFIG_EFI
- unsigned long systab, systab_tables, config_tables;
+ int i;
+
+ /* Get EFI tables from systab. */
+ for (i = 0; i < nr_tables; i++) {
+ acpi_physical_address table;
+ efi_guid_t guid;
+
+ if (efi_64) {
+ efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
+
+ guid = tbl->guid;
+ table = tbl->table;
+
+ if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+ debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+ return 0;
+ }
+ } else {
+ efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;
+
+ guid = tbl->guid;
+ table = tbl->table;
+ }
+
+ if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+ rsdp_addr = table;
+ else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+ return table;
+ }
+#endif
+ return rsdp_addr;
+}
+
+/* EFI/kexec support is 64-bit only. */
+#ifdef CONFIG_X86_64
+static struct efi_setup_data *get_kexec_setup_data_addr(void)
+{
+ struct setup_data *data;
+ u64 pa_data;
+
+ pa_data = boot_params->hdr.setup_data;
+ while (pa_data) {
+ data = (struct setup_data *)pa_data;
+ if (data->type == SETUP_EFI)
+ return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+ pa_data = data->next;
+ }
+ return NULL;
+}
+
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+ efi_system_table_64_t *systab;
+ struct efi_setup_data *esd;
+ struct efi_info *ei;
+ char *sig;
+
+ esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
+ if (!esd)
+ return 0;
+
+ if (!esd->tables) {
+ debug_putstr("Wrong kexec SETUP_EFI data.\n");
+ return 0;
+ }
+
+ ei = &boot_params->efi_info;
+ sig = (char *)&ei->efi_loader_signature;
+ if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+ debug_putstr("Wrong kexec EFI loader signature.\n");
+ return 0;
+ }
+
+ /* Get systab from boot params. */
+ systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+ if (!systab)
+ error("EFI system table not found in kexec boot_params.");
+
+ return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+}
+#else
+static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
+#endif /* CONFIG_X86_64 */
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+ unsigned long systab, config_tables;
unsigned int nr_tables;
struct efi_info *ei;
bool efi_64;
- int size, i;
char *sig;
ei = &boot_params->efi_info;
@@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
config_tables = stbl->tables;
nr_tables = stbl->nr_tables;
- size = sizeof(efi_config_table_64_t);
} else {
efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
config_tables = stbl->tables;
nr_tables = stbl->nr_tables;
- size = sizeof(efi_config_table_32_t);
}
if (!config_tables)
error("EFI config tables not found.");
- /* Get EFI tables from systab. */
- for (i = 0; i < nr_tables; i++) {
- acpi_physical_address table;
- efi_guid_t guid;
-
- config_tables += size;
-
- if (efi_64) {
- efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
- guid = tbl->guid;
- table = tbl->table;
-
- if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
- debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
- return 0;
- }
- } else {
- efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
- guid = tbl->guid;
- table = tbl->table;
- }
-
- if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
- rsdp_addr = table;
- else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
- return table;
- }
+ return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+ return 0;
#endif
- return rsdp_addr;
}
static u8 compute_checksum(u8 *buffer, u32 length)
@@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
if (!pa)
pa = boot_params->acpi_rsdp_addr;
+ /*
+ * Try to get EFI data from setup_data. This can happen when we're a
+ * kexec'ed kernel and kexec(1) has passed all the required EFI info to
+ * us.
+ */
+ if (!pa)
+ pa = kexec_get_rsdp_addr();
+
if (!pa)
pa = efi_get_rsdp_addr();
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 544ac4fafd11..d6662fdef300 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -----------------------------------------------------------------------
*
* Copyright 2011 Intel Corporation; author Matt Fleming
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
#include <linux/efi.h>
@@ -386,14 +384,11 @@ struct boot_params *make_boot_params(struct efi_config *c)
struct apm_bios_info *bi;
struct setup_header *hdr;
efi_loaded_image_t *image;
- void *options, *handle;
+ void *handle;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
char *cmdline_ptr;
- u16 *s2;
- u8 *s1;
- int i;
unsigned long ramdisk_addr;
unsigned long ramdisk_size;
@@ -496,8 +491,6 @@ static void add_e820ext(struct boot_params *params,
struct setup_data *e820ext, u32 nr_entries)
{
struct setup_data *data;
- efi_status_t status;
- unsigned long size;
e820ext->type = SETUP_E820_EXT;
e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
@@ -679,8 +672,6 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
void *priv)
{
const char *signature;
- __u32 nr_desc;
- efi_status_t status;
struct exit_boot_struct *p = priv;
signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
@@ -749,7 +740,6 @@ struct boot_params *
efi_main(struct efi_config *c, struct boot_params *boot_params)
{
struct desc_ptr *gdt = NULL;
- efi_loaded_image_t *image;
struct setup_header *hdr = &boot_params->hdr;
efi_status_t status;
struct desc_struct *desc;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fafb75c6c592..6233ae35d0d9 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -659,6 +659,7 @@ no_longmode:
gdt64:
.word gdt_end - gdt
.quad 0
+ .balign 8
gdt:
.word gdt_end - gdt
.long gdt
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index a480356e0ed8..6afb7130a387 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AMD Memory Encryption Support
*
* Copyright (C) 2017 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 5a237e8dbf8d..53ac0cb2396d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -17,6 +17,7 @@
#include "pgtable.h"
#include "../string.h"
#include "../voffset.h"
+#include <asm/bootparam_utils.h>
/*
* WARNING!!
@@ -351,9 +352,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
/* Clear flags intended for solely in-kernel use. */
boot_params->hdr.loadflags &= ~KASLR_FLAG;
- /* Save RSDP address for later use. */
- /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
-
sanitize_boot_params(boot_params);
if (boot_params->screen_info.orig_video_mode == 7) {
@@ -368,6 +366,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
cols = boot_params->screen_info.orig_video_cols;
console_init();
+
+ /*
+ * Save RSDP address for later use. Have this after console_init()
+ * so that early debugging output from the RSDP parsing code can be
+ * collected.
+ */
+ boot_params->acpi_rsdp_addr = get_rsdp_addr();
+
debug_putstr("early console in extract_kernel\n");
free_mem_ptr = heap; /* Heap */
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index d2f184165934..c8181392f70d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,6 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
-#include <asm/bootparam_utils.h>
#define BOOT_CTYPE_H
#include <linux/acpi.h>
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 72bad2c8debe..7e01248765b2 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -1,27 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 2009 Intel Corporation. All rights reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
* H. Peter Anvin <hpa@linux.intel.com>
*
* -----------------------------------------------------------------------
*
* Outputs a small assembly wrapper with the appropriate symbols defined.
- *
*/
#include <stdlib.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f8debf7aeb4c..5f2d03067ae5 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -40,7 +40,6 @@ int cmdline_find_option_bool(const char *option);
static unsigned long find_trampoline_placement(void)
{
unsigned long bios_start = 0, ebda_start = 0;
- unsigned long trampoline_start;
struct boot_e820_entry *entry;
char *signature;
int i;
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 19dbbcdd1a53..81fc1eaa3229 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -11,7 +11,7 @@
#include "../string.c"
#ifdef CONFIG_X86_32
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
asm volatile(
@@ -25,7 +25,7 @@ static void *__memcpy(void *dest, const void *src, size_t n)
return dest;
}
#else
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
{
long d0, d1, d2;
asm volatile(
@@ -56,7 +56,7 @@ void *memmove(void *dest, const void *src, size_t n)
const unsigned char *s = src;
if (d <= s || d - s >= n)
- return __memcpy(dest, src, n);
+ return ____memcpy(dest, src, n);
while (n-- > 0)
d[n] = s[n];
@@ -71,5 +71,11 @@ void *memcpy(void *dest, const void *src, size_t n)
warn("Avoiding potentially unsafe overlapping memcpy()!");
return memmove(dest, src, n);
}
- return __memcpy(dest, src, n);
+ return ____memcpy(dest, src, n);
}
+
+#ifdef CONFIG_KASAN
+extern void *__memset(void *s, int c, size_t n) __alias(memset);
+extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove);
+extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy);
+#endif
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index 15d9f74b0008..4c5f4f4ad035 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -1,11 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
#include <linux/linkage.h>
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 26240dde081e..0bbf4f3707d2 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 51079fc9298f..e1478d32de1a 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 6c176b6a42ad..1fb4bc70cee9 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 850b8762e889..2c11c0f45d49 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -313,7 +313,7 @@ start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# bootloaders know to change this.
- # See Documentation/x86/boot.txt for
+ # See Documentation/x86/boot.rst for
# assigned ids
# flags, unused bits must be zero (RFU) bit within loadflags
@@ -419,7 +419,17 @@ xloadflags:
# define XLF4 0
#endif
- .word XLF0 | XLF1 | XLF23 | XLF4
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_5LEVEL
+#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED)
+#else
+#define XLF56 XLF_5LEVEL
+#endif
+#else
+#define XLF56 0
+#endif
+
+ .word XLF0 | XLF1 | XLF23 | XLF4 | XLF56
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 73532543d689..996df3d586f0 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index f06c147b5140..b0422b79debc 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index f72498dc90d2..9caa10e82217 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* ----------------------------------------------------------------------- *
*
* Copyright 2008 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2 or (at your
- * option) any later version; incorporated herein by reference.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 8062f8915250..40031a614712 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index 3e0edc6d2a20..c22f9a7d1aeb 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -1,11 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index 565083c16e5c..1237beeb9540 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
index 2fe3616ba161..55de6b3092b8 100644
--- a/arch/x86/boot/regs.c
+++ b/arch/x86/boot/regs.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* -----------------------------------------------------------------------
*
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2 or (at your
- * option) any later version; incorporated herein by reference.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 90154df8f125..401e30ca0a75 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index def2451f46ae..1fedabdb95ad 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index 2b15aa488ffb..a1aaaf6c06a6 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h
index 468e444622c5..9e23fdffbb88 100644
--- a/arch/x86/boot/vesa.h
+++ b/arch/x86/boot/vesa.h
@@ -1,13 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* ----------------------------------------------------------------------- *
*
* Copyright 1999-2007 H. Peter Anvin - All Rights Reserved
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Boston MA 02111-1307, USA; either version 2 of the License, or
- * (at your option) any later version; incorporated herein by reference.
- *
* ----------------------------------------------------------------------- */
#ifndef BOOT_VESA_H
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index 49e0c18833e0..6eb8c06bc287 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
index 95c7a818c0ed..9ada55dc1ab7 100644
--- a/arch/x86/boot/video-mode.c
+++ b/arch/x86/boot/video-mode.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 3ecc11a9c440..7e185977a984 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index a14c5178d4ba..4816cb9cf996 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index ac89b6624a40..f2e96905b3fe 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index b54e0328c449..cbf7fed22441 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -1,11 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2b2481acc661..59ce9ed58430 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -130,7 +130,6 @@ CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index e8829abf063a..d0a5ffeae8df 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -129,7 +129,6 @@ CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 5f7e43d4f64a..4434607e366d 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AES-NI + SSE2 implementation of AEGIS-128
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index bdeee1b830be..46d227122643 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The AEGIS-128 Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
index 491dd61c845c..1461ef00c0e8 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AES-NI + SSE2 implementation of AEGIS-128L
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 80d917f7e467..19eb28b316f0 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The AEGIS-128L Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
index 8870c7c5d9a4..37d9b13dfd85 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AES-NI + SSE2 implementation of AEGIS-128L
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 716eecb66bd5..f84da27171d3 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The AEGIS-256 Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index e26984f7ab8d..9e9d819e8bc3 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Glue Code for the asm optimized version of the AES Cipher Algorithm
*
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index cb2deb61c5d9..e40bdf024ba7 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Implement AES algorithm in Intel AES-NI instructions.
*
@@ -22,11 +23,6 @@
*
* Ported x86_64 version to x86:
* Author: Mathias Krause <minipli@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 21c246799aa5..73c0ccb009a0 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for Intel AES-NI instructions. This file contains glue
* code, the real AES implementation is in intel-aes_asm.S.
@@ -12,11 +13,6 @@
* Tadeusz Struk (tadeusz.struk@intel.com)
* Aidan O'Mahony (aidan.o.mahony@intel.com)
* Copyright (c) 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/hardirq.h>
@@ -375,20 +371,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
}
}
-static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
- aesni_enc(ctx, dst, src);
-}
-
-static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
- aesni_dec(ctx, dst, src);
-}
-
static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int len)
{
@@ -924,7 +906,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
}
#endif
-static struct crypto_alg aesni_algs[] = { {
+static struct crypto_alg aesni_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-aesni",
.cra_priority = 300,
@@ -941,24 +923,7 @@ static struct crypto_alg aesni_algs[] = { {
.cia_decrypt = aes_decrypt
}
}
-}, {
- .cra_name = "__aes",
- .cra_driver_name = "__aes-aesni",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = __aes_encrypt,
- .cia_decrypt = __aes_decrypt
- }
- }
-} };
+};
static struct skcipher_alg aesni_skciphers[] = {
{
@@ -1154,7 +1119,7 @@ static int __init aesni_init(void)
#endif
#endif
- err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+ err = crypto_register_alg(&aesni_cipher_alg);
if (err)
return err;
@@ -1162,7 +1127,7 @@ static int __init aesni_init(void)
ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
if (err)
- goto unregister_algs;
+ goto unregister_cipher;
err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads),
aesni_simd_aeads);
@@ -1174,8 +1139,8 @@ static int __init aesni_init(void)
unregister_skciphers:
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
-unregister_algs:
- crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+unregister_cipher:
+ crypto_unregister_alg(&aesni_cipher_alg);
return err;
}
@@ -1185,7 +1150,7 @@ static void __exit aesni_exit(void)
aesni_simd_aeads);
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
- crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+ crypto_unregister_alg(&aesni_cipher_alg);
}
late_initcall(aesni_init);
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 8c1fcb6bad21..330db7a48af8 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -1,23 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Blowfish Cipher Algorithm (x86_64)
*
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 3e0c07cc9124..cedfdba69ce3 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for assembler optimized version of Blowfish
*
@@ -7,22 +8,6 @@
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <crypto/algapi.h>
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index b66bbfa62f50..4be4c7c3ba27 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -1,13 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* x86_64/AVX2/AES-NI assembler implementation of Camellia
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 95ba6956a7f6..23528bc18fc6 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -1,23 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Camellia Cipher Algorithm (x86_64)
*
* Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index d4992e458f92..abf298c272dc 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <asm/crypto/camellia.h>
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index d09f6521466a..0c22d84750a3 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <asm/crypto/camellia.h>
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index dcd5e0f71b00..7c62db56ffe1 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for assembler optimized version of Camellia
*
@@ -5,22 +6,6 @@
*
* Camellia parts based on code by:
* Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <asm/unaligned.h>
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 86107c961bb4..dc55c3332fcc 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Cast5 Cipher 16-way parallel algorithm (AVX/x86_64)
*
@@ -5,22 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index d1ce49119da8..384ccb00f9e1 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -1,24 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for the AVX assembler implementation of the Cast5 Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <asm/crypto/glue_helper.h>
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 7f30b6f0d72c..4f0a7cdb94d9 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
*
@@ -5,22 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 18965c39305e..645f8f16815c 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for the AVX assembler implementation of the Cast6 Cipher
*
@@ -5,22 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/module.h>
diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
index 32903fd450af..831e4434fc20 100644
--- a/arch/x86/crypto/chacha-avx2-x86_64.S
+++ b/arch/x86/crypto/chacha-avx2-x86_64.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* ChaCha 256-bit cipher algorithm, x64 AVX2 functions
*
* Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index c05a7a963dc3..2d86c7d6dc88 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* ChaCha 256-bit cipher algorithm, x64 SSSE3 functions
*
* Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 4967ad620775..388f95a4ec24 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
* including ChaCha20 (RFC7539)
*
* Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <crypto/algapi.h>
@@ -128,7 +124,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_simd_stream_xor(struct skcipher_walk *walk,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
int next_yield = 4096; /* bytes until next FPU yield */
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index a58fe217c856..eefa0862f309 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
* CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
@@ -9,20 +10,6 @@
* Copyright (C) 2008 Intel Corporation
* Authors: Austin Zhang <austin_zhang@linux.intel.com>
* Kent Liu <kent.liu@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
*/
#include <linux/init.h>
#include <linux/module.h>
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index 8e49ce117494..7fca43099a5f 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -1,17 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 5c610d4ef9fc..968386c21ef4 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for assembler optimized version of 3DES
*
@@ -7,17 +8,6 @@
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <crypto/algapi.h>
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index f94375a8dcd1..5d53effe8abe 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains accelerated part of ghash
@@ -10,10 +11,6 @@
* Vinodh Gopal
* Erdinc Ozturk
* Deniz Karakoyunlu
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index e3f3e6fd9d65..ac76fe88ac4f 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains glue code.
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/err.h>
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
index 02ee2308fb38..d08fc575ef7f 100644
--- a/arch/x86/crypto/glue_helper-asm-avx.S
+++ b/arch/x86/crypto/glue_helper-asm-avx.S
@@ -1,18 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Shared glue code for 128bit block ciphers, AVX assembler macros
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S
index a53ac11dd385..d84508c85c13 100644
--- a/arch/x86/crypto/glue_helper-asm-avx2.S
+++ b/arch/x86/crypto/glue_helper-asm-avx2.S
@@ -1,13 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Shared glue code for 128bit block ciphers, AVX2 assembler macros
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#define load_16way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index a78ef99a9981..901551445387 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Shared glue code for 128bit block ciphers
*
@@ -7,22 +8,6 @@
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/module.h>
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
index de182c460f82..5413fee33481 100644
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AVX2 implementation of MORUS-1280
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
index 679627a2a824..2d000d66ba4c 100644
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Glue for AVX2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
index da5d2905db60..0eece772866b 100644
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* SSE2 implementation of MORUS-1280
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index c35c0638d0bb..aada9d774293 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Glue for SSE2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
index 30fc1bd98ec3..ffbde8b22838 100644
--- a/arch/x86/crypto/morus1280_glue.c
+++ b/arch/x86/crypto/morus1280_glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Common x86 SIMD glue skeleton
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
index 414db480250e..a60891101bbd 100644
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* SSE2 implementation of MORUS-640
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 32da56b3bdad..8ef68134aef4 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The MORUS-640 Authenticated-Encryption Algorithm
* Glue for SSE2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
index 1dea33d84426..d8b5fd6cef29 100644
--- a/arch/x86/crypto/morus640_glue.c
+++ b/arch/x86/crypto/morus640_glue.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The MORUS-640 Authenticated-Encryption Algorithm
* Common x86 SIMD glue skeleton
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <crypto/internal/aead.h>
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
index 8457cdd47f75..8b341bc29d41 100644
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
*
* Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index 6f0be7a86964..5578f846e622 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
*
* Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 6eb65b237b3c..4a1c05dce950 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Poly1305 authenticator algorithm, RFC7539, SIMD glue code
*
* Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <crypto/algapi.h>
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 2925077f8c6a..ddc51dbba3af 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Serpent Cipher 8-way parallel algorithm (x86_64/AVX)
*
@@ -5,22 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
index d67888f2a52a..37bc1d48106c 100644
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* x86_64/AVX2 assembler optimized version of Serpent
*
@@ -6,12 +7,6 @@
* Based on AVX assembler implementation of Serpent by:
* Copyright © 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index d348f1553a79..e5c4a4690ca9 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Serpent Cipher 4-way parallel algorithm (i586/SSE2)
*
@@ -6,22 +7,6 @@
* Based on crypto/serpent.c by
* Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
* 2003 Herbert Valerio Riedel <hvr@gnu.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index acc066c7c6b2..5e0b3a3e97af 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Serpent Cipher 8-way parallel algorithm (x86_64/SSE2)
*
@@ -6,22 +7,6 @@
* Based on crypto/serpent.c by
* Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
* 2003 Herbert Valerio Riedel <hvr@gnu.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 03347b16ac9d..b871728e0b2f 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for x86_64/AVX2 assembler optimized version of Serpent
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <linux/module.h>
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 458567ecf76c..4a9a9f2ee1d8 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for AVX assembler versions of Serpent Cipher
*
@@ -5,22 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/module.h>
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 3dafe137596a..5fdf1931d069 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for SSE2 assembler versions of Serpent Cipher
*
@@ -11,22 +12,6 @@
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/module.h>
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 613d0bfc3d84..99c5b8c4dc38 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
* SSE3 instruction set extensions introduced in Intel Core Microarchitecture
@@ -21,11 +22,6 @@
*
* Converted to AT&T syntax and adapted for inclusion in the Linux kernel:
* Author: Mathias Krause <minipli@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 42f177afc33a..639d4c2fd6a8 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Cryptographic API.
*
@@ -11,12 +12,6 @@
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 73b471da3622..698b8f2a56e2 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
*
@@ -5,22 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 694ea4587ba7..290cc4e9a6fe 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/***************************************************************************
* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
* *
-* This program is free software; you can redistribute it and/or modify *
-* it under the terms of the GNU General Public License as published by *
-* the Free Software Foundation; either version 2 of the License, or *
-* (at your option) any later version. *
-* *
-* This program is distributed in the hope that it will be useful, *
-* but WITHOUT ANY WARRANTY; without even the implied warranty of *
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
-* GNU General Public License for more details. *
-* *
-* You should have received a copy of the GNU General Public License *
-* along with this program; if not, write to the *
-* Free Software Foundation, Inc., *
-* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
.file "twofish-i586-asm.S"
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index e7273a606a07..e495e07c7f1b 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -1,23 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Twofish Cipher 3-way parallel algorithm (x86_64)
*
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index a350c990dc86..ecef2cb9f43f 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/***************************************************************************
* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
* *
-* This program is free software; you can redistribute it and/or modify *
-* it under the terms of the GNU General Public License as published by *
-* the Free Software Foundation; either version 2 of the License, or *
-* (at your option) any later version. *
-* *
-* This program is distributed in the hope that it will be useful, *
-* but WITHOUT ANY WARRANTY; without even the implied warranty of *
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
-* GNU General Public License for more details. *
-* *
-* You should have received a copy of the GNU General Public License *
-* along with this program; if not, write to the *
-* Free Software Foundation, Inc., *
-* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
.file "twofish-x86_64-asm.S"
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 66d989230d10..0dbf8e8b09d7 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for AVX assembler version of Twofish Cipher
*
@@ -5,22 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <linux/module.h>
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 571485502ec8..1dc9e29f221e 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -1,23 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Glue Code for 3-way parallel assembler optimized version of Twofish
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
*/
#include <asm/crypto/glue_helper.h>
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index efb0d1b1f15f..830bd984182b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with
.endif
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
- * the original rbp.
- */
-.macro ENCODE_FRAME_POINTER ptregs_offset=0
-#ifdef CONFIG_FRAME_POINTER
- leaq 1+\ptregs_offset(%rsp), %rbp
-#endif
-.endm
-
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
@@ -358,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with
.Lafter_call_\@:
#endif
.endm
+
+#ifdef CONFIG_PARAVIRT_XXL
+#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
+#else
+#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
+#endif
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a986b3c8294c..536b574b6161 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* common.c - C code for kernel entry and exit
* Copyright (c) 2015 Andrew Lutomirski
- * GPL v2
*
* Based on asm and ptrace code by many authors. The code here originated
* in ptrace.c and signal.c.
@@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs)
struct thread_info *ti = current_thread_info();
unsigned long ret = 0;
- bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
- work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
+ work = READ_ONCE(ti->flags);
- if (unlikely(work & _TIF_SYSCALL_EMU))
- emulated = true;
-
- if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
- tracehook_report_syscall_entry(regs))
- return -1L;
-
- if (emulated)
- return -1L;
+ if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
+ ret = tracehook_report_syscall_entry(regs);
+ if (ret || (work & _TIF_SYSCALL_EMU))
+ return -1L;
+ }
#ifdef CONFIG_SECCOMP
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..2bb986f305ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -67,7 +67,6 @@
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
-# define resume_kernel restore_all_kernel
#endif
.macro TRACE_IRQS_IRET
@@ -203,9 +202,104 @@
.Lend_\@:
.endm
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+#define CS_FROM_ENTRY_STACK (1 << 31)
+#define CS_FROM_USER_CR3 (1 << 30)
+#define CS_FROM_KERNEL (1 << 29)
+
+.macro FIXUP_FRAME
+ /*
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*.
+ * Clear them in case hardware didn't do this for us.
+ */
+ andl $0x0000ffff, 3*4(%esp)
+
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, 4*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+#endif
+ testl $SEGMENT_RPL_MASK, 3*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+
+ orl $CS_FROM_KERNEL, 3*4(%esp)
+
+ /*
+ * When we're here from kernel mode; the (exception) stack looks like:
+ *
+ * 5*4(%esp) - <previous context>
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ *
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+ * is complete and in particular regs->sp is correct. This gives us
+ * the original 5 enties as gap:
+ *
+ * 12*4(%esp) - <previous context>
+ * 11*4(%esp) - gap / flags
+ * 10*4(%esp) - gap / cs
+ * 9*4(%esp) - gap / ip
+ * 8*4(%esp) - gap / orig_eax
+ * 7*4(%esp) - gap / gs / function
+ * 6*4(%esp) - ss
+ * 5*4(%esp) - sp
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ */
+
+ pushl %ss # ss
+ pushl %esp # sp (points at ss)
+ addl $6*4, (%esp) # point sp back at the previous context
+ pushl 6*4(%esp) # flags
+ pushl 6*4(%esp) # cs
+ pushl 6*4(%esp) # ip
+ pushl 6*4(%esp) # orig_eax
+ pushl 6*4(%esp) # gs / function
+.Lfrom_usermode_no_fixup_\@:
+.endm
+
+.macro IRET_FRAME
+ testl $CS_FROM_KERNEL, 1*4(%esp)
+ jz .Lfinished_frame_\@
+
+ /*
+ * Reconstruct the 3 entry IRET frame right after the (modified)
+ * regs->sp without lowering %esp in between, such that an NMI in the
+ * middle doesn't scribble our stack.
+ */
+ pushl %eax
+ pushl %ecx
+ movl 5*4(%esp), %eax # (modified) regs->sp
+
+ movl 4*4(%esp), %ecx # flags
+ movl %ecx, -4(%eax)
+
+ movl 3*4(%esp), %ecx # cs
+ andl $0x0000ffff, %ecx
+ movl %ecx, -8(%eax)
+
+ movl 2*4(%esp), %ecx # ip
+ movl %ecx, -12(%eax)
+
+ movl 1*4(%esp), %ecx # eax
+ movl %ecx, -16(%eax)
+
+ popl %ecx
+ lea -16(%eax), %esp
+ popl %eax
+.Lfinished_frame_\@:
+.endm
+
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
cld
+.if \skip_gs == 0
PUSH_GS
+.endif
+ FIXUP_FRAME
pushl %fs
pushl %es
pushl %ds
@@ -221,13 +315,13 @@
movl %edx, %es
movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs
+.if \skip_gs == 0
SET_KERNEL_GS %edx
-
+.endif
/* Switch to kernel stack if necessary */
.if \switch_stacks > 0
SWITCH_TO_KERNEL_STACK
.endif
-
.endm
.macro SAVE_ALL_NMI cr3_reg:req
@@ -247,22 +341,6 @@
.Lend_\@:
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just clearing the MSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
- * original rbp.
- */
-.macro ENCODE_FRAME_POINTER
-#ifdef CONFIG_FRAME_POINTER
- mov %esp, %ebp
- andl $0x7fffffff, %ebp
-#endif
-.endm
-
.macro RESTORE_INT_REGS
popl %ebx
popl %ecx
@@ -375,9 +453,6 @@
* switch to it before we do any copying.
*/
-#define CS_FROM_ENTRY_STACK (1 << 31)
-#define CS_FROM_USER_CR3 (1 << 30)
-
.macro SWITCH_TO_KERNEL_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -391,13 +466,6 @@
* that register for the time this macro runs
*/
- /*
- * The high bits of the CS dword (__csh) are used for
- * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
- * hardware didn't do this for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -755,7 +823,7 @@ ret_from_intr:
andl $SEGMENT_RPL_MASK, %eax
#endif
cmpl $USER_RPL, %eax
- jb resume_kernel # not returning to v8086 or userspace
+ jb restore_all_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -765,18 +833,6 @@ ENTRY(resume_userspace)
jmp restore_all
END(ret_from_exception)
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
- DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz restore_all_kernel
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all_kernel
- call preempt_schedule_irq
- jmp restore_all_kernel
-END(resume_kernel)
-#endif
-
GLOBAL(__begin_SYSENTER_singlestep_region)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1019,6 +1075,7 @@ restore_all:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return:
+ IRET_FRAME
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from
@@ -1027,6 +1084,15 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
+#ifdef CONFIG_PREEMPT
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
+ jnz .Lno_preempt
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz .Lno_preempt
+ call preempt_schedule_irq
+.Lno_preempt:
+#endif
TRACE_IRQS_IRET
PARANOID_EXIT_TO_KERNEL_MODE
BUG_IF_WRONG_CR3
@@ -1104,6 +1170,30 @@ ENTRY(irq_entries_start)
.endr
END(irq_entries_start)
+#ifdef CONFIG_X86_LOCAL_APIC
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ pushl $(~vector+0x80) /* Note: always in signed byte range */
+ vector=vector+1
+ jmp common_spurious
+ .align 8
+ .endr
+END(spurious_entries_start)
+
+common_spurious:
+ ASM_CLAC
+ addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
+ SAVE_ALL switch_stacks=1
+ ENCODE_FRAME_POINTER
+ TRACE_IRQS_OFF
+ movl %esp, %eax
+ call smp_spurious_interrupt
+ jmp ret_from_intr
+ENDPROC(common_spurious)
+#endif
+
/*
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
@@ -1353,38 +1443,46 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
ENTRY(page_fault)
ASM_CLAC
- pushl $do_page_fault
- ALIGN
- jmp common_exception
+ pushl $0; /* %gs's slot on the stack */
+
+ SAVE_ALL switch_stacks=1 skip_gs=1
+
+ ENCODE_FRAME_POINTER
+ UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
+ GS_TO_REG %ecx
+ REG_TO_PTGS %ecx
+ SET_KERNEL_GS %ecx
+
+ GET_CR2_INTO(%ecx) # might clobber %eax
+
+ /* fixup orig %eax */
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
+ TRACE_IRQS_OFF
+ movl %esp, %eax # pt_regs pointer
+ call do_page_fault
+ jmp ret_from_exception
END(page_fault)
common_exception:
/* the function address is in %gs's slot on the stack */
- pushl %fs
- pushl %es
- pushl %ds
- pushl %eax
- movl $(__USER_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl $(__KERNEL_PERCPU), %eax
- movl %eax, %fs
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- SWITCH_TO_KERNEL_STACK
+ SAVE_ALL switch_stacks=1 skip_gs=1
ENCODE_FRAME_POINTER
- cld
UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
GS_TO_REG %ecx
movl PT_GS(%esp), %edi # get the function address
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
+
+ /* fixup orig %eax */
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 11aa3b2afa4d..3f5a978a02a7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -8,7 +8,7 @@
*
* entry.S contains the system-call and fault low-level handling routines.
*
- * Some of this is documented in Documentation/x86/entry_64.txt
+ * Some of this is documented in Documentation/x86/entry_64.rst
*
* A note on terminology:
* - iret frame: Architecture defined interrupt frame from SS to RIP
@@ -375,6 +375,18 @@ ENTRY(irq_entries_start)
.endr
END(irq_entries_start)
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ UNWIND_HINT_IRET_REGS
+ pushq $(~vector+0x80) /* Note: always in signed byte range */
+ jmp common_spurious
+ .align 8
+ vector=vector+1
+ .endr
+END(spurious_entries_start)
+
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
pushq %rax
@@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry)
/* Interrupt entry/exit. */
- /*
- * The interrupt stubs push (~vector+0x80) onto the stack and
- * then jump to common_interrupt.
- */
+/*
+ * The interrupt stubs push (~vector+0x80) onto the stack and
+ * then jump to common_spurious/interrupt.
+ */
+common_spurious:
+ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call smp_spurious_interrupt /* rdi points to pt_regs */
+ jmp ret_from_intr
+END(common_spurious)
+_ASM_NOKPROBE(common_spurious)
+
+/* common_interrupt is a hotpath. Align it */
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
@@ -842,18 +864,84 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
+.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
+
+ .if \paranoid
+ call paranoid_entry
+ /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
+ .else
+ call error_entry
+ .endif
+ UNWIND_HINT_REGS
+
+ .if \read_cr2
+ /*
+ * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
+ * intermediate storage as RDX can be clobbered in enter_from_user_mode().
+ * GET_CR2_INTO can clobber RAX.
+ */
+ GET_CR2_INTO(%r12);
+ .endif
+
+ .if \shift_ist != -1
+ TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
+ .else
+ TRACE_IRQS_OFF
+ .endif
+
+ .if \paranoid == 0
+ testb $3, CS(%rsp)
+ jz .Lfrom_kernel_no_context_tracking_\@
+ CALL_enter_from_user_mode
+.Lfrom_kernel_no_context_tracking_\@:
+ .endif
+
+ movq %rsp, %rdi /* pt_regs pointer */
+
+ .if \has_error_code
+ movq ORIG_RAX(%rsp), %rsi /* get error code */
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+ .else
+ xorl %esi, %esi /* no error code */
+ .endif
+
+ .if \shift_ist != -1
+ subq $\ist_offset, CPU_TSS_IST(\shift_ist)
+ .endif
+
+ .if \read_cr2
+ movq %r12, %rdx /* Move CR2 into 3rd argument */
+ .endif
+
+ call \do_sym
+
+ .if \shift_ist != -1
+ addq $\ist_offset, CPU_TSS_IST(\shift_ist)
+ .endif
+
+ .if \paranoid
+ /* this procedure expect "no swapgs" flag in ebx */
+ jmp paranoid_exit
+ .else
+ jmp error_exit
+ .endif
+
+.endm
+
/**
* idtentry - Generate an IDT entry stub
* @sym: Name of the generated entry point
- * @do_sym: C function to be called
- * @has_error_code: True if this IDT vector has an error code on the stack
- * @paranoid: non-zero means that this vector may be invoked from
+ * @do_sym: C function to be called
+ * @has_error_code: True if this IDT vector has an error code on the stack
+ * @paranoid: non-zero means that this vector may be invoked from
* kernel mode with user GSBASE and/or user CR3.
* 2 is special -- see below.
* @shift_ist: Set to an IST index if entries from kernel mode should
- * decrement the IST stack so that nested entries get a
+ * decrement the IST stack so that nested entries get a
* fresh stack. (This is for #DB, which has a nasty habit
- * of recursing.)
+ * of recursing.)
+ * @create_gap: create a 6-word stack gap when coming from kernel mode.
+ * @read_cr2: load CR2 into the 3rd argument; done before calling any C code
*
* idtentry generates an IDT stub that sets up a usable kernel context,
* creates struct pt_regs, and calls @do_sym. The stub has the following
@@ -878,15 +966,19 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
/* Sanity check */
- .if \shift_ist != -1 && \paranoid == 0
+ .if \shift_ist != -1 && \paranoid != 1
.error "using shift_ist requires paranoid=1"
.endif
+ .if \create_gap && \paranoid
+ .error "using create_gap requires paranoid=0"
+ .endif
+
ASM_CLAC
.if \has_error_code == 0
@@ -912,47 +1004,7 @@ ENTRY(\sym)
.Lfrom_usermode_no_gap_\@:
.endif
- .if \paranoid
- call paranoid_entry
- .else
- call error_entry
- .endif
- UNWIND_HINT_REGS
- /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-
- .if \paranoid
- .if \shift_ist != -1
- TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
- .else
- TRACE_IRQS_OFF
- .endif
- .endif
-
- movq %rsp, %rdi /* pt_regs pointer */
-
- .if \has_error_code
- movq ORIG_RAX(%rsp), %rsi /* get error code */
- movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* no error code */
- .endif
-
- .if \shift_ist != -1
- subq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- call \do_sym
-
- .if \shift_ist != -1
- addq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- /* these procedures expect "no swapgs" flag in ebx */
- .if \paranoid
- jmp paranoid_exit
- .else
- jmp error_exit
- .endif
+ idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
.if \paranoid == 1
/*
@@ -961,21 +1013,9 @@ ENTRY(\sym)
* run in real process context if user_mode(regs).
*/
.Lfrom_usermode_switch_stack_\@:
- call error_entry
-
- movq %rsp, %rdi /* pt_regs pointer */
-
- .if \has_error_code
- movq ORIG_RAX(%rsp), %rsi /* get error code */
- movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* no error code */
+ idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
.endif
- call \do_sym
-
- jmp error_exit
- .endif
_ASM_NOKPROBE(\sym)
END(\sym)
.endm
@@ -985,7 +1025,7 @@ idtentry overflow do_overflow has_error_code=0
idtentry bounds do_bounds has_error_code=0
idtentry invalid_op do_invalid_op has_error_code=0
idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=2
+idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1
idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
idtentry invalid_TSS do_invalid_TSS has_error_code=1
idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1142,6 +1182,11 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
+#if IS_ENABLED(CONFIG_ACRN_GUEST)
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ acrn_hv_callback_vector acrn_hv_vector_handler
+#endif
+
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
@@ -1149,14 +1194,13 @@ idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN_PV
idtentry xennmi do_nmi has_error_code=0
idtentry xendebug do_debug has_error_code=0
-idtentry xenint3 do_int3 has_error_code=0
#endif
idtentry general_protection do_general_protection has_error_code=1
-idtentry page_fault do_page_fault has_error_code=1
+idtentry page_fault do_page_fault has_error_code=1 read_cr2=1
#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault do_async_page_fault has_error_code=1
+idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1
#endif
#ifdef CONFIG_X86_MCE
@@ -1255,18 +1299,9 @@ ENTRY(error_entry)
movq %rax, %rsp /* switch stack */
ENCODE_FRAME_POINTER
pushq %r12
-
- /*
- * We need to tell lockdep that IRQs are off. We can't do this until
- * we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks).
- */
- TRACE_IRQS_OFF
- CALL_enter_from_user_mode
ret
.Lerror_entry_done:
- TRACE_IRQS_OFF
ret
/*
@@ -1670,11 +1705,17 @@ nmi_restore:
iretq
END(nmi)
+#ifndef CONFIG_IA32_EMULATION
+/*
+ * This handles SYSCALL from 32-bit code. There is no way to program
+ * MSRs to fully disable 32-bit SYSCALL.
+ */
ENTRY(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
+#endif
ENTRY(rewind_stack_do_exit)
UNWIND_HINT_FUNC
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ad968b7bac72..c00019abd076 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -438,3 +438,5 @@
431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
432 i386 fsmount sys_fsmount __ia32_sys_fsmount
433 i386 fspick sys_fspick __ia32_sys_fspick
+434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
+435 i386 clone3 sys_clone3 __ia32_sys_clone3
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index b4e6f9e6204a..c29976eca4a8 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -355,6 +355,8 @@
431 common fsconfig __x64_sys_fsconfig
432 common fsmount __x64_sys_fsmount
433 common fspick __x64_sys_fspick
+434 common pidfd_open __x64_sys_pidfd_open
+435 common clone3 __x64_sys_clone3/ptregs
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index fee6bc79b987..cb3464525b37 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
* Copyright 2008 by Steven Rostedt, Red Hat, Inc
* (inspired by Andi Kleen's thunk_64.S)
- * Subject to the GNU public license, v.2. No warranty of any kind.
*/
#include <linux/linkage.h>
#include <asm/asm.h>
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index be36bf4e0957..cc20465b2867 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -1,9 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Save registers before calling assembly functions. This avoids
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
* Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
- * Subject to the GNU public license, v.2. No warranty of any kind.
*/
#include <linux/linkage.h>
#include "calling.h"
@@ -12,9 +12,7 @@
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func, put_ret_addr_in_rdi=0
- .globl \name
- .type \name, @function
-\name:
+ ENTRY(\name)
pushq %rbp
movq %rsp, %rbp
@@ -35,6 +33,7 @@
call \func
jmp .L_restore
+ ENDPROC(\name)
_ASM_NOKPROBE(\name)
.endm
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 42fe42e82baf..8df549138193 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -3,6 +3,12 @@
# Building vDSO images for x86.
#
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE|
+ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
+include $(srctree)/lib/vdso/Makefile
+
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
@@ -50,7 +56,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
-z max-page-size=4096
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
- $(call if_changed,vdso)
+ $(call if_changed,vdso_and_check)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
hostprogs-y += vdso2c
@@ -120,7 +126,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
- $(call if_changed,vdso)
+ $(call if_changed,vdso_and_check)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
@@ -159,7 +165,7 @@ $(obj)/vdso32.so.dbg: FORCE \
$(obj)/vdso32/note.o \
$(obj)/vdso32/system_call.o \
$(obj)/vdso32/sigreturn.o
- $(call if_changed,vdso)
+ $(call if_changed,vdso_and_check)
#
# The DSO images are built using a special linker script.
@@ -170,11 +176,13 @@ quiet_cmd_vdso = VDSO $@
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
- $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
- -Bsymbolic
+VDSO_LDFLAGS = -shared --hash-style=both --build-id \
+ $(call ld-option, --eh-frame-hdr) -Bsymbolic
GCOV_PROFILE := n
+quiet_cmd_vdso_and_check = VDSO $@
+ cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
+
#
# Install the unstripped copies of vdso*.so. If our toolchain supports
# build-id, install .build-id links as well.
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 98c7d12b945c..d9ff616bb0f6 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -1,240 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- * Subject to the GNU Public License, v.2
- *
* Fast user context implementation of clock_gettime, gettimeofday, and time.
*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Copyright 2019 ARM Limited
+ *
* 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * The code should have no internal unresolved relocations.
- * Check with readelf after changing.
*/
-
-#include <uapi/linux/time.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-#include <asm/unistd.h>
-#include <asm/msr.h>
-#include <asm/pvclock.h>
-#include <asm/mshyperv.h>
-#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>
+#include <linux/types.h>
-#define gtod (&VVAR(vsyscall_gtod_data))
+#include "../../../../lib/vdso/gettimeofday.c"
-extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
-extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
-#ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifndef BUILD_VDSO32
-
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
- long ret;
- asm ("syscall" : "=a" (ret), "=m" (*ts) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
- "rcx", "r11");
- return ret;
+ return __cvdso_gettimeofday(tv, tz);
}
-#else
+int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+time_t __vdso_time(time_t *t)
{
- long ret;
-
- asm (
- "mov %%ebx, %%edx \n"
- "mov %[clock], %%ebx \n"
- "call __kernel_vsyscall \n"
- "mov %%edx, %%ebx \n"
- : "=a" (ret), "=m" (*ts)
- : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
- : "edx");
- return ret;
+ return __cvdso_time(t);
}
-#endif
+time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
-#ifdef CONFIG_PARAVIRT_CLOCK
-static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
-{
- return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
-}
-static notrace u64 vread_pvclock(void)
-{
- const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
- u32 version;
- u64 ret;
-
- /*
- * Note: The kernel and hypervisor must guarantee that cpu ID
- * number maps 1:1 to per-CPU pvclock time info.
- *
- * Because the hypervisor is entirely unaware of guest userspace
- * preemption, it cannot guarantee that per-CPU pvclock time
- * info is updated if the underlying CPU changes or that that
- * version is increased whenever underlying CPU changes.
- *
- * On KVM, we are guaranteed that pvti updates for any vCPU are
- * atomic as seen by *all* vCPUs. This is an even stronger
- * guarantee than we get with a normal seqlock.
- *
- * On Xen, we don't appear to have that guarantee, but Xen still
- * supplies a valid seqlock using the version field.
- *
- * We only do pvclock vdso timing at all if
- * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
- * mean that all vCPUs have matching pvti and that the TSC is
- * synced, so we can just look at vCPU 0's pvti.
- */
-
- do {
- version = pvclock_read_begin(pvti);
-
- if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
- return U64_MAX;
-
- ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
- } while (pvclock_read_retry(pvti, version));
-
- return ret;
-}
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
-static notrace u64 vread_hvclock(void)
-{
- const struct ms_hyperv_tsc_page *tsc_pg =
- (const struct ms_hyperv_tsc_page *)&hvclock_page;
+#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
+/* both 64-bit and x32 use these */
+extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res);
- return hv_read_tsc_page(tsc_pg);
-}
-#endif
-
-notrace static inline u64 vgetcyc(int mode)
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
- if (mode == VCLOCK_TSC)
- return (u64)rdtsc_ordered();
-#ifdef CONFIG_PARAVIRT_CLOCK
- else if (mode == VCLOCK_PVCLOCK)
- return vread_pvclock();
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
- else if (mode == VCLOCK_HVCLOCK)
- return vread_hvclock();
-#endif
- return U64_MAX;
+ return __cvdso_clock_gettime(clock, ts);
}
-notrace static int do_hres(clockid_t clk, struct timespec *ts)
-{
- struct vgtod_ts *base = &gtod->basetime[clk];
- u64 cycles, last, sec, ns;
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- cycles = vgetcyc(gtod->vclock_mode);
- ns = base->nsec;
- last = gtod->cycle_last;
- if (unlikely((s64)cycles < 0))
- return vdso_fallback_gettime(clk, ts);
- if (cycles > last)
- ns += (cycles - last) * gtod->mult;
- ns >>= gtod->shift;
- sec = base->sec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-
- /*
- * Do this outside the loop: a race inside the loop could result
- * in __iter_div_u64_rem() being extremely slow.
- */
- ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return 0;
-}
+int clock_gettime(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
-notrace static void do_coarse(clockid_t clk, struct timespec *ts)
+int __vdso_clock_getres(clockid_t clock,
+ struct __kernel_timespec *res)
{
- struct vgtod_ts *base = &gtod->basetime[clk];
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- ts->tv_sec = base->sec;
- ts->tv_nsec = base->nsec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
+ return __cvdso_clock_getres(clock, res);
}
+int clock_getres(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
-notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+#else
+/* i386 only */
+extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res);
+
+int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
{
- unsigned int msk;
-
- /* Sort out negative (CPU/FD) and invalid clocks */
- if (unlikely((unsigned int) clock >= MAX_CLOCKS))
- return vdso_fallback_gettime(clock, ts);
-
- /*
- * Convert the clockid to a bitmask and use it to check which
- * clocks are handled in the VDSO directly.
- */
- msk = 1U << clock;
- if (likely(msk & VGTOD_HRES)) {
- return do_hres(clock, ts);
- } else if (msk & VGTOD_COARSE) {
- do_coarse(clock, ts);
- return 0;
- }
- return vdso_fallback_gettime(clock, ts);
+ return __cvdso_clock_gettime32(clock, ts);
}
-int clock_gettime(clockid_t, struct timespec *)
+int clock_gettime(clockid_t, struct old_timespec32 *)
__attribute__((weak, alias("__vdso_clock_gettime")));
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
{
- if (likely(tv != NULL)) {
- struct timespec *ts = (struct timespec *) tv;
-
- do_hres(CLOCK_REALTIME, ts);
- tv->tv_usec /= 1000;
- }
- if (unlikely(tz != NULL)) {
- tz->tz_minuteswest = gtod->tz_minuteswest;
- tz->tz_dsttime = gtod->tz_dsttime;
- }
-
- return 0;
+ return __cvdso_clock_gettime(clock, ts);
}
-int gettimeofday(struct timeval *, struct timezone *)
- __attribute__((weak, alias("__vdso_gettimeofday")));
-/*
- * This will break when the xtime seconds get inaccurate, but that is
- * unlikely
- */
-notrace time_t __vdso_time(time_t *t)
-{
- /* This is atomic on x86 so we don't need any locks. */
- time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
+int clock_gettime64(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime64")));
- if (t)
- *t = result;
- return result;
+int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
+{
+ return __cvdso_clock_getres_time32(clock, res);
}
-time_t time(time_t *t)
- __attribute__((weak, alias("__vdso_time")));
+
+int clock_getres(clockid_t, struct old_timespec32 *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
+#endif
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index d3a2dce4cfa9..36b644e16272 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -25,6 +25,8 @@ VERSION {
__vdso_getcpu;
time;
__vdso_time;
+ clock_getres;
+ __vdso_clock_getres;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 42d4c89f990e..240626e7f55a 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -65,9 +65,6 @@ subsys_initcall(sysenter_setup);
/* Register vsyscall32 into the ABI table */
#include <linux/sysctl.h>
-static const int zero;
-static const int one = 1;
-
static struct ctl_table abi_table2[] = {
{
.procname = "vsyscall32",
@@ -75,8 +72,8 @@ static struct ctl_table abi_table2[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (int *)&zero,
- .extra2 = (int *)&one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{}
};
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 422764a81d32..c7720995ab1a 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -26,6 +26,8 @@ VERSION
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
+ __vdso_clock_getres;
+ __vdso_clock_gettime64;
};
LINUX_2.5 {
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 05cd1c5c4a15..16a8050a4fb6 100644
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
@@ -21,6 +21,7 @@ VERSION {
__vdso_gettimeofday;
__vdso_getcpu;
__vdso_time;
+ __vdso_clock_getres;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index f86ab0ae1777..b88a82bbc359 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2006 Andi Kleen, SUSE Labs.
- * Subject to the GNU Public License, v.2
*
* Fast user context implementation of getcpu()
*/
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index babc4e7a519c..349a61d8bf34 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2007 Andi Kleen, SUSE Labs.
- * Subject to the GPL, v.2
*
* This contains most of the x86 vDSO kernel-side code.
*/
@@ -22,7 +22,7 @@
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
-#include <asm/mshyperv.h>
+#include <clocksource/hyperv_timer.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile
index a9f4856f622a..93c1b3e949a7 100644
--- a/arch/x86/entry/vsyscall/Makefile
+++ b/arch/x86/entry/vsyscall/Makefile
@@ -1,7 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the x86 low level vsyscall code
#
-obj-y := vsyscall_gtod.o
-
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index d9d81ad7a400..e7c596dea947 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,9 +42,11 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
-static enum { EMULATE, NONE } vsyscall_mode =
+static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE;
+#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
+ XONLY;
#else
EMULATE;
#endif
@@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str)
if (str) {
if (!strcmp("emulate", str))
vsyscall_mode = EMULATE;
+ else if (!strcmp("xonly", str))
+ vsyscall_mode = XONLY;
else if (!strcmp("none", str))
vsyscall_mode = NONE;
else
@@ -106,14 +110,15 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
thread->cr2 = ptr;
thread->trap_nr = X86_TRAP_PF;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr);
return false;
} else {
return true;
}
}
-bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
@@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
long ret;
unsigned long orig_dx;
+ /* Write faults or kernel-privilege faults never get fixed up. */
+ if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
+ return false;
+
+ if (!(error_code & X86_PF_INSTR)) {
+ /* Failed vsyscall read */
+ if (vsyscall_mode == EMULATE)
+ return false;
+
+ /*
+ * User code tried and failed to read the vsyscall page.
+ */
+ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
+ return false;
+ }
+
/*
* No point in checking CS -- the only way to get here is a user mode
* trap to a high address, which means that we're in 64-bit user code.
@@ -268,7 +289,7 @@ do_ret:
return true;
sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return true;
}
@@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma)
static const struct vm_operations_struct gate_vma_ops = {
.name = gate_vma_name,
};
-static struct vm_area_struct gate_vma = {
+static struct vm_area_struct gate_vma __ro_after_init = {
.vm_start = VSYSCALL_ADDR,
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
.vm_page_prot = PAGE_READONLY_EXEC,
@@ -357,12 +378,20 @@ void __init map_vsyscall(void)
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- if (vsyscall_mode != NONE) {
+ /*
+ * For full emulation, the page needs to exist for real. In
+ * execute-only mode, there is no PTE at all backing the vsyscall
+ * page.
+ */
+ if (vsyscall_mode == EMULATE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
+ if (vsyscall_mode == XONLY)
+ gate_vma.vm_flags = VM_EXEC;
+
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
}
diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
index c9596a9af159..2e203f3a25a7 100644
--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
@@ -1,9 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* vsyscall_emu_64.S: Vsyscall emulation page
*
* Copyright (c) 2011 Andy Lutomirski
- *
- * Subject to the GNU General Public License, version 2
*/
#include <linux/linkage.h>
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
deleted file mode 100644
index cfcdba082feb..000000000000
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright 2003 Andi Kleen, SuSE Labs.
- *
- * Modified for x86 32 bit architecture by
- * Stefani Seibold <stefani@seibold.net>
- * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * Thanks to hpa@transmeta.com for some useful hint.
- * Special thanks to Ingo Molnar for his early experience with
- * a different vsyscall implementation for Linux/IA32 and for the name.
- *
- */
-
-#include <linux/timekeeper_internal.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-
-int vclocks_used __read_mostly;
-
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
-
-void update_vsyscall_tz(void)
-{
- vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
- vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
-}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
- struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
- struct vgtod_ts *base;
- u64 nsec;
-
- /* Mark the new vclock used. */
- BUILD_BUG_ON(VCLOCK_MAX >= 32);
- WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
-
- gtod_write_begin(vdata);
-
- /* copy vsyscall data */
- vdata->vclock_mode = vclock_mode;
- vdata->cycle_last = tk->tkr_mono.cycle_last;
- vdata->mask = tk->tkr_mono.mask;
- vdata->mult = tk->tkr_mono.mult;
- vdata->shift = tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_REALTIME];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_TAI];
- base->sec = tk->xtime_sec + (s64)tk->tai_offset;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_MONOTONIC];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec;
- nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
- while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
- nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
- base->sec++;
- }
- base->nsec = nsec;
-
- base = &vdata->basetime[CLOCK_REALTIME_COARSE];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
- nsec += tk->wall_to_monotonic.tv_nsec;
- while (nsec >= NSEC_PER_SEC) {
- nsec -= NSEC_PER_SEC;
- base->sec++;
- }
- base->nsec = nsec;
-
- gtod_write_end(vdata);
-}
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index b8ccdb5c9244..9e07f554333f 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,4 +1,5 @@
-obj-y += core.o
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += core.o probe.o
obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index f15441b07dad..e7d35f60d53f 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/perf_event.h>
#include <linux/export.h>
#include <linux/types.h>
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 58a6993d7eb3..fb616203ce42 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2013 Advanced Micro Devices, Inc.
*
@@ -5,10 +6,6 @@
* Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
*
* Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) "perf/amd_iommu: " fmt
diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
index 62e0702c4374..0e5c036fd7be 100644
--- a/arch/x86/events/amd/iommu.h
+++ b/arch/x86/events/amd/iommu.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2013 Advanced Micro Devices, Inc.
*
* Author: Steven Kinney <Steven.Kinney@amd.com>
* Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _PERF_EVENT_AMD_IOMMU_H_
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index c5ff084551c6..abef51320e3a 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Performance events - AMD Processor Power Reporting Mechanism
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Huang Rui <ray.huang@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/module.h>
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 79cfd3b30ceb..a6ea07f2aa84 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2013 Advanced Micro Devices, Inc.
*
* Author: Jacob Shin <jacob.shin@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/perf_event.h>
@@ -205,15 +202,22 @@ static int amd_uncore_event_init(struct perf_event *event)
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
hwc->idx = -1;
+ if (event->cpu < 0)
+ return -EINVAL;
+
/*
* SliceMask and ThreadMask need to be set for certain L3 events in
* Family 17h. For other events, the two fields do not affect the count.
*/
- if (l3_mask)
- hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
+ if (l3_mask && is_llc_event(event)) {
+ int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4);
- if (event->cpu < 0)
- return -EINVAL;
+ if (smp_num_siblings > 1)
+ thread += cpu_data(event->cpu).apicid & 1;
+
+ hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) &
+ AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK;
+ }
uncore = event_to_amd_uncore(event);
if (!uncore)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f315425d8468..81b005e4c7d9 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -561,14 +561,14 @@ int x86_pmu_hw_config(struct perf_event *event)
}
/* sample_regs_user never support XMM registers */
- if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
+ if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK))
return -EINVAL;
/*
* Besides the general purpose registers, XMM registers may
* be collected in PEBS on some platforms, e.g. Icelake
*/
- if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
- if (x86_pmu.pebs_no_xmm_regs)
+ if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) {
+ if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
return -EINVAL;
if (!event->attr.precise_ip)
@@ -1618,68 +1618,6 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = {
.attrs = NULL,
};
-/*
- * Remove all undefined events (x86_pmu.event_map(id) == 0)
- * out of events_attr attributes.
- */
-static void __init filter_events(struct attribute **attrs)
-{
- struct device_attribute *d;
- struct perf_pmu_events_attr *pmu_attr;
- int offset = 0;
- int i, j;
-
- for (i = 0; attrs[i]; i++) {
- d = (struct device_attribute *)attrs[i];
- pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
- /* str trumps id */
- if (pmu_attr->event_str)
- continue;
- if (x86_pmu.event_map(i + offset))
- continue;
-
- for (j = i; attrs[j]; j++)
- attrs[j] = attrs[j + 1];
-
- /* Check the shifted attr. */
- i--;
-
- /*
- * event_map() is index based, the attrs array is organized
- * by increasing event index. If we shift the events, then
- * we need to compensate for the event_map(), otherwise
- * we are looking up the wrong event in the map
- */
- offset++;
- }
-}
-
-/* Merge two pointer arrays */
-__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
-{
- struct attribute **new;
- int j, i;
-
- for (j = 0; a && a[j]; j++)
- ;
- for (i = 0; b && b[i]; i++)
- j++;
- j++;
-
- new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
- if (!new)
- return NULL;
-
- j = 0;
- for (i = 0; a && a[i]; i++)
- new[j++] = a[i];
- for (i = 0; b && b[i]; i++)
- new[j++] = b[i];
- new[j] = NULL;
-
- return new;
-}
-
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
@@ -1744,9 +1682,24 @@ static struct attribute *events_attr[] = {
NULL,
};
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static umode_t
+is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+ /* str trumps id */
+ return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0;
+}
+
static struct attribute_group x86_pmu_events_group __ro_after_init = {
.name = "events",
.attrs = events_attr,
+ .is_visible = is_visible,
};
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
@@ -1842,37 +1795,10 @@ static int __init init_hw_perf_events(void)
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
- if (x86_pmu.caps_attrs) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
- if (!WARN_ON(!tmp))
- x86_pmu_caps_group.attrs = tmp;
- }
-
- if (x86_pmu.event_attrs)
- x86_pmu_events_group.attrs = x86_pmu.event_attrs;
-
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
- else
- filter_events(x86_pmu_events_group.attrs);
-
- if (x86_pmu.cpu_events) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
- if (!WARN_ON(!tmp))
- x86_pmu_events_group.attrs = tmp;
- }
-
- if (x86_pmu.attrs) {
- struct attribute **tmp;
- tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
- if (!WARN_ON(!tmp))
- x86_pmu_attr_group.attrs = tmp;
- }
+ pmu.attr_update = x86_pmu.attr_update;
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
@@ -2179,7 +2105,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&mm->mmap_sem);
+ lockdep_assert_held_write(&mm->mmap_sem);
if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
@@ -2402,13 +2328,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
return;
}
- if (perf_hw_regs(regs)) {
- if (perf_callchain_store(entry, regs->ip))
- return;
+ if (perf_callchain_store(entry, regs->ip))
+ return;
+
+ if (perf_hw_regs(regs))
unwind_start(&state, current, regs, NULL);
- } else {
+ else
unwind_start(&state, current, NULL, (void *)regs->sp);
- }
for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 890a3fb5706f..5ee3fed881d3 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* BTS PMU driver for perf
* Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#undef DEBUG
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 546d13e436aa..9e911a96972b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Per core/cpu state
*
@@ -19,6 +20,7 @@
#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
+#include <asm/hypervisor.h>
#include "../perf_event.h"
@@ -2159,12 +2161,10 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
intel_pmu_disable_fixed(hwc);
- return;
- }
-
- x86_pmu_disable_event(event);
+ else
+ x86_pmu_disable_event(event);
/*
* Needs to be called after x86_pmu_disable_event,
@@ -3896,8 +3896,6 @@ static __initconst const struct x86_pmu core_pmu = {
.check_period = intel_pmu_check_period,
};
-static struct attribute *intel_pmu_attrs[];
-
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -3929,8 +3927,6 @@ static __initconst const struct x86_pmu intel_pmu = {
.format_attrs = intel_arch3_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
- .attrs = intel_pmu_attrs,
-
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
@@ -4054,6 +4050,13 @@ static bool check_msr(unsigned long msr, u64 mask)
u64 val_old, val_new, val_tmp;
/*
+ * Disable the check for real HW, so we don't
+ * mess with potentionaly enabled registers:
+ */
+ if (hypervisor_is_type(X86_HYPER_NATIVE))
+ return true;
+
+ /*
* Read the current value, change it and read it back to see if it
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
@@ -4273,13 +4276,6 @@ static struct attribute *icl_tsx_events_attrs[] = {
NULL,
};
-static __init struct attribute **get_icl_events_attrs(void)
-{
- return boot_cpu_has(X86_FEATURE_RTM) ?
- merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
- icl_events_attrs;
-}
-
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -4401,43 +4397,111 @@ static DEVICE_ATTR(allow_tsx_force_abort, 0644,
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
- NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
+ &dev_attr_allow_tsx_force_abort.attr,
NULL,
};
-static __init struct attribute **
-get_events_attrs(struct attribute **base,
- struct attribute **mem,
- struct attribute **tsx)
+static umode_t
+tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- struct attribute **attrs = base;
- struct attribute **old;
+ return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
+}
- if (mem && x86_pmu.pebs)
- attrs = merge_attr(attrs, mem);
+static umode_t
+pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.pebs ? attr->mode : 0;
+}
- if (tsx && boot_cpu_has(X86_FEATURE_RTM)) {
- old = attrs;
- attrs = merge_attr(attrs, tsx);
- if (old != base)
- kfree(old);
- }
+static umode_t
+lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+}
- return attrs;
+static umode_t
+exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.version >= 2 ? attr->mode : 0;
}
+static umode_t
+default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &dev_attr_allow_tsx_force_abort.attr)
+ return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group group_events_td = {
+ .name = "events",
+};
+
+static struct attribute_group group_events_mem = {
+ .name = "events",
+ .is_visible = pebs_is_visible,
+};
+
+static struct attribute_group group_events_tsx = {
+ .name = "events",
+ .is_visible = tsx_is_visible,
+};
+
+static struct attribute_group group_caps_gen = {
+ .name = "caps",
+ .attrs = intel_pmu_caps_attrs,
+};
+
+static struct attribute_group group_caps_lbr = {
+ .name = "caps",
+ .attrs = lbr_attrs,
+ .is_visible = lbr_is_visible,
+};
+
+static struct attribute_group group_format_extra = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_format_extra_skl = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_default = {
+ .attrs = intel_pmu_attrs,
+ .is_visible = default_is_visible,
+};
+
+static const struct attribute_group *attr_update[] = {
+ &group_events_td,
+ &group_events_mem,
+ &group_events_tsx,
+ &group_caps_gen,
+ &group_caps_lbr,
+ &group_format_extra,
+ &group_format_extra_skl,
+ &group_default,
+ NULL,
+};
+
+static struct attribute *empty_attrs;
+
__init int intel_pmu_init(void)
{
- struct attribute **extra_attr = NULL;
- struct attribute **mem_attr = NULL;
- struct attribute **tsx_attr = NULL;
- struct attribute **to_free = NULL;
+ struct attribute **extra_skl_attr = &empty_attrs;
+ struct attribute **extra_attr = &empty_attrs;
+ struct attribute **td_attr = &empty_attrs;
+ struct attribute **mem_attr = &empty_attrs;
+ struct attribute **tsx_attr = &empty_attrs;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
struct event_constraint *c;
unsigned int unused;
struct extra_reg *er;
+ bool pmem = false;
int version, i;
char *name;
@@ -4595,7 +4659,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = slm_events_attrs;
+ td_attr = slm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Silvermont events, ");
name = "silvermont";
@@ -4623,7 +4687,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Goldmont events, ");
name = "goldmont";
@@ -4650,7 +4714,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
event_attr_td_total_slots_scale_glm.event_str = "4";
extra_attr = slm_format_attr;
@@ -4739,7 +4803,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
@@ -4780,7 +4844,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
@@ -4817,10 +4881,10 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Haswell events, ");
@@ -4859,10 +4923,10 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Broadwell events, ");
@@ -4889,9 +4953,10 @@ __init int intel_pmu_init(void)
name = "knights-landing";
break;
+ case INTEL_FAM6_SKYLAKE_X:
+ pmem = true;
case INTEL_FAM6_SKYLAKE_MOBILE:
case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_add_quirk(intel_pebs_isolation_quirk);
@@ -4919,27 +4984,28 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = hsw_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
- extra_attr = merge_attr(extra_attr, skl_format_attr);
- to_free = extra_attr;
- x86_pmu.cpu_events = hsw_events_attrs;
+ extra_skl_attr = skl_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
- intel_pmu_pebs_data_source_skl(
- boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
+ intel_pmu_pebs_data_source_skl(pmem);
if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
x86_pmu.flags |= PMU_FL_TFA;
x86_pmu.get_event_constraints = tfa_get_event_constraints;
x86_pmu.enable_all = intel_tfa_pmu_enable_all;
x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
- intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
}
pr_cont("Skylake events, ");
name = "skylake";
break;
+ case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_XEON_D:
+ pmem = true;
case INTEL_FAM6_ICELAKE_MOBILE:
+ case INTEL_FAM6_ICELAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4958,11 +5024,12 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = icl_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
- extra_attr = merge_attr(extra_attr, skl_format_attr);
- x86_pmu.cpu_events = get_icl_events_attrs();
+ extra_skl_attr = skl_format_attr;
+ mem_attr = icl_events_attrs;
+ tsx_attr = icl_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
x86_pmu.lbr_pt_coexist = true;
- intel_pmu_pebs_data_source_skl(false);
+ intel_pmu_pebs_data_source_skl(pmem);
pr_cont("Icelake events, ");
name = "icelake";
break;
@@ -4987,14 +5054,14 @@ __init int intel_pmu_init(void)
snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
- if (version >= 2 && extra_attr) {
- x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
- extra_attr);
- WARN_ON(!x86_pmu.format_attrs);
- }
- x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events,
- mem_attr, tsx_attr);
+ group_events_td.attrs = td_attr;
+ group_events_mem.attrs = mem_attr;
+ group_events_tsx.attrs = tsx_attr;
+ group_format_extra.attrs = extra_attr;
+ group_format_extra_skl.attrs = extra_skl_attr;
+
+ x86_pmu.attr_update = attr_update;
if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -5042,12 +5109,8 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}
- x86_pmu.caps_attrs = intel_pmu_caps_attrs;
-
- if (x86_pmu.lbr_nr) {
- x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs);
+ if (x86_pmu.lbr_nr)
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
- }
/*
* Access extra MSR may cause #GP under certain circumstances.
@@ -5077,7 +5140,6 @@ __init int intel_pmu_init(void)
if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
- kfree(to_free);
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 6072f92cb8ea..688592b34564 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -96,6 +96,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
+#include "../probe.h"
MODULE_LICENSE("GPL");
@@ -144,25 +145,42 @@ enum perf_cstate_core_events {
PERF_CSTATE_CORE_EVENT_MAX,
};
-PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03");
-static struct perf_cstate_msr core_msr[] = {
- [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
- [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
- [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
- [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
+static unsigned long core_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_core_c1);
+PMU_EVENT_GROUP(events, cstate_core_c3);
+PMU_EVENT_GROUP(events, cstate_core_c6);
+PMU_EVENT_GROUP(events, cstate_core_c7);
+
+static bool test_msr(int idx, void *data)
+{
+ return test_bit(idx, (unsigned long *) data);
+}
+
+static struct perf_msr core_msr[] = {
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr },
};
-static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+static struct attribute *attrs_empty[] = {
NULL,
};
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
static struct attribute_group core_events_attr_group = {
.name = "events",
- .attrs = core_events_attrs,
+ .attrs = attrs_empty,
};
DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
@@ -211,31 +229,37 @@ enum perf_cstate_pkg_events {
PERF_CSTATE_PKG_EVENT_MAX,
};
-PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
-PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
-PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
-PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
-
-static struct perf_cstate_msr pkg_msr[] = {
- [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
- [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
- [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
- [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
- [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
- [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
- [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
-};
-
-static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
- NULL,
+PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06");
+
+static unsigned long pkg_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_pkg_c2);
+PMU_EVENT_GROUP(events, cstate_pkg_c3);
+PMU_EVENT_GROUP(events, cstate_pkg_c6);
+PMU_EVENT_GROUP(events, cstate_pkg_c7);
+PMU_EVENT_GROUP(events, cstate_pkg_c8);
+PMU_EVENT_GROUP(events, cstate_pkg_c9);
+PMU_EVENT_GROUP(events, cstate_pkg_c10);
+
+static struct perf_msr pkg_msr[] = {
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr },
};
static struct attribute_group pkg_events_attr_group = {
.name = "events",
- .attrs = pkg_events_attrs,
+ .attrs = attrs_empty,
};
DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
@@ -289,7 +313,8 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (event->pmu == &cstate_core_pmu) {
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
return -EINVAL;
- if (!core_msr[cfg].attr)
+ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX);
+ if (!(core_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_core_cpu_mask,
@@ -298,11 +323,11 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
- if (!pkg_msr[cfg].attr)
+ if (!(pkg_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_core_cpumask(event->cpu));
+ topology_die_cpumask(event->cpu));
} else {
return -ENOENT;
}
@@ -385,7 +410,7 @@ static int cstate_cpu_exit(unsigned int cpu)
if (has_cstate_pkg &&
cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate events if there is a valid target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
@@ -414,15 +439,35 @@ static int cstate_cpu_init(unsigned int cpu)
* in the package cpu mask as the designated reader.
*/
target = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_core_cpumask(cpu));
+ topology_die_cpumask(cpu));
if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
return 0;
}
+const struct attribute_group *core_attr_update[] = {
+ &group_cstate_core_c1,
+ &group_cstate_core_c3,
+ &group_cstate_core_c6,
+ &group_cstate_core_c7,
+ NULL,
+};
+
+const struct attribute_group *pkg_attr_update[] = {
+ &group_cstate_pkg_c2,
+ &group_cstate_pkg_c3,
+ &group_cstate_pkg_c6,
+ &group_cstate_pkg_c7,
+ &group_cstate_pkg_c8,
+ &group_cstate_pkg_c9,
+ &group_cstate_pkg_c10,
+ NULL,
+};
+
static struct pmu cstate_core_pmu = {
.attr_groups = core_attr_groups,
+ .attr_update = core_attr_update,
.name = "cstate_core",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
@@ -437,6 +482,7 @@ static struct pmu cstate_core_pmu = {
static struct pmu cstate_pkg_pmu = {
.attr_groups = pkg_attr_groups,
+ .attr_update = pkg_attr_update,
.name = "cstate_pkg",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
@@ -580,35 +626,11 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there are no available events.
- */
-static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
- struct perf_cstate_msr *msr,
- struct attribute **attrs)
-{
- bool found = false;
- unsigned int bit;
- u64 val;
-
- for (bit = 0; bit < max; bit++) {
- if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
- *attrs++ = &msr[bit].attr->attr.attr;
- found = true;
- } else {
- msr[bit].attr = NULL;
- }
- }
- *attrs = NULL;
-
- return found;
-}
-
static int __init cstate_probe(const struct cstate_model *cm)
{
/* SLM has different MSR for PKG C6 */
@@ -620,13 +642,14 @@ static int __init cstate_probe(const struct cstate_model *cm)
pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
- has_cstate_core = cstate_probe_msr(cm->core_events,
- PERF_CSTATE_CORE_EVENT_MAX,
- core_msr, core_events_attrs);
+ core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX,
+ true, (void *) &cm->core_events);
- has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
- PERF_CSTATE_PKG_EVENT_MAX,
- pkg_msr, pkg_events_attrs);
+ pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
+ true, (void *) &cm->pkg_events);
+
+ has_cstate_core = !!core_msr_mask;
+ has_cstate_pkg = !!pkg_msr_mask;
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
}
@@ -663,7 +686,13 @@ static int __init cstate_init(void)
}
if (has_cstate_pkg) {
- err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+ if (topology_max_die_per_package() > 1) {
+ err = perf_pmu_register(&cstate_pkg_pmu,
+ "cstate_die", -1);
+ } else {
+ err = perf_pmu_register(&cstate_pkg_pmu,
+ cstate_pkg_pmu.name, -1);
+ }
if (err) {
has_cstate_pkg = false;
pr_info("Failed to register cstate pkg pmu\n");
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7a9f5dac5abe..2c8db2c19328 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -337,7 +337,7 @@ static int alloc_pebs_buffer(int cpu)
struct debug_store *ds = hwev->ds;
size_t bsiz = x86_pmu.pebs_buffer_size;
int max, node = cpu_to_node(cpu);
- void *buffer, *ibuffer, *cea;
+ void *buffer, *insn_buff, *cea;
if (!x86_pmu.pebs)
return 0;
@@ -351,12 +351,12 @@ static int alloc_pebs_buffer(int cpu)
* buffer then.
*/
if (x86_pmu.intel_cap.pebs_format < 2) {
- ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
- if (!ibuffer) {
+ insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!insn_buff) {
dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
- per_cpu(insn_buffer, cpu) = ibuffer;
+ per_cpu(insn_buffer, cpu) = insn_buff;
}
hwev->ds_pebs_vaddr = buffer;
/* Update the cpu entry area mapping */
@@ -684,7 +684,7 @@ struct event_constraint intel_core2_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
EVENT_CONSTRAINT_END
};
@@ -693,7 +693,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
/* Allow all events as PEBS with no flags */
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
@@ -701,7 +701,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
struct event_constraint intel_slm_pebs_event_constraints[] = {
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
/* Allow all events as PEBS with no flags */
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
@@ -726,7 +726,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
@@ -743,7 +743,7 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
EVENT_CONSTRAINT_END
};
@@ -752,7 +752,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -767,9 +767,9 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -783,9 +783,9 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
@@ -806,9 +806,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
@@ -829,9 +829,9 @@ struct event_constraint intel_bdw_pebs_event_constraints[] = {
struct event_constraint intel_skl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
/* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
@@ -987,7 +987,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
pebs_data_cfg |= PEBS_DATACFG_GP;
if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
- (attr->sample_regs_intr & PEBS_XMM_REGS))
+ (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
pebs_data_cfg |= PEBS_DATACFG_XMMS;
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -1964,10 +1964,9 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
- if (x86_pmu.version <= 4) {
+ if (x86_pmu.version <= 4)
x86_pmu.pebs_no_isolation = 1;
- x86_pmu.pebs_no_xmm_regs = 1;
- }
+
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
char *pebs_qual = "";
@@ -2020,9 +2019,9 @@ void __init intel_ds_init(void)
PERF_SAMPLE_TIME;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
pebs_qual = "-baseline";
+ x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
} else {
/* Only basic record supported */
- x86_pmu.pebs_no_xmm_regs = 1;
x86_pmu.large_pebs_flags &=
~(PERF_SAMPLE_ADDR |
PERF_SAMPLE_TIME |
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 339d7628080c..d3dc2274ddd4 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel(R) Processor Trace PMU driver for perf
* Copyright (c) 2013-2014, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* Intel PT is specified in the Intel Architecture Instruction Set Extensions
* Programming Reference:
* http://software.intel.com/en-us/intel-isa-extensions
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 269e15a9086c..63fe4063fbd6 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -1,16 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Intel(R) Processor Trace PMU driver for perf
* Copyright (c) 2013-2014, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* Intel PT is specified in the Intel Architecture Instruction Set Extensions
* Programming Reference:
* http://software.intel.com/en-us/intel-isa-extensions
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 37ebf6fc5415..64ab51ffdf06 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Support Intel RAPL energy consumption counters
* Copyright (C) 2013 Google, Inc., Stephane Eranian
@@ -54,27 +55,28 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
+#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
+#include "../probe.h"
MODULE_LICENSE("GPL");
/*
* RAPL energy status counters
*/
-#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
-#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
-#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
-#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
-#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
-#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
-#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
-#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
-#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
-
-#define NR_RAPL_DOMAINS 0x5
+enum perf_rapl_events {
+ PERF_RAPL_PP0 = 0, /* all cores */
+ PERF_RAPL_PKG, /* entire package */
+ PERF_RAPL_RAM, /* DRAM */
+ PERF_RAPL_PP1, /* gpu */
+ PERF_RAPL_PSYS, /* psys */
+
+ PERF_RAPL_MAX,
+ NR_RAPL_DOMAINS = PERF_RAPL_MAX,
+};
+
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
@@ -83,33 +85,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"psys",
};
-/* Clients have PP0, PKG */
-#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM */
-#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM, PP1 */
-#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
-#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT|\
- 1<<RAPL_IDX_PSYS_NRG_STAT)
-
-/* Knights Landing has PKG, RAM */
-#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -148,26 +123,32 @@ struct rapl_pmu {
struct rapl_pmus {
struct pmu pmu;
- unsigned int maxpkg;
+ unsigned int maxdie;
struct rapl_pmu *pmus[];
};
+struct rapl_model {
+ unsigned long events;
+ bool apply_quirk;
+};
+
/* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
+static struct perf_msr rapl_msrs[];
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
- unsigned int pkgid = topology_logical_package_id(cpu);
+ unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
- return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
+ return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL;
}
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -349,7 +330,7 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags)
static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- int bit, msr, ret = 0;
+ int bit, ret = 0;
struct rapl_pmu *pmu;
/* only look at RAPL events */
@@ -365,33 +346,12 @@ static int rapl_pmu_event_init(struct perf_event *event)
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
- /*
- * check event is known (determines counter)
- */
- switch (cfg) {
- case INTEL_RAPL_PP0:
- bit = RAPL_IDX_PP0_NRG_STAT;
- msr = MSR_PP0_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PKG:
- bit = RAPL_IDX_PKG_NRG_STAT;
- msr = MSR_PKG_ENERGY_STATUS;
- break;
- case INTEL_RAPL_RAM:
- bit = RAPL_IDX_RAM_NRG_STAT;
- msr = MSR_DRAM_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PP1:
- bit = RAPL_IDX_PP1_NRG_STAT;
- msr = MSR_PP1_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PSYS:
- bit = RAPL_IDX_PSYS_NRG_STAT;
- msr = MSR_PLATFORM_ENERGY_STATUS;
- break;
- default:
+ if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL;
- }
+
+ cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
+ bit = cfg - 1;
+
/* check event supported */
if (!(rapl_cntr_mask & (1 << bit)))
return -EINVAL;
@@ -406,7 +366,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu;
- event->hw.event_base = msr;
+ event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg;
event->hw.idx = bit;
@@ -456,110 +416,111 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
-static struct attribute *rapl_events_srv_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_ram),
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
+static struct attribute *attrs_empty[] = {
+ NULL,
+};
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_ram_unit),
+static struct attribute_group rapl_pmu_events_group = {
+ .name = "events",
+ .attrs = attrs_empty,
+};
- EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_ram_scale),
+DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+static struct attribute *rapl_formats_attr[] = {
+ &format_attr_event.attr,
NULL,
};
-static struct attribute *rapl_events_cln_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
-
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
+static struct attribute_group rapl_pmu_format_group = {
+ .name = "format",
+ .attrs = rapl_formats_attr,
+};
- EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
+static const struct attribute_group *rapl_attr_groups[] = {
+ &rapl_pmu_attr_group,
+ &rapl_pmu_format_group,
+ &rapl_pmu_events_group,
NULL,
};
-static struct attribute *rapl_events_hsw_attr[] = {
+static struct attribute *rapl_events_cores[] = {
EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
- EVENT_PTR(rapl_ram),
-
EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
- EVENT_PTR(rapl_ram_unit),
-
EVENT_PTR(rapl_cores_scale),
- EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
- EVENT_PTR(rapl_ram_scale),
NULL,
};
-static struct attribute *rapl_events_skl_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_gpu),
- EVENT_PTR(rapl_ram),
- EVENT_PTR(rapl_psys),
+static struct attribute_group rapl_events_cores_group = {
+ .name = "events",
+ .attrs = rapl_events_cores,
+};
- EVENT_PTR(rapl_cores_unit),
+static struct attribute *rapl_events_pkg[] = {
+ EVENT_PTR(rapl_pkg),
EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_gpu_unit),
- EVENT_PTR(rapl_ram_unit),
- EVENT_PTR(rapl_psys_unit),
-
- EVENT_PTR(rapl_cores_scale),
EVENT_PTR(rapl_pkg_scale),
- EVENT_PTR(rapl_gpu_scale),
- EVENT_PTR(rapl_ram_scale),
- EVENT_PTR(rapl_psys_scale),
NULL,
};
-static struct attribute *rapl_events_knl_attr[] = {
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_ram),
+static struct attribute_group rapl_events_pkg_group = {
+ .name = "events",
+ .attrs = rapl_events_pkg,
+};
- EVENT_PTR(rapl_pkg_unit),
+static struct attribute *rapl_events_ram[] = {
+ EVENT_PTR(rapl_ram),
EVENT_PTR(rapl_ram_unit),
-
- EVENT_PTR(rapl_pkg_scale),
EVENT_PTR(rapl_ram_scale),
NULL,
};
-static struct attribute_group rapl_pmu_events_group = {
- .name = "events",
- .attrs = NULL, /* patched at runtime */
+static struct attribute_group rapl_events_ram_group = {
+ .name = "events",
+ .attrs = rapl_events_ram,
};
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
-static struct attribute *rapl_formats_attr[] = {
- &format_attr_event.attr,
+static struct attribute *rapl_events_gpu[] = {
+ EVENT_PTR(rapl_gpu),
+ EVENT_PTR(rapl_gpu_unit),
+ EVENT_PTR(rapl_gpu_scale),
NULL,
};
-static struct attribute_group rapl_pmu_format_group = {
- .name = "format",
- .attrs = rapl_formats_attr,
+static struct attribute_group rapl_events_gpu_group = {
+ .name = "events",
+ .attrs = rapl_events_gpu,
};
-static const struct attribute_group *rapl_attr_groups[] = {
- &rapl_pmu_attr_group,
- &rapl_pmu_format_group,
- &rapl_pmu_events_group,
+static struct attribute *rapl_events_psys[] = {
+ EVENT_PTR(rapl_psys),
+ EVENT_PTR(rapl_psys_unit),
+ EVENT_PTR(rapl_psys_scale),
NULL,
};
+static struct attribute_group rapl_events_psys_group = {
+ .name = "events",
+ .attrs = rapl_events_psys,
+};
+
+static bool test_msr(int idx, void *data)
+{
+ return test_bit(idx, (unsigned long *) data);
+}
+
+static struct perf_msr rapl_msrs[] = {
+ [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr },
+ [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+ [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr },
+ [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr },
+ [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr },
+};
+
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
@@ -571,7 +532,7 @@ static int rapl_cpu_offline(unsigned int cpu)
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate rapl events to the new target */
if (target < nr_cpu_ids) {
@@ -598,14 +559,14 @@ static int rapl_cpu_online(unsigned int cpu)
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
- rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+ rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
}
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
*/
- target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+ target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
@@ -632,7 +593,7 @@ static int rapl_check_hw_unit(bool apply_quirk)
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
*/
if (apply_quirk)
- rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+ rapl_hw_unit[PERF_RAPL_RAM] = 16;
/*
* Calculate the timer rate:
@@ -668,23 +629,33 @@ static void cleanup_rapl_pmus(void)
{
int i;
- for (i = 0; i < rapl_pmus->maxpkg; i++)
+ for (i = 0; i < rapl_pmus->maxdie; i++)
kfree(rapl_pmus->pmus[i]);
kfree(rapl_pmus);
}
+const struct attribute_group *rapl_attr_update[] = {
+ &rapl_events_cores_group,
+ &rapl_events_pkg_group,
+ &rapl_events_ram_group,
+ &rapl_events_gpu_group,
+ &rapl_events_gpu_group,
+ NULL,
+};
+
static int __init init_rapl_pmus(void)
{
- int maxpkg = topology_max_packages();
+ int maxdie = topology_max_packages() * topology_max_die_per_package();
size_t size;
- size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+ size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
rapl_pmus = kzalloc(size, GFP_KERNEL);
if (!rapl_pmus)
return -ENOMEM;
- rapl_pmus->maxpkg = maxpkg;
+ rapl_pmus->maxdie = maxdie;
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
+ rapl_pmus->pmu.attr_update = rapl_attr_update;
rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
rapl_pmus->pmu.event_init = rapl_pmu_event_init;
rapl_pmus->pmu.add = rapl_pmu_event_add;
@@ -700,105 +671,96 @@ static int __init init_rapl_pmus(void)
#define X86_RAPL_MODEL_MATCH(model, init) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
-struct intel_rapl_init_fun {
- bool apply_quirk;
- int cntr_mask;
- struct attribute **attrs;
-};
-
-static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_CLN,
- .attrs = rapl_events_cln_attr,
+static struct rapl_model model_snb = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_PP1),
+ .apply_quirk = false,
};
-static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
- .apply_quirk = true,
- .cntr_mask = RAPL_IDX_SRV,
- .attrs = rapl_events_srv_attr,
+static struct rapl_model model_snbep = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM),
+ .apply_quirk = false,
};
-static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_HSW,
- .attrs = rapl_events_hsw_attr,
+static struct rapl_model model_hsw = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM) |
+ BIT(PERF_RAPL_PP1),
+ .apply_quirk = false,
};
-static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_SRV,
- .attrs = rapl_events_srv_attr,
+static struct rapl_model model_hsx = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM),
+ .apply_quirk = true,
};
-static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
- .apply_quirk = true,
- .cntr_mask = RAPL_IDX_KNL,
- .attrs = rapl_events_knl_attr,
+static struct rapl_model model_knl = {
+ .events = BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM),
+ .apply_quirk = true,
};
-static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
- .apply_quirk = false,
- .cntr_mask = RAPL_IDX_SKL_CLN,
- .attrs = rapl_events_skl_attr,
+static struct rapl_model model_skl = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM) |
+ BIT(PERF_RAPL_PP1) |
+ BIT(PERF_RAPL_PSYS),
+ .apply_quirk = false,
};
-static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
-
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init),
+static const struct x86_cpu_id rapl_model_match[] __initconst = {
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, model_skl),
{},
};
-MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
+MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
static int __init rapl_pmu_init(void)
{
const struct x86_cpu_id *id;
- struct intel_rapl_init_fun *rapl_init;
- bool apply_quirk;
+ struct rapl_model *rm;
int ret;
- id = x86_match_cpu(rapl_cpu_match);
+ id = x86_match_cpu(rapl_model_match);
if (!id)
return -ENODEV;
- rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
- apply_quirk = rapl_init->apply_quirk;
- rapl_cntr_mask = rapl_init->cntr_mask;
- rapl_pmu_events_group.attrs = rapl_init->attrs;
+ rm = (struct rapl_model *) id->driver_data;
+ rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
+ false, (void *) &rm->events);
- ret = rapl_check_hw_unit(apply_quirk);
+ ret = rapl_check_hw_unit(rm->apply_quirk);
if (ret)
return ret;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fc40a1473058..3694a5d0703d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/module.h>
#include <asm/cpu_device_id.h>
@@ -7,6 +8,7 @@
static struct intel_uncore_type *empty_uncore[] = { NULL, };
struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
+struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
static bool pcidrv_registered;
struct pci_driver *uncore_pci_driver;
@@ -14,7 +16,7 @@ struct pci_driver *uncore_pci_driver;
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
struct pci_extra_dev *uncore_extra_pci_dev;
-static int max_packages;
+static int max_dies;
/* mask of cpus that collect uncore events */
static cpumask_t uncore_cpu_mask;
@@ -27,7 +29,7 @@ struct event_constraint uncore_constraint_empty =
MODULE_LICENSE("GPL");
-static int uncore_pcibus_to_physid(struct pci_bus *bus)
+int uncore_pcibus_to_physid(struct pci_bus *bus)
{
struct pci2phy_map *map;
int phys_id = -1;
@@ -100,13 +102,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
- unsigned int pkgid = topology_logical_package_id(cpu);
+ unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
- return pkgid < max_packages ? pmu->boxes[pkgid] : NULL;
+ return dieid < max_dies ? pmu->boxes[dieid] : NULL;
}
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -118,6 +120,21 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve
return count;
}
+void uncore_mmio_exit_box(struct intel_uncore_box *box)
+{
+ if (box->io_addr)
+ iounmap(box->io_addr);
+}
+
+u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ if (!box->io_addr)
+ return 0;
+
+ return readq(box->io_addr + event->hw.event_base);
+}
+
/*
* generic get constraint function for shared match/mask registers.
*/
@@ -311,7 +328,7 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
uncore_pmu_init_hrtimer(box);
box->cpu = -1;
box->pci_phys_id = -1;
- box->pkgid = -1;
+ box->dieid = -1;
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -826,10 +843,10 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
{
- int pkg;
+ int die;
- for (pkg = 0; pkg < max_packages; pkg++)
- kfree(pmu->boxes[pkg]);
+ for (die = 0; die < max_dies; die++)
+ kfree(pmu->boxes[die]);
kfree(pmu->boxes);
}
@@ -866,7 +883,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
if (!pmus)
return -ENOMEM;
- size = max_packages * sizeof(struct intel_uncore_box *);
+ size = max_dies * sizeof(struct intel_uncore_box *);
for (i = 0; i < type->num_boxes; i++) {
pmus[i].func_id = setid ? i : -1;
@@ -936,20 +953,21 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
- int phys_id, pkg, ret;
+ int phys_id, die, ret;
phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
- pkg = topology_phys_to_logical_pkg(phys_id);
- if (pkg < 0)
+ die = (topology_max_die_per_package() > 1) ? phys_id :
+ topology_phys_to_logical_pkg(phys_id);
+ if (die < 0)
return -EINVAL;
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
- uncore_extra_pci_dev[pkg].dev[idx] = pdev;
+ uncore_extra_pci_dev[die].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
@@ -988,7 +1006,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
- if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+ if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
return -EINVAL;
box = uncore_alloc_box(type, NUMA_NO_NODE);
@@ -1002,13 +1020,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
atomic_inc(&box->refcnt);
box->pci_phys_id = phys_id;
- box->pkgid = pkg;
+ box->dieid = die;
box->pci_dev = pdev;
box->pmu = pmu;
uncore_box_init(box);
pci_set_drvdata(pdev, box);
- pmu->boxes[pkg] = box;
+ pmu->boxes[die] = box;
if (atomic_inc_return(&pmu->activeboxes) > 1)
return 0;
@@ -1016,7 +1034,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
ret = uncore_pmu_register(pmu);
if (ret) {
pci_set_drvdata(pdev, NULL);
- pmu->boxes[pkg] = NULL;
+ pmu->boxes[die] = NULL;
uncore_box_exit(box);
kfree(box);
}
@@ -1027,16 +1045,17 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
- int i, phys_id, pkg;
+ int i, phys_id, die;
phys_id = uncore_pcibus_to_physid(pdev->bus);
box = pci_get_drvdata(pdev);
if (!box) {
- pkg = topology_phys_to_logical_pkg(phys_id);
+ die = (topology_max_die_per_package() > 1) ? phys_id :
+ topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
- if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
- uncore_extra_pci_dev[pkg].dev[i] = NULL;
+ if (uncore_extra_pci_dev[die].dev[i] == pdev) {
+ uncore_extra_pci_dev[die].dev[i] = NULL;
break;
}
}
@@ -1049,7 +1068,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return;
pci_set_drvdata(pdev, NULL);
- pmu->boxes[box->pkgid] = NULL;
+ pmu->boxes[box->dieid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
@@ -1061,7 +1080,7 @@ static int __init uncore_pci_init(void)
size_t size;
int ret;
- size = max_packages * sizeof(struct pci_extra_dev);
+ size = max_dies * sizeof(struct pci_extra_dev);
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
if (!uncore_extra_pci_dev) {
ret = -ENOMEM;
@@ -1108,11 +1127,11 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
{
struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
- int i, pkg;
+ int i, die;
- pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+ die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
+ box = pmu->boxes[die];
if (!box)
continue;
@@ -1140,18 +1159,33 @@ static void uncore_change_context(struct intel_uncore_type **uncores,
uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
}
-static int uncore_event_cpu_offline(unsigned int cpu)
+static void uncore_box_unref(struct intel_uncore_type **types, int id)
{
- struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, pkg, target;
+ int i;
+
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[id];
+ if (box && atomic_dec_return(&box->refcnt) == 0)
+ uncore_box_exit(box);
+ }
+ }
+}
+
+static int uncore_event_cpu_offline(unsigned int cpu)
+{
+ int die, target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
goto unref;
/* Find a new cpu to collect uncore events */
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate uncore events to the new target */
if (target < nr_cpu_ids)
@@ -1160,25 +1194,19 @@ static int uncore_event_cpu_offline(unsigned int cpu)
target = -1;
uncore_change_context(uncore_msr_uncores, cpu, target);
+ uncore_change_context(uncore_mmio_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
unref:
/* Clear the references */
- pkg = topology_logical_package_id(cpu);
- for (; *types; types++) {
- type = *types;
- pmu = type->pmus;
- for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
- if (box && atomic_dec_return(&box->refcnt) == 0)
- uncore_box_exit(box);
- }
- }
+ die = topology_logical_die_id(cpu);
+ uncore_box_unref(uncore_msr_uncores, die);
+ uncore_box_unref(uncore_mmio_uncores, die);
return 0;
}
static int allocate_boxes(struct intel_uncore_type **types,
- unsigned int pkg, unsigned int cpu)
+ unsigned int die, unsigned int cpu)
{
struct intel_uncore_box *box, *tmp;
struct intel_uncore_type *type;
@@ -1191,20 +1219,20 @@ static int allocate_boxes(struct intel_uncore_type **types,
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
- if (pmu->boxes[pkg])
+ if (pmu->boxes[die])
continue;
box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
goto cleanup;
box->pmu = pmu;
- box->pkgid = pkg;
+ box->dieid = die;
list_add(&box->active_list, &allocated);
}
}
/* Install them in the pmus */
list_for_each_entry_safe(box, tmp, &allocated, active_list) {
list_del_init(&box->active_list);
- box->pmu->boxes[pkg] = box;
+ box->pmu->boxes[die] = box;
}
return 0;
@@ -1216,15 +1244,15 @@ cleanup:
return -ENOMEM;
}
-static int uncore_event_cpu_online(unsigned int cpu)
+static int uncore_box_ref(struct intel_uncore_type **types,
+ int id, unsigned int cpu)
{
- struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, ret, pkg, target;
+ int i, ret;
- pkg = topology_logical_package_id(cpu);
- ret = allocate_boxes(types, pkg, cpu);
+ ret = allocate_boxes(types, id, cpu);
if (ret)
return ret;
@@ -1232,23 +1260,38 @@ static int uncore_event_cpu_online(unsigned int cpu)
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
+ box = pmu->boxes[id];
if (box && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
}
+ return 0;
+}
+
+static int uncore_event_cpu_online(unsigned int cpu)
+{
+ int die, target, msr_ret, mmio_ret;
+
+ die = topology_logical_die_id(cpu);
+ msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
+ mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
+ if (msr_ret && mmio_ret)
+ return -ENOMEM;
/*
* Check if there is an online cpu in the package
* which collects uncore events already.
*/
- target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
- uncore_change_context(uncore_msr_uncores, -1, cpu);
+ if (!msr_ret)
+ uncore_change_context(uncore_msr_uncores, -1, cpu);
+ if (!mmio_ret)
+ uncore_change_context(uncore_mmio_uncores, -1, cpu);
uncore_change_context(uncore_pci_uncores, -1, cpu);
return 0;
}
@@ -1296,12 +1339,35 @@ err:
return ret;
}
+static int __init uncore_mmio_init(void)
+{
+ struct intel_uncore_type **types = uncore_mmio_uncores;
+ int ret;
+
+ ret = uncore_types_init(types, true);
+ if (ret)
+ goto err;
+
+ for (; *types; types++) {
+ ret = type_pmu_register(*types);
+ if (ret)
+ goto err;
+ }
+ return 0;
+err:
+ uncore_types_exit(uncore_mmio_uncores);
+ uncore_mmio_uncores = empty_uncore;
+ return ret;
+}
+
+
#define X86_UNCORE_MODEL_MATCH(model, init) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
+ void (*mmio_init)(void);
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1372,6 +1438,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
.pci_init = skl_uncore_pci_init,
};
+static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
+ .cpu_init = snr_uncore_cpu_init,
+ .pci_init = snr_uncore_pci_init,
+ .mmio_init = snr_uncore_mmio_init,
+};
+
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
@@ -1399,6 +1471,9 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init),
{},
};
@@ -1408,7 +1483,7 @@ static int __init intel_uncore_init(void)
{
const struct x86_cpu_id *id;
struct intel_uncore_init_fun *uncore_init;
- int pret = 0, cret = 0, ret;
+ int pret = 0, cret = 0, mret = 0, ret;
id = x86_match_cpu(intel_uncore_match);
if (!id)
@@ -1417,7 +1492,7 @@ static int __init intel_uncore_init(void)
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
- max_packages = topology_max_packages();
+ max_dies = topology_max_packages() * topology_max_die_per_package();
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_init->pci_init) {
@@ -1431,7 +1506,12 @@ static int __init intel_uncore_init(void)
cret = uncore_cpu_init();
}
- if (cret && pret)
+ if (uncore_init->mmio_init) {
+ uncore_init->mmio_init();
+ mret = uncore_mmio_init();
+ }
+
+ if (cret && pret && mret)
return -ENODEV;
/* Install hotplug callbacks to setup the targets for each package */
@@ -1445,6 +1525,7 @@ static int __init intel_uncore_init(void)
err:
uncore_types_exit(uncore_msr_uncores);
+ uncore_types_exit(uncore_mmio_uncores);
uncore_pci_exit();
return ret;
}
@@ -1454,6 +1535,7 @@ static void __exit intel_uncore_exit(void)
{
cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
uncore_types_exit(uncore_msr_uncores);
+ uncore_types_exit(uncore_mmio_uncores);
uncore_pci_exit();
}
module_exit(intel_uncore_exit);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79eb2e21e4f0..f36f7bebbc1b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -2,6 +2,7 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <asm/apicdef.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/perf_event.h>
#include "../perf_event.h"
@@ -56,7 +57,10 @@ struct intel_uncore_type {
unsigned fixed_ctr;
unsigned fixed_ctl;
unsigned box_ctl;
- unsigned msr_offset;
+ union {
+ unsigned msr_offset;
+ unsigned mmio_offset;
+ };
unsigned num_shared_regs:8;
unsigned single_fixed:1;
unsigned pair_ctr_ctl:1;
@@ -108,7 +112,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box {
int pci_phys_id;
- int pkgid; /* Logical package ID */
+ int dieid; /* Logical die ID */
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
@@ -125,7 +129,7 @@ struct intel_uncore_box {
struct hrtimer hrtimer;
struct list_head list;
struct list_head active_list;
- void *io_addr;
+ void __iomem *io_addr;
struct intel_uncore_extra_reg shared_regs[0];
};
@@ -159,6 +163,7 @@ struct pci2phy_map {
};
struct pci2phy_map *__find_pci2phy_map(int segment);
+int uncore_pcibus_to_physid(struct pci_bus *bus);
ssize_t uncore_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
@@ -190,6 +195,13 @@ static inline bool uncore_pmc_freerunning(int idx)
return idx == UNCORE_PMC_IDX_FREERUNNING;
}
+static inline
+unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box)
+{
+ return box->pmu->type->box_ctl +
+ box->pmu->type->mmio_offset * box->pmu->pmu_idx;
+}
+
static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
{
return box->pmu->type->box_ctl;
@@ -330,7 +342,7 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
static inline
unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_fixed_ctl(box);
else
return uncore_msr_fixed_ctl(box);
@@ -339,7 +351,7 @@ unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
static inline
unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_fixed_ctr(box);
else
return uncore_msr_fixed_ctr(box);
@@ -348,7 +360,7 @@ unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
static inline
unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_event_ctl(box, idx);
else
return uncore_msr_event_ctl(box, idx);
@@ -357,7 +369,7 @@ unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
static inline
unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
{
- if (box->pci_dev)
+ if (box->pci_dev || box->io_addr)
return uncore_pci_perf_ctr(box, idx);
else
return uncore_msr_perf_ctr(box, idx);
@@ -419,6 +431,16 @@ static inline bool is_freerunning_event(struct perf_event *event)
(((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START);
}
+/* Check and reject invalid config */
+static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ if (is_freerunning_event(event))
+ return 0;
+
+ return -EINVAL;
+}
+
static inline void uncore_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->disable_box)
@@ -467,7 +489,7 @@ static inline void uncore_box_exit(struct intel_uncore_box *box)
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
- return (box->pkgid < 0);
+ return (box->dieid < 0);
}
static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
@@ -482,6 +504,9 @@ static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *ev
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_mmio_exit_box(struct intel_uncore_box *box);
+u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event);
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_event_start(struct perf_event *event, int flags);
@@ -497,6 +522,7 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores;
+extern struct intel_uncore_type **uncore_mmio_uncores;
extern struct pci_driver *uncore_pci_driver;
extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
@@ -528,6 +554,9 @@ int knl_uncore_pci_init(void);
void knl_uncore_cpu_init(void);
int skx_uncore_pci_init(void);
void skx_uncore_cpu_init(void);
+int snr_uncore_pci_init(void);
+void snr_uncore_cpu_init(void);
+void snr_uncore_mmio_init(void);
/* uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index f8431819b3e1..dbaa1b088a30 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -3,27 +3,29 @@
#include "uncore.h"
/* Uncore IMC PCI IDs */
-#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
-#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
-#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904
-#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c
-#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900
-#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910
-#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f
-#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f
-#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c
-#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904
-#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914
-#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f
-#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f
-#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc
-#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0
-#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10
-#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4
+#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904
+#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900
+#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910
+#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f
+#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f
+#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c
+#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904
+#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914
+#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f
+#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f
+#define PCI_DEVICE_ID_INTEL_KBL_HQ_IMC 0x5910
+#define PCI_DEVICE_ID_INTEL_KBL_WQ_IMC 0x5918
+#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc
+#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0
+#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10
+#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4
#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f
#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f
#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2
@@ -34,9 +36,15 @@
#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33
#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca
#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32
+#define PCI_DEVICE_ID_INTEL_AML_YD_IMC 0x590c
+#define PCI_DEVICE_ID_INTEL_AML_YQ_IMC 0x590d
+#define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0
+#define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34
+#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35
#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02
#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12
+
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
@@ -420,11 +428,6 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
}
-static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
-{
- iounmap(box->io_addr);
-}
-
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
{}
@@ -437,13 +440,6 @@ static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct per
static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{}
-static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
-}
-
/*
* Keep the custom event_init() function compatible with old event
* encoding for free running counters.
@@ -570,13 +566,13 @@ static struct pmu snb_uncore_imc_pmu = {
static struct intel_uncore_ops snb_uncore_imc_ops = {
.init_box = snb_uncore_imc_init_box,
- .exit_box = snb_uncore_imc_exit_box,
+ .exit_box = uncore_mmio_exit_box,
.enable_box = snb_uncore_imc_enable_box,
.disable_box = snb_uncore_imc_disable_box,
.disable_event = snb_uncore_imc_disable_event,
.enable_event = snb_uncore_imc_enable_event,
.hw_config = snb_uncore_imc_hw_config,
- .read_counter = snb_uncore_imc_read_counter,
+ .read_counter = uncore_mmio_read_counter,
};
static struct intel_uncore_type snb_uncore_imc = {
@@ -682,6 +678,14 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
@@ -737,6 +741,26 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ },
};
@@ -807,6 +831,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */
IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */
IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */
+ IMC_DEV(KBL_HQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core H Quad Core */
+ IMC_DEV(KBL_WQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S 4 cores Work Station */
IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */
IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */
IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */
@@ -821,6 +847,11 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */
IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */
IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */
+ IMC_DEV(AML_YD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Dual Core */
+ IMC_DEV(AML_YQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Quad Core */
+ IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */
+ IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */
+ IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */
IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
{ /* end marker */ }
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b10e04387f38..b10a5ec79e48 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -324,12 +324,77 @@
#define SKX_M2M_PCI_PMON_CTR0 0x200
#define SKX_M2M_PCI_PMON_BOX_CTL 0x258
+/* SNR Ubox */
+#define SNR_U_MSR_PMON_CTR0 0x1f98
+#define SNR_U_MSR_PMON_CTL0 0x1f91
+#define SNR_U_MSR_PMON_UCLK_FIXED_CTL 0x1f93
+#define SNR_U_MSR_PMON_UCLK_FIXED_CTR 0x1f94
+
+/* SNR CHA */
+#define SNR_CHA_RAW_EVENT_MASK_EXT 0x3ffffff
+#define SNR_CHA_MSR_PMON_CTL0 0x1c01
+#define SNR_CHA_MSR_PMON_CTR0 0x1c08
+#define SNR_CHA_MSR_PMON_BOX_CTL 0x1c00
+#define SNR_C0_MSR_PMON_BOX_FILTER0 0x1c05
+
+
+/* SNR IIO */
+#define SNR_IIO_MSR_PMON_CTL0 0x1e08
+#define SNR_IIO_MSR_PMON_CTR0 0x1e01
+#define SNR_IIO_MSR_PMON_BOX_CTL 0x1e00
+#define SNR_IIO_MSR_OFFSET 0x10
+#define SNR_IIO_PMON_RAW_EVENT_MASK_EXT 0x7ffff
+
+/* SNR IRP */
+#define SNR_IRP0_MSR_PMON_CTL0 0x1ea8
+#define SNR_IRP0_MSR_PMON_CTR0 0x1ea1
+#define SNR_IRP0_MSR_PMON_BOX_CTL 0x1ea0
+#define SNR_IRP_MSR_OFFSET 0x10
+
+/* SNR M2PCIE */
+#define SNR_M2PCIE_MSR_PMON_CTL0 0x1e58
+#define SNR_M2PCIE_MSR_PMON_CTR0 0x1e51
+#define SNR_M2PCIE_MSR_PMON_BOX_CTL 0x1e50
+#define SNR_M2PCIE_MSR_OFFSET 0x10
+
+/* SNR PCU */
+#define SNR_PCU_MSR_PMON_CTL0 0x1ef1
+#define SNR_PCU_MSR_PMON_CTR0 0x1ef8
+#define SNR_PCU_MSR_PMON_BOX_CTL 0x1ef0
+#define SNR_PCU_MSR_PMON_BOX_FILTER 0x1efc
+
+/* SNR M2M */
+#define SNR_M2M_PCI_PMON_CTL0 0x468
+#define SNR_M2M_PCI_PMON_CTR0 0x440
+#define SNR_M2M_PCI_PMON_BOX_CTL 0x438
+#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff
+
+/* SNR PCIE3 */
+#define SNR_PCIE3_PCI_PMON_CTL0 0x508
+#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8
+#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4
+
+/* SNR IMC */
+#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54
+#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38
+#define SNR_IMC_MMIO_PMON_CTL0 0x40
+#define SNR_IMC_MMIO_PMON_CTR0 0x8
+#define SNR_IMC_MMIO_PMON_BOX_CTL 0x22800
+#define SNR_IMC_MMIO_OFFSET 0x4000
+#define SNR_IMC_MMIO_SIZE 0x4000
+#define SNR_IMC_MMIO_BASE_OFFSET 0xd0
+#define SNR_IMC_MMIO_BASE_MASK 0x1FFFFFFF
+#define SNR_IMC_MMIO_MEM0_OFFSET 0xd8
+#define SNR_IMC_MMIO_MEM0_MASK 0x7FF
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39");
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
@@ -343,11 +408,14 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43");
+DEFINE_UNCORE_FORMAT_ATTR(ch_mask2, ch_mask, "config:36-47");
DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46");
+DEFINE_UNCORE_FORMAT_ATTR(fc_mask2, fc_mask, "config:48-50");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid5, filter_tid, "config1:0-9");
DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
@@ -1058,8 +1126,8 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- int pkg = box->pkgid;
- struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+ int die = box->dieid;
+ struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx];
if (filter_pdev) {
pci_write_config_dword(filter_pdev, reg1->reg,
@@ -3585,6 +3653,7 @@ static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = {
static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = {
.read_counter = uncore_msr_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
};
static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = {
@@ -3967,3 +4036,535 @@ int skx_uncore_pci_init(void)
}
/* end of SKX uncore support */
+
+/* SNR uncore support */
+
+static struct intel_uncore_type snr_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNR_U_MSR_PMON_CTR0,
+ .event_ctl = SNR_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = SNR_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = SNR_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static struct attribute *snr_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext2.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid5.attr,
+ NULL,
+};
+static const struct attribute_group snr_uncore_chabox_format_group = {
+ .name = "format",
+ .attrs = snr_uncore_cha_formats_attr,
+};
+
+static int snr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+ reg1->reg = SNR_C0_MSR_PMON_BOX_FILTER0 +
+ box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID;
+ reg1->idx = 0;
+
+ return 0;
+}
+
+static void snr_cha_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, reg1->config);
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snr_uncore_chabox_ops = {
+ .init_box = ivbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = snr_cha_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = snr_cha_hw_config,
+};
+
+static struct intel_uncore_type snr_uncore_chabox = {
+ .name = "cha",
+ .num_counters = 4,
+ .num_boxes = 6,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_CHA_MSR_PMON_CTL0,
+ .perf_ctr = SNR_CHA_MSR_PMON_CTR0,
+ .box_ctl = SNR_CHA_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_CBO_MSR_OFFSET,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT,
+ .ops = &snr_uncore_chabox_ops,
+ .format_group = &snr_uncore_chabox_format_group,
+};
+
+static struct attribute *snr_uncore_iio_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh9.attr,
+ &format_attr_ch_mask2.attr,
+ &format_attr_fc_mask2.attr,
+ NULL,
+};
+
+static const struct attribute_group snr_uncore_iio_format_group = {
+ .name = "format",
+ .attrs = snr_uncore_iio_formats_attr,
+};
+
+static struct intel_uncore_type snr_uncore_iio = {
+ .name = "iio",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_IIO_MSR_PMON_CTL0,
+ .perf_ctr = SNR_IIO_MSR_PMON_CTR0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL,
+ .msr_offset = SNR_IIO_MSR_OFFSET,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &snr_uncore_iio_format_group,
+};
+
+static struct intel_uncore_type snr_uncore_irp = {
+ .name = "irp",
+ .num_counters = 2,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_IRP0_MSR_PMON_CTL0,
+ .perf_ctr = SNR_IRP0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_IRP0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNR_IRP_MSR_OFFSET,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static struct intel_uncore_type snr_uncore_m2pcie = {
+ .name = "m2pcie",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 48,
+ .event_ctl = SNR_M2PCIE_MSR_PMON_CTL0,
+ .perf_ctr = SNR_M2PCIE_MSR_PMON_CTR0,
+ .box_ctl = SNR_M2PCIE_MSR_PMON_BOX_CTL,
+ .msr_offset = SNR_M2PCIE_MSR_OFFSET,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+static int snr_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+ if (ev_sel >= 0xb && ev_sel <= 0xe) {
+ reg1->reg = SNR_PCU_MSR_PMON_BOX_FILTER;
+ reg1->idx = ev_sel - 0xb;
+ reg1->config = event->attr.config1 & (0xff << reg1->idx);
+ }
+ return 0;
+}
+
+static struct intel_uncore_ops snr_uncore_pcu_ops = {
+ IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+ .hw_config = snr_pcu_hw_config,
+ .get_constraint = snbep_pcu_get_constraint,
+ .put_constraint = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type snr_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNR_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNR_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snr_uncore_pcu_ops,
+ .format_group = &skx_uncore_pcu_format_group,
+};
+
+enum perf_uncore_snr_iio_freerunning_type_id {
+ SNR_IIO_MSR_IOCLK,
+ SNR_IIO_MSR_BW_IN,
+
+ SNR_IIO_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters snr_iio_freerunning[] = {
+ [SNR_IIO_MSR_IOCLK] = { 0x1eac, 0x1, 0x10, 1, 48 },
+ [SNR_IIO_MSR_BW_IN] = { 0x1f00, 0x1, 0x10, 8, 48 },
+};
+
+static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = {
+ /* Free-Running IIO CLOCKS Counter */
+ INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
+ /* Free-Running IIO BANDWIDTH IN Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type snr_uncore_iio_free_running = {
+ .name = "iio_free_running",
+ .num_counters = 9,
+ .num_boxes = 5,
+ .num_freerunning_types = SNR_IIO_FREERUNNING_TYPE_MAX,
+ .freerunning = snr_iio_freerunning,
+ .ops = &skx_uncore_iio_freerunning_ops,
+ .event_descs = snr_uncore_iio_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+static struct intel_uncore_type *snr_msr_uncores[] = {
+ &snr_uncore_ubox,
+ &snr_uncore_chabox,
+ &snr_uncore_iio,
+ &snr_uncore_irp,
+ &snr_uncore_m2pcie,
+ &snr_uncore_pcu,
+ &snr_uncore_iio_free_running,
+ NULL,
+};
+
+void snr_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = snr_msr_uncores;
+}
+
+static void snr_m2m_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+
+ __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+ pci_write_config_dword(pdev, box_ctl, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops snr_m2m_uncore_pci_ops = {
+ .init_box = snr_m2m_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+static struct attribute *snr_m2m_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext3.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static const struct attribute_group snr_m2m_uncore_format_group = {
+ .name = "format",
+ .attrs = snr_m2m_uncore_formats_attr,
+};
+
+static struct intel_uncore_type snr_uncore_m2m = {
+ .name = "m2m",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNR_M2M_PCI_PMON_CTR0,
+ .event_ctl = SNR_M2M_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT,
+ .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL,
+ .ops = &snr_m2m_uncore_pci_ops,
+ .format_group = &snr_m2m_uncore_format_group,
+};
+
+static struct intel_uncore_type snr_uncore_pcie3 = {
+ .name = "pcie3",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0,
+ .event_ctl = SNR_PCIE3_PCI_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL,
+ .ops = &ivbep_uncore_pci_ops,
+ .format_group = &ivbep_uncore_format_group,
+};
+
+enum {
+ SNR_PCI_UNCORE_M2M,
+ SNR_PCI_UNCORE_PCIE3,
+};
+
+static struct intel_uncore_type *snr_pci_uncores[] = {
+ [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m,
+ [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3,
+ NULL,
+};
+
+static const struct pci_device_id snr_uncore_pci_ids[] = {
+ { /* M2M */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0),
+ },
+ { /* PCIe3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver snr_uncore_pci_driver = {
+ .name = "snr_uncore",
+ .id_table = snr_uncore_pci_ids,
+};
+
+int snr_uncore_pci_init(void)
+{
+ /* SNR UBOX DID */
+ int ret = snbep_pci2phy_map_init(0x3460, SKX_CPUNODEID,
+ SKX_GIDNIDMAP, true);
+
+ if (ret)
+ return ret;
+
+ uncore_pci_uncores = snr_pci_uncores;
+ uncore_pci_driver = &snr_uncore_pci_driver;
+ return 0;
+}
+
+static struct pci_dev *snr_uncore_get_mc_dev(int id)
+{
+ struct pci_dev *mc_dev = NULL;
+ int phys_id, pkg;
+
+ while (1) {
+ mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev);
+ if (!mc_dev)
+ break;
+ phys_id = uncore_pcibus_to_physid(mc_dev->bus);
+ if (phys_id < 0)
+ continue;
+ pkg = topology_phys_to_logical_pkg(phys_id);
+ if (pkg < 0)
+ continue;
+ else if (pkg == id)
+ break;
+ }
+ return mc_dev;
+}
+
+static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid);
+ unsigned int box_ctl = uncore_mmio_box_ctl(box);
+ resource_size_t addr;
+ u32 pci_dword;
+
+ if (!pdev)
+ return;
+
+ pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword);
+ addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
+
+ pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword);
+ addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12;
+
+ addr += box_ctl;
+
+ box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE);
+ if (!box->io_addr)
+ return;
+
+ writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
+}
+
+static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box)
+{
+ u32 config;
+
+ if (!box->io_addr)
+ return;
+
+ config = readl(box->io_addr);
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ writel(config, box->io_addr);
+}
+
+static void snr_uncore_mmio_enable_box(struct intel_uncore_box *box)
+{
+ u32 config;
+
+ if (!box->io_addr)
+ return;
+
+ config = readl(box->io_addr);
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ writel(config, box->io_addr);
+}
+
+static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box->io_addr)
+ return;
+
+ writel(hwc->config | SNBEP_PMON_CTL_EN,
+ box->io_addr + hwc->config_base);
+}
+
+static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box->io_addr)
+ return;
+
+ writel(hwc->config, box->io_addr + hwc->config_base);
+}
+
+static struct intel_uncore_ops snr_uncore_mmio_ops = {
+ .init_box = snr_uncore_mmio_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = snr_uncore_mmio_disable_box,
+ .enable_box = snr_uncore_mmio_enable_box,
+ .disable_event = snr_uncore_mmio_disable_event,
+ .enable_event = snr_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+static struct uncore_event_desc snr_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type snr_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
+ .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
+ .event_descs = snr_uncore_imc_events,
+ .perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
+ .event_ctl = SNR_IMC_MMIO_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL,
+ .mmio_offset = SNR_IMC_MMIO_OFFSET,
+ .ops = &snr_uncore_mmio_ops,
+ .format_group = &skx_uncore_format_group,
+};
+
+enum perf_uncore_snr_imc_freerunning_type_id {
+ SNR_IMC_DCLK,
+ SNR_IMC_DDR,
+
+ SNR_IMC_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters snr_imc_freerunning[] = {
+ [SNR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 },
+ [SNR_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 },
+};
+
+static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = {
+ INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
+
+ INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
+};
+
+static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = {
+ .init_box = snr_uncore_mmio_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type snr_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 1,
+ .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = snr_imc_freerunning,
+ .ops = &snr_uncore_imc_freerunning_ops,
+ .event_descs = snr_uncore_imc_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+static struct intel_uncore_type *snr_mmio_uncores[] = {
+ &snr_uncore_imc,
+ &snr_uncore_imc_free_running,
+ NULL,
+};
+
+void snr_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = snr_mmio_uncores;
+}
+
+/* end of SNR uncore support */
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index f3f4c2263501..9431447541e9 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
+#include <linux/sysfs.h>
#include <linux/nospec.h>
#include <asm/intel-family.h>
+#include "probe.h"
enum perf_msr_id {
PERF_MSR_TSC = 0,
@@ -12,32 +14,30 @@ enum perf_msr_id {
PERF_MSR_PTSC = 5,
PERF_MSR_IRPERF = 6,
PERF_MSR_THERM = 7,
- PERF_MSR_THERM_SNAP = 8,
- PERF_MSR_THERM_UNIT = 9,
PERF_MSR_EVENT_MAX,
};
-static bool test_aperfmperf(int idx)
+static bool test_aperfmperf(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}
-static bool test_ptsc(int idx)
+static bool test_ptsc(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_PTSC);
}
-static bool test_irperf(int idx)
+static bool test_irperf(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_IRPERF);
}
-static bool test_therm_status(int idx)
+static bool test_therm_status(int idx, void *data)
{
return boot_cpu_has(X86_FEATURE_DTHERM);
}
-static bool test_intel(int idx)
+static bool test_intel(int idx, void *data)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)
@@ -98,37 +98,51 @@ static bool test_intel(int idx)
return false;
}
-struct perf_msr {
- u64 msr;
- struct perf_pmu_events_attr *attr;
- bool (*test)(int idx);
+PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" );
+PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" );
+PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" );
+PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" );
+PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" );
+PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" );
+PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" );
+
+static unsigned long msr_mask;
+
+PMU_EVENT_GROUP(events, aperf);
+PMU_EVENT_GROUP(events, mperf);
+PMU_EVENT_GROUP(events, pperf);
+PMU_EVENT_GROUP(events, smi);
+PMU_EVENT_GROUP(events, ptsc);
+PMU_EVENT_GROUP(events, irperf);
+
+static struct attribute *attrs_therm[] = {
+ &attr_therm.attr.attr,
+ &attr_therm_snap.attr.attr,
+ &attr_therm_unit.attr.attr,
+ NULL,
};
-PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" );
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" );
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" );
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" );
-PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" );
-PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" );
-PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" );
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" );
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" );
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" );
+static struct attribute_group group_therm = {
+ .name = "events",
+ .attrs = attrs_therm,
+};
static struct perf_msr msr[] = {
- [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
- [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
- [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
- [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
- [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
- [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
- [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
- [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, },
- [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, },
- [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, },
+ [PERF_MSR_TSC] = { .no_check = true, },
+ [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, },
+ [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, },
+ [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, },
+ [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, },
+ [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, },
+ [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, },
+ [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, },
};
-static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
+static struct attribute *events_attrs[] = {
+ &attr_tsc.attr.attr,
NULL,
};
@@ -153,6 +167,17 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
+const struct attribute_group *attr_update[] = {
+ &group_aperf,
+ &group_mperf,
+ &group_pperf,
+ &group_smi,
+ &group_ptsc,
+ &group_irperf,
+ &group_therm,
+ NULL,
+};
+
static int msr_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
@@ -169,7 +194,7 @@ static int msr_event_init(struct perf_event *event)
cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
- if (!msr[cfg].attr)
+ if (!(msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.idx = -1;
@@ -252,32 +277,17 @@ static struct pmu pmu_msr = {
.stop = msr_event_stop,
.read = msr_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .attr_update = attr_update,
};
static int __init msr_init(void)
{
- int i, j = 0;
-
if (!boot_cpu_has(X86_FEATURE_TSC)) {
pr_cont("no MSR PMU driver.\n");
return 0;
}
- /* Probe the MSRs. */
- for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
- u64 val;
-
- /* Virt sucks; you cannot tell if a R/O MSR is present :/ */
- if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
- msr[i].attr = NULL;
- }
-
- /* List remaining MSRs in the sysfs attrs. */
- for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
- if (msr[i].attr)
- events_attrs[j++] = &msr[i].attr->attr.attr;
- }
- events_attrs[j] = NULL;
+ msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL);
perf_pmu_register(&pmu_msr, "msr", -1);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a6ac2f4f76fc..8751008fc170 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -121,24 +121,6 @@ struct amd_nb {
(1ULL << PERF_REG_X86_R14) | \
(1ULL << PERF_REG_X86_R15))
-#define PEBS_XMM_REGS \
- ((1ULL << PERF_REG_X86_XMM0) | \
- (1ULL << PERF_REG_X86_XMM1) | \
- (1ULL << PERF_REG_X86_XMM2) | \
- (1ULL << PERF_REG_X86_XMM3) | \
- (1ULL << PERF_REG_X86_XMM4) | \
- (1ULL << PERF_REG_X86_XMM5) | \
- (1ULL << PERF_REG_X86_XMM6) | \
- (1ULL << PERF_REG_X86_XMM7) | \
- (1ULL << PERF_REG_X86_XMM8) | \
- (1ULL << PERF_REG_X86_XMM9) | \
- (1ULL << PERF_REG_X86_XMM10) | \
- (1ULL << PERF_REG_X86_XMM11) | \
- (1ULL << PERF_REG_X86_XMM12) | \
- (1ULL << PERF_REG_X86_XMM13) | \
- (1ULL << PERF_REG_X86_XMM14) | \
- (1ULL << PERF_REG_X86_XMM15))
-
/*
* Per register state.
*/
@@ -631,14 +613,11 @@ struct x86_pmu {
int attr_rdpmc_broken;
int attr_rdpmc;
struct attribute **format_attrs;
- struct attribute **event_attrs;
- struct attribute **caps_attrs;
ssize_t (*events_sysfs_show)(char *page, u64 config);
- struct attribute **cpu_events;
+ const struct attribute_group **attr_update;
unsigned long attr_freeze_on_smi;
- struct attribute **attrs;
/*
* CPU Hotplug hooks
@@ -668,8 +647,7 @@ struct x86_pmu {
pebs_broken :1,
pebs_prec_dist :1,
pebs_no_tlb :1,
- pebs_no_isolation :1,
- pebs_no_xmm_regs :1;
+ pebs_no_isolation :1;
int pebs_record_size;
int pebs_buffer_size;
int max_pebs_events;
@@ -905,8 +883,6 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
ssize_t intel_event_sysfs_show(char *page, u64 config);
-struct attribute **merge_attr(struct attribute **a, struct attribute **b);
-
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c
new file mode 100644
index 000000000000..c2ede2f3b277
--- /dev/null
+++ b/arch/x86/events/probe.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/bits.h>
+#include "probe.h"
+
+static umode_t
+not_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return 0;
+}
+
+unsigned long
+perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
+{
+ unsigned long avail = 0;
+ unsigned int bit;
+ u64 val;
+
+ if (cnt >= BITS_PER_LONG)
+ return 0;
+
+ for (bit = 0; bit < cnt; bit++) {
+ if (!msr[bit].no_check) {
+ struct attribute_group *grp = msr[bit].grp;
+
+ grp->is_visible = not_visible;
+
+ if (msr[bit].test && !msr[bit].test(bit, data))
+ continue;
+ /* Virt sucks; you cannot tell if a R/O MSR is present :/ */
+ if (rdmsrl_safe(msr[bit].msr, &val))
+ continue;
+ /* Disable zero counters if requested. */
+ if (!zero && !val)
+ continue;
+
+ grp->is_visible = NULL;
+ }
+ avail |= BIT(bit);
+ }
+
+ return avail;
+}
+EXPORT_SYMBOL_GPL(perf_msr_probe);
diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h
new file mode 100644
index 000000000000..4c8e0afc5fb5
--- /dev/null
+++ b/arch/x86/events/probe.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_X86_EVENTS_PROBE_H__
+#define __ARCH_X86_EVENTS_PROBE_H__
+#include <linux/sysfs.h>
+
+struct perf_msr {
+ u64 msr;
+ struct attribute_group *grp;
+ bool (*test)(int idx, void *data);
+ bool no_check;
+};
+
+unsigned long
+perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data);
+
+#define __PMU_EVENT_GROUP(_name) \
+static struct attribute *attrs_##_name[] = { \
+ &attr_##_name.attr.attr, \
+ NULL, \
+}
+
+#define PMU_EVENT_GROUP(_grp, _name) \
+__PMU_EVENT_GROUP(_name); \
+static struct attribute_group group_##_name = { \
+ .name = #_grp, \
+ .attrs = attrs_##_name, \
+}
+
+#endif /* __ARCH_X86_EVENTS_PROBE_H__ */
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 1c11f9420a82..89b1f74d3225 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-y := hv_init.o mmu.o nested.o
obj-$(CONFIG_X86_64) += hv_apic.o
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index e4ba467a9fc6..0d258688c8cf 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -1,20 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* X86 specific Hyper-V initialization code.
*
* Copyright (C) 2016, Microsoft, Inc.
*
* Author : K. Y. Srinivasan <kys@microsoft.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
*/
#include <linux/efi.h>
@@ -27,64 +17,13 @@
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/clockchips.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-
-static struct ms_hyperv_tsc_page *tsc_pg;
-
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return tsc_pg;
-}
-EXPORT_SYMBOL_GPL(hv_get_tsc_page);
-
-static u64 read_hv_clock_tsc(struct clocksource *arg)
-{
- u64 current_tick = hv_read_tsc_page(tsc_pg);
-
- if (current_tick == U64_MAX)
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
-
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_tsc = {
- .name = "hyperv_clocksource_tsc_page",
- .rating = 400,
- .read = read_hv_clock_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-#endif
-
-static u64 read_hv_clock_msr(struct clocksource *arg)
-{
- u64 current_tick;
- /*
- * Read the partition counter to get the current tick count. This count
- * is set to 0 when the partition is created and is incremented in
- * 100 nanosecond units.
- */
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_msr = {
- .name = "hyperv_clocksource_msr",
- .rating = 400,
- .read = read_hv_clock_msr,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
+#include <clocksource/hyperv_timer.h>
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
-struct clocksource *hyperv_cs;
-EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
@@ -121,8 +60,17 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;
- if (!*hvp)
- *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ /*
+ * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
+ * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
+ * we always write the EOI MSR in hv_apic_eoi_write() *after* the
+ * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
+ * not be stopped in the case of CPU offlining and the VM will hang.
+ */
+ if (!*hvp) {
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
+ PAGE_KERNEL);
+ }
if (*hvp) {
u64 val;
@@ -353,42 +301,8 @@ void __init hyperv_init(void)
x86_init.pci.arch_init = hv_pci_init;
- /*
- * Register Hyper-V specific clocksource.
- */
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) {
- union hv_x64_msr_hypercall_contents tsc_msr;
-
- tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
- if (!tsc_pg)
- goto register_msr_cs;
-
- hyperv_cs = &hyperv_cs_tsc;
-
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- tsc_msr.enable = 1;
- tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
-
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
-
- clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
- return;
- }
-register_msr_cs:
-#endif
- /*
- * For 32 bit guests just use the MSR based mechanism for reading
- * the partition counter.
- */
-
- hyperv_cs = &hyperv_cs_msr;
- if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
- clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
-
+ /* Register Hyper-V specific clocksource */
+ hv_init_clocksource();
return;
remove_cpuhp_state:
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index cd4339bae066..d13b352b2aa7 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the ia32 kernel emulation subsystem.
#
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 3c135084e1eb..9bb71abd66bd 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* a.out loader for x86-64
*
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 629d1ee05599..1cee10091b9f 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -358,7 +358,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
/* Create the ucontext. */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
+ if (static_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index a43212036257..21790307121e 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -237,6 +237,18 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
unsigned long, newsp, int __user *, parent_tidptr,
unsigned long, tls_val, int __user *, child_tidptr)
{
- return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr,
- tls_val);
+ struct kernel_clone_args args = {
+ .flags = (clone_flags & ~CSIGNAL),
+ .pidfd = parent_tidptr,
+ .child_tid = child_tidptr,
+ .parent_tid = parent_tidptr,
+ .exit_signal = (clone_flags & CSIGNAL),
+ .stack = newsp,
+ .tls = tls_val,
+ };
+
+ if (!legacy_clone_args_valid(&args))
+ return -EINVAL;
+
+ return _do_fork(&args);
}
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index eebd05942e6c..8b52bc5ddf69 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
generated-y += syscalls_32.h
diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 1b010a859b8b..9aff97f0de7f 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* X86 specific ACPICA environments and implementation
*
* Copyright (C) 2014, Intel Corporation
* Author: Lv Zheng <lv.zheng@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _ASM_X86_ACENV_H
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2f01eb4d6208..aac686e1e005 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -1,27 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_ACPI_H
#define _ASM_X86_ACPI_H
/*
* Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <acpi/pdc_intel.h>
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
new file mode 100644
index 000000000000..4adb13f08af7
--- /dev/null
+++ b/arch/x86/include/asm/acrn.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ACRN_H
+#define _ASM_X86_ACRN_H
+
+extern void acrn_hv_callback_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_acrn_hv_callback_vector acrn_hv_callback_vector
+#endif
+
+extern void acrn_hv_vector_handler(struct pt_regs *regs);
+#endif /* _ASM_X86_ACRN_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
index 0acbac299e49..99bb207fc04c 100644
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare
*
* (C) Copyright 2009 Intel Corporation
* Author: Jacob Pan (jacob.jun.pan@intel.com)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* Note:
*/
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 130e81e10fc7..e647aa095867 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_APIC_H
#define _ASM_X86_APIC_H
@@ -48,11 +49,11 @@ static inline void generic_apic_probe(void)
#ifdef CONFIG_X86_LOCAL_APIC
-extern unsigned int apic_verbosity;
+extern int apic_verbosity;
extern int local_apic_timer_c2_ok;
extern int disable_apic;
-extern unsigned int lapic_timer_frequency;
+extern unsigned int lapic_timer_period;
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
@@ -154,7 +155,6 @@ static inline int apic_force_enable(unsigned long addr)
extern int apic_force_enable(unsigned long addr);
#endif
-extern void apic_bsp_setup(bool upmode);
extern void apic_ap_setup(void);
/*
@@ -174,6 +174,7 @@ extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
extern void lapic_online(void);
extern void lapic_offline(void);
+extern bool apic_needs_pit(void);
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
@@ -187,6 +188,7 @@ static inline void init_bsp_APIC(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+static inline bool apic_needs_pit(void) { return true; }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
@@ -273,7 +275,6 @@ struct irq_data;
/*
* Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
*
* Generic APIC sub-arch data struct.
*
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 3ac991d81e74..af45e1452f09 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* This file is part of the Linux kernel.
*
* Copyright (c) 2011-2014, Intel Corporation
* Authors: Fenghua Yu <fenghua.yu@intel.com>,
* H. Peter Anvin <hpa@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
*/
#ifndef ASM_X86_ARCHRANDOM_H
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ea3d95275b43..115127c7ad28 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
/**
@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
/**
@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc
@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 6a5b0ec460da..52cfaecb13f9 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -9,7 +9,7 @@
/* An 64bit atomic type */
typedef struct {
- u64 __aligned(8) counter;
+ s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(val) { (val) }
@@ -71,8 +71,7 @@ ATOMIC64_DECL(add_unless);
* the old value.
*/
-static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
- long long n)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
return arch_cmpxchg64(&v->counter, o, n);
}
@@ -85,9 +84,9 @@ static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
* Atomically xchgs the value of @v to @n and returns
* the old value.
*/
-static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
- long long o;
+ s64 o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
alternative_atomic64(xchg, "=&A" (o),
@@ -103,7 +102,7 @@ static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
*
* Atomically sets the value of @v to @n.
*/
-static inline void arch_atomic64_set(atomic64_t *v, long long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
@@ -118,9 +117,9 @@ static inline void arch_atomic64_set(atomic64_t *v, long long i)
*
* Atomically reads the value of @v and returns it.
*/
-static inline long long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
{
- long long r;
+ s64 r;
alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
return r;
}
@@ -132,7 +131,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + *@v
*/
-static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -143,7 +142,7 @@ static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
/*
* Other variants with different arithmetic operators:
*/
-static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -151,18 +150,18 @@ static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
return i;
}
-static inline long long arch_atomic64_inc_return(atomic64_t *v)
+static inline s64 arch_atomic64_inc_return(atomic64_t *v)
{
- long long a;
+ s64 a;
alternative_atomic64(inc_return, "=&A" (a),
"S" (v) : "memory", "ecx");
return a;
}
#define arch_atomic64_inc_return arch_atomic64_inc_return
-static inline long long arch_atomic64_dec_return(atomic64_t *v)
+static inline s64 arch_atomic64_dec_return(atomic64_t *v)
{
- long long a;
+ s64 a;
alternative_atomic64(dec_return, "=&A" (a),
"S" (v) : "memory", "ecx");
return a;
@@ -176,7 +175,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
*
* Atomically adds @i to @v.
*/
-static inline long long arch_atomic64_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -191,7 +190,7 @@ static inline long long arch_atomic64_add(long long i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -234,8 +233,7 @@ static inline void arch_atomic64_dec(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns non-zero if the add was done, zero otherwise.
*/
-static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
- long long u)
+static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
@@ -254,9 +252,9 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
}
#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
- long long r;
+ s64 r;
alternative_atomic64(dec_if_positive, "=&A" (r),
"S" (v) : "ecx", "memory");
return r;
@@ -266,17 +264,17 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-static inline void arch_atomic64_and(long long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
@@ -284,17 +282,17 @@ static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
return old;
}
-static inline void arch_atomic64_or(long long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
@@ -302,17 +300,17 @@ static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
return old;
}
-static inline void arch_atomic64_xor(long long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
@@ -320,9 +318,9 @@ static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
return old;
}
-static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
c = old;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index dadc20adba21..95c6ceac66b9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -17,7 +17,7 @@
* Atomically reads the value of @v.
* Doesn't imply a read memory barrier.
*/
-static inline long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
{
return READ_ONCE((v)->counter);
}
@@ -29,7 +29,7 @@ static inline long arch_atomic64_read(const atomic64_t *v)
*
* Atomically sets the value of @v to @i.
*/
-static inline void arch_atomic64_set(atomic64_t *v, long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
WRITE_ONCE(v->counter, i);
}
@@ -41,11 +41,11 @@ static inline void arch_atomic64_set(atomic64_t *v, long i)
*
* Atomically adds @i to @v.
*/
-static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
/**
@@ -55,11 +55,11 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
/**
@@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_inc arch_atomic64_inc
@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_dec arch_atomic64_dec
@@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
+static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
@@ -155,43 +155,43 @@ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
}
-static inline long arch_atomic64_sub_return(long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
return arch_atomic64_add_return(-i, v);
}
-static inline long arch_atomic64_fetch_add(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return xadd(&v->counter, i);
}
-static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
{
return xadd(&v->counter, -i);
}
-static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return arch_cmpxchg(&v->counter, old, new);
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
return try_cmpxchg(&v->counter, old, new);
}
-static inline long arch_atomic64_xchg(atomic64_t *v, long new)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
return arch_xchg(&v->counter, new);
}
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "andq %1,%0"
: "+m" (v->counter)
@@ -199,7 +199,7 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -208,7 +208,7 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
return val;
}
-static inline void arch_atomic64_or(long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "orq %1,%0"
: "+m" (v->counter)
@@ -216,7 +216,7 @@ static inline void arch_atomic64_or(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -225,7 +225,7 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
return val;
}
-static inline void arch_atomic64_xor(long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "xorq %1,%0"
: "+m" (v->counter)
@@ -233,7 +233,7 @@ static inline void arch_atomic64_xor(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 14de0432d288..84f848c2541a 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -80,8 +80,8 @@ do { \
})
/* Atomic operations are already serializing on x86 */
-#define __smp_mb__before_atomic() barrier()
-#define __smp_mb__after_atomic() barrier()
+#define __smp_mb__before_atomic() do { } while (0)
+#define __smp_mb__after_atomic() do { } while (0)
#include <asm-generic/barrier.h>
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 8e790ec219a5..ba15d53c1ca7 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -49,23 +49,8 @@
#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
static __always_inline void
-set_bit(long nr, volatile unsigned long *addr)
+arch_set_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -78,32 +63,14 @@ set_bit(long nr, volatile unsigned long *addr)
}
}
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___set_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
static __always_inline void
-clear_bit(long nr, volatile unsigned long *addr)
+arch_clear_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -115,26 +82,21 @@ clear_bit(long nr, volatile unsigned long *addr)
}
}
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
- clear_bit(nr, addr);
+ arch_clear_bit(nr, addr);
}
-static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1"
@@ -143,48 +105,23 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
: "ir" ((char) ~(1 << nr)) : "memory");
return negative;
}
+#define arch_clear_bit_unlock_is_negative_byte \
+ arch_clear_bit_unlock_is_negative_byte
-// Let everybody know we have it
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- */
-static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
{
- __clear_bit(nr, addr);
+ arch___clear_bit(nr, addr);
}
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___change_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __always_inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -196,42 +133,20 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
}
}
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_set_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
static __always_inline bool
-test_and_set_bit_lock(long nr, volatile unsigned long *addr)
+arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
- return test_and_set_bit(nr, addr);
+ return arch_test_and_set_bit(nr, addr);
}
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_set_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -242,28 +157,13 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
return oldbit;
}
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- *
+/*
* Note: the operation is performed atomically with respect to
* the local CPU, but not other CPUs. Portable code should not
* rely on this behaviour.
@@ -271,7 +171,8 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
-static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_clear_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -282,8 +183,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
return oldbit;
}
-/* WARNING: non atomic and it can be reordered! */
-static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_change_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -295,15 +196,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
return oldbit;
}
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_change_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
@@ -326,16 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
return oldbit;
}
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static bool test_bit(int nr, const volatile unsigned long *addr);
-#endif
-
-#define test_bit(nr, addr) \
+#define arch_test_bit(nr, addr) \
(__builtin_constant_p((nr)) \
? constant_test_bit((nr), (addr)) \
: variable_test_bit((nr), (addr)))
@@ -504,6 +389,8 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/const_hweight.h>
+#include <asm-generic/bitops-instrumented.h>
+
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index f6f6ef436599..101eb944f13c 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -24,7 +24,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
* IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear
* this field. The purpose of this field is to guarantee
* compliance with the x86 boot spec located in
- * Documentation/x86/boot.txt . That spec says that the
+ * Documentation/x86/boot.rst . That spec says that the
* *whole* structure should be cleared, after which only the
* portion defined by struct setup_header (boot_params->hdr)
* should be copied in.
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index a8303ebe089f..facd374a1bf7 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Derived from include/asm-powerpc/iommu.h
*
@@ -5,20 +6,6 @@
*
* Author: Jon Mason <jdmason@us.ibm.com>
* Author: Muli Ben-Yehuda <muli@il.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_X86_CALGARY_H
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d337c51f7e6..58acda503817 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,8 +22,8 @@ enum cpuid_leafs
CPUID_LNX_3,
CPUID_7_0_EBX,
CPUID_D_1_EAX,
- CPUID_F_0_EDX,
- CPUID_F_1_EDX,
+ CPUID_LNX_4,
+ CPUID_7_1_EAX,
CPUID_8000_0008_EBX,
CPUID_6_EAX,
CPUID_8000_000A_EDX,
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 75f27ee2c263..998c2cc08363 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -239,12 +239,14 @@
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
@@ -269,13 +271,19 @@
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0xf, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -322,6 +330,7 @@
#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 9e27fa05a7ae..4c95c365058a 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -536,7 +536,7 @@ static inline void __fpregs_load_activate(void)
struct fpu *fpu = &current->thread.fpu;
int cpu = smp_processor_id();
- if (WARN_ON_ONCE(current->mm == NULL))
+ if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
return;
if (!fpregs_state_valid(fpu, cpu)) {
@@ -567,11 +567,11 @@ static inline void __fpregs_load_activate(void)
* otherwise.
*
* The FPU context is only stored/restored for a user task and
- * ->mm is used to distinguish between kernel and user threads.
+ * PF_KTHREAD is used to distinguish between kernel and user threads.
*/
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (static_cpu_has(X86_FEATURE_FPU) && current->mm) {
+ if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 7e42b285c856..c6136d79f8c0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,7 +47,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
-void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 5cbce6fbb534..296b346184b2 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -22,6 +22,35 @@
pop %_ASM_BP
.endm
+#ifdef CONFIG_X86_64
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+ leaq 1+\ptregs_offset(%rsp), %rbp
+.endm
+#else /* !CONFIG_X86_64 */
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just clearing the MSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original ebp.
+ */
+.macro ENCODE_FRAME_POINTER
+ mov %esp, %ebp
+ andl $0x7fffffff, %ebp
+.endm
+#endif /* CONFIG_X86_64 */
+
#else /* !__ASSEMBLY__ */
#define FRAME_BEGIN \
@@ -30,12 +59,32 @@
#define FRAME_END "pop %" _ASM_BP "\n"
+#ifdef CONFIG_X86_64
+#define ENCODE_FRAME_POINTER \
+ "lea 1(%rsp), %rbp\n\t"
+#else /* !CONFIG_X86_64 */
+#define ENCODE_FRAME_POINTER \
+ "movl %esp, %ebp\n\t" \
+ "andl $0x7fffffff, %ebp\n\t"
+#endif /* CONFIG_X86_64 */
+
#endif /* __ASSEMBLY__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
+#ifdef __ASSEMBLY__
+
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+.endm
+
+#else /* !__ASSEMBLY */
+
+#define ENCODE_FRAME_POINTER
+
+#endif
+
#define FRAME_BEGIN
#define FRAME_END
#define FRAME_OFFSET 0
diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index 7cd73552a4e8..3c7267ef4a9e 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AMD Geode definitions
* Copyright (C) 2006, Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
*/
#ifndef _ASM_X86_GEODE_H
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d9069bb26c7f..07533795b8d2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -37,7 +37,7 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
unsigned int irq_hv_callback_count;
#endif
#if IS_ENABLED(CONFIG_HYPERV)
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 67385d56d4f4..6352dee37cda 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -75,16 +75,15 @@ extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);
struct irq_data;
-struct hpet_dev;
+struct hpet_channel;
struct irq_domain;
extern void hpet_msi_unmask(struct irq_data *data);
extern void hpet_msi_mask(struct irq_data *data);
-extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
-extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
+extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg);
extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
extern int hpet_assign_irq(struct irq_domain *domain,
- struct hpet_dev *dev, int dev_num);
+ struct hpet_channel *hc, int dev_num);
#ifdef CONFIG_HPET_EMULATE_RTC
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 32e666e1231e..cbd97e22d2f3 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -150,8 +150,11 @@ extern char irq_entries_start[];
#define trace_irq_entries_start irq_entries_start
#endif
+extern char spurious_entries_start[];
+
#define VECTOR_UNUSED NULL
-#define VECTOR_RETRIGGERED ((void *)~0UL)
+#define VECTOR_SHUTDOWN ((void *)~0UL)
+#define VECTOR_RETRIGGERED ((void *)~1UL)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index cdf44aa9a501..af78cd72b8f3 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+/*
+ * The Hyper-V TimeRefCount register and the TSC
+ * page provide a guest VM clock with 100ns tick rate
+ */
+#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
+
typedef struct _HV_REFERENCE_TSC_PAGE {
__u32 tsc_sequence;
__u32 res1;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 8c5aaba6633f..e41cbf2ec41d 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -29,6 +29,7 @@ enum x86_hypervisor_type {
X86_HYPER_XEN_HVM,
X86_HYPER_KVM,
X86_HYPER_JAILHOUSE,
+ X86_HYPER_ACRN,
};
#ifdef CONFIG_HYPERVISOR_GUEST
@@ -52,8 +53,20 @@ struct hypervisor_x86 {
/* runtime callbacks */
struct x86_hyper_runtime runtime;
+
+ /* ignore nopv parameter */
+ bool ignore_nopv;
};
+extern const struct hypervisor_x86 x86_hyper_vmware;
+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_kvm;
+extern const struct hypervisor_x86 x86_hyper_jailhouse;
+extern const struct hypervisor_x86 x86_hyper_acrn;
+extern struct hypervisor_x86 x86_hyper_xen_hvm;
+
+extern bool nopv;
extern enum x86_hypervisor_type x86_hyper_type;
extern void init_hypervisor_platform(void);
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index ebea2c9d2cdc..0d1dbf235679 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* imr.h: Isolated Memory Region API
*
* Copyright(c) 2013 Intel Corporation.
* Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _IMR_H
#define _IMR_H
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 1c78580e58be..4cf2ad521f65 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_INAT_H
#define _ASM_X86_INAT_H
/*
* x86 instruction attributes
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
*/
#include <asm/inat_types.h>
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
index cb3c20ce39cf..b047efa9ddc2 100644
--- a/arch/x86/include/asm/inat_types.h
+++ b/arch/x86/include/asm/inat_types.h
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_INAT_TYPES_H
#define _ASM_X86_INAT_TYPES_H
/*
* x86 instruction attributes
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
*/
/* Instruction attributes */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index c2c01f84df75..154f27be8bfc 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_INSN_H
#define _ASM_X86_INSN_H
/*
* x86 instruction analysis
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2009
*/
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 9f15384c504a..0278aa66ef62 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -52,7 +52,11 @@
#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
+#define INTEL_FAM6_ICELAKE_X 0x6A
+#define INTEL_FAM6_ICELAKE_XEON_D 0x6C
+#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D
#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
+#define INTEL_FAM6_ICELAKE_NNPI 0x9D
/* "Small Core" Processors (Atom) */
@@ -73,6 +77,7 @@
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+
#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 52f815a80539..8e5af119dc2d 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* intel-mid.h: Intel MID specific setup code
*
* (C) Copyright 2009 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _ASM_X86_INTEL_MID_H
#define _ASM_X86_INTEL_MID_H
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 85029b58d0cd..214394860632 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -1,17 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Intel SOC Telemetry Driver Header File
* Copyright (C) 2015, Intel Corporation.
* All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
*/
#ifndef INTEL_TELEMETRY_H
#define INTEL_TELEMETRY_H
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index a06a9f8294ea..6bed97ff6db2 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -165,7 +165,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
{
return (unsigned int)virt_to_phys(address);
}
-#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page))
#define isa_bus_to_virt phys_to_virt
/*
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 363e33eb6ec1..2a7b3211ee7a 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_IOMAP_H
#define _ASM_X86_IOMAP_H
/*
* Copyright © 2008 Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
#include <linux/fs.h>
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index a4fe16e42b7b..f73076be546a 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_IPI_H
#define _ASM_X86_IPI_H
@@ -5,7 +6,6 @@
/*
* Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
*
* Generic APIC InterProcessor Interrupt code.
*
@@ -17,7 +17,6 @@
* (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
* (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
* (c) 2002,2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License, v.2
*/
#include <asm/hw_irq.h>
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 8f3bee821e6c..187ce59aea28 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -16,7 +16,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
- return this_cpu_read(irq_regs);
+ return __this_cpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -24,7 +24,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
struct pt_regs *old_regs;
old_regs = get_irq_regs();
- this_cpu_write(irq_regs, new_regs);
+ __this_cpu_write(irq_regs, new_regs);
return old_regs;
}
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 67ed72f31cc2..4bc985f1e2e4 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* This header file contains the interface of the interrupt remapping code to
* the x86 interrupt management code.
*/
diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h
index c9803f1a2033..7ede2731dc92 100644
--- a/arch/x86/include/asm/ist.h
+++ b/arch/x86/include/asm/ist.h
@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Include file for the interface to IST BIOS
* Copyright 2002 Andy Grover <andrew.grover@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#ifndef _ASM_X86_IST_H
#define _ASM_X86_IST_H
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..06c3cc22a058 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
+#define HAVE_JUMP_LABEL_BATCH
+
#define JUMP_LABEL_NOP_SIZE 5
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 003f2daa3b0f..5e7d6b46de97 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -71,22 +71,6 @@ struct kimage;
#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
/*
- * CPU does not save ss and sp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
-#ifdef CONFIG_X86_32
- newregs->sp = (unsigned long)&(oldregs->sp);
- asm volatile("xorl %%eax, %%eax\n\t"
- "movw %%ss, %%ax\n\t"
- :"=a"(newregs->ss));
-#endif
-}
-
-/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
* mode exception.
@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
- crash_fixup_ss_esp(newregs, oldregs);
} else {
#ifdef CONFIG_X86_32
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index c8cec1b39b88..5dc909d9ad81 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_KPROBES_H
#define _ASM_X86_KPROBES_H
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
*
* See arch/x86/kernel/kprobes.c for x86 kprobes history.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 450d69a1e6fa..8282b8d41209 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Kernel-based Virtual Machine driver for Linux
*
* This header defines architecture specific interfaces, x86 version
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#ifndef _ASM_X86_KVM_HOST_H
@@ -689,6 +686,7 @@ struct kvm_vcpu_arch {
u32 virtual_tsc_mult;
u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr;
+ u64 msr_ia32_power_ctl;
u64 tsc_scaling_ratio;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
@@ -755,6 +753,8 @@ struct kvm_vcpu_arch {
struct gfn_to_hva_cache data;
} pv_eoi;
+ u64 msr_kvm_poll_control;
+
/*
* Indicate whether the access faults on its page table in guest
* which is set when fix page fault and used to detect unhandeable
@@ -882,6 +882,7 @@ struct kvm_arch {
bool mwait_in_guest;
bool hlt_in_guest;
bool pause_in_guest;
+ bool cstate_in_guest;
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@@ -929,6 +930,8 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+
+ struct kvm_pmu_event_filter *pmu_event_filter;
};
struct kvm_vm_stat {
@@ -999,7 +1002,7 @@ struct kvm_x86_ops {
int (*disabled_by_bios)(void); /* __init */
int (*hardware_enable)(void);
void (*hardware_disable)(void);
- void (*check_processor_compatibility)(void *rtn);
+ int (*check_processor_compatibility)(void);/* __init */
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
@@ -1113,7 +1116,7 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
- void (*handle_external_intr)(struct kvm_vcpu *vcpu);
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
bool (*umip_emulated)(void);
@@ -1493,25 +1496,29 @@ enum {
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+asmlinkage void __noreturn kvm_spurious_fault(void);
+
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
- * Trap the fault and ignore the instruction if that happens.
+ * Usually after catching the fault we just panic; during reboot
+ * instead the instruction is ignored.
*/
-asmlinkage void kvm_spurious_fault(void);
-
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
- "666: " insn "\n\t" \
- "668: \n\t" \
- ".pushsection .fixup, \"ax\" \n" \
- "667: \n\t" \
- cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting \n\t" \
- "jne 668b \n\t" \
- __ASM_SIZE(push) " $666b \n\t" \
- "jmp kvm_spurious_fault \n\t" \
- ".popsection \n\t" \
- _ASM_EXTABLE(666b, 667b)
+#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
+ "666: \n\t" \
+ insn "\n\t" \
+ "jmp 668f \n\t" \
+ "667: \n\t" \
+ "call kvm_spurious_fault \n\t" \
+ "668: \n\t" \
+ ".pushsection .fixup, \"ax\" \n\t" \
+ "700: \n\t" \
+ cleanup_insn "\n\t" \
+ "cmpb $0, kvm_rebooting\n\t" \
+ "je 667b \n\t" \
+ "jmp 668b \n\t" \
+ ".popsection \n\t" \
+ _ASM_EXTABLE(666b, 700b)
#define __kvm_handle_fault_on_reboot(insn) \
____kvm_handle_fault_on_reboot(insn, "")
@@ -1532,7 +1539,6 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-u64 kvm_get_arch_capabilities(void);
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..9b4df6eaa11a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index a66f6706c2de..1fde1ab6559e 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* livepatch.h - x86-specific Kernel Live Patching Core
*
* Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
* Copyright (C) 2014 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ASM_X86_LIVEPATCH_H
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 616f8e637bc3..0c196c47d621 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AMD Memory Encryption Support
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef __X86_MEM_ENCRYPT_H__
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5ff3e8af2c20..e78c7db87801 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -59,6 +59,7 @@ typedef struct {
#define INIT_MM_CONTEXT(mm) \
.context = { \
.ctx_id = 1, \
+ .lock = __MUTEX_INITIALIZER(mm.context.lock), \
}
void leave_mm(int cpu);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index cc60e617931c..2ef31cc8c529 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,84 +3,15 @@
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
-#include <linux/atomic.h>
#include <linux/nmi.h>
#include <asm/io.h>
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
-#define VP_INVAL U32_MAX
-
-struct ms_hyperv_info {
- u32 features;
- u32 misc_features;
- u32 hints;
- u32 nested_features;
- u32 max_vp_index;
- u32 max_lp_index;
-};
-
-extern struct ms_hyperv_info ms_hyperv;
-
-
typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
void *data);
-/*
- * Generate the guest ID.
- */
-
-static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
- __u64 d_info2)
-{
- __u64 guest_id = 0;
-
- guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48);
- guest_id |= (d_info1 << 48);
- guest_id |= (kernel_version << 16);
- guest_id |= d_info2;
-
- return guest_id;
-}
-
-
-/* Free the message slot and signal end-of-message if required */
-static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
-{
- /*
- * On crash we're reading some other CPU's message page and we need
- * to be careful: this other CPU may already had cleared the header
- * and the host may already had delivered some other message there.
- * In case we blindly write msg->header.message_type we're going
- * to lose it. We can still lose a message of the same type but
- * we count on the fact that there can only be one
- * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
- * on crash.
- */
- if (cmpxchg(&msg->header.message_type, old_msg_type,
- HVMSG_NONE) != old_msg_type)
- return;
-
- /*
- * Make sure the write to MessageType (ie set to
- * HVMSG_NONE) happens before we read the
- * MessagePending and EOMing. Otherwise, the EOMing
- * will not deliver any more messages since there is
- * no empty slot
- */
- mb();
-
- if (msg->header.message_flags.msg_pending) {
- /*
- * This will cause message queue rescan to
- * possibly deliver another msg from the
- * hypervisor
- */
- wrmsrl(HV_X64_MSR_EOM, 0);
- }
-}
-
#define hv_init_timer(timer, tick) \
wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick)
#define hv_init_timer_config(timer, val) \
@@ -97,6 +28,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
+#define hv_signal_eom() wrmsrl(HV_X64_MSR_EOM, 0)
+
#define hv_get_synint_state(int_num, val) \
rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
#define hv_set_synint_state(int_num, val) \
@@ -105,19 +38,23 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_get_crash_ctl(val) \
rdmsrl(HV_X64_MSR_CRASH_CTL, val)
+#define hv_get_time_ref_count(val) \
+ rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val)
+
+#define hv_get_reference_tsc(val) \
+ rdmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_reference_tsc(val) \
+ wrmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_clocksource_vdso(val) \
+ ((val).archdata.vclock_mode = VCLOCK_HVCLOCK)
+#define hv_get_raw_timer() rdtsc_ordered()
+
void hyperv_callback_vector(void);
void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
#define trace_hyperv_callback_vector hyperv_callback_vector
#endif
void hyperv_vector_handler(struct pt_regs *regs);
-void hv_setup_vmbus_irq(void (*handler)(void));
-void hv_remove_vmbus_irq(void);
-
-void hv_setup_kexec_handler(void (*handler)(void));
-void hv_remove_kexec_handler(void);
-void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
-void hv_remove_crash_handler(void);
/*
* Routines for stimer0 Direct Mode handling.
@@ -125,15 +62,12 @@ void hv_remove_crash_handler(void);
*/
void hv_stimer0_vector_handler(struct pt_regs *regs);
void hv_stimer0_callback_vector(void);
-int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
-void hv_remove_stimer0_irq(int irq);
static inline void hv_enable_stimer0_percpu_irq(int irq) {}
static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV)
-extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
@@ -272,14 +206,6 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
return status;
}
-/*
- * Hypervisor's notion of virtual processor ID is different from
- * Linux' notion of CPU ID. This information can only be retrieved
- * in the context of the calling CPU. Setup a map for easy access
- * to this information.
- */
-extern u32 *hv_vp_index;
-extern u32 hv_max_vp_index;
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -290,63 +216,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
return hv_vp_assist_page[cpu];
}
-/**
- * hv_cpu_number_to_vp_number() - Map CPU to VP.
- * @cpu_number: CPU number in Linux terms
- *
- * This function returns the mapping between the Linux processor
- * number and the hypervisor's virtual processor number, useful
- * in making hypercalls and such that talk about specific
- * processors.
- *
- * Return: Virtual processor number in Hyper-V terms
- */
-static inline int hv_cpu_number_to_vp_number(int cpu_number)
-{
- return hv_vp_index[cpu_number];
-}
-
-static inline int cpumask_to_vpset(struct hv_vpset *vpset,
- const struct cpumask *cpus)
-{
- int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
-
- /* valid_bank_mask can represent up to 64 banks */
- if (hv_max_vp_index / 64 >= 64)
- return 0;
-
- /*
- * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
- * structs are not cleared between calls, we risk flushing unneeded
- * vCPUs otherwise.
- */
- for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
- vpset->bank_contents[vcpu_bank] = 0;
-
- /*
- * Some banks may end up being empty but this is acceptable.
- */
- for_each_cpu(cpu, cpus) {
- vcpu = hv_cpu_number_to_vp_number(cpu);
- if (vcpu == VP_INVAL)
- return -1;
- vcpu_bank = vcpu / 64;
- vcpu_offset = vcpu % 64;
- __set_bit(vcpu_offset, (unsigned long *)
- &vpset->bank_contents[vcpu_bank]);
- if (vcpu_bank >= nr_bank)
- nr_bank = vcpu_bank + 1;
- }
- vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
- return nr_bank;
-}
-
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
-void hyperv_report_panic(struct pt_regs *regs, long err);
-void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
-bool hv_is_hyperv_initialized(void);
-void hyperv_cleanup(void);
void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
@@ -369,8 +240,6 @@ static inline void hv_apic_init(void) {}
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
-static inline bool hv_is_hyperv_initialized(void) { return false; }
-static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
@@ -387,73 +256,7 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
#endif /* CONFIG_HYPERV */
-#ifdef CONFIG_HYPERV_TSCPAGE
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- u64 scale, offset;
- u32 sequence;
-
- /*
- * The protocol for reading Hyper-V TSC page is specified in Hypervisor
- * Top-Level Functional Specification ver. 3.0 and above. To get the
- * reference time we must do the following:
- * - READ ReferenceTscSequence
- * A special '0' value indicates the time source is unreliable and we
- * need to use something else. The currently published specification
- * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
- * instead of '0' as the special value, see commit c35b82ef0294.
- * - ReferenceTime =
- * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
- * - READ ReferenceTscSequence again. In case its value has changed
- * since our first reading we need to discard ReferenceTime and repeat
- * the whole sequence as the hypervisor was updating the page in
- * between.
- */
- do {
- sequence = READ_ONCE(tsc_pg->tsc_sequence);
- if (!sequence)
- return U64_MAX;
- /*
- * Make sure we read sequence before we read other values from
- * TSC page.
- */
- smp_rmb();
-
- scale = READ_ONCE(tsc_pg->tsc_scale);
- offset = READ_ONCE(tsc_pg->tsc_offset);
- *cur_tsc = rdtsc_ordered();
-
- /*
- * Make sure we read sequence after we read all other values
- * from TSC page.
- */
- smp_rmb();
-
- } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
-
- return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
-}
-
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
-{
- u64 cur_tsc;
- return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
-}
+#include <asm-generic/mshyperv.h>
-#else
-static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return NULL;
-}
-
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- BUG();
- return U64_MAX;
-}
-#endif
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 979ef971cc78..6b4fc2788078 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,6 +61,15 @@
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+#define MSR_IA32_UMWAIT_CONTROL 0xe1
+#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0)
+#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1)
+/*
+ * The time field is bit[31:2], but representing a 32bit value with
+ * bit[1:0] zero.
+ */
+#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
+
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index eb0f80ce8524..e28f8b723b5c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -86,9 +86,9 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
- mds_idle_clear_cpu_buffers();
-
trace_hardirqs_on();
+
+ mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index c2bf1de5d901..6fe76282aceb 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -9,12 +9,10 @@
struct olpc_platform_t {
int flags;
uint32_t boardrev;
- int ecver;
};
#define OLPC_F_PRESENT 0x01
#define OLPC_F_DCON 0x02
-#define OLPC_F_EC_WIDE_SCI 0x04
#ifdef CONFIG_OLPC
@@ -64,13 +62,6 @@ static inline int olpc_board_at_least(uint32_t rev)
return olpc_platform_info.boardrev >= rev;
}
-extern void olpc_ec_wakeup_set(u16 value);
-extern void olpc_ec_wakeup_clear(u16 value);
-extern bool olpc_ec_wakeup_available(void);
-
-extern int olpc_ec_mask_write(u16 bits);
-extern int olpc_ec_sci_query(u16 *sci_value);
-
#else
static inline int machine_is_olpc(void)
@@ -83,14 +74,6 @@ static inline int olpc_has_dcon(void)
return 0;
}
-static inline void olpc_ec_wakeup_set(u16 value) { }
-static inline void olpc_ec_wakeup_clear(u16 value) { }
-
-static inline bool olpc_ec_wakeup_available(void)
-{
- return false;
-}
-
#endif
#ifdef CONFIG_OLPC_XO1_PM
@@ -101,20 +84,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value);
extern int pci_olpc_init(void);
-/* SCI source values */
-
-#define EC_SCI_SRC_EMPTY 0x00
-#define EC_SCI_SRC_GAME 0x01
-#define EC_SCI_SRC_BATTERY 0x02
-#define EC_SCI_SRC_BATSOC 0x04
-#define EC_SCI_SRC_BATERR 0x08
-#define EC_SCI_SRC_EBOOK 0x10 /* XO-1 only */
-#define EC_SCI_SRC_WLAN 0x20 /* XO-1 only */
-#define EC_SCI_SRC_ACPWR 0x40
-#define EC_SCI_SRC_BATCRIT 0x80
-#define EC_SCI_SRC_GPWAKE 0x100 /* XO-1.5 only */
-#define EC_SCI_SRC_ALL 0x1FF
-
/* GPIO assignments */
#define OLPC_GPIO_MIC_AC 1
diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
index 91c8d868424d..241631282e43 100644
--- a/arch/x86/include/asm/orc_lookup.h
+++ b/arch/x86/include/asm/orc_lookup.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ORC_LOOKUP_H
#define _ORC_LOOKUP_H
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 46f516dd80ce..6e060907c163 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ORC_TYPES_H
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 793c14c372cb..288b065955b7 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -48,7 +48,7 @@
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/x86/x86_64/mm.rst for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 52
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25c38a05c1c..dce26f1d13e1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x)
static inline unsigned long read_cr2(void)
{
- return PVOP_CALL0(unsigned long, mmu.read_cr2);
+ return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
}
static inline void write_cr2(unsigned long x)
@@ -746,6 +746,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
PV_RESTORE_ALL_CALLER_REGS \
FRAME_END \
"ret;" \
+ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
/* Get a reference to a callee-save function */
@@ -909,13 +910,7 @@ extern void default_banner(void);
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \
)
-#endif
-
-#define GET_CR2_INTO_RAX \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
-#ifdef CONFIG_PARAVIRT_XXL
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
ANNOTATE_RETPOLINE_SAFE; \
@@ -929,9 +924,19 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
-#endif
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_PARAVIRT_XXL
+
+#define GET_CR2_INTO_AX \
+ PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \
+ ANNOTATE_RETPOLINE_SAFE; \
+ call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \
+ )
+
+#endif /* CONFIG_PARAVIRT_XXL */
-#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
#else /* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2474e434a6f7..639b2df445ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -88,7 +88,7 @@ struct pv_init_ops {
* the number of bytes of code generated, as we nop pad the
* rest in generic code.
*/
- unsigned (*patch)(u8 type, void *insnbuf,
+ unsigned (*patch)(u8 type, void *insn_buff,
unsigned long addr, unsigned len);
} __no_randomize_layout;
@@ -220,7 +220,7 @@ struct pv_mmu_ops {
void (*exit_mmap)(struct mm_struct *mm);
#ifdef CONFIG_PARAVIRT_XXL
- unsigned long (*read_cr2)(void);
+ struct paravirt_callee_save read_cr2;
void (*write_cr2)(unsigned long);
unsigned long (*read_cr3)(void);
@@ -370,18 +370,11 @@ extern struct paravirt_patch_template pv_ops;
/* Simple instruction patching code. */
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-#define DEF_NATIVE(ops, name, code) \
- __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \
- asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
+unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len);
+unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len);
+unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end);
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
- unsigned long addr, unsigned len);
-
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
- const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
+unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len);
int paravirt_disable_iospace(void);
@@ -679,8 +672,8 @@ u64 _paravirt_ident_64(u64);
/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch_site {
- u8 *instr; /* original instructions */
- u8 instrtype; /* type of this instruction */
+ u8 *instr; /* original instructions */
+ u8 type; /* type of this instruction */
u8 len; /* length of original instruction */
};
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1a19d11cfbbd..2278797c769d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -87,7 +87,7 @@
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
-#define percpu_to_op(op, var, val) \
+#define percpu_to_op(qual, op, var, val) \
do { \
typedef typeof(var) pto_T__; \
if (0) { \
@@ -97,22 +97,22 @@ do { \
} \
switch (sizeof(var)) { \
case 1: \
- asm(op "b %1,"__percpu_arg(0) \
+ asm qual (op "b %1,"__percpu_arg(0) \
: "+m" (var) \
: "qi" ((pto_T__)(val))); \
break; \
case 2: \
- asm(op "w %1,"__percpu_arg(0) \
+ asm qual (op "w %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pto_T__)(val))); \
break; \
case 4: \
- asm(op "l %1,"__percpu_arg(0) \
+ asm qual (op "l %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pto_T__)(val))); \
break; \
case 8: \
- asm(op "q %1,"__percpu_arg(0) \
+ asm qual (op "q %1,"__percpu_arg(0) \
: "+m" (var) \
: "re" ((pto_T__)(val))); \
break; \
@@ -124,7 +124,7 @@ do { \
* Generate a percpu add to memory instruction and optimize code
* if one is added or subtracted.
*/
-#define percpu_add_op(var, val) \
+#define percpu_add_op(qual, var, val) \
do { \
typedef typeof(var) pao_T__; \
const int pao_ID__ = (__builtin_constant_p(val) && \
@@ -138,41 +138,41 @@ do { \
switch (sizeof(var)) { \
case 1: \
if (pao_ID__ == 1) \
- asm("incb "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decb "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addb %1, "__percpu_arg(0) \
+ asm qual ("addb %1, "__percpu_arg(0) \
: "+m" (var) \
: "qi" ((pao_T__)(val))); \
break; \
case 2: \
if (pao_ID__ == 1) \
- asm("incw "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decw "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addw %1, "__percpu_arg(0) \
+ asm qual ("addw %1, "__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pao_T__)(val))); \
break; \
case 4: \
if (pao_ID__ == 1) \
- asm("incl "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decl "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addl %1, "__percpu_arg(0) \
+ asm qual ("addl %1, "__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pao_T__)(val))); \
break; \
case 8: \
if (pao_ID__ == 1) \
- asm("incq "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decq "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addq %1, "__percpu_arg(0) \
+ asm qual ("addq %1, "__percpu_arg(0) \
: "+m" (var) \
: "re" ((pao_T__)(val))); \
break; \
@@ -180,27 +180,27 @@ do { \
} \
} while (0)
-#define percpu_from_op(op, var) \
+#define percpu_from_op(qual, op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm volatile(op "b "__percpu_arg(1)",%0"\
+ asm qual (op "b "__percpu_arg(1)",%0" \
: "=q" (pfo_ret__) \
: "m" (var)); \
break; \
case 2: \
- asm volatile(op "w "__percpu_arg(1)",%0"\
+ asm qual (op "w "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 4: \
- asm volatile(op "l "__percpu_arg(1)",%0"\
+ asm qual (op "l "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 8: \
- asm volatile(op "q "__percpu_arg(1)",%0"\
+ asm qual (op "q "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
@@ -238,23 +238,23 @@ do { \
pfo_ret__; \
})
-#define percpu_unary_op(op, var) \
+#define percpu_unary_op(qual, op, var) \
({ \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(0) \
+ asm qual (op "b "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_arg(0) \
+ asm qual (op "w "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_arg(0) \
+ asm qual (op "l "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_arg(0) \
+ asm qual (op "q "__percpu_arg(0) \
: "+m" (var)); \
break; \
default: __bad_percpu_size(); \
@@ -264,27 +264,27 @@ do { \
/*
* Add return operation
*/
-#define percpu_add_return_op(var, val) \
+#define percpu_add_return_op(qual, var, val) \
({ \
typeof(var) paro_ret__ = val; \
switch (sizeof(var)) { \
case 1: \
- asm("xaddb %0, "__percpu_arg(1) \
+ asm qual ("xaddb %0, "__percpu_arg(1) \
: "+q" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 2: \
- asm("xaddw %0, "__percpu_arg(1) \
+ asm qual ("xaddw %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 4: \
- asm("xaddl %0, "__percpu_arg(1) \
+ asm qual ("xaddl %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 8: \
- asm("xaddq %0, "__percpu_arg(1) \
+ asm qual ("xaddq %0, "__percpu_arg(1) \
: "+re" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
@@ -299,13 +299,13 @@ do { \
* expensive due to the implied lock prefix. The processor cannot prefetch
* cachelines if xchg is used.
*/
-#define percpu_xchg_op(var, nval) \
+#define percpu_xchg_op(qual, var, nval) \
({ \
typeof(var) pxo_ret__; \
typeof(var) pxo_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
- asm("\n\tmov "__percpu_arg(1)",%%al" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%al" \
"\n1:\tcmpxchgb %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -313,7 +313,7 @@ do { \
: "memory"); \
break; \
case 2: \
- asm("\n\tmov "__percpu_arg(1)",%%ax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \
"\n1:\tcmpxchgw %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -321,7 +321,7 @@ do { \
: "memory"); \
break; \
case 4: \
- asm("\n\tmov "__percpu_arg(1)",%%eax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
"\n1:\tcmpxchgl %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -329,7 +329,7 @@ do { \
: "memory"); \
break; \
case 8: \
- asm("\n\tmov "__percpu_arg(1)",%%rax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
"\n1:\tcmpxchgq %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -345,32 +345,32 @@ do { \
* cmpxchg has no such implied lock semantics as a result it is much
* more efficient for cpu local operations.
*/
-#define percpu_cmpxchg_op(var, oval, nval) \
+#define percpu_cmpxchg_op(qual, var, oval, nval) \
({ \
typeof(var) pco_ret__; \
typeof(var) pco_old__ = (oval); \
typeof(var) pco_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
- asm("cmpxchgb %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgb %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "q" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 2: \
- asm("cmpxchgw %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgw %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 4: \
- asm("cmpxchgl %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgl %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 8: \
- asm("cmpxchgq %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgq %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
@@ -391,58 +391,70 @@ do { \
*/
#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
-#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-
-#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
-
-#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp)
+
+#define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val)
+
+/*
+ * raw_cpu_xchg() can use a load-store since it is not required to be
+ * IRQ-safe.
+ */
+#define raw_percpu_xchg_op(var, nval) \
+({ \
+ typeof(var) pxo_ret__ = raw_cpu_read(var); \
+ raw_cpu_write(var, (nval)); \
+ pxo_ret__; \
+})
+
+#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
@@ -466,23 +478,23 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+
+#define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
/*
* Pretty complex macro to generate cmpxchg16 instruction. The instruction
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index a281e61ec60c..29aa7859bdee 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -6,6 +6,9 @@
#include <linux/mm.h> /* for struct page */
#include <linux/pagemap.h>
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
#ifdef CONFIG_PARAVIRT_XXL
@@ -47,24 +50,8 @@ extern gfp_t __userpte_alloc_gfp;
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
extern pgtable_t pte_alloc_one(struct mm_struct *);
-/* Should really implement gc for free page table pages. This could be
- done with a reference count in struct page. */
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index f8b1ad2c3828..e3633795fb22 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -285,53 +285,6 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
__pteval_swp_offset(pte)))
-#define gup_get_pte gup_get_pte
-/*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking
- * any locks. For this we would like to load the pointers atomically,
- * but that is not possible (without expensive cmpxchg8b) on PAE. What
- * we do have is the guarantee that a PTE will only either go from not
- * present to present, or present to not present or both -- it will not
- * switch to a completely different present page without a TLB flush in
- * between; something that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- *
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- *
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees 'l' iff pte_high
- * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
- * don't see an older value of pte_high. *Then* we recheck pte_low,
- * which ensures that we haven't picked up a changed pte high. We might
- * have gotten rubbish values from pte_low and pte_high, but we are
- * guaranteed that pte_low will not have the present bit set *unless*
- * it is 'l'. Because get_user_pages_fast() only operates on present ptes
- * we're safe.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
- pte_t pte;
-
- do {
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- } while (unlikely(pte.pte_low != ptep->pte_low));
-
- return pte;
-}
-
#include <asm/pgtable-invert.h>
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5e0509b41986..0bc530c4eb13 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -271,7 +271,7 @@ static inline int has_transparent_hugepage(void)
return boot_cpu_has(X86_FEATURE_PSE);
}
-#ifdef __HAVE_ARCH_PTE_DEVMAP
+#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
static inline int pmd_devmap(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_DEVMAP);
@@ -732,7 +732,7 @@ static inline int pte_present(pte_t a)
return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}
-#ifdef __HAVE_ARCH_PTE_DEVMAP
+#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
static inline int pte_devmap(pte_t a)
{
return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 4fe9e7fc74d3..c78da8eda8f2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -106,6 +106,6 @@ do { \
* with only a host target support using a 32-bit type for internal
* representation.
*/
-#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
#endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 0bb566315621..4990d26dfc73 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -259,14 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#define gup_fast_permitted gup_fast_permitted
-static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
+static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
- unsigned long len, end;
-
- len = (unsigned long)nr_pages << PAGE_SHIFT;
- end = start + len;
- if (end < start)
- return false;
if (end >> __VIRTUAL_MASK_SHIFT)
return false;
return true;
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 88bca456da99..52e5f5f2240d 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -103,7 +103,7 @@ extern unsigned int ptrs_per_p4d;
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/*
- * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ * See Documentation/x86/x86_64/mm.rst for a description of the memory map.
*
* Be very careful vs. KASLR when changing anything here. The KASLR address
* range must not overlap with anything except the KASAN shadow area, which
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d6ff0bbdb394..b5e49e6bac63 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -103,7 +103,6 @@
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
-#define __HAVE_ARCH_PTE_DEVMAP
#else
#define _PAGE_NX (_AT(pteval_t, 0))
#define _PAGE_DEVMAP (_AT(pteval_t, 0))
diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h
index 059823bb8af7..16b9f220bdeb 100644
--- a/arch/x86/include/asm/platform_sst_audio.h
+++ b/arch/x86/include/asm/platform_sst_audio.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* platform_sst_audio.h: sst audio platform data header file
*
@@ -5,11 +6,6 @@
* Author: Jeeja KP <jeeja.kp@intel.com>
* Omair Mohammed Abdullah <omair.m.abdullah@intel.com>
* Vinod Koul ,vinod.koul@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _PLATFORM_SST_AUDIO_H_
#define _PLATFORM_SST_AUDIO_H_
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c34a35c78618..6e0a3b43d027 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -105,7 +105,7 @@ struct cpuinfo_x86 {
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
- u16 x86_max_cores;
+ u16 x86_max_cores;
u16 apicid;
u16 initial_apicid;
u16 x86_clflush_size;
@@ -117,6 +117,8 @@ struct cpuinfo_x86 {
u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
+ u16 cpu_die_id;
+ u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
@@ -144,7 +146,8 @@ enum cpuid_regs_idx {
#define X86_VENDOR_TRANSMETA 7
#define X86_VENDOR_NSC 8
#define X86_VENDOR_HYGON 9
-#define X86_VENDOR_NUM 10
+#define X86_VENDOR_ZHAOXIN 10
+#define X86_VENDOR_NUM 11
#define X86_VENDOR_UNKNOWN 0xff
@@ -738,6 +741,7 @@ extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
+extern void cr4_init(void);
static inline unsigned long get_debugctlmsr(void)
{
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 1d081ac1cd69..b716d291d0d4 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Definitions for Device tree / OpenFirmware handling on X86
*
* based on arch/powerpc/include/asm/prom.h which is
* Copyright (C) 1996-2005 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_X86_PROM_H
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 8a7fc0cca2d1..332eb3525867 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -98,12 +98,10 @@ struct cpuinfo_x86;
struct task_struct;
extern unsigned long profile_pc(struct pt_regs *regs);
-#define profile_pc profile_pc
extern unsigned long
convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code);
+extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);
static inline unsigned long regs_return_value(struct pt_regs *regs)
@@ -166,20 +164,37 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
-#ifdef CONFIG_X86_32
-extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
-#else
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
-#endif
-#define GET_IP(regs) ((regs)->ip)
-#define GET_FP(regs) ((regs)->bp)
-#define GET_USP(regs) ((regs)->sp)
+static inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+ return regs->ip;
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->ip = val;
+}
+
+static inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+ return regs->bp;
+}
+
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+ return regs->sp;
+}
-#include <asm-generic/ptrace.h>
+static inline void user_stack_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->sp = val;
+}
/* Query offset/name of register from its name/offset */
extern int regs_query_register_offset(const char *name);
@@ -201,14 +216,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
if (unlikely(offset > MAX_REG_OFFSET))
return 0;
#ifdef CONFIG_X86_32
- /*
- * Traps from the kernel do not save sp and ss.
- * Use the helper function to retrieve sp.
- */
- if (offset == offsetof(struct pt_regs, sp) &&
- regs->cs == __KERNEL_CS)
- return kernel_stack_pointer(regs);
-
/* The selector fields are 16-bit. */
if (offset == offsetof(struct pt_regs, cs) ||
offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +241,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
static inline int regs_within_kernel_stack(struct pt_regs *regs,
unsigned long addr)
{
- return ((addr & ~(THREAD_SIZE - 1)) ==
- (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+ return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
}
/**
@@ -249,7 +255,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
*/
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
- unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *addr = (unsigned long *)regs->sp;
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index b6033680d458..19b695ff2c68 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PVCLOCK_H
#define _ASM_X86_PVCLOCK_H
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
/* some helper functions for xen and kvm pv clock sources */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 8ea1cfdbeabc..71b32f2570ab 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -13,4 +13,6 @@ extern char __end_rodata_aligned[];
extern char __end_rodata_hpage_align[];
#endif
+extern char __end_of_kernel_reserve[];
+
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index f94a7d0ddd49..27c47d183f4b 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Supervisor Mode Access Prevention support
*
* Copyright (C) 2012 Intel Corporation
* Author: H. Peter Anvin <hpa@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _ASM_X86_SMAP_H
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index da545df207b2..e1356a3b8223 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,7 @@ extern unsigned int num_processors;
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
@@ -162,7 +163,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
+#define raw_smp_processor_id() this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
#ifdef CONFIG_X86_32
extern int safe_smp_processor_id(void);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 0a3c4cab39db..219be88a59d2 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,8 @@
#ifdef __KERNEL__
#include <asm/nops.h>
+#include <asm/processor-flags.h>
+#include <linux/jump_label.h>
/*
* Volatile isn't enough to prevent the compiler from reordering the
@@ -16,6 +18,8 @@
*/
extern unsigned long __force_order;
+void native_write_cr0(unsigned long val);
+
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
@@ -23,11 +27,6 @@ static inline unsigned long native_read_cr0(void)
return val;
}
-static inline void native_write_cr0(unsigned long val)
-{
- asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
-}
-
static inline unsigned long native_read_cr2(void)
{
unsigned long val;
@@ -72,10 +71,7 @@ static inline unsigned long native_read_cr4(void)
return val;
}
-static inline void native_write_cr4(unsigned long val)
-{
- asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
-}
+void native_write_cr4(unsigned long val);
#ifdef CONFIG_X86_64
static inline unsigned long native_read_cr8(void)
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a8d0cdf48616..14db05086bbf 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -78,7 +78,7 @@ static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{
if (regs)
- return (unsigned long *)kernel_stack_pointer(regs);
+ return (unsigned long *)regs->sp;
if (task == current)
return __builtin_frame_address(0);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index b05ad16174e5..2dc4a021beea 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Access to user system call parameters and results
*
* Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- *
* See asm-generic/syscall.h for descriptions of what we must do here.
*/
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 9fa979dd0d9d..91b7b6e1a115 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -1,10 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* syscalls.h - Linux syscall interfaces (arch-specific)
*
* Copyright (c) 2008 Jaswinder Singh Rajput
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
*/
#ifndef _ASM_X86_SYSCALLS_H
diff --git a/arch/x86/include/asm/sysfb.h b/arch/x86/include/asm/sysfb.h
index 2aeb3e25579c..9834eef7f034 100644
--- a/arch/x86/include/asm/sysfb.h
+++ b/arch/x86/include/asm/sysfb.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ARCH_X86_KERNEL_SYSFB_H
#define _ARCH_X86_KERNEL_SYSFB_H
/*
* Generic System Framebuffers on x86
* Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
#include <linux/kernel.h>
diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h
index 7a6677c1a715..6ed2deacf1d0 100644
--- a/arch/x86/include/asm/tce.h
+++ b/arch/x86/include/asm/tce.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file is derived from asm-powerpc/tce.h.
*
@@ -5,20 +6,6 @@
*
* Author: Muli Ben-Yehuda <muli@il.ibm.com>
* Author: Jon Mason <jdmason@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_X86_TCE_H
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 880b5515b1d6..70c09967a999 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,6 +18,20 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif
+/*
+ * Currently, the max observed size in the kernel code is
+ * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
+ * Raise it if needed.
+ */
+#define POKE_MAX_OPCODE_SIZE 5
+
+struct text_poke_loc {
+ void *detour;
+ void *addr;
+ size_t len;
+ const char opcode[POKE_MAX_OPCODE_SIZE];
+};
+
extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
@@ -38,6 +52,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -51,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
#define INT3_INSN_SIZE 1
#define CALL_INSN_SIZE 5
-#ifdef CONFIG_X86_64
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
@@ -69,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
int3_emulate_jmp(regs, func);
}
-#endif /* CONFIG_X86_64 */
#endif /* !CONFIG_UML_X86 */
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..8ac563abb567 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -7,6 +7,7 @@
extern void hpet_time_init(void);
extern void time_init(void);
+extern bool pit_timer_init(void);
extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38a1c33..4b14d2318251 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -106,15 +106,25 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
+#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
+#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#ifdef CONFIG_SMP
+#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+extern unsigned int __max_die_per_package;
+
+static inline int topology_max_die_per_package(void)
+{
+ return __max_die_per_package;
+}
+
extern int __max_smt_threads;
static inline int topology_max_smt_threads(void)
@@ -123,14 +133,21 @@ static inline int topology_max_smt_threads(void)
}
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+int topology_update_die_map(unsigned int dieid, unsigned int cpu);
int topology_phys_to_logical_pkg(unsigned int pkg);
+int topology_phys_to_logical_die(unsigned int die, unsigned int cpu);
bool topology_is_primary_thread(unsigned int cpu);
bool topology_smt_supported(void);
#else
#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int
+topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_phys_to_logical_die(unsigned int die,
+ unsigned int cpu) { return 0; }
+static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline bool topology_smt_supported(void) { return false; }
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7d6f3f3fad78..b25e633033c3 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,7 +40,7 @@ asmlinkage void simd_coprocessor_error(void);
asmlinkage void xen_divide_error(void);
asmlinkage void xen_xennmi(void);
asmlinkage void xen_xendebug(void);
-asmlinkage void xen_xenint3(void);
+asmlinkage void xen_int3(void);
asmlinkage void xen_overflow(void);
asmlinkage void xen_bounds(void);
asmlinkage void xen_invalid_op(void);
@@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
#ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
void __init trap_init(void);
#endif
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c82abd6e4ca3..9c4435307ff8 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -66,7 +66,9 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
})
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
+static inline bool pagefault_disabled(void);
+# define WARN_ON_IN_IRQ() \
+ WARN_ON_ONCE(!in_task() && !pagefault_disabled())
#else
# define WARN_ON_IN_IRQ()
#endif
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 146859efd83c..097589753fec 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -54,5 +54,6 @@
# define __ARCH_WANT_SYS_FORK
# define __ARCH_WANT_SYS_VFORK
# define __ARCH_WANT_SYS_CLONE
+# define __ARCH_WANT_SYS_CLONE3
#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index d8bfa98fca98..678fb546f0a7 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_UPROBES_H
#define _ASM_UPROBES_H
/*
* User-space Probes (UProbes) for x86
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2008-2011
* Authors:
* Srikar Dronamraju
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 8cfccc3cbbf4..6e7caf65fa40 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_UV_BIOS_H
#define _ASM_X86_UV_BIOS_H
/*
* UV BIOS layer definitions.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Russ Anderson <rja@sgi.com>
*/
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..ae91429129a6
--- /dev/null
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * Copyright (C) 2019 ARM Limited.
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <uapi/linux/time.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+#include <asm/unistd.h>
+#include <asm/msr.h>
+#include <asm/pvclock.h>
+#include <clocksource/hyperv_timer.h>
+
+#define __vdso_data (VVAR(_vdso_data))
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+/*
+ * Declare the memory-mapped vclock data pages. These come from hypervisors.
+ * If we ever reintroduce something like direct access to an MMIO clock like
+ * the HPET again, it will go here as well.
+ *
+ * A load from any of these pages will segfault if the clock in question is
+ * disabled, so appropriate compiler barriers and checks need to be used
+ * to prevent stray loads.
+ *
+ * These declarations MUST NOT be const. The compiler will assume that
+ * an extern const variable has genuinely constant contents, and the
+ * resulting code won't work, since the whole point is that these pages
+ * change over time, possibly while we're accessing them.
+ */
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+/*
+ * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO
+ * if the hypervisor tells us that all vCPUs can get valid data from the
+ * vCPU 0 page.
+ */
+extern struct pvclock_vsyscall_time_info pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+extern struct ms_hyperv_tsc_page hvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifndef BUILD_VDSO32
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+#else
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm(
+ "mov %%ebx, %%edx \n"
+ "mov %2, %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
+ : "memory", "edx");
+
+ return ret;
+}
+
+static __always_inline long
+clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+static u64 vread_pvclock(void)
+{
+ const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
+ u32 version;
+ u64 ret;
+
+ /*
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
+ *
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+ *
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
+ */
+
+ do {
+ version = pvclock_read_begin(pvti);
+
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
+ return U64_MAX;
+
+ ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
+ } while (pvclock_read_retry(pvti, version));
+
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+static u64 vread_hvclock(void)
+{
+ return hv_read_tsc_page(&hvclock_page);
+}
+#endif
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ if (clock_mode == VCLOCK_TSC)
+ return (u64)rdtsc_ordered();
+ /*
+ * For any memory-mapped vclock type, we need to make sure that gcc
+ * doesn't cleverly hoist a load before the mode check. Otherwise we
+ * might end up touching the memory-mapped page even if the vclock in
+ * question isn't enabled, which will segfault. Hence the barriers.
+ */
+#ifdef CONFIG_PARAVIRT_CLOCK
+ if (clock_mode == VCLOCK_PVCLOCK) {
+ barrier();
+ return vread_pvclock();
+ }
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+ if (clock_mode == VCLOCK_HVCLOCK) {
+ barrier();
+ return vread_hvclock();
+ }
+#endif
+ return U64_MAX;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return __vdso_data;
+}
+
+/*
+ * x86 specific delta calculation.
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @last. If not then use @last, which is the base time of the current
+ * conversion period.
+ *
+ * This variant also removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ if (cycles > last)
+ return (cycles - last) * mult;
+ return 0;
+}
+#define vdso_calc_delta vdso_calc_delta
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0026ab2123ce
--- /dev/null
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+
+int vclocks_used __read_mostly;
+
+DEFINE_VVAR(struct vdso_data, _vdso_data);
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__x86_get_k_vdso_data(void)
+{
+ return _vdso_data;
+}
+#define __arch_get_k_vdso_data __x86_get_k_vdso_data
+
+static __always_inline
+int __x86_get_clock_mode(struct timekeeper *tk)
+{
+ int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+
+ /* Mark the new vclock used. */
+ BUILD_BUG_ON(VCLOCK_MAX >= 32);
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
+ return vclock_mode;
+}
+#define __arch_get_clock_mode __x86_get_clock_mode
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133f8e6f..a2638c6124ed 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -3,7 +3,9 @@
#define _ASM_X86_VGTOD_H
#include <linux/compiler.h>
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
#include <uapi/linux/time.h>
@@ -13,81 +15,10 @@ typedef u64 gtod_long_t;
typedef unsigned long gtod_long_t;
#endif
-/*
- * There is one of these objects in the vvar page for each
- * vDSO-accelerated clockid. For high-resolution clocks, this encodes
- * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse
- * clocks, this encodes the actual time.
- *
- * To confuse the reader, for high-resolution clocks, nsec is left-shifted
- * by vsyscall_gtod_data.shift.
- */
-struct vgtod_ts {
- u64 sec;
- u64 nsec;
-};
-
-#define VGTOD_BASES (CLOCK_TAI + 1)
-#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
-#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
-
-/*
- * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
- * so be carefull by modifying this structure.
- */
-struct vsyscall_gtod_data {
- unsigned int seq;
-
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
-
- struct vgtod_ts basetime[VGTOD_BASES];
-
- int tz_minuteswest;
- int tz_dsttime;
-};
-extern struct vsyscall_gtod_data vsyscall_gtod_data;
-
extern int vclocks_used;
static inline bool vclock_was_used(int vclock)
{
return READ_ONCE(vclocks_used) & (1 << vclock);
}
-static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
-{
- unsigned int ret;
-
-repeat:
- ret = READ_ONCE(s->seq);
- if (unlikely(ret & 1)) {
- cpu_relax();
- goto repeat;
- }
- smp_rmb();
- return ret;
-}
-
-static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
- unsigned int start)
-{
- smp_rmb();
- return unlikely(s->seq != start);
-}
-
-static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
-{
- ++s->seq;
- smp_wmb();
-}
-
-static inline void gtod_write_end(struct vsyscall_gtod_data *s)
-{
- smp_wmb();
- ++s->seq;
-}
-
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 1fc7a0d1e877..9aad0e0876fb 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* CPU virtualization extensions handling
*
* This should carry the code for handling CPU virtualization extensions
@@ -8,9 +9,6 @@
* Copyright (C) 2008, Red Hat Inc.
*
* Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
*/
#ifndef _ASM_X86_VIRTEX_H
#define _ASM_X86_VIRTEX_H
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4e4133e86484..a39136b0d509 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -1,25 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* vmx.h: VMX Architecture related definitions
* Copyright (c) 2004, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
* A few random additions are:
* Copyright (C) 2006 Qumranet
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
- *
*/
#ifndef VMX_H
#define VMX_H
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index b986b2ca688a..ab60a71a8dcb 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
* Called on instruction fetch fault in vsyscall page.
* Returns true if handled.
*/
-extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address);
#else
static inline void map_vsyscall(void) {}
-static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+static inline bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
return false;
}
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 3f32dfc2ab73..32f5d9a0b90e 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* vvar.h: Shared vDSO/kernel variable declarations
* Copyright (c) 2011 Andy Lutomirski
- * Subject to the GNU General Public License, version 2
*
* A handful of variables are accessible (read-only) from userspace
* code in the vsyscall page and the vdso. They are declared here.
@@ -32,19 +32,20 @@
extern char __vvar_page;
#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name __attribute__((visibility("hidden")));
+ extern type vvar_ ## name[CS_BASES] \
+ __attribute__((visibility("hidden")));
#define VVAR(name) (vvar_ ## name)
#define DEFINE_VVAR(type, name) \
- type name \
+ type name[CS_BASES] \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
#endif
/* DECLARE_VVAR(offset, type, name) */
-DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
+DECLARE_VVAR(128, struct vdso_data, _vdso_data)
#undef DECLARE_VVAR
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b85a7c54c6a1..ac0934189017 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -301,6 +301,8 @@ extern struct x86_apic_ops x86_apic_ops;
extern void x86_early_init_platform_quirks(void);
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
+extern bool bool_x86_init_noop(void);
+extern void x86_op_int_noop(int cpu);
extern bool x86_pnpbios_disabled(void);
#endif
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 39171b3646bb..42e1245af0d8 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -44,14 +44,14 @@ static inline uint32_t xen_cpuid_base(void)
}
#ifdef CONFIG_XEN
-extern bool xen_hvm_need_lapic(void);
+extern bool __init xen_hvm_need_lapic(void);
-static inline bool xen_x2apic_para_available(void)
+static inline bool __init xen_x2apic_para_available(void)
{
return xen_hvm_need_lapic();
}
#else
-static inline bool xen_x2apic_para_available(void)
+static inline bool __init xen_x2apic_para_available(void)
{
return (xen_cpuid_base() != 0);
}
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 45c8605467f1..2ee95a7769e6 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_XOR_H
#define _ASM_X86_XOR_H
/*
* Optimized RAID-5 checksumming functions for SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 635eac543922..67ceb790e639 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_X86_XOR_32_H
#define _ASM_X86_XOR_32_H
/*
* Optimized RAID-5 checksumming functions for MMX.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 22a7b1870a31..d61ddf3d052b 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_XOR_AVX_H
#define _ASM_X86_XOR_AVX_H
@@ -8,11 +9,6 @@
* Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifdef CONFIG_AS_AVX
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 59b5ad310f78..39606a856d3b 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
generated-y += unistd_32.h
generated-y += unistd_64.h
generated-y += unistd_x32.h
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 60733f137e9a..c895df5482c5 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -29,6 +29,8 @@
#define XLF_EFI_HANDOVER_32 (1<<2)
#define XLF_EFI_HANDOVER_64 (1<<3)
#define XLF_EFI_KEXEC (1<<4)
+#define XLF_5LEVEL (1<<5)
+#define XLF_5LEVEL_ENABLED (1<<6)
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 7a0e64ccd6ff..503d3f42da16 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -378,10 +378,14 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
-#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
-#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
-#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
-#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+
+#define KVM_STATE_NESTED_FORMAT_VMX 0
+#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
@@ -390,9 +394,16 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
-struct kvm_vmx_nested_state {
+#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
+
+struct kvm_vmx_nested_state_data {
+ __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+};
+
+struct kvm_vmx_nested_state_hdr {
__u64 vmxon_pa;
- __u64 vmcs_pa;
+ __u64 vmcs12_pa;
struct {
__u16 flags;
@@ -401,24 +412,38 @@ struct kvm_vmx_nested_state {
/* for KVM_CAP_NESTED_STATE */
struct kvm_nested_state {
- /* KVM_STATE_* flags */
__u16 flags;
-
- /* 0 for VMX, 1 for SVM. */
__u16 format;
-
- /* 128 for SVM, 128 + VMCS size for VMX. */
__u32 size;
union {
- /* VMXON, VMCS */
- struct kvm_vmx_nested_state vmx;
+ struct kvm_vmx_nested_state_hdr vmx;
/* Pad the header to 128 bytes. */
__u8 pad[120];
- };
+ } hdr;
- __u8 data[0];
+ /*
+ * Define data region as 0 bytes to preserve backwards-compatability
+ * to old definition of kvm_nested_state in order to avoid changing
+ * KVM_{GET,PUT}_NESTED_STATE ioctl values.
+ */
+ union {
+ struct kvm_vmx_nested_state_data vmx[0];
+ } data;
+};
+
+/* for KVM_CAP_PMU_EVENT_FILTER */
+struct kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[0];
};
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 19980ec1a316..2a8e0b6b9805 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,6 +29,8 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
#define KVM_FEATURE_PV_SEND_IPI 11
+#define KVM_FEATURE_POLL_CONTROL 12
+#define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_HINTS_REALTIME 0
@@ -47,6 +49,7 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_POLL_CONTROL 0x4b564d05
struct kvm_steal_time {
__u64 steal;
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index ac67bbea10ca..7c9d2bb3833b 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -52,4 +52,7 @@ enum perf_event_x86_regs {
/* These include both GPRs and XMMX registers */
PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
};
+
+#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
+
#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d213ec5c3766..f0b0c90dd398 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -146,7 +146,6 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
-#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ce1b5cc360a2..3578ad248bc9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o := n
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
-OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
ifdef CONFIG_FRAME_POINTER
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
index bb8d300fecbd..c22fb55abcfd 100644
--- a/arch/x86/kernel/acpi/apei.c
+++ b/arch/x86/kernel/acpi/apei.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Arch-specific APEI-related functions.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <acpi/apei.h>
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 9fc92e4539d8..17b33ef604f3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1,26 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* boot.c - Architecture-Specific Low-Level ACPI Boot Support
*
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <linux/init.h>
diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c
index 6fb478bf82fd..b961de569e7e 100644
--- a/arch/x86/kernel/acpi/cppc_msr.c
+++ b/arch/x86/kernel/acpi/cppc_msr.c
@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* cppc_msr.c: MSR Interface for CPPC
* Copyright (c) 2016, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
*/
#include <acpi/cppc_acpi.h>
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index cb6e076a6d39..caf2edccbad2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2005 Intel Corporation
* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
@@ -63,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
c->x86_stepping >= 0x0e))
flags->bm_check = 1;
}
+
+ if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+ /*
+ * All Zhaoxin CPUs that support C3 share cache.
+ * And caches should not be flushed by software while
+ * entering C3 type state.
+ */
+ flags->bm_check = 1;
+ /*
+ * On all recent Zhaoxin platforms, ARB_DISABLE is a nop.
+ * So, set bm_control to zero to indicate that ARB_DISABLE
+ * is not required while entering C3 type state.
+ */
+ flags->bm_control = 0;
+ }
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 4203d4f0c68d..e95e95960156 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,9 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
.text
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page_types.h>
-# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
+# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz
.code32
ALIGN
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 510fa12aab73..b0715c3ac18d 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
.text
#include <linux/linkage.h>
#include <asm/segment.h>
@@ -7,7 +8,7 @@
#include <asm/asm-offsets.h>
#include <asm/frame.h>
-# Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
+# Copyright 2003 Pavel Machek <pavel@suse.cz
.code64
/*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7b9b49dfc05a..ccd32013c47a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "SMP alternatives: " fmt
#include <linux/module.h>
@@ -13,6 +14,7 @@
#include <linux/kdebug.h>
#include <linux/kprobes.h>
#include <linux/mmu_context.h>
+#include <linux/bsearch.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -276,7 +278,7 @@ static inline bool is_jmp(const u8 opcode)
}
static void __init_or_module
-recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
{
u8 *next_rip, *tgt_rip;
s32 n_dspl, o_dspl;
@@ -285,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
if (a->replacementlen != 5)
return;
- o_dspl = *(s32 *)(insnbuf + 1);
+ o_dspl = *(s32 *)(insn_buff + 1);
/* next_rip of the replacement JMP */
next_rip = repl_insn + a->replacementlen;
@@ -311,9 +313,9 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
two_byte_jmp:
n_dspl -= 2;
- insnbuf[0] = 0xeb;
- insnbuf[1] = (s8)n_dspl;
- add_nops(insnbuf + 2, 3);
+ insn_buff[0] = 0xeb;
+ insn_buff[1] = (s8)n_dspl;
+ add_nops(insn_buff + 2, 3);
repl_len = 2;
goto done;
@@ -321,8 +323,8 @@ two_byte_jmp:
five_byte_jmp:
n_dspl -= 5;
- insnbuf[0] = 0xe9;
- *(s32 *)&insnbuf[1] = n_dspl;
+ insn_buff[0] = 0xe9;
+ *(s32 *)&insn_buff[1] = n_dspl;
repl_len = 5;
@@ -369,7 +371,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
{
struct alt_instr *a;
u8 *instr, *replacement;
- u8 insnbuf[MAX_PATCH_LEN];
+ u8 insn_buff[MAX_PATCH_LEN];
DPRINTK("alt table %px, -> %px", start, end);
/*
@@ -382,11 +384,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* order.
*/
for (a = start; a < end; a++) {
- int insnbuf_sz = 0;
+ int insn_buff_sz = 0;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
- BUG_ON(a->instrlen > sizeof(insnbuf));
+ BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
if (!boot_cpu_has(a->cpuid)) {
if (a->padlen > 1)
@@ -404,8 +406,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
- memcpy(insnbuf, replacement, a->replacementlen);
- insnbuf_sz = a->replacementlen;
+ memcpy(insn_buff, replacement, a->replacementlen);
+ insn_buff_sz = a->replacementlen;
/*
* 0xe8 is a relative jump; fix the offset.
@@ -413,24 +415,24 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* Instruction length is checked before the opcode to avoid
* accessing uninitialized bytes for zero-length replacements.
*/
- if (a->replacementlen == 5 && *insnbuf == 0xe8) {
- *(s32 *)(insnbuf + 1) += replacement - instr;
+ if (a->replacementlen == 5 && *insn_buff == 0xe8) {
+ *(s32 *)(insn_buff + 1) += replacement - instr;
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
- *(s32 *)(insnbuf + 1),
- (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+ *(s32 *)(insn_buff + 1),
+ (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
}
if (a->replacementlen && is_jmp(replacement[0]))
- recompute_jump(a, instr, replacement, insnbuf);
+ recompute_jump(a, instr, replacement, insn_buff);
if (a->instrlen > a->replacementlen) {
- add_nops(insnbuf + a->replacementlen,
+ add_nops(insn_buff + a->replacementlen,
a->instrlen - a->replacementlen);
- insnbuf_sz += a->instrlen - a->replacementlen;
+ insn_buff_sz += a->instrlen - a->replacementlen;
}
- DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
+ DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
- text_poke_early(instr, insnbuf, insnbuf_sz);
+ text_poke_early(instr, insn_buff, insn_buff_sz);
}
}
@@ -592,33 +594,119 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *end)
{
struct paravirt_patch_site *p;
- char insnbuf[MAX_PATCH_LEN];
+ char insn_buff[MAX_PATCH_LEN];
for (p = start; p < end; p++) {
unsigned int used;
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
- memcpy(insnbuf, p->instr, p->len);
- used = pv_ops.init.patch(p->instrtype, insnbuf,
- (unsigned long)p->instr, p->len);
+ memcpy(insn_buff, p->instr, p->len);
+ used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
BUG_ON(used > p->len);
/* Pad the rest with nops */
- add_nops(insnbuf + used, p->len - used);
- text_poke_early(p->instr, insnbuf, p->len);
+ add_nops(insn_buff + used, p->len - used);
+ text_poke_early(p->instr, insn_buff, p->len);
}
}
extern struct paravirt_patch_site __start_parainstructions[],
__stop_parainstructions[];
#endif /* CONFIG_PARAVIRT */
+/*
+ * Self-test for the INT3 based CALL emulation code.
+ *
+ * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
+ * properly and that there is a stack gap between the INT3 frame and the
+ * previous context. Without this gap doing a virtual PUSH on the interrupted
+ * stack would corrupt the INT3 IRET frame.
+ *
+ * See entry_{32,64}.S for more details.
+ */
+
+/*
+ * We define the int3_magic() function in assembly to control the calling
+ * convention such that we can 'call' it from assembly.
+ */
+
+extern void int3_magic(unsigned int *ptr); /* defined in asm */
+
+asm (
+" .pushsection .init.text, \"ax\", @progbits\n"
+" .type int3_magic, @function\n"
+"int3_magic:\n"
+" movl $1, (%" _ASM_ARG1 ")\n"
+" ret\n"
+" .size int3_magic, .-int3_magic\n"
+" .popsection\n"
+);
+
+extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
+
+static int __init
+int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!regs || user_mode(regs))
+ return NOTIFY_DONE;
+
+ if (val != DIE_INT3)
+ return NOTIFY_DONE;
+
+ if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
+ return NOTIFY_DONE;
+
+ int3_emulate_call(regs, (unsigned long)&int3_magic);
+ return NOTIFY_STOP;
+}
+
+static void __init int3_selftest(void)
+{
+ static __initdata struct notifier_block int3_exception_nb = {
+ .notifier_call = int3_exception_notify,
+ .priority = INT_MAX-1, /* last */
+ };
+ unsigned int val = 0;
+
+ BUG_ON(register_die_notifier(&int3_exception_nb));
+
+ /*
+ * Basically: int3_magic(&val); but really complicated :-)
+ *
+ * Stick the address of the INT3 instruction into int3_selftest_ip,
+ * then trigger the INT3, padded with NOPs to match a CALL instruction
+ * length.
+ */
+ asm volatile ("1: int3; nop; nop; nop; nop\n\t"
+ ".pushsection .init.data,\"aw\"\n\t"
+ ".align " __ASM_SEL(4, 8) "\n\t"
+ ".type int3_selftest_ip, @object\n\t"
+ ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
+ "int3_selftest_ip:\n\t"
+ __ASM_SEL(.long, .quad) " 1b\n\t"
+ ".popsection\n\t"
+ : ASM_CALL_CONSTRAINT
+ : __ASM_SEL_RAW(a, D) (&val)
+ : "memory");
+
+ BUG_ON(val != 1);
+
+ unregister_die_notifier(&int3_exception_nb);
+}
+
void __init alternative_instructions(void)
{
- /* The patching is not fully atomic, so try to avoid local interruptions
- that might execute the to be patched code.
- Other CPUs are not running. */
+ int3_selftest();
+
+ /*
+ * The patching is not fully atomic, so try to avoid local
+ * interruptions that might execute the to be patched code.
+ * Other CPUs are not running.
+ */
stop_nmi();
/*
@@ -643,10 +731,11 @@ void __init alternative_instructions(void)
_text, _etext);
}
- if (!uniproc_patched || num_possible_cpus() == 1)
+ if (!uniproc_patched || num_possible_cpus() == 1) {
free_init_pages("SMP alternatives",
(unsigned long)__smp_locks,
(unsigned long)__smp_locks_end);
+ }
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
@@ -847,81 +936,133 @@ static void do_sync_core(void *info)
sync_core();
}
-static bool bp_patching_in_progress;
-static void *bp_int3_handler, *bp_int3_addr;
+static struct bp_patching_desc {
+ struct text_poke_loc *vec;
+ int nr_entries;
+} bp_patching;
+
+static int patch_cmp(const void *key, const void *elt)
+{
+ struct text_poke_loc *tp = (struct text_poke_loc *) elt;
+
+ if (key < tp->addr)
+ return -1;
+ if (key > tp->addr)
+ return 1;
+ return 0;
+}
+NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
{
+ struct text_poke_loc *tp;
+ unsigned char int3 = 0xcc;
+ void *ip;
+
/*
* Having observed our INT3 instruction, we now must observe
- * bp_patching_in_progress.
+ * bp_patching.nr_entries.
*
- * in_progress = TRUE INT3
+ * nr_entries != 0 INT3
* WMB RMB
- * write INT3 if (in_progress)
+ * write INT3 if (nr_entries)
*
- * Idem for bp_int3_handler.
+ * Idem for other elements in bp_patching.
*/
smp_rmb();
- if (likely(!bp_patching_in_progress))
+ if (likely(!bp_patching.nr_entries))
return 0;
- if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
+ if (user_mode(regs))
return 0;
- /* set up the specified breakpoint handler */
- regs->ip = (unsigned long) bp_int3_handler;
+ /*
+ * Discount the sizeof(int3). See text_poke_bp_batch().
+ */
+ ip = (void *) regs->ip - sizeof(int3);
+
+ /*
+ * Skip the binary search if there is a single member in the vector.
+ */
+ if (unlikely(bp_patching.nr_entries > 1)) {
+ tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
+ sizeof(struct text_poke_loc),
+ patch_cmp);
+ if (!tp)
+ return 0;
+ } else {
+ tp = bp_patching.vec;
+ if (tp->addr != ip)
+ return 0;
+ }
+
+ /* set up the specified breakpoint detour */
+ regs->ip = (unsigned long) tp->detour;
return 1;
}
NOKPROBE_SYMBOL(poke_int3_handler);
/**
- * text_poke_bp() -- update instructions on live kernel on SMP
- * @addr: address to patch
- * @opcode: opcode of new instruction
- * @len: length to copy
- * @handler: address to jump to when the temporary breakpoint is hit
+ * text_poke_bp_batch() -- update instructions on live kernel on SMP
+ * @tp: vector of instructions to patch
+ * @nr_entries: number of entries in the vector
*
* Modify multi-byte instruction by using int3 breakpoint on SMP.
* We completely avoid stop_machine() here, and achieve the
* synchronization using int3 breakpoint.
*
* The way it is done:
- * - add a int3 trap to the address that will be patched
+ * - For each entry in the vector:
+ * - add a int3 trap to the address that will be patched
* - sync cores
- * - update all but the first byte of the patched range
+ * - For each entry in the vector:
+ * - update all but the first byte of the patched range
* - sync cores
- * - replace the first byte (int3) by the first byte of
- * replacing opcode
+ * - For each entry in the vector:
+ * - replace the first byte (int3) by the first byte of
+ * replacing opcode
* - sync cores
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
+ int patched_all_but_first = 0;
unsigned char int3 = 0xcc;
-
- bp_int3_handler = handler;
- bp_int3_addr = (u8 *)addr + sizeof(int3);
- bp_patching_in_progress = true;
+ unsigned int i;
lockdep_assert_held(&text_mutex);
+ bp_patching.vec = tp;
+ bp_patching.nr_entries = nr_entries;
+
/*
* Corresponding read barrier in int3 notifier for making sure the
- * in_progress and handler are correctly ordered wrt. patching.
+ * nr_entries and handler are correctly ordered wrt. patching.
*/
smp_wmb();
- text_poke(addr, &int3, sizeof(int3));
+ /*
+ * First step: add a int3 trap to the address that will be patched.
+ */
+ for (i = 0; i < nr_entries; i++)
+ text_poke(tp[i].addr, &int3, sizeof(int3));
on_each_cpu(do_sync_core, NULL, 1);
- if (len - sizeof(int3) > 0) {
- /* patch all but the first byte */
- text_poke((char *)addr + sizeof(int3),
- (const char *) opcode + sizeof(int3),
- len - sizeof(int3));
+ /*
+ * Second step: update all but the first byte of the patched range.
+ */
+ for (i = 0; i < nr_entries; i++) {
+ if (tp[i].len - sizeof(int3) > 0) {
+ text_poke((char *)tp[i].addr + sizeof(int3),
+ (const char *)tp[i].opcode + sizeof(int3),
+ tp[i].len - sizeof(int3));
+ patched_all_but_first++;
+ }
+ }
+
+ if (patched_all_but_first) {
/*
* According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But
@@ -930,14 +1071,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
on_each_cpu(do_sync_core, NULL, 1);
}
- /* patch the first byte */
- text_poke(addr, opcode, sizeof(int3));
+ /*
+ * Third step: replace the first byte (int3) by the first byte of
+ * replacing opcode.
+ */
+ for (i = 0; i < nr_entries; i++)
+ text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
on_each_cpu(do_sync_core, NULL, 1);
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
*/
- bp_patching_in_progress = false;
+ bp_patching.vec = NULL;
+ bp_patching.nr_entries = 0;
}
+/**
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr: address to patch
+ * @opcode: opcode of new instruction
+ * @len: length to copy
+ * @handler: address to jump to when the temporary breakpoint is hit
+ *
+ * Update a single instruction with the vector in the stack, avoiding
+ * dynamically allocated memory. This function should be used when it is
+ * not possible to allocate memory.
+ */
+void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+{
+ struct text_poke_loc tp = {
+ .detour = handler,
+ .addr = addr,
+ .len = len,
+ };
+
+ if (len > POKE_MAX_OPCODE_SIZE) {
+ WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
+ return;
+ }
+
+ memcpy((void *)tp.opcode, opcode, len);
+
+ text_poke_bp_batch(&tp, 1);
+}
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index bf7f13ea3c64..a585ea6f686a 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Dynamic DMA mapping support for AMD Hammer.
*
@@ -8,7 +9,6 @@
* See Documentation/DMA-API-HOWTO.txt for the interface specification.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU General Public License v2 only.
*/
#include <linux/types.h>
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index cc51275c8759..d63e63b7d1d9 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Shared support code for AMD K8 northbridges and derivates.
- * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
+ * Copyright 2006 Andi Kleen, SUSE Labs.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -71,7 +72,7 @@ static const struct pci_device_id hygon_root_ids[] = {
{}
};
-const struct pci_device_id hygon_nb_misc_ids[] = {
+static const struct pci_device_id hygon_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
};
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 65721dc73bd8..5da106f84e84 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* apb_timer.c: Driver for Langwell APB timers
*
* (C) Copyright 2009 Intel Corporation
* Author: Jacob Pan (jacob.jun.pan@intel.com)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* Note:
* Langwell is the south complex of Intel Moorestown MID platform. There are
* eight external timers in total that can be used by the operating system.
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ab6af775f06c..f5291362da1a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Local APIC handling, local APIC timers
*
@@ -182,7 +183,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/*
* Debug level, exported for io_apic.c
*/
-unsigned int apic_verbosity;
+int apic_verbosity;
int pic_mode;
@@ -194,7 +195,7 @@ static struct resource lapic_resource = {
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
};
-unsigned int lapic_timer_frequency = 0;
+unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
@@ -500,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
- __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
+ __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
return 0;
}
@@ -804,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
static int __init lapic_init_clockevent(void)
{
- if (!lapic_timer_frequency)
+ if (!lapic_timer_period)
return -1;
/* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+ lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
TICK_NSEC, lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
@@ -820,6 +821,33 @@ static int __init lapic_init_clockevent(void)
return 0;
}
+bool __init apic_needs_pit(void)
+{
+ /*
+ * If the frequencies are not known, PIT is required for both TSC
+ * and apic timer calibration.
+ */
+ if (!tsc_khz || !cpu_khz)
+ return true;
+
+ /* Is there an APIC at all? */
+ if (!boot_cpu_has(X86_FEATURE_APIC))
+ return true;
+
+ /* Deadline timer is based on TSC so no further PIT action required */
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+
+ /* APIC timer disabled? */
+ if (disable_apic_timer)
+ return true;
+ /*
+ * The APIC timer frequency is known already, no PIT calibration
+ * required. If unknown, let the PIT be initialized.
+ */
+ return lapic_timer_period == 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
@@ -838,7 +866,7 @@ static int __init calibrate_APIC_clock(void)
*/
if (!lapic_init_clockevent()) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
- lapic_timer_frequency);
+ lapic_timer_period);
/*
* Direct calibration methods must have an always running
* local APIC timer, no need for broadcast timer.
@@ -883,13 +911,13 @@ static int __init calibrate_APIC_clock(void)
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta, &deltatsc);
- lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
lapic_init_clockevent();
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
- lapic_timer_frequency);
+ lapic_timer_period);
if (boot_cpu_has(X86_FEATURE_TSC)) {
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
@@ -900,13 +928,13 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
"%u.%04u MHz.\n",
- lapic_timer_frequency / (1000000 / HZ),
- lapic_timer_frequency % (1000000 / HZ));
+ lapic_timer_period / (1000000 / HZ),
+ lapic_timer_period % (1000000 / HZ));
/*
* Do a sanity check on the APIC calibration result
*/
- if (lapic_timer_frequency < (1000000 / HZ)) {
+ if (lapic_timer_period < (1000000 / HZ)) {
local_irq_enable();
pr_warning("APIC frequency too slow, disabling apic timer\n");
return -1;
@@ -1350,6 +1378,8 @@ void __init init_bsp_APIC(void)
apic_write(APIC_LVT1, value);
}
+static void __init apic_bsp_setup(bool upmode);
+
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
@@ -1463,7 +1493,8 @@ static void apic_pending_intr_clear(void)
if (queued) {
if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
ntsc = rdtsc();
- max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ max_loops = (long long)cpu_khz << 10;
+ max_loops -= ntsc - tsc;
} else {
max_loops--;
}
@@ -2039,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
entering_irq();
trace_spurious_apic_entry(vector);
+ inc_irq_stat(irq_spurious_count);
+
+ /*
+ * If this is a spurious interrupt then do not acknowledge
+ */
+ if (vector == SPURIOUS_APIC_VECTOR) {
+ /* See SDM vol 3 */
+ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
+ smp_processor_id());
+ goto out;
+ }
+
/*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
+ * If it is a vectored one, verify it's set in the ISR. If set,
+ * acknowledge it.
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
- if (v & (1 << (vector & 0x1f)))
+ if (v & (1 << (vector & 0x1f))) {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ vector, smp_processor_id());
ack_APIC_irq();
-
- inc_irq_stat(irq_spurious_count);
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
- "should never happen.\n", vector, smp_processor_id());
-
+ } else {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ vector, smp_processor_id());
+ }
+out:
trace_spurious_apic_exit(vector);
exiting_irq();
}
@@ -2414,11 +2456,8 @@ static void __init apic_bsp_up_setup(void)
/**
* apic_bsp_setup - Setup function for local apic and io-apic
* @upmode: Force UP mode (for APIC_init_uniprocessor)
- *
- * Returns:
- * apic_id of BSP APIC
*/
-void __init apic_bsp_setup(bool upmode)
+static void __init apic_bsp_setup(bool upmode)
{
connect_bsp_APIC();
if (upmode)
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 0005c284a5c5..bbdca603f94a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
*
* Flat APIC subarch code.
*
@@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
int cpu = smp_processor_id();
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
@@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector)
unsigned long mask = cpumask_bits(cpu_online_mask)[0];
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53aa234a6803..c7bb6c69f21c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -58,6 +58,7 @@
#include <asm/acpi.h>
#include <asm/dma.h>
#include <asm/timer.h>
+#include <asm/time.h>
#include <asm/i8259.h>
#include <asm/setup.h>
#include <asm/irq_remapping.h>
@@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
return ret;
}
+/*
+ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
+ * be in flight, but not yet serviced by the target CPU. That means
+ * __synchronize_hardirq() would return and claim that everything is calmed
+ * down. So free_irq() would proceed and deactivate the interrupt and free
+ * resources.
+ *
+ * Once the target CPU comes around to service it it will find a cleared
+ * vector and complain. While the spurious interrupt is harmless, the full
+ * release of resources might prevent the interrupt from being acknowledged
+ * which keeps the hardware in a weird state.
+ *
+ * Verify that the corresponding Remote-IRR bits are clear.
+ */
+static int ioapic_irq_get_chip_state(struct irq_data *irqd,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ struct mp_chip_data *mcd = irqd->chip_data;
+ struct IO_APIC_route_entry rentry;
+ struct irq_pin_list *p;
+
+ if (which != IRQCHIP_STATE_ACTIVE)
+ return -EINVAL;
+
+ *state = false;
+ raw_spin_lock(&ioapic_lock);
+ for_each_irq_pin(p, mcd->irq_2_pin) {
+ rentry = __ioapic_read_entry(p->apic, p->pin);
+ /*
+ * The remote IRR is only valid in level trigger mode. It's
+ * meaning is undefined for edge triggered interrupts and
+ * irrelevant because the IO-APIC treats them as fire and
+ * forget.
+ */
+ if (rentry.irr && rentry.trigger) {
+ *state = true;
+ break;
+ }
+ }
+ raw_spin_unlock(&ioapic_lock);
+ return 0;
+}
+
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.irq_startup = startup_ioapic_irq,
@@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_eoi = ioapic_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_eoi = ioapic_ir_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -2083,6 +2130,9 @@ static inline void __init check_timer(void)
unsigned long flags;
int no_pin1 = 0;
+ if (!global_clock_event)
+ return;
+
local_irq_save(flags);
/*
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 72a94401f9e0..7f7533462474 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Support of MSI, HPET and DMAR interrupts.
*
@@ -5,10 +6,6 @@
* Moved from arch/x86/kernel/apic/io_apic.c.
* Jiang Liu <jiang.liu@linux.intel.com>
* Convert to hierarchical irqdomain
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/mm.h>
#include <linux/interrupt.h>
@@ -373,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
return d;
}
-int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
+int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
int dev_num)
{
struct irq_alloc_info info;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_HPET;
- info.hpet_data = dev;
+ info.hpet_data = hc;
info.hpet_id = hpet_dev_id(domain);
info.hpet_index = dev_num;
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 47ff2976c292..1492799b8f43 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Default generic APIC driver. This handles up to 8 CPUs.
*
* Copyright 2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License, v.2
*
* Generic x86 APIC driver probe layer.
*/
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index c303054b90b5..e6560a02eb46 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
*
* Generic APIC sub-arch probe layer.
*
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3173e07d3791..fdacb864c3dd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Local APIC related interfaces to support IOAPIC, MSI, etc.
*
@@ -5,10 +6,6 @@
* Moved from arch/x86/kernel/apic/io_apic.c.
* Jiang Liu <jiang.liu@linux.intel.com>
* Enable support of hierarchical irqdomains
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/interrupt.h>
#include <linux/irq.h>
@@ -343,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd)
trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
apicd->prev_cpu);
- per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
apicd->vector = 0;
@@ -352,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd)
if (!vector)
return;
- per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 7685444a106b..609e499387a1 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
cpumask_copy(tmpmsk, mask);
/* If IPI should not be sent to self, clear current CPU */
if (apic_dest != APIC_DEST_ALLINC)
- cpumask_clear_cpu(smp_processor_id(), tmpmsk);
+ __cpumask_clear_cpu(smp_processor_id(), tmpmsk);
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
for_each_cpu(cpu, tmpmsk) {
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f7151cd03cb0..660270359d39 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* -*- linux-c -*-
* APM BIOS driver for Linux
* Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au)
@@ -5,16 +6,6 @@
* Initial development of this driver was funded by NEC Australia P/L
* and NEC Corporation
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
* October 1995, Rik Faith (faith@cs.unc.edu):
* Minor enhancements and updates (to the patch set) for 1.3.x
* Documentation
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d077d7..5c7ee3df4d0b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -38,7 +38,6 @@ static void __used common(void)
#endif
BLANK();
- OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();
@@ -77,6 +76,7 @@ static void __used common(void)
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+ OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
#endif
BLANK();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 1796d2bdcaaa..d7a1e5a9331c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -24,17 +24,22 @@ obj-y += match.o
obj-y += bugs.o
obj-y += aperfmperf.o
obj-y += cpuid-deps.o
+obj-y += umwait.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o
+ifdef CONFIG_CPU_SUP_INTEL
+obj-y += intel.o intel_pconfig.o
+obj-$(CONFIG_PM) += intel_epb.o
+endif
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
+obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o
obj-$(CONFIG_X86_MCE) += mce/
obj-$(CONFIG_MTRR) += mtrr/
@@ -44,6 +49,7 @@ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
+obj-$(CONFIG_ACRN_GUEST) += acrn.o
ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
@@ -51,8 +57,7 @@ quiet_cmd_mkcapflags = MKCAP $@
cpufeature = $(src)/../../include/asm/cpufeatures.h
-targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
endif
-clean-files += capflags.c
+targets += capflags.c
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
new file mode 100644
index 000000000000..676022e71791
--- /dev/null
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN detection support
+ *
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ *
+ * Jason Chen CJ <jason.cj.chen@intel.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <asm/acrn.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/hypervisor.h>
+#include <asm/irq_regs.h>
+
+static uint32_t __init acrn_detect(void)
+{
+ return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0);
+}
+
+static void __init acrn_init_platform(void)
+{
+ /* Setup the IDT for ACRN hypervisor callback */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector);
+}
+
+static bool acrn_x2apic_available(void)
+{
+ /*
+ * x2apic is not supported for now. Future enablement will have to check
+ * X86_FEATURE_X2APIC to determine whether x2apic is supported in the
+ * guest.
+ */
+ return false;
+}
+
+static void (*acrn_intr_handler)(void);
+
+__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ /*
+ * The hypervisor requires that the APIC EOI should be acked.
+ * If the APIC EOI is not acked, the APIC ISR bit for the
+ * HYPERVISOR_CALLBACK_VECTOR will not be cleared and then it
+ * will block the interrupt whose vector is lower than
+ * HYPERVISOR_CALLBACK_VECTOR.
+ */
+ entering_ack_irq();
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (acrn_intr_handler)
+ acrn_intr_handler();
+
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
+
+const __initconst struct hypervisor_x86 x86_hyper_acrn = {
+ .name = "ACRN",
+ .detect = acrn_detect,
+ .type = X86_HYPER_ACRN,
+ .init.init_platform = acrn_init_platform,
+ .init.x2apic_available = acrn_x2apic_available,
+};
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fb6a64bd765f..8d4e50428b68 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/export.h>
#include <linux/bitops.h>
#include <linux/elf.h>
@@ -823,8 +824,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
- /* Fix erratum 1076: CPB feature bit not being set in CPUID. */
- if (!cpu_has(c, X86_FEATURE_CPB))
+ /*
+ * Fix erratum 1076: CPB feature bit not being set in CPUID.
+ * Always set it, except when running under a hypervisor.
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
}
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 64d5aec24203..e2f319dc992d 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* x86 APERF/MPERF KHz calculation for
* /sys/.../cpufreq/scaling_cur_freq
*
* Copyright (C) 2017 Intel Corp.
* Author: Len Brown <len.brown@intel.com>
- *
- * This file is licensed under GPLv2.
*/
#include <linux/delay.h>
@@ -14,6 +13,7 @@
#include <linux/percpu.h>
#include <linux/cpufreq.h>
#include <linux/smp.h>
+#include <linux/sched/isolation.h>
#include "cpu.h"
@@ -86,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ return 0;
+
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
return per_cpu(samples.khz, cpu);
}
@@ -102,9 +105,12 @@ void arch_freq_prepare_all(void)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ continue;
if (!aperfmperf_snapshot_cpu(cpu, now, false))
wait = true;
+ }
if (wait)
msleep(APERFMPERF_REFRESH_DELAY_MS);
@@ -118,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ return 0;
+
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
return per_cpu(samples.khz, cpu);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03b4cc0ec3a7..66ca906aa790 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
}
/*
+ * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+ * bit in the mask to allow guests to use the mitigation even in the
+ * case where the host does not enable it.
+ */
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ }
+
+ /*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
* - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
}
}
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 395d46f78582..c7503be92f35 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
if (c->x86 < 0x17) {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = node_id;
- } else if (c->x86 == 0x17 &&
- c->x86_model >= 0 && c->x86_model <= 0x1F) {
+ } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d7f55ad2dfb1..11472178e17f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* cpu_feature_enabled() cannot be used this early */
#define USE_EARLY_PGTABLE_L5
@@ -365,6 +366,77 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
}
+static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
+static unsigned long cr4_pinned_bits __ro_after_init;
+
+void native_write_cr0(unsigned long val)
+{
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
+ bits_missing = X86_CR0_WP;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
+ }
+}
+EXPORT_SYMBOL(native_write_cr0);
+
+void native_write_cr4(unsigned long val)
+{
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
+ bits_missing = ~val & cr4_pinned_bits;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
+ bits_missing);
+ }
+}
+EXPORT_SYMBOL(native_write_cr4);
+
+void cr4_init(void)
+{
+ unsigned long cr4 = __read_cr4();
+
+ if (boot_cpu_has(X86_FEATURE_PCID))
+ cr4 |= X86_CR4_PCIDE;
+ if (static_branch_likely(&cr_pinning))
+ cr4 |= cr4_pinned_bits;
+
+ __write_cr4(cr4);
+
+ /* Initialize cr4 shadow for this CPU. */
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+}
+
+/*
+ * Once CPU feature detection is finished (and boot params have been
+ * parsed), record any of the sensitive CR bits that are set, and
+ * enable CR pinning.
+ */
+static void __init setup_cr_pinning(void)
+{
+ unsigned long mask;
+
+ mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ static_key_enable(&cr_pinning.key);
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -800,6 +872,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
}
}
+static void init_cqm(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ c->x86_cache_max_rmid = -1;
+ c->x86_cache_occ_scale = -1;
+ return;
+ }
+
+ /* will be overridden if occupancy monitoring exists */
+ c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* QoS sub-leaf, EAX=0Fh, ECX=1 */
+ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_cache_max_rmid = ecx;
+ c->x86_cache_occ_scale = ebx;
+ }
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -822,6 +918,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
c->x86_capability[CPUID_7_EDX] = edx;
+
+ /* Check valid sub-leaf index before accessing it */
+ if (eax >= 1) {
+ cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[CPUID_7_1_EAX] = eax;
+ }
}
/* Extended state features: level 0x0000000d */
@@ -831,33 +933,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_D_1_EAX] = eax;
}
- /* Additional Intel-defined flags: level 0x0000000F */
- if (c->cpuid_level >= 0x0000000F) {
-
- /* QoS sub-leaf, EAX=0Fh, ECX=0 */
- cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_F_0_EDX] = edx;
-
- if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
- /* will be overridden if occupancy monitoring exists */
- c->x86_cache_max_rmid = ebx;
-
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
- cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_F_1_EDX] = edx;
-
- if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
- ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
- (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
- c->x86_cache_max_rmid = ecx;
- c->x86_cache_occ_scale = ebx;
- }
- } else {
- c->x86_cache_max_rmid = -1;
- c->x86_cache_occ_scale = -1;
- }
- }
-
/* AMD-defined flags: level 0x80000001 */
eax = cpuid_eax(0x80000000);
c->extended_cpuid_level = eax;
@@ -888,6 +963,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
init_scattered_cpuid_features(c);
init_speculation_control(c);
+ init_cqm(c);
/*
* Clear/Set all flags overridden by options, after probe.
@@ -1298,6 +1374,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
cpu, apicid, c->initial_apicid);
}
BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
+ BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
#else
c->logical_proc_id = 0;
#endif
@@ -1463,6 +1540,7 @@ void __init identify_boot_cpu(void)
enable_sep_cpu();
#endif
cpu_detect_tlb(&boot_cpu_data);
+ setup_cr_pinning();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1697,12 +1775,6 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
- /*
- * Initialize the CR4 shadow before doing anything that could
- * try to read it.
- */
- cr4_init_shadow();
-
if (cpu)
load_ucode_ap();
@@ -1797,12 +1869,6 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
- /*
- * Initialize the CR4 shadow before doing anything that could
- * try to read it.
- */
- cr4_init_shadow();
-
show_ucode_info_early();
pr_info("Initializing CPU#%d\n", cpu);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 2c0bd38a44ab..b5353244749b 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -20,6 +20,7 @@ struct cpuid_dep {
* but it's difficult to tell that to the init reference checker.
*/
static const struct cpuid_dep cpuid_deps[] = {
+ { X86_FEATURE_FXSR, X86_FEATURE_FPU },
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
@@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_CMOV, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMX, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
{ X86_FEATURE_XMM2, X86_FEATURE_XMM },
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
@@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
{}
};
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 479ca4728de0..553bfbfc3a1b 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -26,13 +26,6 @@
#include <asm/processor.h>
#include <asm/hypervisor.h>
-extern const struct hypervisor_x86 x86_hyper_vmware;
-extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_pv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
-extern const struct hypervisor_x86 x86_hyper_kvm;
-extern const struct hypervisor_x86 x86_hyper_jailhouse;
-
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
#ifdef CONFIG_XEN_PV
@@ -49,11 +42,22 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#ifdef CONFIG_JAILHOUSE_GUEST
&x86_hyper_jailhouse,
#endif
+#ifdef CONFIG_ACRN_GUEST
+ &x86_hyper_acrn,
+#endif
};
enum x86_hypervisor_type x86_hyper_type;
EXPORT_SYMBOL(x86_hyper_type);
+bool __initdata nopv;
+static __init int parse_nopv(char *arg)
+{
+ nopv = true;
+ return 0;
+}
+early_param("nopv", parse_nopv);
+
static inline const struct hypervisor_x86 * __init
detect_hypervisor_vendor(void)
{
@@ -61,6 +65,9 @@ detect_hypervisor_vendor(void)
uint32_t pri, max_pri = 0;
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
+ if (unlikely(nopv) && !(*p)->ignore_nopv)
+ continue;
+
pri = (*p)->detect();
if (pri > max_pri) {
max_pri = pri;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f17c1a714779..8d6d92ebeb54 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
}
}
+/*
+ * Processors which have self-snooping capability can handle conflicting
+ * memory type across CPUs by snooping its own cache. However, there exists
+ * CPU models in which having conflicting memory types still leads to
+ * unpredictable behavior, machine check errors, or hangs. Clear this
+ * feature to prevent its use on machines with known erratas.
+ */
+static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
+{
+ switch (c->x86_model) {
+ case INTEL_FAM6_CORE_YONAH:
+ case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_G:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_SANDYBRIDGE:
+ setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
+ }
+}
+
static bool ring3mwait_disabled __read_mostly;
static int __init ring3mwait_disable(char *__unused)
@@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
}
check_mpx_erratum(c);
+ check_memory_type_self_snoop_errata(c);
/*
* Get the number of SMT siblings early from the extended topology
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index ebb14a26f117..f4dd73396f28 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -97,7 +97,6 @@ static void intel_epb_restore(void)
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
}
-#ifdef CONFIG_PM
static struct syscore_ops intel_epb_syscore_ops = {
.suspend = intel_epb_save,
.resume = intel_epb_restore,
@@ -194,25 +193,6 @@ static int intel_epb_offline(unsigned int cpu)
return 0;
}
-static inline void register_intel_ebp_syscore_ops(void)
-{
- register_syscore_ops(&intel_epb_syscore_ops);
-}
-#else /* !CONFIG_PM */
-static int intel_epb_online(unsigned int cpu)
-{
- intel_epb_restore();
- return 0;
-}
-
-static int intel_epb_offline(unsigned int cpu)
-{
- return intel_epb_save();
-}
-
-static inline void register_intel_ebp_syscore_ops(void) {}
-#endif
-
static __init int intel_epb_init(void)
{
int ret;
@@ -226,7 +206,7 @@ static __init int intel_epb_init(void)
if (ret < 0)
goto err_out_online;
- register_intel_ebp_syscore_ops();
+ register_syscore_ops(&intel_epb_syscore_ops);
return 0;
err_out_online:
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index d904aafe6409..6ea7fdc82f3c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1,8 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* (c) 2005-2016 Advanced Micro Devices, Inc.
- * Your use of this code is subject to the terms and conditions of the
- * GNU general public license version 2. See "COPYING" or
- * http://www.gnu.org/licenses/gpl.html
*
* Written by Jacob Shin - AMD, Inc.
* Maintained by: Borislav Petkov <bp@alien8.de>
@@ -101,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
-static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
-{
- [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
-};
-
static const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -199,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
+/* Map of banks that have more than MCA_MISC0 available. */
+static DEFINE_PER_CPU(u32, smca_misc_banks_map);
+
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@@ -208,6 +204,28 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
+static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
+{
+ u32 low, high;
+
+ /*
+ * For SMCA enabled processors, BLKPTR field of the first MISC register
+ * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+ */
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return;
+
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
+ return;
+
+ if (low & MASK_BLKPTR_LO)
+ per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
+
+}
+
static void smca_configure(unsigned int bank, unsigned int cpu)
{
unsigned int i, hwid_mcatype;
@@ -245,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}
+ smca_set_misc_banks_map(bank, cpu);
+
/* Return early if this bank was already initialized. */
if (smca_banks[bank].hwid)
return;
@@ -455,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int bank, unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block,
+ unsigned int cpu)
{
- u32 low, high;
- u32 addr = 0;
-
- if (smca_get_bank_type(bank) == SMCA_RESERVED)
- return addr;
-
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
- /* Check our cache first: */
- if (smca_bank_addrs[bank][block] != -1)
- return smca_bank_addrs[bank][block];
-
- /*
- * For SMCA enabled processors, BLKPTR field of the first MISC register
- * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
- */
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- goto out;
-
- if (!(low & MCI_CONFIG_MCAX))
- goto out;
-
- if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
+ return 0;
-out:
- smca_bank_addrs[bank][block] = addr;
- return addr;
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
- unsigned int bank, unsigned int block)
+ unsigned int bank, unsigned int block,
+ unsigned int cpu)
{
u32 addr = 0, offset = 0;
- if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;
if (mce_flags.smca)
- return smca_get_block_address(bank, block);
+ return smca_get_block_address(bank, block, cpu);
/* Fall back to method we used for older processors: */
switch (block) {
@@ -626,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
- u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id();
+ u32 low = 0, high = 0, address = 0;
int offset = -1;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
disable_err_thresholding(c, bank);
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(address, low, high, bank, block);
+ address = get_block_address(address, low, high, bank, block, cpu);
if (!address)
break;
@@ -975,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
{
unsigned int bank;
- for (bank = 0; bank < mca_cfg.banks; ++bank)
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
log_error_deferred(bank);
}
@@ -1016,7 +1016,7 @@ static void amd_threshold_interrupt(void)
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
unsigned int bank, cpu = smp_processor_id();
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -1203,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
u32 low, high;
int err;
- if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return 0;
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
@@ -1254,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- address = get_block_address(address, low, high, bank, ++block);
+ address = get_block_address(address, low, high, bank, ++block, cpu);
if (!address)
return 0;
@@ -1437,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
threshold_remove_bank(cpu, bank);
@@ -1458,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
if (bp)
return 0;
- bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
+ bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
GFP_KERNEL);
if (!bp)
return -ENOMEM;
per_cpu(threshold_banks, cpu) = bp;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
err = threshold_create_bank(cpu, bank);
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index c038e5c00a59..af8d37962586 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Bridge between MCE and APEI
*
@@ -13,19 +14,6 @@
*
* Copyright 2010 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/export.h>
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5112a50e6486..743370ee4983 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Machine check handler.
*
@@ -64,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
DEFINE_PER_CPU(unsigned, mce_exception_count);
-struct mce_bank *mce_banks __read_mostly;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
+
+struct mce_bank {
+ u64 ctl; /* subevents to enable */
+ bool init; /* initialise bank? */
+};
+static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
+#define ATTR_LEN 16
+/* One object for each MCE bank, shared by all CPUs */
+struct mce_bank_dev {
+ struct device_attribute attr; /* device attribute */
+ char attrname[ATTR_LEN]; /* attribute name */
+ u8 bank; /* bank number */
+};
+static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
+
struct mce_vendor_flags mce_flags __read_mostly;
struct mca_config mca_cfg __read_mostly = {
@@ -674,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
*/
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
bool error_seen = false;
struct mce m;
int i;
@@ -685,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (flags & MCP_TIMESTAMP)
m.tsc = rdtsc();
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -787,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp;
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
m->status = mce_rdmsrl(msr_ops.status(i));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1067,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear)
{
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (test_bit(i, toclear))
mce_wrmsrl(msr_ops.status(i), 0);
}
@@ -1121,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
int severity, i;
- for (i = 0; i < cfg->banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
@@ -1329,7 +1348,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
local_irq_enable();
if (kill_it || do_memory_failure(&m))
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
local_irq_disable();
ist_end_non_atomic();
} else {
@@ -1462,27 +1481,29 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
-static int __mcheck_cpu_mce_banks_init(void)
+static void __mcheck_cpu_mce_banks_init(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u8 n_banks = this_cpu_read(mce_num_banks);
int i;
- mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
- if (!mce_banks)
- return -ENOMEM;
-
- for (i = 0; i < MAX_NR_BANKS; i++) {
+ for (i = 0; i < n_banks; i++) {
struct mce_bank *b = &mce_banks[i];
+ /*
+ * Init them all, __mcheck_cpu_apply_quirks() is going to apply
+ * the required vendor quirks before
+ * __mcheck_cpu_init_clear_banks() does the final bank setup.
+ */
b->ctl = -1ULL;
b->init = 1;
}
- return 0;
}
/*
* Initialize Machine Checks for a CPU.
*/
-static int __mcheck_cpu_cap_init(void)
+static void __mcheck_cpu_cap_init(void)
{
u64 cap;
u8 b;
@@ -1490,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- if (WARN_ON_ONCE(b > MAX_NR_BANKS))
+
+ if (b > MAX_NR_BANKS) {
+ pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
+ smp_processor_id(), MAX_NR_BANKS, b);
b = MAX_NR_BANKS;
+ }
- mca_cfg.banks = max(mca_cfg.banks, b);
+ this_cpu_write(mce_num_banks, b);
- if (!mce_banks) {
- int err = __mcheck_cpu_mce_banks_init();
- if (err)
- return err;
- }
+ __mcheck_cpu_mce_banks_init();
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
@@ -1507,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void)
if (cap & MCG_SER_P)
mca_cfg.ser = 1;
-
- return 0;
}
static void __mcheck_cpu_init_generic(void)
@@ -1535,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void)
static void __mcheck_cpu_init_clear_banks(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
@@ -1548,6 +1568,33 @@ static void __mcheck_cpu_init_clear_banks(void)
}
/*
+ * Do a final check to see if there are any unused/RAZ banks.
+ *
+ * This must be done after the banks have been initialized and any quirks have
+ * been applied.
+ *
+ * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
+ * Otherwise, a user who disables a bank will not be able to re-enable it
+ * without a system reboot.
+ */
+static void __mcheck_cpu_check_banks(void)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u64 msrval;
+ int i;
+
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
+ struct mce_bank *b = &mce_banks[i];
+
+ if (!b->init)
+ continue;
+
+ rdmsrl(msr_ops.ctl(i), msrval);
+ b->init = !!msrval;
+ }
+}
+
+/*
* During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
* EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
* Vol 3B Table 15-20). But this confuses both the code that determines
@@ -1578,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
@@ -1587,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
- if (c->x86 == 15 && cfg->banks > 4) {
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
@@ -1606,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
- if (c->x86 == 6 && cfg->banks > 0)
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
mce_banks[0].ctl = 0;
/*
@@ -1628,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
- if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
+ if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
mce_banks[0].init = 0;
/*
@@ -1814,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
if (!mce_available(c))
return;
- if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
+ __mcheck_cpu_cap_init();
+
+ if (__mcheck_cpu_apply_quirks(c) < 0) {
mca_cfg.disabled = 1;
return;
}
@@ -1831,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_check_banks();
__mcheck_cpu_setup_timer();
}
@@ -1862,7 +1913,7 @@ static void __mce_disable_bank(void *arg)
void mce_disable_bank(int bank)
{
- if (bank >= mca_cfg.banks) {
+ if (bank >= this_cpu_read(mce_num_banks)) {
pr_warn(FW_BUG
"Ignoring request to disable invalid MCA bank %d.\n",
bank);
@@ -1948,9 +1999,10 @@ int __init mcheck_init(void)
*/
static void mce_disable_error_reporting(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2050,26 +2102,47 @@ static struct bus_type mce_subsys = {
DEFINE_PER_CPU(struct device *, mce_device);
-static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
+static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
{
- return container_of(attr, struct mce_bank, attr);
+ return container_of(attr, struct mce_bank_dev, attr);
}
static ssize_t show_bank(struct device *s, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
+ u8 bank = attr_to_bank(attr)->bank;
+ struct mce_bank *b;
+
+ if (bank >= per_cpu(mce_num_banks, s->id))
+ return -EINVAL;
+
+ b = &per_cpu(mce_banks_array, s->id)[bank];
+
+ if (!b->init)
+ return -ENODEV;
+
+ return sprintf(buf, "%llx\n", b->ctl);
}
static ssize_t set_bank(struct device *s, struct device_attribute *attr,
const char *buf, size_t size)
{
+ u8 bank = attr_to_bank(attr)->bank;
+ struct mce_bank *b;
u64 new;
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
- attr_to_bank(attr)->ctl = new;
+ if (bank >= per_cpu(mce_num_banks, s->id))
+ return -EINVAL;
+
+ b = &per_cpu(mce_banks_array, s->id)[bank];
+
+ if (!b->init)
+ return -ENODEV;
+
+ b->ctl = new;
mce_restart();
return size;
@@ -2184,7 +2257,7 @@ static void mce_device_release(struct device *dev)
kfree(dev);
}
-/* Per cpu device init. All of the cpus still share the same ctrl bank: */
+/* Per CPU device init. All of the CPUs still share the same bank device: */
static int mce_device_create(unsigned int cpu)
{
struct device *dev;
@@ -2216,8 +2289,8 @@ static int mce_device_create(unsigned int cpu)
if (err)
goto error;
}
- for (j = 0; j < mca_cfg.banks; j++) {
- err = device_create_file(dev, &mce_banks[j].attr);
+ for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
+ err = device_create_file(dev, &mce_bank_devs[j].attr);
if (err)
goto error2;
}
@@ -2227,7 +2300,7 @@ static int mce_device_create(unsigned int cpu)
return 0;
error2:
while (--j >= 0)
- device_remove_file(dev, &mce_banks[j].attr);
+ device_remove_file(dev, &mce_bank_devs[j].attr);
error:
while (--i >= 0)
device_remove_file(dev, mce_device_attrs[i]);
@@ -2248,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu)
for (i = 0; mce_device_attrs[i]; i++)
device_remove_file(dev, mce_device_attrs[i]);
- for (i = 0; i < mca_cfg.banks; i++)
- device_remove_file(dev, &mce_banks[i].attr);
+ for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
+ device_remove_file(dev, &mce_bank_devs[i].attr);
device_unregister(dev);
cpumask_clear_cpu(cpu, mce_device_initialized);
@@ -2270,6 +2343,7 @@ static void mce_disable_cpu(void)
static void mce_reenable_cpu(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
if (!mce_available(raw_cpu_ptr(&cpu_info)))
@@ -2277,7 +2351,7 @@ static void mce_reenable_cpu(void)
if (!cpuhp_tasks_frozen)
cmci_reenable();
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2327,10 +2401,12 @@ static __init void mce_init_banks(void)
{
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
- struct mce_bank *b = &mce_banks[i];
+ for (i = 0; i < MAX_NR_BANKS; i++) {
+ struct mce_bank_dev *b = &mce_bank_devs[i];
struct device_attribute *a = &b->attr;
+ b->bank = i;
+
sysfs_attr_init(&a->attr);
a->attr.name = b->attrname;
snprintf(b->attrname, ATTR_LEN, "bank%d", i);
@@ -2440,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
"%llu\n");
-static int __init mcheck_debugfs_init(void)
+static void __init mcheck_debugfs_init(void)
{
- struct dentry *dmce, *ffake_panic;
+ struct dentry *dmce;
dmce = mce_get_debugfs_dir();
- if (!dmce)
- return -ENOMEM;
- ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
- NULL, &fake_panic_fops);
- if (!ffake_panic)
- return -ENOMEM;
-
- return 0;
+ debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
+ &fake_panic_fops);
}
#else
-static int __init mcheck_debugfs_init(void) { return -EINVAL; }
+static void __init mcheck_debugfs_init(void) { }
#endif
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
@@ -2463,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
- pr_info("Using %d MCE banks\n", mca_cfg.banks);
-
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 9690ec5c8051..7c8958dee103 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* /dev/mcelog driver
*
diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c
index 64d1d5a00f39..fbe8b61c3413 100644
--- a/arch/x86/kernel/cpu/mce/genpool.c
+++ b/arch/x86/kernel/cpu/mce/genpool.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* MCE event pool management in MCE context
*
* Copyright (C) 2015 Intel Corp.
* Author: Chen, Gong <gong.chen@linux.intel.com>
- *
- * This file is licensed under GPLv2.
*/
#include <linux/smp.h>
#include <linux/mm.h>
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index a6026170af92..1f30117b24ba 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Machine check injection support.
* Copyright 2008 Intel Corporation.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* Authors:
* Andi Kleen
* Ying Huang
@@ -16,9 +12,6 @@
* built as module so that it can be loaded on production kernels for
* testing purposes.
*
- * This file may be distributed under the terms of the GNU General Public
- * License version 2.
- *
* Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
* Advanced Micro Devices Inc.
*/
@@ -652,7 +645,6 @@ static const struct file_operations readme_fops = {
static struct dfs_node {
char *name;
- struct dentry *d;
const struct file_operations *fops;
umode_t perm;
} dfs_fls[] = {
@@ -666,49 +658,23 @@ static struct dfs_node {
{ .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
};
-static int __init debugfs_init(void)
+static void __init debugfs_init(void)
{
unsigned int i;
dfs_inj = debugfs_create_dir("mce-inject", NULL);
- if (!dfs_inj)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
- dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
- dfs_fls[i].perm,
- dfs_inj,
- &i_mce,
- dfs_fls[i].fops);
-
- if (!dfs_fls[i].d)
- goto err_dfs_add;
- }
-
- return 0;
-err_dfs_add:
- while (i-- > 0)
- debugfs_remove(dfs_fls[i].d);
-
- debugfs_remove(dfs_inj);
- dfs_inj = NULL;
-
- return -ENODEV;
+ for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
+ debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
+ &i_mce, dfs_fls[i].fops);
}
static int __init inject_init(void)
{
- int err;
-
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
- err = debugfs_init();
- if (err) {
- free_cpumask_var(mce_inject_cpumask);
- return err;
- }
+ debugfs_init();
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
mce_register_injector_chain(&inject_nb);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index a34b55baa7aa..43031db429d2 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -22,17 +22,8 @@ enum severity_level {
extern struct blocking_notifier_head x86_mce_decoder_chain;
-#define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
-/* One object for each MCE bank, shared by all CPUs */
-struct mce_bank {
- u64 ctl; /* subevents to enable */
- unsigned char init; /* initialise bank? */
- struct device_attribute attr; /* device attribute */
- char attrname[ATTR_LEN]; /* attribute name */
-};
-
struct mce_evt_llist {
struct llist_node llnode;
struct mce mce;
@@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
-extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
@@ -128,7 +118,6 @@ struct mca_config {
bios_cmci_threshold : 1,
__reserved : 59;
- u8 banks;
s8 bootlog;
int tolerant;
int monarch_timeout;
@@ -137,6 +126,7 @@ struct mca_config {
};
extern struct mca_config mca_cfg;
+DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
struct mce_vendor_flags {
/*
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 65201e180fe0..210f1f5db5f7 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* MCE grading rules.
* Copyright 2008, 2009 Intel Corporation.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* Author: Andi Kleen
*/
#include <linux/kernel.h>
@@ -404,21 +400,13 @@ static const struct file_operations severities_coverage_fops = {
static int __init severities_debugfs_init(void)
{
- struct dentry *dmce, *fsev;
+ struct dentry *dmce;
dmce = mce_get_debugfs_dir();
- if (!dmce)
- goto err_out;
-
- fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
- &severities_coverage_fops);
- if (!fsev)
- goto err_out;
+ debugfs_create_file("severities-coverage", 0444, dmce, NULL,
+ &severities_coverage_fops);
return 0;
-
-err_out:
- return -ENOMEM;
}
late_initcall(severities_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 10a3b0599300..6e2becf547c5 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Thermal throttle event support code (such as syslog messaging and rate
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile
index ba12e8aa4a45..34098d48c48f 100644
--- a/arch/x86/kernel/cpu/microcode/Makefile
+++ b/arch/x86/kernel/cpu/microcode/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
microcode-y := core.o
obj-$(CONFIG_MICROCODE) += microcode.o
microcode-$(CONFIG_MICROCODE_INTEL) += intel.o
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index e1f3ba19ba54..a0e52bd00ecc 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* AMD CPU Microcode Update Driver for Linux
*
@@ -17,9 +18,6 @@
*
* Author: Jacob Shin <jacob.shin@amd.com>
* Fixes: Borislav Petkov <bp@suse.de>
- *
- * Licensed under the terms of the GNU General Public
- * License version 2. See file COPYING for details.
*/
#define pr_fmt(fmt) "microcode: " fmt
@@ -61,7 +59,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE];
/*
* Microcode patch container file is prepended to the initrd in cpio
- * format. See Documentation/x86/microcode.txt
+ * format. See Documentation/x86/microcode.rst
*/
static const char
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c321f4f513f9..cb0fdcaf1415 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* CPU Microcode Update Driver for Linux
*
@@ -12,11 +13,6 @@
* (C) 2015 Borislav Petkov <bp@alien8.de>
*
* This driver allows to upgrade microcode on x86 processors.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "microcode: " fmt
@@ -793,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
.resume = mc_bp_resume,
};
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
{
- struct device *dev;
-
- dev = get_cpu_device(cpu);
microcode_update_cpu(cpu);
pr_debug("CPU%d added\n", cpu);
+ return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
@@ -876,6 +875,8 @@ int __init microcode_init(void)
goto out_ucode_group;
register_syscore_ops(&mc_syscore_ops);
+ cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+ mc_cpu_starting, NULL);
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
mc_cpu_online, mc_cpu_down_prep);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index a44bdbe7c55e..ce799cfe9434 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Intel CPU Microcode Update Driver for Linux
*
@@ -8,11 +9,6 @@
*
* Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
* H Peter Anvin" <hpa@zytor.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/*
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index d0dfb892c72f..aed45b8895d5 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -4,6 +4,8 @@
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
+set -e
+
IN=$1
OUT=$2
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 3fa238a137d2..062f77279ce3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* HyperV Detection code.
*
* Copyright (C) 2010, Novell, Inc.
* Author : K. Y. Srinivasan <ksrinivasan@novell.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
*/
#include <linux/types.h>
@@ -21,6 +17,7 @@
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/i8253.h>
+#include <linux/random.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
@@ -84,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
inc_irq_stat(hyperv_stimer0_count);
if (hv_stimer0_handler)
hv_stimer0_handler();
+ add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
ack_APIC_irq();
exiting_irq();
@@ -93,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
{
*vector = HYPERV_STIMER0_VECTOR;
- *irq = 0; /* Unused on x86/x64 */
+ *irq = -1; /* Unused on x86/x64 */
hv_stimer0_handler = handler;
return 0;
}
@@ -270,9 +268,9 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
- lapic_timer_frequency = hv_lapic_frequency;
+ lapic_timer_period = hv_lapic_frequency;
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
- lapic_timer_frequency);
+ lapic_timer_period);
}
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index 2ad9107ee980..cc4f9f1cb94c 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-y := mtrr.o if.o generic.o cleanup.o
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 86e277f8daf4..aa5c064a6a22 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
* because MTRRs can span up to 40 bits (36bits on most modern x86)
@@ -742,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
- wbinvd();
+
+ /*
+ * Cache flushing is the most time-consuming step when programming
+ * the MTRRs. Fortunately, as per the Intel Software Development
+ * Manual, we can skip it if the processor supports cache self-
+ * snooping.
+ */
+ if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
+ wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (boot_cpu_has(X86_FEATURE_PGE)) {
@@ -759,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
- wbinvd();
+
+ /* Again, only flush caches if we have to. */
+ if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
+ wbinvd();
}
static void post_set(void) __releases(set_atomicity_lock)
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index cfa97ff67bda..5c900f9527ff 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* This file is part of the Linux kernel.
*
* Copyright (c) 2011, Intel Corporation
* Authors: Fenghua Yu <fenghua.yu@intel.com>,
* H. Peter Anvin <hpa@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
*/
#include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index c3a9dc63edf2..03eb90d00af0 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Resource Director Technology(RDT)
* - Cache Allocation code.
@@ -9,15 +10,6 @@
* Tony Luck <tony.luck@intel.com>
* Vikas Shivappa <vikas.shivappa@intel.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* More information about RDT be found in the Intel (R) x86 Architecture
* Software Developer Manual June 2016, volume 3, section 17.17.
*/
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 89320c0396b1..efbd54cc4e69 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Resource Director Technology(RDT)
* - Cache Allocation code.
@@ -8,15 +9,6 @@
* Fenghua Yu <fenghua.yu@intel.com>
* Tony Luck <tony.luck@intel.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* More information about RDT be found in the Intel (R) x86 Architecture
* Software Developer Manual June 2016, volume 3, section 17.17.
*/
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 1573a0a6b525..397206f23d14 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Resource Director Technology(RDT)
* - Monitoring code
@@ -10,15 +11,6 @@
* This replaces the cqm.c based on perf but we reuse a lot of
* code and datastructures originally from Peter Zijlstra and Matt Fleming.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* More information about RDT be found in the Intel (R) x86 Architecture
* Software Developer Manual June 2016, volume 3, section 17.17.
*/
@@ -368,6 +360,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
struct list_head *head;
struct rdtgroup *entry;
+ if (!is_mbm_local_enabled())
+ return;
+
r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 604c0e3bcc83..d7623e1b927d 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -431,11 +431,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
#else
register unsigned int line_size asm("esi");
register unsigned int size asm("edi");
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
+ register void *mem_r asm(_ASM_BX);
#endif /* CONFIG_KASAN */
/*
@@ -1503,7 +1499,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
- if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) {
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 333c177a2471..a46dee8e78db 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* User interface for Resource Alloction in Resource Director Technology(RDT)
*
@@ -5,15 +6,6 @@
*
* Author: Fenghua Yu <fenghua.yu@intel.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* More information about RDT be found in the Intel (R) x86 Architecture
* Software Developer Manual.
*/
@@ -804,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- u32 sw_shareable = 0, hw_shareable = 0;
- u32 exclusive = 0, pseudo_locked = 0;
+ /*
+ * Use unsigned long even though only 32 bits are used to ensure
+ * test_bit() is used safely.
+ */
+ unsigned long sw_shareable = 0, hw_shareable = 0;
+ unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
@@ -850,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
}
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
- hwb = test_bit(i, (unsigned long *)&hw_shareable);
- swb = test_bit(i, (unsigned long *)&sw_shareable);
- excl = test_bit(i, (unsigned long *)&exclusive);
- psl = test_bit(i, (unsigned long *)&pseudo_locked);
+ hwb = test_bit(i, &hw_shareable);
+ swb = test_bit(i, &sw_shareable);
+ excl = test_bit(i, &exclusive);
+ psl = test_bit(i, &pseudo_locked);
if (hwb && swb)
seq_putc(seq, 'X');
else if (hwb && !swb)
@@ -2108,8 +2104,7 @@ static int rdt_init_fs_context(struct fs_context *fc)
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
fc->fs_private = &ctx->kfc;
fc->ops = &rdt_fs_context_ops;
- if (fc->user_ns)
- put_user_ns(fc->user_ns);
+ put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(&init_user_ns);
fc->global = true;
return 0;
@@ -2492,28 +2487,21 @@ out_destroy:
* modification to the CBM if the default does not satisfy the
* requirements.
*/
-static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
+static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
{
- /*
- * Convert the u32 _val to an unsigned long required by all the bit
- * operations within this function. No more than 32 bits of this
- * converted value can be accessed because all bit operations are
- * additionally provided with cbm_len that is initialized during
- * hardware enumeration using five bits from the EAX register and
- * thus never can exceed 32 bits.
- */
- unsigned long *val = (unsigned long *)_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
+ unsigned long val = _val;
- if (*val == 0)
- return;
+ if (!val)
+ return 0;
- first_bit = find_first_bit(val, cbm_len);
- zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
/* Clear any remaining bits to ensure contiguous region */
- bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+ bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+ return (u32)val;
}
/*
@@ -2542,7 +2530,12 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
- break;
+ /*
+ * ctrl values for locksetup aren't relevant
+ * until the schemata is written, and the mode
+ * becomes RDT_MODE_PSEUDO_LOCKED.
+ */
+ continue;
/*
* If CDP is active include peer domain's
* usage to ensure there is no overlap
@@ -2566,7 +2559,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
* Force the initial CBM to be valid, user can
* modify the CBM based on system availability.
*/
- cbm_ensure_valid(&d->new_ctrl, r);
+ d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
/*
* Assign the u32 CBM to an unsigned long to ensure that
* bitmap_weight() does not access out-of-bound memory.
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 94aa1c72ca98..adf9b71386ef 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,10 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 8f6c784141d1..ee48c3fc8a65 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -15,33 +15,66 @@
/* leaf 0xb SMT level */
#define SMT_LEVEL 0
-/* leaf 0xb sub-leaf types */
+/* extended topology sub-leaf types */
#define INVALID_TYPE 0
#define SMT_TYPE 1
#define CORE_TYPE 2
+#define DIE_TYPE 5
#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
-int detect_extended_topology_early(struct cpuinfo_x86 *c)
-{
#ifdef CONFIG_SMP
+unsigned int __max_die_per_package __read_mostly = 1;
+EXPORT_SYMBOL(__max_die_per_package);
+
+/*
+ * Check if given CPUID extended toplogy "leaf" is implemented
+ */
+static int check_extended_topology_leaf(int leaf)
+{
unsigned int eax, ebx, ecx, edx;
- if (c->cpuid_level < 0xb)
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+
+ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
return -1;
- cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ return 0;
+}
+/*
+ * Return best CPUID Extended Toplogy Leaf supported
+ */
+static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
+{
+ if (c->cpuid_level >= 0x1f) {
+ if (check_extended_topology_leaf(0x1f) == 0)
+ return 0x1f;
+ }
- /*
- * check if the cpuid leaf 0xb is actually implemented.
- */
- if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
+ if (c->cpuid_level >= 0xb) {
+ if (check_extended_topology_leaf(0xb) == 0)
+ return 0xb;
+ }
+
+ return -1;
+}
+#endif
+
+int detect_extended_topology_early(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ unsigned int eax, ebx, ecx, edx;
+ int leaf;
+
+ leaf = detect_extended_topology_leaf(c);
+ if (leaf < 0)
return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
/*
* initial apic id, which also represents 32-bit extended x2apic id.
*/
@@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
}
/*
- * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * Check for extended topology enumeration cpuid leaf, and if it
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
@@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
- unsigned int ht_mask_width, core_plus_mask_width;
+ unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
+ unsigned int die_select_mask, die_level_siblings;
+ int leaf;
- if (detect_extended_topology_early(c) < 0)
+ leaf = detect_extended_topology_leaf(c);
+ if (leaf < 0)
return -1;
/*
* Populate HT related information from sub-leaf level 0.
*/
- cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ c->initial_apicid = edx;
core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
do {
- cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
+ cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
/*
* Check for the Core type in the implemented sub leaves.
@@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- break;
+ die_level_siblings = core_level_siblings;
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ }
+ if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
+ die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
}
sub_index++;
} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
-
- c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
- & core_select_mask;
- c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+ die_select_mask = (~(-1 << die_plus_mask_width)) >>
+ core_plus_mask_width;
+
+ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid,
+ ht_mask_width) & core_select_mask;
+ c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
+ core_plus_mask_width) & die_select_mask;
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
+ die_plus_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
+ __max_die_per_package = (die_level_siblings / core_level_siblings);
#endif
return 0;
}
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
new file mode 100644
index 000000000000..6a204e7336c1
--- /dev/null
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/syscore_ops.h>
+#include <linux/suspend.h>
+#include <linux/cpu.h>
+
+#include <asm/msr.h>
+
+#define UMWAIT_C02_ENABLE 0
+
+#define UMWAIT_CTRL_VAL(max_time, c02_disable) \
+ (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \
+ ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE))
+
+/*
+ * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default,
+ * umwait max time is 100000 in TSC-quanta and C0.2 is enabled
+ */
+static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
+
+/*
+ * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
+ * the sysfs write functions.
+ */
+static DEFINE_MUTEX(umwait_lock);
+
+static void umwait_update_control_msr(void * unused)
+{
+ lockdep_assert_irqs_disabled();
+ wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0);
+}
+
+/*
+ * The CPU hotplug callback sets the control MSR to the global control
+ * value.
+ *
+ * Disable interrupts so the read of umwait_control_cached and the WRMSR
+ * are protected against a concurrent sysfs write. Otherwise the sysfs
+ * write could update the cached value after it had been read on this CPU
+ * and issue the IPI before the old value had been written. The IPI would
+ * interrupt, write the new value and after return from IPI the previous
+ * value would be written by this CPU.
+ *
+ * With interrupts disabled the upcoming CPU either sees the new control
+ * value or the IPI is updating this CPU to the new control value after
+ * interrupts have been reenabled.
+ */
+static int umwait_cpu_online(unsigned int cpu)
+{
+ local_irq_disable();
+ umwait_update_control_msr(NULL);
+ local_irq_enable();
+ return 0;
+}
+
+/*
+ * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
+ * is the only active CPU at this time. The MSR is set up on the APs via the
+ * CPU hotplug callback.
+ *
+ * This function is invoked on resume from suspend and hibernation. On
+ * resume from suspend the restore should be not required, but we neither
+ * trust the firmware nor does it matter if the same value is written
+ * again.
+ */
+static void umwait_syscore_resume(void)
+{
+ umwait_update_control_msr(NULL);
+}
+
+static struct syscore_ops umwait_syscore_ops = {
+ .resume = umwait_syscore_resume,
+};
+
+/* sysfs interface */
+
+/*
+ * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
+ * Otherwise, C0.2 is enabled.
+ */
+static inline bool umwait_ctrl_c02_enabled(u32 ctrl)
+{
+ return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE);
+}
+
+static inline u32 umwait_ctrl_max_time(u32 ctrl)
+{
+ return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
+}
+
+static inline void umwait_update_control(u32 maxtime, bool c02_enable)
+{
+ u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
+
+ if (!c02_enable)
+ ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE;
+
+ WRITE_ONCE(umwait_control_cached, ctrl);
+ /* Propagate to all CPUs */
+ on_each_cpu(umwait_update_control_msr, NULL, 1);
+}
+
+static ssize_t
+enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ u32 ctrl = READ_ONCE(umwait_control_cached);
+
+ return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl));
+}
+
+static ssize_t enable_c02_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool c02_enable;
+ u32 ctrl;
+ int ret;
+
+ ret = kstrtobool(buf, &c02_enable);
+ if (ret)
+ return ret;
+
+ mutex_lock(&umwait_lock);
+
+ ctrl = READ_ONCE(umwait_control_cached);
+ if (c02_enable != umwait_ctrl_c02_enabled(ctrl))
+ umwait_update_control(ctrl, c02_enable);
+
+ mutex_unlock(&umwait_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(enable_c02);
+
+static ssize_t
+max_time_show(struct device *kobj, struct device_attribute *attr, char *buf)
+{
+ u32 ctrl = READ_ONCE(umwait_control_cached);
+
+ return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl));
+}
+
+static ssize_t max_time_store(struct device *kobj,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 max_time, ctrl;
+ int ret;
+
+ ret = kstrtou32(buf, 0, &max_time);
+ if (ret)
+ return ret;
+
+ /* bits[1:0] must be zero */
+ if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK)
+ return -EINVAL;
+
+ mutex_lock(&umwait_lock);
+
+ ctrl = READ_ONCE(umwait_control_cached);
+ if (max_time != umwait_ctrl_max_time(ctrl))
+ umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl));
+
+ mutex_unlock(&umwait_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(max_time);
+
+static struct attribute *umwait_attrs[] = {
+ &dev_attr_enable_c02.attr,
+ &dev_attr_max_time.attr,
+ NULL
+};
+
+static struct attribute_group umwait_attr_group = {
+ .attrs = umwait_attrs,
+ .name = "umwait_control",
+};
+
+static int __init umwait_init(void)
+{
+ struct device *dev;
+ int ret;
+
+ if (!boot_cpu_has(X86_FEATURE_WAITPKG))
+ return -ENODEV;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
+ umwait_cpu_online, NULL);
+
+ register_syscore_ops(&umwait_syscore_ops);
+
+ /*
+ * Add umwait control interface. Ignore failure, so at least the
+ * default values are set up in case the machine manages to boot.
+ */
+ dev = cpu_subsys.dev_root;
+ return sysfs_create_group(&dev->kobj, &umwait_attr_group);
+}
+device_initcall(umwait_init);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 0eda91f8eeac..3c648476d4fb 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void)
#ifdef CONFIG_X86_LOCAL_APIC
/* Skip lapic calibration since we know the bus frequency. */
- lapic_timer_frequency = ecx / HZ;
+ lapic_timer_period = ecx / HZ;
pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
ecx);
#endif
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
new file mode 100644
index 000000000000..8e6f2f4b4afe
--- /dev/null
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+
+#include <asm/cpufeature.h>
+
+#include "cpu.h"
+
+#define MSR_ZHAOXIN_FCR57 0x00001257
+
+#define ACE_PRESENT (1 << 6)
+#define ACE_ENABLED (1 << 7)
+#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */
+
+#define RNG_PRESENT (1 << 2)
+#define RNG_ENABLED (1 << 3)
+#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */
+
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
+static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
+{
+ u32 lo, hi;
+
+ /* Test for Extended Feature Flags presence */
+ if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+ u32 tmp = cpuid_edx(0xC0000001);
+
+ /* Enable ACE unit, if present and disabled */
+ if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
+ rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ /* Enable ACE unit */
+ lo |= ACE_FCR;
+ wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ pr_info("CPU: Enabled ACE h/w crypto\n");
+ }
+
+ /* Enable RNG unit, if present and disabled */
+ if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
+ rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ /* Enable RNG unit */
+ lo |= RNG_ENABLE;
+ wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ pr_info("CPU: Enabled h/w RNG\n");
+ }
+
+ /*
+ * Store Extended Feature Flags as word 5 of the CPU
+ * capability bit array
+ */
+ c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001);
+ }
+
+ if (c->x86 >= 0x6)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ cpu_detect_cache_sizes(c);
+}
+
+static void early_init_zhaoxin(struct cpuinfo_x86 *c)
+{
+ if (c->x86 >= 0x6)
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_SYSENTER32);
+#endif
+ if (c->x86_power & (1 << 8)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ }
+
+ if (c->cpuid_level >= 0x00000001) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+ /*
+ * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+ * apicids which are reserved per package. Store the resulting
+ * shift value for the package management code.
+ */
+ if (edx & (1U << 28))
+ c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+ }
+
+}
+
+static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
+}
+
+static void init_zhaoxin(struct cpuinfo_x86 *c)
+{
+ early_init_zhaoxin(c);
+ init_intel_cacheinfo(c);
+ detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+
+ if (c->cpuid_level > 9) {
+ unsigned int eax = cpuid_eax(10);
+
+ /*
+ * Check for version and the number of counters
+ * Version(eax[7:0]) can't be 0;
+ * Counters(eax[15:8]) should be greater than 1;
+ */
+ if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+ }
+
+ if (c->x86 >= 0x6)
+ init_zhaoxin_cap(c);
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+#endif
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ zhaoxin_detect_vmx_virtcap(c);
+}
+
+#ifdef CONFIG_X86_32
+static unsigned int
+zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size)
+{
+ return size;
+}
+#endif
+
+static const struct cpu_dev zhaoxin_cpu_dev = {
+ .c_vendor = "zhaoxin",
+ .c_ident = { " Shanghai " },
+ .c_early_init = early_init_zhaoxin,
+ .c_init = init_zhaoxin,
+#ifdef CONFIG_X86_32
+ .legacy_cache_size = zhaoxin_size_cache,
+#endif
+ .c_x86_vendor = X86_VENDOR_ZHAOXIN,
+};
+
+cpu_dev_register(zhaoxin_cpu_dev);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 1d300f96df4b..3492aa36bf09 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* ----------------------------------------------------------------------- *
*
* Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
- * USA; either version 2 of the License, or (at your option) any later
- * version; incorporated herein by reference.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index a96ca8584803..2bf70a2fed90 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Architecture specific (i386/x86_64) functions for kexec based crash dumps.
*
@@ -55,7 +56,6 @@ struct crash_memmap_data {
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-unsigned long crash_zero_bytes;
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
@@ -72,14 +72,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
{
-#ifdef CONFIG_X86_32
- struct pt_regs fixed_regs;
-
- if (!user_mode(regs)) {
- crash_fixup_ss_esp(&fixed_regs, regs);
- regs = &fixed_regs;
- }
-#endif
crash_save_cpu(regs, cpu);
/*
@@ -180,6 +172,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
+
+static unsigned long crash_zero_bytes;
+
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
@@ -380,6 +375,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
memmap_entry_callback);
+ /* Add e820 reserved ranges */
+ cmd.type = E820_TYPE_RESERVED;
+ flags = IORESOURCE_MEM;
+ walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
/* Add crashk_low_res region */
if (crashk_low_res.end) {
ei.addr = crashk_low_res.start;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 76dd605ee2a3..7da2bcd2b8eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Low level x86 E820 memory map handling functions.
*
@@ -85,9 +86,9 @@ static bool _e820__mapped_any(struct e820_table *table,
continue;
if (entry->addr >= end || entry->addr + entry->size <= start)
continue;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
@@ -1062,10 +1063,10 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE;
case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
+ case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
case E820_TYPE_RAM: /* Fall-through: */
case E820_TYPE_UNUSABLE: /* Fall-through: */
- case E820_TYPE_RESERVED: /* Fall-through: */
default: return IORES_DESC_NONE;
}
}
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index e8c8c5d78dbd..e963344b0449 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -1,7 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* EISA specific code
- *
- * This file is licensed under the GPL V2
*/
#include <linux/ioport.h>
#include <linux/eisa.h>
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index aebd0d5bc086..12e7d4406c32 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* ----------------------------------------------------------------------- *
*
* Copyright 2014 Intel Corporation; author: H. Peter Anvin
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile
index 68279efb811a..78c5621457d4 100644
--- a/arch/x86/kernel/fpu/Makefile
+++ b/arch/x86/kernel/fpu/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Build rules for the FPU support code:
#
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ce243f76bdb7..12c70840980e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 1994 Linus Torvalds
*
@@ -42,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
-static void kernel_fpu_disable(void)
-{
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, true);
-}
-
-static void kernel_fpu_enable(void)
-{
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, false);
-}
-
static bool kernel_fpu_disabled(void)
{
return this_cpu_read(in_kernel_fpu);
@@ -93,42 +82,33 @@ bool irq_fpu_usable(void)
}
EXPORT_SYMBOL(irq_fpu_usable);
-static void __kernel_fpu_begin(void)
+void kernel_fpu_begin(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ preempt_disable();
WARN_ON_FPU(!irq_fpu_usable());
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- kernel_fpu_disable();
+ this_cpu_write(in_kernel_fpu, true);
- if (current->mm) {
- if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
- set_thread_flag(TIF_NEED_FPU_LOAD);
- /*
- * Ignore return value -- we don't care if reg state
- * is clobbered.
- */
- copy_fpregs_to_fpstate(fpu);
- }
+ if (!(current->flags & PF_KTHREAD) &&
+ !test_thread_flag(TIF_NEED_FPU_LOAD)) {
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
+ copy_fpregs_to_fpstate(&current->thread.fpu);
}
__cpu_invalidate_fpregs_state();
}
-
-static void __kernel_fpu_end(void)
-{
- kernel_fpu_enable();
-}
-
-void kernel_fpu_begin(void)
-{
- preempt_disable();
- __kernel_fpu_begin();
-}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
- __kernel_fpu_end();
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+
+ this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
@@ -154,7 +134,6 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_after_save(fpu);
fpregs_unlock();
}
-EXPORT_SYMBOL_GPL(fpu__save);
/*
* Legacy x87 fpstate state init:
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 20d8fa7124c7..6ce7e0a23268 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* x86 FPU boot time init code:
*/
@@ -203,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/
if (!boot_cpu_has(X86_FEATURE_FPU)) {
- /*
- * Disable xsave as we do not support it if i387
- * emulation is enabled.
- */
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
fpu_kernel_xstate_size = sizeof(struct swregs_state);
} else {
if (boot_cpu_has(X86_FEATURE_FXSR))
@@ -251,17 +246,20 @@ static void __init fpu__init_parse_early_param(void)
char *argptr = arg;
int bit;
+#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+ pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
+#endif
- if (cmdline_find_option_bool(boot_command_line, "nofxsr")) {
+ if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
setup_clear_cpu_cap(X86_FEATURE_FXSR);
- setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
- setup_clear_cpu_cap(X86_FEATURE_XMM);
- }
+#endif
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
- fpu__xstate_clear_all_cpu_caps();
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 5a8d118bc423..0071b794ed19 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -5,6 +5,7 @@
#include <linux/compat.h>
#include <linux/cpu.h>
+#include <linux/pagemap.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
@@ -61,6 +62,11 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
struct user_i387_ia32_struct env;
struct _fpstate_32 __user *fp = buf;
+ fpregs_lock();
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD))
+ copy_fxregs_to_kernel(&tsk->thread.fpu);
+ fpregs_unlock();
+
convert_from_fxsr(&env, tsk);
if (__copy_to_user(buf, &env, sizeof(env)) ||
@@ -189,15 +195,7 @@ retry:
fpregs_unlock();
if (ret) {
- int aligned_size;
- int nr_pages;
-
- aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size;
- nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE);
-
- ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages,
- NULL, FOLL_WRITE);
- if (ret == nr_pages)
+ if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
goto retry;
return -EFAULT;
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9c459fd1d38e..e5cb67d67c03 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* xsave/xrstor support.
*
@@ -7,6 +8,8 @@
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/pkeys.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -67,15 +70,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
unsigned int fpu_user_xstate_size;
/*
- * Clear all of the X86_FEATURE_* bits that are unavailable
- * when the CPU has no XSAVE support.
- */
-void fpu__xstate_clear_all_cpu_caps(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-}
-
-/*
* Return whether the system supports a given xfeature.
*
* Also return the name of the (most advanced) feature that the caller requested:
@@ -708,7 +702,7 @@ static void fpu__init_disable_system_xstate(void)
{
xfeatures_mask = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
- fpu__xstate_clear_all_cpu_caps();
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
}
/*
@@ -1239,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
return 0;
}
+
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+ unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ long delta;
+
+ if (!timestamp) {
+ /*
+ * Report -1 if no AVX512 usage
+ */
+ delta = -1;
+ } else {
+ delta = (long)(jiffies - timestamp);
+ /*
+ * Cap to LONG_MAX if time difference > LONG_MAX
+ */
+ if (delta < 0)
+ delta = LONG_MAX;
+ delta = jiffies_to_msecs(delta);
+ }
+
+ seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+ seq_putc(m, '\n');
+}
+
+/*
+ * Report architecture specific information
+ */
+int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ /*
+ * Report AVX512 state if the processor and build option supported.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_AVX512F))
+ avx512_status(m, task);
+
+ return 0;
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0927bb158ffc..024c3053dbba 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/module.h>
+#include <linux/memory.h>
#include <trace/syscall.h>
@@ -34,16 +35,25 @@
#ifdef CONFIG_DYNAMIC_FTRACE
int ftrace_arch_code_modify_prepare(void)
+ __acquires(&text_mutex)
{
+ /*
+ * Need to grab text_mutex to prevent a race from module loading
+ * and live kernel patching from changing the text permissions while
+ * ftrace has it set to "read/write".
+ */
+ mutex_lock(&text_mutex);
set_kernel_text_rw();
set_all_modules_text_rw();
return 0;
}
int ftrace_arch_code_modify_post_process(void)
+ __releases(&text_mutex)
{
set_all_modules_text_ro();
set_kernel_text_ro();
+ mutex_unlock(&text_mutex);
return 0;
}
@@ -300,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
ip = regs->ip - INT3_INSN_SIZE;
-#ifdef CONFIG_X86_64
if (ftrace_location(ip)) {
int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
return 1;
@@ -312,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
int3_emulate_call(regs, ftrace_update_func_call);
return 1;
}
-#else
- if (ftrace_location(ip) || is_ftrace_caller(ip)) {
- int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
- return 1;
- }
-#endif
return 0;
}
@@ -370,7 +373,7 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
return add_break(rec->ip, old);
}
-static int add_breakpoints(struct dyn_ftrace *rec, int enable)
+static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
@@ -478,7 +481,7 @@ static int add_update_nop(struct dyn_ftrace *rec)
return add_update_code(ip, new);
}
-static int add_update(struct dyn_ftrace *rec, int enable)
+static int add_update(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
@@ -524,7 +527,7 @@ static int finish_update_nop(struct dyn_ftrace *rec)
return ftrace_write(ip, new, 1);
}
-static int finish_update(struct dyn_ftrace *rec, int enable)
+static int finish_update(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 2ba914a34b06..073aab525d80 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -9,6 +9,8 @@
#include <asm/export.h>
#include <asm/ftrace.h>
#include <asm/nospec-branch.h>
+#include <asm/frame.h>
+#include <asm/asm-offsets.h>
# define function_hook __fentry__
EXPORT_SYMBOL(__fentry__)
@@ -89,26 +91,38 @@ END(ftrace_caller)
ENTRY(ftrace_regs_caller)
/*
- * i386 does not save SS and ESP when coming from kernel.
- * Instead, to get sp, &regs->sp is used (see ptrace.h).
- * Unfortunately, that means eflags must be at the same location
- * as the current return ip is. We move the return ip into the
- * regs->ip location, and move flags into the return ip location.
+ * We're here from an mcount/fentry CALL, and the stack frame looks like:
+ *
+ * <previous context>
+ * RET-IP
+ *
+ * The purpose of this function is to call out in an emulated INT3
+ * environment with a stack frame like:
+ *
+ * <previous context>
+ * gap / RET-IP
+ * gap
+ * gap
+ * gap
+ * pt_regs
+ *
+ * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
*/
- pushl $__KERNEL_CS
- pushl 4(%esp) /* Save the return ip */
- pushl $0 /* Load 0 into orig_ax */
+ subl $3*4, %esp # RET-IP + 3 gaps
+ pushl %ss # ss
+ pushl %esp # points at ss
+ addl $5*4, (%esp) # make it point at <previous context>
+ pushfl # flags
+ pushl $__KERNEL_CS # cs
+ pushl 7*4(%esp) # ip <- RET-IP
+ pushl $0 # orig_eax
+
pushl %gs
pushl %fs
pushl %es
pushl %ds
- pushl %eax
-
- /* Get flags and place them into the return ip slot */
- pushf
- popl %eax
- movl %eax, 8*4(%esp)
+ pushl %eax
pushl %ebp
pushl %edi
pushl %esi
@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller)
pushl %ecx
pushl %ebx
- movl 12*4(%esp), %eax /* Load ip (1st parameter) */
- subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
- movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */
- movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
- pushl %esp /* Save pt_regs as 4th parameter */
+ ENCODE_FRAME_POINTER
+
+ movl PT_EIP(%esp), %eax # 1st argument: IP
+ subl $MCOUNT_INSN_SIZE, %eax
+ movl 21*4(%esp), %edx # 2nd argument: parent ip
+ movl function_trace_op, %ecx # 3rd argument: ftrace_pos
+ pushl %esp # 4th argument: pt_regs
GLOBAL(ftrace_regs_call)
call ftrace_stub
- addl $4, %esp /* Skip pt_regs */
+ addl $4, %esp # skip 4th argument
- /* restore flags */
- push 14*4(%esp)
- popf
+ /* place IP below the new SP */
+ movl PT_OLDESP(%esp), %eax
+ movl PT_EIP(%esp), %ecx
+ movl %ecx, -4(%eax)
- /* Move return ip back to its original location */
- movl 12*4(%esp), %eax
- movl %eax, 14*4(%esp)
+ /* place EAX below that */
+ movl PT_EAX(%esp), %ecx
+ movl %ecx, -8(%eax)
popl %ebx
popl %ecx
@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call)
popl %esi
popl %edi
popl %ebp
- popl %eax
- popl %ds
- popl %es
- popl %fs
- popl %gs
- /* use lea to not affect flags */
- lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */
+ lea -8(%eax), %esp
+ popl %eax
jmp .Lftrace_ret
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 10eb2760ef2c..809d54397dba 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -9,6 +9,7 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
+#include <asm/frame.h>
.code64
.section .entry.text, "ax"
@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr)
leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
movq %rcx, RSP(%rsp)
+ ENCODE_FRAME_POINTER
+
/* regs go into 4th parameter */
leaq (%rsp), %rcx
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 16b1cbd3a61e..29ffa495bd1c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr,
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (la57) {
- p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+ p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
+ physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
- i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
- p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
- p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
+ i = physaddr >> P4D_SHIFT;
+ p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
+ p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
} else {
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
}
- i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
- pud[i + 0] = (pudval_t)pmd + pgtable_flags;
- pud[i + 1] = (pudval_t)pmd + pgtable_flags;
+ i = physaddr >> PUD_SHIFT;
+ pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
+ pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */
@@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
pmd_entry += physaddr;
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
- int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
- pmd[idx] = pmd_entry + i * PMD_SIZE;
+ int idx = i + (physaddr >> PMD_SHIFT);
+
+ pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
}
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index bcd206c8ac90..a6342c899be5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,9 +29,7 @@
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/asm-offsets.h>
#include <asm/paravirt.h>
-#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
#else
-#define GET_CR2_INTO(reg) movq %cr2, reg
#define INTERRUPT_RETURN iretq
#endif
@@ -253,10 +251,10 @@ END(secondary_startup_64)
* start_secondary() via .Ljump_to_C_code.
*/
ENTRY(start_cpu0)
- movq initial_stack(%rip), %rsp
UNWIND_HINT_EMPTY
+ movq initial_stack(%rip), %rsp
jmp .Ljump_to_C_code
-ENDPROC(start_cpu0)
+END(start_cpu0)
#endif
/* Both SMP bootup and ACPI suspend change these variables */
@@ -323,7 +321,7 @@ early_idt_handler_common:
cmpq $14,%rsi /* Page fault? */
jnz 10f
- GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
+ GET_CR2_INTO(%rdi) /* can clobber %rax if pv */
call early_make_pgtable
andl %eax,%eax
jz 20f /* All good */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index fb32925a2e62..c43e96a938d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,31 +1,44 @@
-#include <linux/clocksource.h>
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/clockchips.h>
#include <linux/interrupt.h>
-#include <linux/irq.h>
#include <linux/export.h>
#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/i8253.h>
-#include <linux/slab.h>
#include <linux/hpet.h>
-#include <linux/init.h>
#include <linux/cpu.h>
-#include <linux/pm.h>
-#include <linux/io.h>
+#include <linux/irq.h>
-#include <asm/cpufeature.h>
-#include <asm/irqdomain.h>
-#include <asm/fixmap.h>
#include <asm/hpet.h>
#include <asm/time.h>
-#define HPET_MASK CLOCKSOURCE_MASK(32)
+#undef pr_fmt
+#define pr_fmt(fmt) "hpet: " fmt
-#define HPET_DEV_USED_BIT 2
-#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
-#define HPET_DEV_VALID 0x8
-#define HPET_DEV_FSB_CAP 0x1000
-#define HPET_DEV_PERI_CAP 0x2000
+enum hpet_mode {
+ HPET_MODE_UNUSED,
+ HPET_MODE_LEGACY,
+ HPET_MODE_CLOCKEVT,
+ HPET_MODE_DEVICE,
+};
+
+struct hpet_channel {
+ struct clock_event_device evt;
+ unsigned int num;
+ unsigned int cpu;
+ unsigned int irq;
+ unsigned int in_use;
+ enum hpet_mode mode;
+ unsigned int boot_cfg;
+ char name[10];
+};
+
+struct hpet_base {
+ unsigned int nr_channels;
+ unsigned int nr_clockevents;
+ unsigned int boot_cfg;
+ struct hpet_channel *channels;
+};
+
+#define HPET_MASK CLOCKSOURCE_MASK(32)
#define HPET_MIN_CYCLES 128
#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
@@ -38,22 +51,25 @@ u8 hpet_blockid; /* OS timer block num */
bool hpet_msi_disable;
#ifdef CONFIG_PCI_MSI
-static unsigned int hpet_num_timers;
+static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel);
+static struct irq_domain *hpet_domain;
#endif
+
static void __iomem *hpet_virt_address;
-struct hpet_dev {
- struct clock_event_device evt;
- unsigned int num;
- int cpu;
- unsigned int irq;
- unsigned int flags;
- char name[10];
-};
+static struct hpet_base hpet_base;
+
+static bool hpet_legacy_int_enabled;
+static unsigned long hpet_freq;
-static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+bool boot_hpet_disable;
+bool hpet_force_user;
+static bool hpet_verbose;
+
+static inline
+struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt)
{
- return container_of(evtdev, struct hpet_dev, evt);
+ return container_of(evt, struct hpet_channel, evt);
}
inline unsigned int hpet_readl(unsigned int a)
@@ -66,10 +82,6 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
writel(d, hpet_virt_address + a);
}
-#ifdef CONFIG_X86_64
-#include <asm/pgtable.h>
-#endif
-
static inline void hpet_set_mapping(void)
{
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
@@ -84,10 +96,6 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-bool boot_hpet_disable;
-bool hpet_force_user;
-static bool hpet_verbose;
-
static int __init hpet_setup(char *str)
{
while (str) {
@@ -119,13 +127,8 @@ static inline int is_hpet_capable(void)
return !boot_hpet_disable && hpet_address;
}
-/*
- * HPET timer interrupt enable / disable
- */
-static bool hpet_legacy_int_enabled;
-
/**
- * is_hpet_enabled - check whether the hpet timer interrupt is enabled
+ * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled
*/
int is_hpet_enabled(void)
{
@@ -135,32 +138,36 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
static void _hpet_print_config(const char *function, int line)
{
- u32 i, timers, l, h;
- printk(KERN_INFO "hpet: %s(%d):\n", function, line);
- l = hpet_readl(HPET_ID);
- h = hpet_readl(HPET_PERIOD);
- timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
- printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
- l = hpet_readl(HPET_CFG);
- h = hpet_readl(HPET_STATUS);
- printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
+ u32 i, id, period, cfg, status, channels, l, h;
+
+ pr_info("%s(%d):\n", function, line);
+
+ id = hpet_readl(HPET_ID);
+ period = hpet_readl(HPET_PERIOD);
+ pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period);
+
+ cfg = hpet_readl(HPET_CFG);
+ status = hpet_readl(HPET_STATUS);
+ pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status);
+
l = hpet_readl(HPET_COUNTER);
h = hpet_readl(HPET_COUNTER+4);
- printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+ pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+
+ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
- for (i = 0; i < timers; i++) {
+ for (i = 0; i < channels; i++) {
l = hpet_readl(HPET_Tn_CFG(i));
h = hpet_readl(HPET_Tn_CFG(i)+4);
- printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_CMP(i));
h = hpet_readl(HPET_Tn_CMP(i)+4);
- printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_ROUTE(i));
h = hpet_readl(HPET_Tn_ROUTE(i)+4);
- printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h);
}
}
@@ -171,31 +178,20 @@ do { \
} while (0)
/*
- * When the hpet driver (/dev/hpet) is enabled, we need to reserve
+ * When the HPET driver (/dev/hpet) is enabled, we need to reserve
* timer 0 and timer 1 in case of RTC emulation.
*/
#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd);
-
-static void hpet_reserve_platform_timers(unsigned int id)
+static void __init hpet_reserve_platform_timers(void)
{
- struct hpet __iomem *hpet = hpet_virt_address;
- struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
- unsigned int nrtimers, i;
struct hpet_data hd;
-
- nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
+ unsigned int i;
memset(&hd, 0, sizeof(hd));
hd.hd_phys_address = hpet_address;
- hd.hd_address = hpet;
- hd.hd_nirqs = nrtimers;
- hpet_reserve_timer(&hd, 0);
-
-#ifdef CONFIG_HPET_EMULATE_RTC
- hpet_reserve_timer(&hd, 1);
-#endif
+ hd.hd_address = hpet_virt_address;
+ hd.hd_nirqs = hpet_base.nr_channels;
/*
* NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
@@ -205,30 +201,52 @@ static void hpet_reserve_platform_timers(unsigned int id)
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
- for (i = 2; i < nrtimers; timer++, i++) {
- hd.hd_irq[i] = (readl(&timer->hpet_config) &
- Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
- }
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+
+ if (i >= 2)
+ hd.hd_irq[i] = hc->irq;
- hpet_reserve_msi_timers(&hd);
+ switch (hc->mode) {
+ case HPET_MODE_UNUSED:
+ case HPET_MODE_DEVICE:
+ hc->mode = HPET_MODE_DEVICE;
+ break;
+ case HPET_MODE_CLOCKEVT:
+ case HPET_MODE_LEGACY:
+ hpet_reserve_timer(&hd, hc->num);
+ break;
+ }
+ }
hpet_alloc(&hd);
+}
+static void __init hpet_select_device_channel(void)
+{
+ int i;
+
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+
+ /* Associate the first unused channel to /dev/hpet */
+ if (hc->mode == HPET_MODE_UNUSED) {
+ hc->mode = HPET_MODE_DEVICE;
+ return;
+ }
+ }
}
+
#else
-static void hpet_reserve_platform_timers(unsigned int id) { }
+static inline void hpet_reserve_platform_timers(void) { }
+static inline void hpet_select_device_channel(void) {}
#endif
-/*
- * Common hpet info
- */
-static unsigned long hpet_freq;
-
-static struct clock_event_device hpet_clockevent;
-
+/* Common HPET functions */
static void hpet_stop_counter(void)
{
u32 cfg = hpet_readl(HPET_CFG);
+
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -242,6 +260,7 @@ static void hpet_reset_counter(void)
static void hpet_start_counter(void)
{
unsigned int cfg = hpet_readl(HPET_CFG);
+
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -273,24 +292,9 @@ static void hpet_enable_legacy_int(void)
hpet_legacy_int_enabled = true;
}
-static void hpet_legacy_clockevent_register(void)
-{
- /* Start HPET legacy interrupts */
- hpet_enable_legacy_int();
-
- /*
- * Start hpet with the boot cpu mask and make it
- * global after the IO_APIC has been initialized.
- */
- hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
- clockevents_config_and_register(&hpet_clockevent, hpet_freq,
- HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
- global_clock_event = &hpet_clockevent;
- printk(KERN_DEBUG "hpet clockevent registered\n");
-}
-
-static int hpet_set_periodic(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg, cmp, now;
uint64_t delta;
@@ -299,11 +303,11 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned int)delta;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
HPET_TN_32BIT;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
- hpet_writel(cmp, HPET_Tn_CMP(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
+ hpet_writel(cmp, HPET_Tn_CMP(channel));
udelay(1);
/*
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
@@ -312,52 +316,55 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
- hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer));
+ hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel));
hpet_start_counter();
hpet_print_config();
return 0;
}
-static int hpet_set_oneshot(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
return 0;
}
-static int hpet_shutdown(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
return 0;
}
-static int hpet_resume(struct clock_event_device *evt)
+static int hpet_clkevt_legacy_resume(struct clock_event_device *evt)
{
hpet_enable_legacy_int();
hpet_print_config();
return 0;
}
-static int hpet_next_event(unsigned long delta,
- struct clock_event_device *evt, int timer)
+static int
+hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
u32 cnt;
s32 res;
cnt = hpet_readl(HPET_COUNTER);
cnt += (u32) delta;
- hpet_writel(cnt, HPET_Tn_CMP(timer));
+ hpet_writel(cnt, HPET_Tn_CMP(channel));
/*
* HPETs are a complete disaster. The compare register is
@@ -386,360 +393,250 @@ static int hpet_next_event(unsigned long delta,
return res < HPET_MIN_CYCLES ? -ETIME : 0;
}
-static int hpet_legacy_shutdown(struct clock_event_device *evt)
+static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating)
{
- return hpet_shutdown(evt, 0);
-}
+ struct clock_event_device *evt = &hc->evt;
-static int hpet_legacy_set_oneshot(struct clock_event_device *evt)
-{
- return hpet_set_oneshot(evt, 0);
-}
+ evt->rating = rating;
+ evt->irq = hc->irq;
+ evt->name = hc->name;
+ evt->cpumask = cpumask_of(hc->cpu);
+ evt->set_state_oneshot = hpet_clkevt_set_state_oneshot;
+ evt->set_next_event = hpet_clkevt_set_next_event;
+ evt->set_state_shutdown = hpet_clkevt_set_state_shutdown;
-static int hpet_legacy_set_periodic(struct clock_event_device *evt)
-{
- return hpet_set_periodic(evt, 0);
+ evt->features = CLOCK_EVT_FEAT_ONESHOT;
+ if (hc->boot_cfg & HPET_TN_PERIODIC) {
+ evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+ evt->set_state_periodic = hpet_clkevt_set_state_periodic;
+ }
}
-static int hpet_legacy_resume(struct clock_event_device *evt)
+static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
{
- return hpet_resume(evt);
-}
+ /*
+ * Start HPET with the boot CPU's cpumask and make it global after
+ * the IO_APIC has been initialized.
+ */
+ hc->cpu = boot_cpu_data.cpu_index;
+ strncpy(hc->name, "hpet", sizeof(hc->name));
+ hpet_init_clockevent(hc, 50);
-static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- return hpet_next_event(delta, evt, 0);
-}
+ hc->evt.tick_resume = hpet_clkevt_legacy_resume;
-/*
- * The hpet clock event device
- */
-static struct clock_event_device hpet_clockevent = {
- .name = "hpet",
- .features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
- .set_state_periodic = hpet_legacy_set_periodic,
- .set_state_oneshot = hpet_legacy_set_oneshot,
- .set_state_shutdown = hpet_legacy_shutdown,
- .tick_resume = hpet_legacy_resume,
- .set_next_event = hpet_legacy_next_event,
- .irq = 0,
- .rating = 50,
-};
+ /*
+ * Legacy horrors and sins from the past. HPET used periodic mode
+ * unconditionally forever on the legacy channel 0. Removing the
+ * below hack and using the conditional in hpet_init_clockevent()
+ * makes at least Qemu and one hardware machine fail to boot.
+ * There are two issues which cause the boot failure:
+ *
+ * #1 After the timer delivery test in IOAPIC and the IOAPIC setup
+ * the next interrupt is not delivered despite the HPET channel
+ * being programmed correctly. Reprogramming the HPET after
+ * switching to IOAPIC makes it work again. After fixing this,
+ * the next issue surfaces:
+ *
+ * #2 Due to the unconditional periodic mode availability the Local
+ * APIC timer calibration can hijack the global clockevents
+ * event handler without causing damage. Using oneshot at this
+ * stage makes if hang because the HPET does not get
+ * reprogrammed due to the handler hijacking. Duh, stupid me!
+ *
+ * Both issues require major surgery and especially the kick HPET
+ * again after enabling IOAPIC results in really nasty hackery.
+ * This 'assume periodic works' magic has survived since HPET
+ * support got added, so it's questionable whether this should be
+ * fixed. Both Qemu and the failing hardware machine support
+ * periodic mode despite the fact that both don't advertise it in
+ * the configuration register and both need that extra kick after
+ * switching to IOAPIC. Seems to be a feature...
+ */
+ hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC;
+ hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic;
+
+ /* Start HPET legacy interrupts */
+ hpet_enable_legacy_int();
+
+ clockevents_config_and_register(&hc->evt, hpet_freq,
+ HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
+ global_clock_event = &hc->evt;
+ pr_debug("Clockevent registered\n");
+}
/*
* HPET MSI Support
*/
#ifdef CONFIG_PCI_MSI
-static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
-static struct hpet_dev *hpet_devs;
-static struct irq_domain *hpet_domain;
-
void hpet_msi_unmask(struct irq_data *data)
{
- struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+ struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
- /* unmask it */
- cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
- hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+ hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}
void hpet_msi_mask(struct irq_data *data)
{
- struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+ struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
- /* mask it */
- cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
- hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
-}
-
-void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
-{
- hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
- hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+ hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}
-void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
+void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg)
{
- msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
- msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
- msg->address_hi = 0;
+ hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num));
+ hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4);
}
-static int hpet_msi_shutdown(struct clock_event_device *evt)
+static int hpet_clkevt_msi_resume(struct clock_event_device *evt)
{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_shutdown(evt, hdev->num);
-}
-
-static int hpet_msi_set_oneshot(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_set_oneshot(evt, hdev->num);
-}
-
-static int hpet_msi_set_periodic(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_set_periodic(evt, hdev->num);
-}
-
-static int hpet_msi_resume(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
- struct irq_data *data = irq_get_irq_data(hdev->irq);
+ struct hpet_channel *hc = clockevent_to_channel(evt);
+ struct irq_data *data = irq_get_irq_data(hc->irq);
struct msi_msg msg;
/* Restore the MSI msg and unmask the interrupt */
irq_chip_compose_msi_msg(data, &msg);
- hpet_msi_write(hdev, &msg);
+ hpet_msi_write(hc, &msg);
hpet_msi_unmask(data);
return 0;
}
-static int hpet_msi_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
- return hpet_next_event(delta, evt, hdev->num);
-}
-
-static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data)
{
- struct hpet_dev *dev = (struct hpet_dev *)data;
- struct clock_event_device *hevt = &dev->evt;
+ struct hpet_channel *hc = data;
+ struct clock_event_device *evt = &hc->evt;
- if (!hevt->event_handler) {
- printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
- dev->num);
+ if (!evt->event_handler) {
+ pr_info("Spurious interrupt HPET channel %d\n", hc->num);
return IRQ_HANDLED;
}
- hevt->event_handler(hevt);
+ evt->event_handler(evt);
return IRQ_HANDLED;
}
-static int hpet_setup_irq(struct hpet_dev *dev)
+static int hpet_setup_msi_irq(struct hpet_channel *hc)
{
-
- if (request_irq(dev->irq, hpet_interrupt_handler,
+ if (request_irq(hc->irq, hpet_msi_interrupt_handler,
IRQF_TIMER | IRQF_NOBALANCING,
- dev->name, dev))
+ hc->name, hc))
return -1;
- disable_irq(dev->irq);
- irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
- enable_irq(dev->irq);
+ disable_irq(hc->irq);
+ irq_set_affinity(hc->irq, cpumask_of(hc->cpu));
+ enable_irq(hc->irq);
- printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
- dev->name, dev->irq);
+ pr_debug("%s irq %u for MSI\n", hc->name, hc->irq);
return 0;
}
-/* This should be called in specific @cpu */
-static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+/* Invoked from the hotplug callback on @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu)
{
- struct clock_event_device *evt = &hdev->evt;
-
- WARN_ON(cpu != smp_processor_id());
- if (!(hdev->flags & HPET_DEV_VALID))
- return;
-
- hdev->cpu = cpu;
- per_cpu(cpu_hpet_dev, cpu) = hdev;
- evt->name = hdev->name;
- hpet_setup_irq(hdev);
- evt->irq = hdev->irq;
+ struct clock_event_device *evt = &hc->evt;
- evt->rating = 110;
- evt->features = CLOCK_EVT_FEAT_ONESHOT;
- if (hdev->flags & HPET_DEV_PERI_CAP) {
- evt->features |= CLOCK_EVT_FEAT_PERIODIC;
- evt->set_state_periodic = hpet_msi_set_periodic;
- }
+ hc->cpu = cpu;
+ per_cpu(cpu_hpet_channel, cpu) = hc;
+ hpet_setup_msi_irq(hc);
- evt->set_state_shutdown = hpet_msi_shutdown;
- evt->set_state_oneshot = hpet_msi_set_oneshot;
- evt->tick_resume = hpet_msi_resume;
- evt->set_next_event = hpet_msi_next_event;
- evt->cpumask = cpumask_of(hdev->cpu);
+ hpet_init_clockevent(hc, 110);
+ evt->tick_resume = hpet_clkevt_msi_resume;
clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
0x7FFFFFFF);
}
-#ifdef CONFIG_HPET
-/* Reserve at least one timer for userspace (/dev/hpet) */
-#define RESERVE_TIMERS 1
-#else
-#define RESERVE_TIMERS 0
-#endif
-
-static void hpet_msi_capability_lookup(unsigned int start_timer)
+static struct hpet_channel *hpet_get_unused_clockevent(void)
{
- unsigned int id;
- unsigned int num_timers;
- unsigned int num_timers_used = 0;
- int i, irq;
-
- if (hpet_msi_disable)
- return;
-
- if (boot_cpu_has(X86_FEATURE_ARAT))
- return;
- id = hpet_readl(HPET_ID);
-
- num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
- num_timers++; /* Value read out starts from 0 */
- hpet_print_config();
-
- hpet_domain = hpet_create_irq_domain(hpet_blockid);
- if (!hpet_domain)
- return;
-
- hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL);
- if (!hpet_devs)
- return;
-
- hpet_num_timers = num_timers;
-
- for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
- struct hpet_dev *hdev = &hpet_devs[num_timers_used];
- unsigned int cfg = hpet_readl(HPET_Tn_CFG(i));
-
- /* Only consider HPET timer with MSI support */
- if (!(cfg & HPET_TN_FSB_CAP))
- continue;
+ int i;
- hdev->flags = 0;
- if (cfg & HPET_TN_PERIODIC_CAP)
- hdev->flags |= HPET_DEV_PERI_CAP;
- sprintf(hdev->name, "hpet%d", i);
- hdev->num = i;
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
- irq = hpet_assign_irq(hpet_domain, hdev, hdev->num);
- if (irq <= 0)
+ if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use)
continue;
-
- hdev->irq = irq;
- hdev->flags |= HPET_DEV_FSB_CAP;
- hdev->flags |= HPET_DEV_VALID;
- num_timers_used++;
- if (num_timers_used == num_possible_cpus())
- break;
+ hc->in_use = 1;
+ return hc;
}
-
- printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
- num_timers, num_timers_used);
+ return NULL;
}
-#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
+static int hpet_cpuhp_online(unsigned int cpu)
{
- int i;
-
- if (!hpet_devs)
- return;
+ struct hpet_channel *hc = hpet_get_unused_clockevent();
- for (i = 0; i < hpet_num_timers; i++) {
- struct hpet_dev *hdev = &hpet_devs[i];
+ if (hc)
+ init_one_hpet_msi_clockevent(hc, cpu);
+ return 0;
+}
- if (!(hdev->flags & HPET_DEV_VALID))
- continue;
+static int hpet_cpuhp_dead(unsigned int cpu)
+{
+ struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu);
- hd->hd_irq[hdev->num] = hdev->irq;
- hpet_reserve_timer(hd, hdev->num);
- }
+ if (!hc)
+ return 0;
+ free_irq(hc->irq, hc);
+ hc->in_use = 0;
+ per_cpu(cpu_hpet_channel, cpu) = NULL;
+ return 0;
}
-#endif
-static struct hpet_dev *hpet_get_unused_timer(void)
+static void __init hpet_select_clockevents(void)
{
- int i;
+ unsigned int i;
- if (!hpet_devs)
- return NULL;
+ hpet_base.nr_clockevents = 0;
- for (i = 0; i < hpet_num_timers; i++) {
- struct hpet_dev *hdev = &hpet_devs[i];
+ /* No point if MSI is disabled or CPU has an Always Runing APIC Timer */
+ if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT))
+ return;
- if (!(hdev->flags & HPET_DEV_VALID))
- continue;
- if (test_and_set_bit(HPET_DEV_USED_BIT,
- (unsigned long *)&hdev->flags))
- continue;
- return hdev;
- }
- return NULL;
-}
+ hpet_print_config();
-struct hpet_work_struct {
- struct delayed_work work;
- struct completion complete;
-};
+ hpet_domain = hpet_create_irq_domain(hpet_blockid);
+ if (!hpet_domain)
+ return;
-static void hpet_work(struct work_struct *w)
-{
- struct hpet_dev *hdev;
- int cpu = smp_processor_id();
- struct hpet_work_struct *hpet_work;
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+ int irq;
- hpet_work = container_of(w, struct hpet_work_struct, work.work);
+ if (hc->mode != HPET_MODE_UNUSED)
+ continue;
- hdev = hpet_get_unused_timer();
- if (hdev)
- init_one_hpet_msi_clockevent(hdev, cpu);
+ /* Only consider HPET channel with MSI support */
+ if (!(hc->boot_cfg & HPET_TN_FSB_CAP))
+ continue;
- complete(&hpet_work->complete);
-}
+ sprintf(hc->name, "hpet%d", i);
-static int hpet_cpuhp_online(unsigned int cpu)
-{
- struct hpet_work_struct work;
-
- INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
- init_completion(&work.complete);
- /* FIXME: add schedule_work_on() */
- schedule_delayed_work_on(cpu, &work.work, 0);
- wait_for_completion(&work.complete);
- destroy_delayed_work_on_stack(&work.work);
- return 0;
-}
+ irq = hpet_assign_irq(hpet_domain, hc, hc->num);
+ if (irq <= 0)
+ continue;
-static int hpet_cpuhp_dead(unsigned int cpu)
-{
- struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+ hc->irq = irq;
+ hc->mode = HPET_MODE_CLOCKEVT;
- if (!hdev)
- return 0;
- free_irq(hdev->irq, hdev);
- hdev->flags &= ~HPET_DEV_USED;
- per_cpu(cpu_hpet_dev, cpu) = NULL;
- return 0;
-}
-#else
+ if (++hpet_base.nr_clockevents == num_possible_cpus())
+ break;
+ }
-static void hpet_msi_capability_lookup(unsigned int start_timer)
-{
- return;
+ pr_info("%d channels of %d reserved for per-cpu timers\n",
+ hpet_base.nr_channels, hpet_base.nr_clockevents);
}
-#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
-{
- return;
-}
-#endif
+#else
+
+static inline void hpet_select_clockevents(void) { }
#define hpet_cpuhp_online NULL
#define hpet_cpuhp_dead NULL
@@ -753,10 +650,10 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
/*
* Reading the HPET counter is a very slow operation. If a large number of
* CPUs are trying to access the HPET counter simultaneously, it can cause
- * massive delay and slow down system performance dramatically. This may
+ * massive delays and slow down system performance dramatically. This may
* happen when HPET is the default clock source instead of TSC. For a
* really large system with hundreds of CPUs, the slowdown may be so
- * severe that it may actually crash the system because of a NMI watchdog
+ * severe, that it can actually crash the system because of a NMI watchdog
* soft lockup, for example.
*
* If multiple CPUs are trying to access the HPET counter at the same time,
@@ -765,10 +662,9 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
*
* This special feature is only enabled on x86-64 systems. It is unlikely
* that 32-bit x86 systems will have enough CPUs to require this feature
- * with its associated locking overhead. And we also need 64-bit atomic
- * read.
+ * with its associated locking overhead. We also need 64-bit atomic read.
*
- * The lock and the hpet value are stored together and can be read in a
+ * The lock and the HPET value are stored together and can be read in a
* single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
* is 32 bits in size.
*/
@@ -857,15 +753,40 @@ static struct clocksource clocksource_hpet = {
.resume = hpet_resume_counter,
};
-static int hpet_clocksource_register(void)
+/*
+ * AMD SB700 based systems with spread spectrum enabled use a SMM based
+ * HPET emulation to provide proper frequency setting.
+ *
+ * On such systems the SMM code is initialized with the first HPET register
+ * access and takes some time to complete. During this time the config
+ * register reads 0xffffffff. We check for max 1000 loops whether the
+ * config register reads a non-0xffffffff value to make sure that the
+ * HPET is up and running before we proceed any further.
+ *
+ * A counting loop is safe, as the HPET access takes thousands of CPU cycles.
+ *
+ * On non-SB700 based machines this check is only done once and has no
+ * side effects.
+ */
+static bool __init hpet_cfg_working(void)
{
- u64 start, now;
- u64 t1;
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ if (hpet_readl(HPET_CFG) != 0xFFFFFFFF)
+ return true;
+ }
+
+ pr_warn("Config register invalid. Disabling HPET\n");
+ return false;
+}
+
+static bool __init hpet_counting(void)
+{
+ u64 start, now, t1;
- /* Start the counter */
hpet_restart_counter();
- /* Verify whether hpet counter works */
t1 = hpet_readl(HPET_COUNTER);
start = rdtsc();
@@ -876,30 +797,24 @@ static int hpet_clocksource_register(void)
* 1 GHz == 200us
*/
do {
- rep_nop();
+ if (t1 != hpet_readl(HPET_COUNTER))
+ return true;
now = rdtsc();
} while ((now - start) < 200000UL);
- if (t1 == hpet_readl(HPET_COUNTER)) {
- printk(KERN_WARNING
- "HPET counter not counting. HPET disabled\n");
- return -ENODEV;
- }
-
- clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
- return 0;
+ pr_warn("Counter not counting. HPET disabled\n");
+ return false;
}
-static u32 *hpet_boot_cfg;
-
/**
* hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
- u32 hpet_period, cfg, id;
+ u32 hpet_period, cfg, id, irq;
+ unsigned int i, channels;
+ struct hpet_channel *hc;
u64 freq;
- unsigned int i, last;
if (!is_hpet_capable())
return 0;
@@ -908,40 +823,22 @@ int __init hpet_enable(void)
if (!hpet_virt_address)
return 0;
+ /* Validate that the config register is working */
+ if (!hpet_cfg_working())
+ goto out_nohpet;
+
+ /* Validate that the counter is counting */
+ if (!hpet_counting())
+ goto out_nohpet;
+
/*
* Read the period and check for a sane value:
*/
hpet_period = hpet_readl(HPET_PERIOD);
-
- /*
- * AMD SB700 based systems with spread spectrum enabled use a
- * SMM based HPET emulation to provide proper frequency
- * setting. The SMM code is initialized with the first HPET
- * register access and takes some time to complete. During
- * this time the config register reads 0xffffffff. We check
- * for max. 1000 loops whether the config register reads a non
- * 0xffffffff value to make sure that HPET is up and running
- * before we go further. A counting loop is safe, as the HPET
- * access takes thousands of CPU cycles. On non SB700 based
- * machines this check is only done once and has no side
- * effects.
- */
- for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
- if (i == 1000) {
- printk(KERN_WARNING
- "HPET config register value = 0xFFFFFFFF. "
- "Disabling HPET\n");
- goto out_nohpet;
- }
- }
-
if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;
- /*
- * The period is a femto seconds value. Convert it to a
- * frequency.
- */
+ /* The period is a femtoseconds value. Convert it to a frequency. */
freq = FSEC_PER_SEC;
do_div(freq, hpet_period);
hpet_freq = freq;
@@ -953,72 +850,90 @@ int __init hpet_enable(void)
id = hpet_readl(HPET_ID);
hpet_print_config();
- last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+ /* This is the HPET channel number which is zero based */
+ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
-#ifdef CONFIG_HPET_EMULATE_RTC
/*
* The legacy routing mode needs at least two channels, tick timer
* and the rtc emulation channel.
*/
- if (!last)
+ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2)
goto out_nohpet;
-#endif
+ hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL);
+ if (!hc) {
+ pr_warn("Disabling HPET.\n");
+ goto out_nohpet;
+ }
+ hpet_base.channels = hc;
+ hpet_base.nr_channels = channels;
+
+ /* Read, store and sanitize the global configuration */
cfg = hpet_readl(HPET_CFG);
- hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg),
- GFP_KERNEL);
- if (hpet_boot_cfg)
- *hpet_boot_cfg = cfg;
- else
- pr_warn("HPET initial state will not be saved\n");
+ hpet_base.boot_cfg = cfg;
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
hpet_writel(cfg, HPET_CFG);
if (cfg)
- pr_warn("Unrecognized bits %#x set in global cfg\n", cfg);
+ pr_warn("Global config: Unknown bits %#x\n", cfg);
+
+ /* Read, store and sanitize the per channel configuration */
+ for (i = 0; i < channels; i++, hc++) {
+ hc->num = i;
- for (i = 0; i <= last; ++i) {
cfg = hpet_readl(HPET_Tn_CFG(i));
- if (hpet_boot_cfg)
- hpet_boot_cfg[i + 1] = cfg;
+ hc->boot_cfg = cfg;
+ irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
+ hc->irq = irq;
+
cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB);
hpet_writel(cfg, HPET_Tn_CFG(i));
+
cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP
| HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
| HPET_TN_FSB | HPET_TN_FSB_CAP);
if (cfg)
- pr_warn("Unrecognized bits %#x set in cfg#%u\n",
- cfg, i);
+ pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg);
}
hpet_print_config();
- if (hpet_clocksource_register())
- goto out_nohpet;
+ clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
if (id & HPET_ID_LEGSUP) {
- hpet_legacy_clockevent_register();
+ hpet_legacy_clockevent_register(&hpet_base.channels[0]);
+ hpet_base.channels[0].mode = HPET_MODE_LEGACY;
+ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC))
+ hpet_base.channels[1].mode = HPET_MODE_LEGACY;
return 1;
}
return 0;
out_nohpet:
+ kfree(hpet_base.channels);
+ hpet_base.channels = NULL;
+ hpet_base.nr_channels = 0;
hpet_clear_mapping();
hpet_address = 0;
return 0;
}
/*
- * Needs to be late, as the reserve_timer code calls kalloc !
+ * The late initialization runs after the PCI quirks have been invoked
+ * which might have detected a system on which the HPET can be enforced.
+ *
+ * Also, the MSI machinery is not working yet when the HPET is initialized
+ * early.
*
- * Not a problem on i386 as hpet_enable is called from late_time_init,
- * but on x86_64 it is necessary !
+ * If the HPET is enabled, then:
+ *
+ * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y
+ * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents
+ * 3) Setup /dev/hpet if CONFIG_HPET=y
+ * 4) Register hotplug callbacks when clockevents are available
*/
static __init int hpet_late_init(void)
{
int ret;
- if (boot_hpet_disable)
- return -ENODEV;
-
if (!hpet_address) {
if (!force_hpet_address)
return -ENODEV;
@@ -1030,21 +945,14 @@ static __init int hpet_late_init(void)
if (!hpet_virt_address)
return -ENODEV;
- if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP)
- hpet_msi_capability_lookup(2);
- else
- hpet_msi_capability_lookup(0);
-
- hpet_reserve_platform_timers(hpet_readl(HPET_ID));
+ hpet_select_device_channel();
+ hpet_select_clockevents();
+ hpet_reserve_platform_timers();
hpet_print_config();
- if (hpet_msi_disable)
+ if (!hpet_base.nr_clockevents)
return 0;
- if (boot_cpu_has(X86_FEATURE_ARAT))
- return 0;
-
- /* This notifier should be called after workqueue is ready */
ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online",
hpet_cpuhp_online, NULL);
if (ret)
@@ -1063,47 +971,47 @@ fs_initcall(hpet_late_init);
void hpet_disable(void)
{
- if (is_hpet_capable() && hpet_virt_address) {
- unsigned int cfg = hpet_readl(HPET_CFG), id, last;
-
- if (hpet_boot_cfg)
- cfg = *hpet_boot_cfg;
- else if (hpet_legacy_int_enabled) {
- cfg &= ~HPET_CFG_LEGACY;
- hpet_legacy_int_enabled = false;
- }
- cfg &= ~HPET_CFG_ENABLE;
- hpet_writel(cfg, HPET_CFG);
+ unsigned int i;
+ u32 cfg;
- if (!hpet_boot_cfg)
- return;
+ if (!is_hpet_capable() || !hpet_virt_address)
+ return;
- id = hpet_readl(HPET_ID);
- last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+ /* Restore boot configuration with the enable bit cleared */
+ cfg = hpet_base.boot_cfg;
+ cfg &= ~HPET_CFG_ENABLE;
+ hpet_writel(cfg, HPET_CFG);
- for (id = 0; id <= last; ++id)
- hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id));
+ /* Restore the channel boot configuration */
+ for (i = 0; i < hpet_base.nr_channels; i++)
+ hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i));
- if (*hpet_boot_cfg & HPET_CFG_ENABLE)
- hpet_writel(*hpet_boot_cfg, HPET_CFG);
- }
+ /* If the HPET was enabled at boot time, reenable it */
+ if (hpet_base.boot_cfg & HPET_CFG_ENABLE)
+ hpet_writel(hpet_base.boot_cfg, HPET_CFG);
}
#ifdef CONFIG_HPET_EMULATE_RTC
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
+/*
+ * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET
* is enabled, we support RTC interrupt functionality in software.
+ *
* RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- * is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
+ *
+ * 1) Update Interrupt - generate an interrupt, every second, when the
+ * RTC clock is updated
+ * 2) Alarm Interrupt - generate an interrupt at a specific time of day
+ * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
+ * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2)
+ *
+ * (1) and (2) above are implemented using polling at a frequency of 64 Hz:
+ * DEFAULT_RTC_INT_FREQ.
+ *
+ * The exact frequency is a tradeoff between accuracy and interrupt overhead.
+ *
+ * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency,
+ * if it's higher.
*/
#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
@@ -1124,7 +1032,7 @@ static unsigned long hpet_pie_limit;
static rtc_irq_handler irq_handler;
/*
- * Check that the hpet counter c1 is ahead of the c2
+ * Check that the HPET counter c1 is ahead of c2
*/
static inline int hpet_cnt_ahead(u32 c1, u32 c2)
{
@@ -1162,8 +1070,8 @@ void hpet_unregister_irq_handler(rtc_irq_handler handler)
EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
/*
- * Timer 1 for RTC emulation. We use one shot mode, as periodic mode
- * is not supported by all HPET implementations for timer 1.
+ * Channel 1 for RTC emulation. We use one shot mode, as periodic mode
+ * is not supported by all HPET implementations for channel 1.
*
* hpet_rtc_timer_init() is called when the rtc is initialized.
*/
@@ -1176,10 +1084,11 @@ int hpet_rtc_timer_init(void)
return 0;
if (!hpet_default_delta) {
+ struct clock_event_device *evt = &hpet_base.channels[0].evt;
uint64_t clc;
- clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
- clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
+ clc = (uint64_t) evt->mult * NSEC_PER_SEC;
+ clc >>= evt->shift + DEFAULT_RTC_SHIFT;
hpet_default_delta = clc;
}
@@ -1208,6 +1117,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
static void hpet_disable_rtc_channel(void)
{
u32 cfg = hpet_readl(HPET_T1_CFG);
+
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T1_CFG);
}
@@ -1249,8 +1159,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
}
EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit);
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
- unsigned char sec)
+int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
{
if (!is_hpet_enabled())
return 0;
@@ -1270,15 +1179,18 @@ int hpet_set_periodic_freq(unsigned long freq)
if (!is_hpet_enabled())
return 0;
- if (freq <= DEFAULT_RTC_INT_FREQ)
+ if (freq <= DEFAULT_RTC_INT_FREQ) {
hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
- else {
- clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
+ } else {
+ struct clock_event_device *evt = &hpet_base.channels[0].evt;
+
+ clc = (uint64_t) evt->mult * NSEC_PER_SEC;
do_div(clc, freq);
- clc >>= hpet_clockevent.shift;
+ clc >>= evt->shift;
hpet_pie_delta = clc;
hpet_pie_limit = 0;
}
+
return 1;
}
EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
@@ -1316,8 +1228,7 @@ static void hpet_rtc_timer_reinit(void)
if (hpet_rtc_flags & RTC_PIE)
hpet_pie_count += lost_ints;
if (printk_ratelimit())
- printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n",
- lost_ints);
+ pr_warn("Lost %d RTC interrupts\n", lost_ints);
}
}
@@ -1339,8 +1250,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
hpet_prev_update_sec = curr_time.tm_sec;
}
- if (hpet_rtc_flags & RTC_PIE &&
- ++hpet_pie_count >= hpet_pie_limit) {
+ if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) {
rtc_int_flag |= RTC_PF;
hpet_pie_count = 0;
}
@@ -1349,7 +1259,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
(curr_time.tm_sec == hpet_alarm_time.tm_sec) &&
(curr_time.tm_min == hpet_alarm_time.tm_min) &&
(curr_time.tm_hour == hpet_alarm_time.tm_hour))
- rtc_int_flag |= RTC_AF;
+ rtc_int_flag |= RTC_AF;
if (rtc_int_flag) {
rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index d73083021002..4d8d53ed02c9 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -1,17 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) 2007 Alan Stern
* Copyright (C) 2009 IBM Corporation
diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index 0a3e70fd00d6..2cd124ad9380 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* 8237A DMA controller suspend functions.
*
* Written by Pierre Ossman, 2005.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
*/
#include <linux/dmi.h>
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 0d307a657abb..2b7999a1a50a 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -8,6 +8,7 @@
#include <linux/timex.h>
#include <linux/i8253.h>
+#include <asm/apic.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/smp.h>
@@ -18,10 +19,32 @@
*/
struct clock_event_device *global_clock_event;
-void __init setup_pit_timer(void)
+/*
+ * Modern chipsets can disable the PIT clock which makes it unusable. It
+ * would be possible to enable the clock but the registers are chipset
+ * specific and not discoverable. Avoid the whack a mole game.
+ *
+ * These platforms have discoverable TSC/CPU frequencies but this also
+ * requires to know the local APIC timer frequency as it normally is
+ * calibrated against the PIT interrupt.
+ */
+static bool __init use_pit(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC))
+ return true;
+
+ /* This also returns true when APIC is disabled */
+ return apic_needs_pit();
+}
+
+bool __init pit_timer_init(void)
{
+ if (!use_pit())
+ return false;
+
clockevent_i8253_init(true);
global_clock_event = &i8253_clockevent;
+ return true;
}
#ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 6d8917875f44..87ef69a72c52 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -1,7 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Interrupt descriptor table related code
- *
- * This file is licensed under the GPL V2
*/
#include <linux/interrupt.h>
@@ -320,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void)
#ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
set_bit(i, system_vectors);
- set_intr_gate(i, spurious_interrupt);
+ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
+ set_intr_gate(i, entry);
}
#endif
}
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index 85de790583f9..4c407833faca 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -11,13 +11,19 @@ extern struct boot_params boot_params;
static enum efi_secureboot_mode get_sb_mode(void)
{
efi_char16_t efi_SecureBoot_name[] = L"SecureBoot";
+ efi_char16_t efi_SetupMode_name[] = L"SecureBoot";
efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
efi_status_t status;
unsigned long size;
- u8 secboot;
+ u8 secboot, setupmode;
size = sizeof(secboot);
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+ pr_info("ima: secureboot mode unknown, no efi\n");
+ return efi_secureboot_mode_unknown;
+ }
+
/* Get variable contents into buffer */
status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid,
NULL, &size, &secboot);
@@ -31,7 +37,14 @@ static enum efi_secureboot_mode get_sb_mode(void)
return efi_secureboot_mode_unknown;
}
- if (secboot == 0) {
+ size = sizeof(setupmode);
+ status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid,
+ NULL, &size, &setupmode);
+
+ if (status != EFI_SUCCESS) /* ignore unknown SetupMode */
+ setupmode = 0;
+
+ if (secboot == 0 || setupmode == 1) {
pr_info("ima: secureboot mode disabled\n");
return efi_secureboot_mode_disabled;
}
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 805b7a341aca..fdb6506ceaaa 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -13,7 +13,22 @@
#include <linux/dmi.h>
#include <linux/io.h>
-int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+#define IO_DELAY_TYPE_0X80 0
+#define IO_DELAY_TYPE_0XED 1
+#define IO_DELAY_TYPE_UDELAY 2
+#define IO_DELAY_TYPE_NONE 3
+
+#if defined(CONFIG_IO_DELAY_0X80)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0X80
+#elif defined(CONFIG_IO_DELAY_0XED)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0XED
+#elif defined(CONFIG_IO_DELAY_UDELAY)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_UDELAY
+#elif defined(CONFIG_IO_DELAY_NONE)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_NONE
+#endif
+
+int io_delay_type __read_mostly = DEFAULT_IO_DELAY_TYPE;
static int __initdata io_delay_override;
@@ -24,13 +39,13 @@ void native_io_delay(void)
{
switch (io_delay_type) {
default:
- case CONFIG_IO_DELAY_TYPE_0X80:
+ case IO_DELAY_TYPE_0X80:
asm volatile ("outb %al, $0x80");
break;
- case CONFIG_IO_DELAY_TYPE_0XED:
+ case IO_DELAY_TYPE_0XED:
asm volatile ("outb %al, $0xed");
break;
- case CONFIG_IO_DELAY_TYPE_UDELAY:
+ case IO_DELAY_TYPE_UDELAY:
/*
* 2 usecs is an upper-bound for the outb delay but
* note that udelay doesn't have the bus-level
@@ -39,7 +54,8 @@ void native_io_delay(void)
* are shorter until calibrated):
*/
udelay(2);
- case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ case IO_DELAY_TYPE_NONE:
break;
}
}
@@ -47,9 +63,9 @@ EXPORT_SYMBOL(native_io_delay);
static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
{
- if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+ if (io_delay_type == IO_DELAY_TYPE_0X80) {
pr_notice("%s: using 0xed I/O delay port\n", id->ident);
- io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ io_delay_type = IO_DELAY_TYPE_0XED;
}
return 0;
@@ -115,13 +131,13 @@ static int __init io_delay_param(char *s)
return -EINVAL;
if (!strcmp(s, "0x80"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ io_delay_type = IO_DELAY_TYPE_0X80;
else if (!strcmp(s, "0xed"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ io_delay_type = IO_DELAY_TYPE_0XED;
else if (!strcmp(s, "udelay"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ io_delay_type = IO_DELAY_TYPE_UDELAY;
else if (!strcmp(s, "none"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
+ io_delay_type = IO_DELAY_TYPE_NONE;
else
return -EINVAL;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 59b5f2ea7c2f..4215653f8a8e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Common interrupt code for 32 and 64 bit
*/
@@ -134,7 +135,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
@@ -246,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
if (!handle_irq(desc, regs)) {
ack_APIC_irq();
- if (desc != VECTOR_RETRIGGERED) {
+ if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index d177940aa090..1cb3ca9bba49 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
*
* (C) Copyright 2016 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
* the maximum turbo frequencies of some cores in a CPU package may be
* higher than for the other cores in the same package. In that case,
@@ -69,8 +65,6 @@ static int sched_itmt_update_handler(struct ctl_table *table, int write,
return ret;
}
-static unsigned int zero;
-static unsigned int one = 1;
static struct ctl_table itmt_kern_table[] = {
{
.procname = "sched_itmt_enabled",
@@ -78,8 +72,8 @@ static struct ctl_table itmt_kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_itmt_update_handler,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{}
};
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index 1b2ee55a2dfb..3ad34f01de2a 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
static void __init jailhouse_timer_init(void)
{
- lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ);
+ lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
}
static unsigned long jailhouse_get_tsc(void)
@@ -203,7 +203,7 @@ bool jailhouse_paravirt(void)
return jailhouse_cpuid_base() != 0;
}
-static bool jailhouse_x2apic_available(void)
+static bool __init jailhouse_x2apic_available(void)
{
/*
* The x2APIC is only available if the root cell enabled it. Jailhouse
@@ -217,4 +217,5 @@ const struct hypervisor_x86 x86_hyper_jailhouse __refconst = {
.detect = jailhouse_detect,
.init.init_platform = jailhouse_init_platform,
.init.x2apic_available = jailhouse_x2apic_available,
+ .ignore_nopv = true,
};
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e631c358f7f4..044053235302 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line)
BUG();
}
-static void __ref __jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+static void __jump_label_set_jump_code(struct jump_entry *entry,
+ enum jump_label_type type,
+ union jump_code_union *code,
+ int init)
{
- union jump_code_union jmp;
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
- const void *expect, *code;
+ const void *expect;
int line;
- jmp.jump = 0xe9;
- jmp.offset = jump_entry_target(entry) -
- (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ code->jump = 0xe9;
+ code->offset = jump_entry_target(entry) -
+ (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
- if (type == JUMP_LABEL_JMP) {
- if (init) {
- expect = default_nop; line = __LINE__;
- } else {
- expect = ideal_nop; line = __LINE__;
- }
-
- code = &jmp.code;
+ if (init) {
+ expect = default_nop; line = __LINE__;
+ } else if (type == JUMP_LABEL_JMP) {
+ expect = ideal_nop; line = __LINE__;
} else {
- if (init) {
- expect = default_nop; line = __LINE__;
- } else {
- expect = &jmp.code; line = __LINE__;
- }
-
- code = ideal_nop;
+ expect = code->code; line = __LINE__;
}
if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
bug_at((void *)jump_entry_code(entry), line);
+ if (type == JUMP_LABEL_NOP)
+ memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
+}
+
+static void __ref __jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ int init)
+{
+ union jump_code_union code;
+
+ __jump_label_set_jump_code(entry, type, &code, init);
+
/*
* As long as only a single processor is running and the code is still
* not marked as RO, text_poke_early() can be used; Checking that
@@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
* always nop being the 'currently valid' instruction
*/
if (init || system_state == SYSTEM_BOOTING) {
- text_poke_early((void *)jump_entry_code(entry), code,
+ text_poke_early((void *)jump_entry_code(entry), &code,
JUMP_LABEL_NOP_SIZE);
return;
}
- text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
+ text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
(void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
}
@@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry,
mutex_unlock(&text_mutex);
}
+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
+static struct text_poke_loc tp_vec[TP_VEC_MAX];
+static int tp_vec_nr;
+
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ struct text_poke_loc *tp;
+ void *entry_code;
+
+ if (system_state == SYSTEM_BOOTING) {
+ /*
+ * Fallback to the non-batching mode.
+ */
+ arch_jump_label_transform(entry, type);
+ return true;
+ }
+
+ /*
+ * No more space in the vector, tell upper layer to apply
+ * the queue before continuing.
+ */
+ if (tp_vec_nr == TP_VEC_MAX)
+ return false;
+
+ tp = &tp_vec[tp_vec_nr];
+
+ entry_code = (void *)jump_entry_code(entry);
+
+ /*
+ * The INT3 handler will do a bsearch in the queue, so we need entries
+ * to be sorted. We can survive an unsorted list by rejecting the entry,
+ * forcing the generic jump_label code to apply the queue. Warning once,
+ * to raise the attention to the case of an unsorted entry that is
+ * better not happen, because, in the worst case we will perform in the
+ * same way as we do without batching - with some more overhead.
+ */
+ if (tp_vec_nr > 0) {
+ int prev = tp_vec_nr - 1;
+ struct text_poke_loc *prev_tp = &tp_vec[prev];
+
+ if (WARN_ON_ONCE(prev_tp->addr > entry_code))
+ return false;
+ }
+
+ __jump_label_set_jump_code(entry, type,
+ (union jump_code_union *) &tp->opcode, 0);
+
+ tp->addr = entry_code;
+ tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
+ tp->len = JUMP_LABEL_NOP_SIZE;
+
+ tp_vec_nr++;
+
+ return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+ if (!tp_vec_nr)
+ return;
+
+ mutex_lock(&text_mutex);
+ text_poke_bp_batch(tp_vec, tp_vec_nr);
+ mutex_unlock(&text_mutex);
+
+ tp_vec_nr = 0;
+}
+
static enum {
JL_STATE_START,
JL_STATE_NO_UPDATE,
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index fd6f8fbbe6f2..edaa30b20841 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Architecture specific debugfs files
*
* Copyright (C) 2007, Intel Corp.
* Huang Ying <ying.huang@intel.com>
- *
- * This file is released under the GPLv2.
*/
#include <linux/debugfs.h>
#include <linux/uaccess.h>
@@ -68,33 +67,18 @@ static const struct file_operations fops_setup_data = {
.llseek = default_llseek,
};
-static int __init
+static void __init
create_setup_data_node(struct dentry *parent, int no,
struct setup_data_node *node)
{
- struct dentry *d, *type, *data;
+ struct dentry *d;
char buf[16];
sprintf(buf, "%d", no);
d = debugfs_create_dir(buf, parent);
- if (!d)
- return -ENOMEM;
-
- type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
- if (!type)
- goto err_dir;
-
- data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
- if (!data)
- goto err_type;
- return 0;
-
-err_type:
- debugfs_remove(type);
-err_dir:
- debugfs_remove(d);
- return -ENOMEM;
+ debugfs_create_x32("type", S_IRUGO, d, &node->type);
+ debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
}
static int __init create_setup_data_nodes(struct dentry *parent)
@@ -107,8 +91,6 @@ static int __init create_setup_data_nodes(struct dentry *parent)
int no = 0;
d = debugfs_create_dir("setup_data", parent);
- if (!d)
- return -ENOMEM;
pa_data = boot_params.hdr.setup_data;
@@ -129,19 +111,17 @@ static int __init create_setup_data_nodes(struct dentry *parent)
node->paddr = pa_data;
node->type = data->type;
node->len = data->len;
- error = create_setup_data_node(d, no, node);
+ create_setup_data_node(d, no, node);
pa_data = data->next;
memunmap(data);
- if (error)
- goto err_dir;
no++;
}
return 0;
err_dir:
- debugfs_remove(d);
+ debugfs_remove_recursive(d);
return error;
}
@@ -152,35 +132,18 @@ static struct debugfs_blob_wrapper boot_params_blob = {
static int __init boot_params_kdebugfs_init(void)
{
- struct dentry *dbp, *version, *data;
- int error = -ENOMEM;
+ struct dentry *dbp;
+ int error;
dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
- if (!dbp)
- return -ENOMEM;
-
- version = debugfs_create_x16("version", S_IRUGO, dbp,
- &boot_params.hdr.version);
- if (!version)
- goto err_dir;
- data = debugfs_create_blob("data", S_IRUGO, dbp,
- &boot_params_blob);
- if (!data)
- goto err_version;
+ debugfs_create_x16("version", S_IRUGO, dbp, &boot_params.hdr.version);
+ debugfs_create_blob("data", S_IRUGO, dbp, &boot_params_blob);
error = create_setup_data_nodes(dbp);
if (error)
- goto err_data;
+ debugfs_remove_recursive(dbp);
- return 0;
-
-err_data:
- debugfs_remove(data);
-err_version:
- debugfs_remove(version);
-err_dir:
- debugfs_remove(dbp);
return error;
}
#endif /* CONFIG_DEBUG_BOOT_PARAMS */
@@ -190,8 +153,6 @@ static int __init arch_kdebugfs_init(void)
int error = 0;
arch_debugfs_dir = debugfs_create_dir("x86", NULL);
- if (!arch_debugfs_dir)
- return -ENOMEM;
#ifdef CONFIG_DEBUG_BOOT_PARAMS
error = boot_params_kdebugfs_init();
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 22f60dd26460..5ebcd02cbca7 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kexec bzImage loader
*
* Copyright (C) 2014 Red Hat Inc.
* Authors:
* Vivek Goyal <vgoyal@redhat.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#define pr_fmt(fmt) "kexec-bzImage64: " fmt
@@ -321,6 +319,11 @@ static int bzImage64_probe(const char *buf, unsigned long len)
return ret;
}
+ if (!(header->xloadflags & XLF_5LEVEL) && pgtable_l5_enabled()) {
+ pr_err("bzImage cannot handle 5-level paging mode.\n");
+ return ret;
+ }
+
/* I've got a bzImage */
pr_debug("It's a relocatable bzImage64\n");
ret = 0;
@@ -416,7 +419,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
efi_map_offset = params_cmdline_sz;
efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16);
- /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+ /* Copy setup header onto bootparams. Documentation/x86/boot.rst */
setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
/* Is there a limit on setup header size? */
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 13b13311b792..23297ea64f5f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -1,14 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
*/
/*
@@ -127,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32
switch (regno) {
- case GDB_SS:
- if (!user_mode(regs))
- *(unsigned long *)mem = __KERNEL_DS;
- break;
- case GDB_SP:
- if (!user_mode(regs))
- *(unsigned long *)mem = kernel_stack_pointer(regs);
- break;
case GDB_GS:
case GDB_FS:
*(unsigned long *)mem = 0xFFFF;
@@ -767,7 +750,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
BREAK_INSTR_SIZE);
bpt->type = BP_POKE_BREAKPOINT;
- return err;
+ return 0;
}
int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
index 0d33169cc1a2..8a753432b2d4 100644
--- a/arch/x86/kernel/kprobes/Makefile
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for kernel probes
#
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2b949f4fd4d8..7d3a2e2daf01 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -5,15 +5,10 @@
/* Kprobes and Optprobes common header */
#include <asm/asm.h>
-
-#ifdef CONFIG_FRAME_POINTER
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
- " mov %" _ASM_SP ", %" _ASM_BP "\n"
-#else
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
-#endif
+#include <asm/frame.h>
#ifdef CONFIG_X86_64
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \
" subq $24, %rsp\n" \
@@ -27,11 +22,13 @@
" pushq %r10\n" \
" pushq %r11\n" \
" pushq %rbx\n" \
- SAVE_RBP_STRING \
+ " pushq %rbp\n" \
" pushq %r12\n" \
" pushq %r13\n" \
" pushq %r14\n" \
- " pushq %r15\n"
+ " pushq %r15\n" \
+ ENCODE_FRAME_POINTER
+
#define RESTORE_REGS_STRING \
" popq %r15\n" \
" popq %r14\n" \
@@ -51,19 +48,22 @@
/* Skip orig_ax, ip, cs */ \
" addq $24, %rsp\n"
#else
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax and gs. */ \
- " subl $16, %esp\n" \
+ " subl $4*4, %esp\n" \
" pushl %fs\n" \
" pushl %es\n" \
" pushl %ds\n" \
" pushl %eax\n" \
- SAVE_RBP_STRING \
+ " pushl %ebp\n" \
" pushl %edi\n" \
" pushl %esi\n" \
" pushl %edx\n" \
" pushl %ecx\n" \
- " pushl %ebx\n"
+ " pushl %ebx\n" \
+ ENCODE_FRAME_POINTER
+
#define RESTORE_REGS_STRING \
" popl %ebx\n" \
" popl %ecx\n" \
@@ -72,8 +72,8 @@
" popl %edi\n" \
" popl %ebp\n" \
" popl %eax\n" \
- /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
- " addl $24, %esp\n"
+ /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\
+ " addl $7*4, %esp\n"
#endif
/* Ensure if the instruction can be boostable */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 9e4fa2484d10..0e0b08008b5a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
*
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
@@ -69,7 +56,7 @@
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
+#define stack_addr(regs) ((unsigned long *)regs->sp)
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -731,29 +718,27 @@ asm(
".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
-#ifdef CONFIG_X86_64
/* We don't bother saving the ss register */
+#ifdef CONFIG_X86_64
" pushq %rsp\n"
" pushfq\n"
SAVE_REGS_STRING
" movq %rsp, %rdi\n"
" call trampoline_handler\n"
/* Replace saved sp with true return address. */
- " movq %rax, 152(%rsp)\n"
+ " movq %rax, 19*8(%rsp)\n"
RESTORE_REGS_STRING
" popfq\n"
#else
- " pushf\n"
+ " pushl %esp\n"
+ " pushfl\n"
SAVE_REGS_STRING
" movl %esp, %eax\n"
" call trampoline_handler\n"
- /* Move flags to cs */
- " movl 56(%esp), %edx\n"
- " movl %edx, 52(%esp)\n"
- /* Replace saved flags with true return address. */
- " movl %eax, 56(%esp)\n"
+ /* Replace saved sp with true return address. */
+ " movl %eax, 15*4(%esp)\n"
RESTORE_REGS_STRING
- " popf\n"
+ " popfl\n"
#endif
" ret\n"
".size kretprobe_trampoline, .-kretprobe_trampoline\n"
@@ -794,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
-#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
- /* On x86-64, we use pt_regs->sp for return address holder. */
- frame_pointer = &regs->sp;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
+#ifdef CONFIG_X86_32
+ regs->cs |= get_kernel_rpl();
regs->gs = 0;
- /* On x86-32, we use pt_regs->flags for return address holder. */
- frame_pointer = &regs->flags;
#endif
+ /* We use pt_regs->sp for return address holder. */
+ frame_pointer = &regs->sp;
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
@@ -826,7 +808,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
continue;
/*
* Return probes must be pushed on this hash list correct
- * order (same as return order) so that it can be poped
+ * order (same as return order) so that it can be popped
* correctly. However, if we find it is pushed it incorrect
* order, this means we find a function which should not be
* probed, because the wrong order entry is pushed on the
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index ef819e19650b..681a4b36e9bb 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Dynamic Ftrace based Kprobes Optimization
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) Hitachi Ltd., 2012
*/
#include <linux/kprobes.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f14262952015..9d4aedece363 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Kernel Probes Jump Optimization (Optprobes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
* Copyright (C) Hitachi Ltd., 2012
*/
@@ -115,14 +102,15 @@ asm (
"optprobe_template_call:\n"
ASM_NOP5
/* Move flags to rsp */
- " movq 144(%rsp), %rdx\n"
- " movq %rdx, 152(%rsp)\n"
+ " movq 18*8(%rsp), %rdx\n"
+ " movq %rdx, 19*8(%rsp)\n"
RESTORE_REGS_STRING
/* Skip flags entry */
" addq $8, %rsp\n"
" popfq\n"
#else /* CONFIG_X86_32 */
- " pushf\n"
+ " pushl %esp\n"
+ " pushfl\n"
SAVE_REGS_STRING
" movl %esp, %edx\n"
".global optprobe_template_val\n"
@@ -131,9 +119,13 @@ asm (
".global optprobe_template_call\n"
"optprobe_template_call:\n"
ASM_NOP5
+ /* Move flags into esp */
+ " movl 14*4(%esp), %edx\n"
+ " movl %edx, 15*4(%esp)\n"
RESTORE_REGS_STRING
- " addl $4, %esp\n" /* skip cs */
- " popf\n"
+ /* Skip flags entry */
+ " addl $4, %esp\n"
+ " popfl\n"
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n"
@@ -165,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
} else {
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */
-#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
+#ifdef CONFIG_X86_32
+ regs->cs |= get_kernel_rpl();
regs->gs = 0;
#endif
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
@@ -431,7 +422,7 @@ err:
void arch_optimize_kprobes(struct list_head *oplist)
{
struct optimized_kprobe *op, *tmp;
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 insn_buff[RELATIVEJUMP_SIZE];
list_for_each_entry_safe(op, tmp, oplist, list) {
s32 rel = (s32)((long)op->optinsn.insn -
@@ -443,10 +434,10 @@ void arch_optimize_kprobes(struct list_head *oplist)
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
RELATIVE_ADDR_SIZE);
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
+ insn_buff[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
op->optinsn.insn);
list_del_init(&op->list);
@@ -456,12 +447,12 @@ void arch_optimize_kprobes(struct list_head *oplist)
/* Replace a relative jump with a breakpoint (int3). */
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 insn_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ insn_buff[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
op->optinsn.insn);
}
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 163ae706a0d4..7969da939213 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Architecture specific sysfs attributes in /sys/kernel
*
@@ -5,8 +6,6 @@
* Huang Ying <ying.huang@intel.com>
* Copyright (C) 2013, 2013 Red Hat, Inc.
* Dave Young <dyoung@redhat.com>
- *
- * This file is released under the GPLv2
*/
#include <linux/kobject.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 3f0cc828cc36..b7f34fe2171e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* KVM paravirt_ops implementation
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright IBM Corporation, 2007
* Authors: Anthony Liguori <aliguori@us.ibm.com>
@@ -255,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
enum ctx_state prev_state;
switch (kvm_read_and_reset_pf_reason()) {
default:
- do_page_fault(regs, error_code);
+ do_page_fault(regs, error_code, address);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
prev_state = exception_enter();
- kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
+ kvm_async_pf_task_wait((u32)address, !user_mode(regs));
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
- kvm_async_pf_task_wake((u32)read_cr2());
+ kvm_async_pf_task_wake((u32)address);
rcu_irq_exit();
break;
}
@@ -540,6 +527,21 @@ static void kvm_setup_pv_ipi(void)
pr_info("KVM setup pv IPIs\n");
}
+static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
+{
+ int cpu;
+
+ native_send_call_func_ipi(mask);
+
+ /* Make sure other vCPUs get a chance to run if they need to. */
+ for_each_cpu(cpu, mask) {
+ if (vcpu_is_preempted(cpu)) {
+ kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
+ break;
+ }
+ }
+}
+
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
@@ -651,6 +653,12 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+ if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
+ pr_info("KVM setup pv sched yield\n");
+ }
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
@@ -830,6 +838,7 @@ asm(
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
"setne %al;"
"ret;"
+".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
".popsection");
#endif
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c
index e9d252d873aa..6a68e41206e7 100644
--- a/arch/x86/kernel/livepatch.c
+++ b/arch/x86/kernel/livepatch.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* livepatch.c - x86-specific Kernel Live Patching Core
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/module.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 5409c2800ab5..77854b192fef 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -1,9 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* handle transition of Linux booting another kernel
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/mm.h>
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ceba408ea982..5dcd438ad8f2 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -1,9 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* handle transition of Linux booting another kernel
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#define pr_fmt(fmt) "kexec: " fmt
@@ -18,6 +16,7 @@
#include <linux/io.h>
#include <linux/suspend.h>
#include <linux/vmalloc.h>
+#include <linux/efi.h>
#include <asm/init.h>
#include <asm/pgtable.h>
@@ -29,6 +28,55 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
+#ifdef CONFIG_ACPI
+/*
+ * Used while adding mapping for ACPI tables.
+ * Can be reused when other iomem regions need be mapped
+ */
+struct init_pgtable_data {
+ struct x86_mapping_info *info;
+ pgd_t *level4p;
+};
+
+static int mem_region_callback(struct resource *res, void *arg)
+{
+ struct init_pgtable_data *data = arg;
+ unsigned long mstart, mend;
+
+ mstart = res->start;
+ mend = mstart + resource_size(res) - 1;
+
+ return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
+}
+
+static int
+map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
+{
+ struct init_pgtable_data data;
+ unsigned long flags;
+ int ret;
+
+ data.info = info;
+ data.level4p = level4p;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+ ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
+ &data, mem_region_callback);
+ if (ret && ret != -EINVAL)
+ return ret;
+
+ /* ACPI tables could be located in ACPI Non-volatile Storage region */
+ ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1,
+ &data, mem_region_callback);
+ if (ret && ret != -EINVAL)
+ return ret;
+
+ return 0;
+}
+#else
+static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
+#endif
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -36,6 +84,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
#endif
+static int
+map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
+{
+#ifdef CONFIG_EFI
+ unsigned long mstart, mend;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ mstart = (boot_params.efi_info.efi_systab |
+ ((u64)boot_params.efi_info.efi_systab_hi<<32));
+
+ if (efi_enabled(EFI_64BIT))
+ mend = mstart + sizeof(efi_system_table_64_t);
+ else
+ mend = mstart + sizeof(efi_system_table_32_t);
+
+ if (!mstart)
+ return 0;
+
+ return kernel_ident_mapping_init(info, level4p, mstart, mend);
+#endif
+ return 0;
+}
+
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
@@ -50,12 +123,13 @@ static void free_transition_pgtable(struct kimage *image)
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
{
+ pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
+ unsigned long vaddr, paddr;
+ int result = -ENOMEM;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- unsigned long vaddr, paddr;
- int result = -ENOMEM;
vaddr = (unsigned long)relocate_kernel;
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
@@ -92,7 +166,11 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
}
pte = pte_offset_kernel(pmd, vaddr);
- set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
+
+ if (sev_active())
+ prot = PAGE_KERNEL_EXEC;
+
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
return 0;
err:
return result;
@@ -129,6 +207,11 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
level4p = (pgd_t *)__va(start_pgtable);
clear_page(level4p);
+ if (sev_active()) {
+ info.page_flag |= _PAGE_ENC;
+ info.kernpg_flag |= _PAGE_ENC;
+ }
+
if (direct_gbpages)
info.direct_gbpages = true;
@@ -159,6 +242,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
return result;
}
+ /*
+ * Prepare EFI systab and ACPI tables for kexec kernel since they are
+ * not covered by pfn_mapped.
+ */
+ result = map_efi_systab(&info, level4p);
+ if (result)
+ return result;
+
+ result = map_acpi_tables(&info, level4p);
+ if (result)
+ return result;
+
return init_transition_pgtable(image, level4p);
}
@@ -559,8 +654,20 @@ void arch_kexec_unprotect_crashkres(void)
kexec_mark_crashkres(false);
}
+/*
+ * During a traditional boot under SME, SME will encrypt the kernel,
+ * so the SME kexec kernel also needs to be un-encrypted in order to
+ * replicate a normal SME boot.
+ *
+ * During a traditional boot under SEV, the kernel has already been
+ * loaded encrypted, so the SEV kexec kernel needs to be encrypted in
+ * order to replicate a normal SEV boot.
+ */
int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
{
+ if (sev_active())
+ return 0;
+
/*
* If SME is active we need to be sure that kexec pages are
* not encrypted because when we boot to the new kernel the
@@ -571,6 +678,9 @@ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
{
+ if (sev_active())
+ return;
+
/*
* If SME is active we need to reset the pages back to being
* an encrypted mapping before freeing them.
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index cfa3106faee4..d5c72cb877b3 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for x86.
Copyright (C) 2001 Rusty Russell.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1bfe5c6e6cfe..afac7ccce72f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -546,17 +546,15 @@ void __init default_get_smp_config(unsigned int early)
* local APIC has default address
*/
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- return;
+ goto out;
}
pr_info("Default MP configuration #%d\n", mpf->feature1);
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
- if (check_physptr(mpf, early)) {
- early_memunmap(mpf, sizeof(*mpf));
- return;
- }
+ if (check_physptr(mpf, early))
+ goto out;
} else
BUG();
@@ -565,7 +563,7 @@ void __init default_get_smp_config(unsigned int early)
/*
* Only use the first configuration found.
*/
-
+out:
early_memunmap(mpf, sizeof(*mpf));
}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 4588414e2561..3db2252b958d 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* ----------------------------------------------------------------------- *
*
* Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
* Copyright 2009 Intel Corporation; author: H. Peter Anvin
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
- * USA; either version 2 of the License, or (at your option) any later
- * version; incorporated herein by reference.
- *
* ----------------------------------------------------------------------- */
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 05b09896cfaf..4df7705022b9 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7bbaa6baf37f..0aa6256eedd8 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Paravirtualization interfaces
Copyright (C) 2006 Rusty Russell IBM Corporation
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
*/
@@ -70,24 +58,24 @@ struct branch {
u32 delta;
} __attribute__((packed));
-static unsigned paravirt_patch_call(void *insnbuf, const void *target,
+static unsigned paravirt_patch_call(void *insn_buff, const void *target,
unsigned long addr, unsigned len)
{
- struct branch *b = insnbuf;
- unsigned long delta = (unsigned long)target - (addr+5);
-
- if (len < 5) {
-#ifdef CONFIG_RETPOLINE
- WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
-#endif
- return len; /* call too long for patch site */
+ const int call_len = 5;
+ struct branch *b = insn_buff;
+ unsigned long delta = (unsigned long)target - (addr+call_len);
+
+ if (len < call_len) {
+ pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
+ /* Kernel might not be viable if patching fails, bail out: */
+ BUG_ON(1);
}
b->opcode = 0xe8; /* call */
b->delta = delta;
- BUILD_BUG_ON(sizeof(*b) != 5);
+ BUILD_BUG_ON(sizeof(*b) != call_len);
- return 5;
+ return call_len;
}
#ifdef CONFIG_PARAVIRT_XXL
@@ -97,10 +85,10 @@ u64 notrace _paravirt_ident_64(u64 x)
return x;
}
-static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
unsigned long addr, unsigned len)
{
- struct branch *b = insnbuf;
+ struct branch *b = insn_buff;
unsigned long delta = (unsigned long)target - (addr+5);
if (len < 5) {
@@ -125,7 +113,7 @@ void __init native_pv_lock_init(void)
static_branch_disable(&virt_spin_lock_key);
}
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insn_buff,
unsigned long addr, unsigned len)
{
/*
@@ -137,36 +125,36 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */
- ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
+ ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == _paravirt_nop)
ret = 0;
#ifdef CONFIG_PARAVIRT_XXL
/* identity functions just return their single argument */
else if (opfunc == _paravirt_ident_64)
- ret = paravirt_patch_ident_64(insnbuf, len);
+ ret = paravirt_patch_ident_64(insn_buff, len);
else if (type == PARAVIRT_PATCH(cpu.iret) ||
type == PARAVIRT_PATCH(cpu.usergs_sysret64))
/* If operation requires a jmp, then jmp */
- ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
+ ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
#endif
else
/* Otherwise call the function. */
- ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
+ ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
return ret;
}
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
+unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
const char *start, const char *end)
{
unsigned insn_len = end - start;
- if (insn_len > len || start == NULL)
- insn_len = len;
- else
- memcpy(insnbuf, start, insn_len);
+ /* Alternative instruction is too large for the patch site and we cannot continue: */
+ BUG_ON(insn_len > len || start == NULL);
+
+ memcpy(insn_buff, start, insn_len);
return insn_len;
}
@@ -382,7 +370,7 @@ struct paravirt_patch_template pv_ops = {
.mmu.exit_mmap = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
- .mmu.read_cr2 = native_read_cr2,
+ .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
.mmu.write_cr2 = native_write_cr2,
.mmu.read_cr3 = __native_read_cr3,
.mmu.write_cr3 = native_write_cr3,
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
new file mode 100644
index 000000000000..3eff63c090d2
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+
+#include <asm/paravirt.h>
+#include <asm/asm-offsets.h>
+
+#define PSTART(d, m) \
+ patch_data_##d.m
+
+#define PEND(d, m) \
+ (PSTART(d, m) + sizeof(patch_data_##d.m))
+
+#define PATCH(d, m, insn_buff, len) \
+ paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m))
+
+#define PATCH_CASE(ops, m, data, insn_buff, len) \
+ case PARAVIRT_PATCH(ops.m): \
+ return PATCH(data, ops##_##m, insn_buff, len)
+
+#ifdef CONFIG_PARAVIRT_XXL
+struct patch_xxl {
+ const unsigned char irq_irq_disable[1];
+ const unsigned char irq_irq_enable[1];
+ const unsigned char irq_save_fl[2];
+ const unsigned char mmu_read_cr2[3];
+ const unsigned char mmu_read_cr3[3];
+ const unsigned char mmu_write_cr3[3];
+ const unsigned char irq_restore_fl[2];
+# ifdef CONFIG_X86_64
+ const unsigned char cpu_wbinvd[2];
+ const unsigned char cpu_usergs_sysret64[6];
+ const unsigned char cpu_swapgs[3];
+ const unsigned char mov64[3];
+# else
+ const unsigned char cpu_iret[1];
+# endif
+};
+
+static const struct patch_xxl patch_data_xxl = {
+ .irq_irq_disable = { 0xfa }, // cli
+ .irq_irq_enable = { 0xfb }, // sti
+ .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
+ .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
+ .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
+# ifdef CONFIG_X86_64
+ .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
+ .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
+ .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
+ .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8,
+ 0x48, 0x0f, 0x07 }, // swapgs; sysretq
+ .cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs
+ .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
+# else
+ .mmu_write_cr3 = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
+ .irq_restore_fl = { 0x50, 0x9d }, // push %eax; popf
+ .cpu_iret = { 0xcf }, // iret
+# endif
+};
+
+unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
+{
+#ifdef CONFIG_X86_64
+ return PATCH(xxl, mov64, insn_buff, len);
+#endif
+ return 0;
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+struct patch_lock {
+ unsigned char queued_spin_unlock[3];
+ unsigned char vcpu_is_preempted[2];
+};
+
+static const struct patch_lock patch_data_lock = {
+ .vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax
+
+# ifdef CONFIG_X86_64
+ .queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi)
+# else
+ .queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax)
+# endif
+};
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
+ unsigned int len)
+{
+ switch (type) {
+
+#ifdef CONFIG_PARAVIRT_XXL
+ PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
+ PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
+ PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
+ PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
+
+ PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
+ PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
+ PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
+
+# ifdef CONFIG_X86_64
+ PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
+ PATCH_CASE(cpu, swapgs, xxl, insn_buff, len);
+ PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
+# else
+ PATCH_CASE(cpu, iret, xxl, insn_buff, len);
+# endif
+#endif
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+ case PARAVIRT_PATCH(lock.queued_spin_unlock):
+ if (pv_is_native_spin_unlock())
+ return PATCH(lock, queued_spin_unlock, insn_buff, len);
+ break;
+
+ case PARAVIRT_PATCH(lock.vcpu_is_preempted):
+ if (pv_is_native_vcpu_is_preempted())
+ return PATCH(lock, vcpu_is_preempted, insn_buff, len);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return paravirt_patch_default(type, insn_buff, addr, len);
+}
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
deleted file mode 100644
index de138d3912e4..000000000000
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
- /* arg in %edx:%eax, return in %edx:%eax */
- return 0;
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x) \
- case PARAVIRT_PATCH(ops.x): \
- return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
- switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
- PATCH_SITE(irq, irq_disable);
- PATCH_SITE(irq, irq_enable);
- PATCH_SITE(irq, restore_fl);
- PATCH_SITE(irq, save_fl);
- PATCH_SITE(cpu, iret);
- PATCH_SITE(mmu, read_cr2);
- PATCH_SITE(mmu, read_cr3);
- PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(lock.queued_spin_unlock):
- if (pv_is_native_spin_unlock())
- return paravirt_patch_insns(ibuf, len,
- start_lock_queued_spin_unlock,
- end_lock_queued_spin_unlock);
- break;
-
- case PARAVIRT_PATCH(lock.vcpu_is_preempted):
- if (pv_is_native_vcpu_is_preempted())
- return paravirt_patch_insns(ibuf, len,
- start_lock_vcpu_is_preempted,
- end_lock_vcpu_is_preempted);
- break;
-#endif
-
- default:
- break;
- }
-#undef PATCH_SITE
- return paravirt_patch_default(type, ibuf, addr, len);
-}
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
deleted file mode 100644
index 9d9e04b31077..000000000000
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(cpu, wbinvd, "wbinvd");
-
-DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(cpu, swapgs, "swapgs");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
- return paravirt_patch_insns(insnbuf, len,
- start__mov64, end__mov64);
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x) \
- case PARAVIRT_PATCH(ops.x): \
- return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
- switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
- PATCH_SITE(irq, restore_fl);
- PATCH_SITE(irq, save_fl);
- PATCH_SITE(irq, irq_enable);
- PATCH_SITE(irq, irq_disable);
- PATCH_SITE(cpu, usergs_sysret64);
- PATCH_SITE(cpu, swapgs);
- PATCH_SITE(cpu, wbinvd);
- PATCH_SITE(mmu, read_cr2);
- PATCH_SITE(mmu, read_cr3);
- PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(lock.queued_spin_unlock):
- if (pv_is_native_spin_unlock())
- return paravirt_patch_insns(ibuf, len,
- start_lock_queued_spin_unlock,
- end_lock_queued_spin_unlock);
- break;
-
- case PARAVIRT_PATCH(lock.vcpu_is_preempted):
- if (pv_is_native_vcpu_is_preempted())
- return paravirt_patch_insns(ibuf, len,
- start_lock_vcpu_is_preempted,
- end_lock_vcpu_is_preempted);
- break;
-#endif
-
- default:
- break;
- }
-#undef PATCH_SITE
- return paravirt_patch_default(type, ibuf, addr, len);
-}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index c70720f61a34..9d4343aa481b 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Derived from arch/powerpc/kernel/iommu.c
*
@@ -7,19 +8,6 @@
* Author: Jon Mason <jdmason@kudzu.us>
* Author: Muli Ben-Yehuda <muli@il.ibm.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define pr_fmt(fmt) "Calgary: " fmt
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index dcd272dbd0a9..f62b498b18fb 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -70,7 +70,7 @@ void __init pci_iommu_alloc(void)
}
/*
- * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
+ * See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel
* parameter documentation.
*/
static __init int iommu_setup(char *p)
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 07c30ee17425..bb7e1132290b 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
return regs_get_register(regs, pt_regs_offset[idx]);
}
+#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \
+ ~((1ULL << PERF_REG_X86_MAX) - 1))
+
#ifdef CONFIG_X86_32
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
(1ULL << PERF_REG_X86_R9) | \
@@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
int perf_reg_validate(u64 mask)
{
- if (!mask || (mask & REG_NOSUPPORT))
+ if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
return -EINVAL;
return 0;
@@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
int perf_reg_validate(u64 mask)
{
- if (!mask || (mask & REG_NOSUPPORT))
+ if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
return -EINVAL;
return 0;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2399e910d109..b8ceec4974fe 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
- unsigned long sp;
- unsigned short ss, gs;
+ unsigned short gs;
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss;
+ if (user_mode(regs))
gs = get_user_gs(regs);
- } else {
- sp = kernel_stack_pointer(regs);
- savesegment(ss, ss);
+ else
savesegment(gs, gs);
- }
show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
- regs->si, regs->di, regs->bp, sp);
+ regs->si, regs->di, regs->bp, regs->sp);
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
- (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
+ (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
if (mode != SHOW_REGS_ALL)
return;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index f8e1af380cdf..af64519b2695 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 1995 Linus Torvalds
*
@@ -142,17 +143,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
void release_thread(struct task_struct *dead_task)
{
- if (dead_task->mm) {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- if (dead_task->mm->context.ldt) {
- pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt->entries,
- dead_task->mm->context.ldt->nr_entries);
- BUG();
- }
-#endif
- }
+ WARN_ON(dead_task->mm);
}
enum which_selector {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 4b8ee05dd6ad..0fdbe89d0754 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* By Ross Biro 1/23/92 */
/*
* Pentium III FXSR, SSE support
@@ -24,6 +25,7 @@
#include <linux/rcupdate.h>
#include <linux/export.h>
#include <linux/context_tracking.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -153,35 +155,6 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
-/*
- * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
- * when it traps. The previous stack will be directly underneath the saved
- * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
- *
- * Now, if the stack is empty, '&regs->sp' is out of range. In this
- * case we try to take the previous stack. To always return a non-null
- * stack pointer we fall back to regs as stack if no previous stack
- * exists.
- *
- * This is valid only for kernel mode traps.
- */
-unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
- unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
- unsigned long sp = (unsigned long)&regs->sp;
- u32 *prev_esp;
-
- if (context == (sp & ~(THREAD_SIZE - 1)))
- return sp;
-
- prev_esp = (u32 *)(context);
- if (*prev_esp)
- return (unsigned long)*prev_esp;
-
- return (unsigned long)regs;
-}
-EXPORT_SYMBOL_GPL(kernel_stack_pointer);
-
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
@@ -644,7 +617,8 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
unsigned long val = 0;
if (n < HBP_NUM) {
- struct perf_event *bp = thread->ptrace_bps[n];
+ int index = array_index_nospec(n, HBP_NUM);
+ struct perf_event *bp = thread->ptrace_bps[index];
if (bp)
val = bp->hw.info.address;
@@ -746,9 +720,6 @@ static int ioperm_get(struct task_struct *target,
void ptrace_disable(struct task_struct *child)
{
user_disable_single_step(child);
-#ifdef TIF_SYSCALL_EMU
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-#endif
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1360,18 +1331,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
+void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
{
+ struct task_struct *tsk = current;
+
tsk->thread.trap_nr = X86_TRAP_DB;
tsk->thread.error_code = error_code;
/* Send us the fake SIGTRAP */
force_sig_fault(SIGTRAP, si_code,
- user_mode(regs) ? (void __user *)regs->ip : NULL, tsk);
+ user_mode(regs) ? (void __user *)regs->ip : NULL);
}
void user_single_step_report(struct pt_regs *regs)
{
- send_sigtrap(current, regs, 0, TRAP_BRKPT);
+ send_sigtrap(regs, 0, TRAP_BRKPT);
}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 9b158b4716d2..10125358b9c4 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* paravirtual clock -- common code used by kvm/xen
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <linux/clocksource.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 77630d57e7bf..ee26df08002e 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -1,9 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* relocate_kernel.S - put the kernel image in place to boot
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 11eda21eb697..c51ccff5cd01 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -1,9 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* relocate_kernel.S - put the kernel image in place to boot
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 905dae880563..bbe35bf879f5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 1995 Linus Torvalds
*
@@ -452,15 +453,24 @@ static void __init memblock_x86_reserve_range_setup_data(void)
#define CRASH_ALIGN SZ_16M
/*
- * Keep the crash kernel below this limit. On 32 bits earlier kernels
- * would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * Keep the crash kernel below this limit.
+ *
+ * On 32 bits earlier kernels would limit the kernel to the low 512 MiB
+ * due to mapping restrictions.
+ *
+ * On 64bit, kdump kernel need be restricted to be under 64TB, which is
+ * the upper limit of system RAM in 4-level paing mode. Since the kdump
+ * jumping could be from 5-level to 4-level, the jumping will fail if
+ * kernel is put above 64TB, and there's no way to detect the paging mode
+ * of the kernel which will be loaded for dumping during the 1st kernel
+ * bootup.
*/
#ifdef CONFIG_X86_32
# define CRASH_ADDR_LOW_MAX SZ_512M
# define CRASH_ADDR_HIGH_MAX SZ_512M
#else
# define CRASH_ADDR_LOW_MAX SZ_4G
-# define CRASH_ADDR_HIGH_MAX MAXMEM
+# define CRASH_ADDR_HIGH_MAX SZ_64T
#endif
static int __init reserve_crashkernel_low(void)
@@ -826,8 +836,14 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
void __init setup_arch(char **cmdline_p)
{
+ /*
+ * Reserve the memory occupied by the kernel between _text and
+ * __end_of_kernel_reserve symbols. Any kernel sections after the
+ * __end_of_kernel_reserve symbol must be explicitly reserved with a
+ * separate memblock_reserve() or they will be discarded.
+ */
memblock_reserve(__pa_symbol(_text),
- (unsigned long)__bss_stop - (unsigned long)_text);
+ (unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
/*
* Make sure page 0 is always reserved because on systems with
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 364813cea647..8eb7193e158d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(&frame->uc, &frame->puc);
/* Create the ucontext. */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
+ if (static_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
@@ -857,7 +857,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
pr_cont("\n");
}
- force_sig(SIGSEGV, me);
+ force_sig(SIGSEGV);
}
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 04adc8d60aed..96421f97e75c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Intel SMP support routines.
*
@@ -6,9 +7,6 @@
* (c) 2002,2003 Andi Kleen, SuSE Labs.
*
* i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
*/
#include <linux/init.h>
@@ -146,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
}
cpumask_copy(allbutself, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), allbutself);
+ __cpumask_clear_cpu(smp_processor_id(), allbutself);
if (cpumask_equal(mask, allbutself) &&
cpumask_equal(cpu_online_mask, cpu_callout_mask))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 73e69aaaa117..fdbd47ceb84d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* x86 SMP booting functions
*
@@ -12,9 +13,6 @@
* Pentium Pro and Pentium-II/Xeon MP machines.
* Original development of Linux SMP code supported by Caldera.
*
- * This code is released under the GNU General Public License version 2 or
- * later.
- *
* Fixes
* Felix Koop : NR_CPUS used properly
* Jose Renau : Handle single CPU case.
@@ -91,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+/* representing HT, core, and die siblings of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
+EXPORT_PER_CPU_SYMBOL(cpu_die_map);
+
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */
@@ -101,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
+static unsigned int logical_die __read_mostly;
/* Maximum number of SMT threads on any online core */
int __read_mostly __max_smt_threads = 1;
@@ -212,17 +215,11 @@ static void notrace start_secondary(void *unused)
* before cpu_init(), SMP booting is too fragile that we want to
* limit the things done here to the most necessary things.
*/
- if (boot_cpu_has(X86_FEATURE_PCID))
- __write_cr4(__read_cr4() | X86_CR4_PCIDE);
+ cr4_init();
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
- /*
- * Initialize the CR4 shadow before doing anything that could
- * try to read it.
- */
- cr4_init_shadow();
__flush_tlb_all();
#endif
load_current_idt();
@@ -302,6 +299,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
return -1;
}
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+/**
+ * topology_phys_to_logical_die - Map a physical die id to logical
+ *
+ * Returns logical die id or -1 if not found
+ */
+int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
+{
+ int cpu;
+ int proc_id = cpu_data(cur_cpu).phys_proc_id;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && c->cpu_die_id == die_id &&
+ c->phys_proc_id == proc_id)
+ return c->logical_die_id;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_die);
/**
* topology_update_package_map - Update the physical to logical package map
@@ -326,6 +343,29 @@ found:
cpu_data(cpu).logical_proc_id = new;
return 0;
}
+/**
+ * topology_update_die_map - Update the physical to logical die map
+ * @die: The die id as retrieved via CPUID
+ * @cpu: The cpu for which this is updated
+ */
+int topology_update_die_map(unsigned int die, unsigned int cpu)
+{
+ int new;
+
+ /* Already available somewhere? */
+ new = topology_phys_to_logical_die(die, cpu);
+ if (new >= 0)
+ goto found;
+
+ new = logical_die++;
+ if (new != die) {
+ pr_info("CPU %u Converting physical %u to logical die %u\n",
+ cpu, die, new);
+ }
+found:
+ cpu_data(cpu).logical_die_id = new;
+ return 0;
+}
void __init smp_store_boot_cpu_info(void)
{
@@ -335,6 +375,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
topology_update_package_map(c->phys_proc_id, id);
+ topology_update_die_map(c->cpu_die_id, id);
c->initialized = true;
}
@@ -389,6 +430,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
if (c->cpu_core_id == o->cpu_core_id)
return topology_sane(c, o, "smt");
@@ -400,6 +442,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
}
} else if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id &&
c->cpu_core_id == o->cpu_core_id) {
return topology_sane(c, o, "smt");
}
@@ -462,6 +505,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if ((c->phys_proc_id == o->phys_proc_id) &&
+ (c->cpu_die_id == o->cpu_die_id))
+ return true;
+ return false;
+}
+
+
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
{
@@ -524,6 +576,7 @@ void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
+ cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
c->booted_cores = 1;
return;
}
@@ -572,6 +625,9 @@ void set_cpu_sibling_map(int cpu)
}
if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
+
+ if ((i == cpu) || (has_mp && match_die(c, o)))
+ link_mask(topology_die_cpumask, cpu, i);
}
threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -1176,6 +1232,7 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(0, &phys_cpu_present_map);
cpumask_set_cpu(0, topology_sibling_cpumask(0));
cpumask_set_cpu(0, topology_core_cpumask(0));
+ cpumask_set_cpu(0, topology_die_cpumask(0));
}
/*
@@ -1271,6 +1328,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
@@ -1310,8 +1368,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
- native_pv_lock_init();
-
uv_system_init();
set_mtrr_aps_delayed_init();
@@ -1341,6 +1397,7 @@ void __init native_smp_prepare_boot_cpu(void)
/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
cpu_set_state_online(me);
+ native_pv_lock_init();
}
void __init calculate_max_logical_packages(void)
@@ -1491,6 +1548,8 @@ static void remove_siblinginfo(int cpu)
cpu_data(sibling).booted_cores--;
}
+ for_each_cpu(sibling, topology_die_cpumask(cpu))
+ cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
for_each_cpu(sibling, topology_sibling_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
@@ -1498,6 +1557,7 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
+ cpumask_clear(topology_die_cpumask(cpu));
c->cpu_core_id = 0;
c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 2abf27d7df6b..4f36d3241faf 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -129,11 +129,9 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
break;
if ((unsigned long)fp < regs->sp)
break;
- if (frame.ret_addr) {
- if (!consume_entry(cookie, frame.ret_addr, false))
- return;
- }
- if (fp == frame.next_fp)
+ if (!frame.ret_addr)
+ break;
+ if (!consume_entry(cookie, frame.ret_addr, false))
break;
fp = frame.next_fp;
}
diff --git a/arch/x86/kernel/sysfb.c b/arch/x86/kernel/sysfb.c
index 160386e9fc17..014ebd8ca869 100644
--- a/arch/x86/kernel/sysfb.c
+++ b/arch/x86/kernel/sysfb.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic System Framebuffers on x86
* Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
/*
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index fa51723571c8..8eb67a670b10 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic System Framebuffers on x86
* Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
*
* EFI Quirks Copyright (c) 2006 Edgar Hucek <gimli@dark-green.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
/*
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
index 85195d447a92..01f0e2263b86 100644
--- a/arch/x86/kernel/sysfb_simplefb.c
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic System Framebuffers on x86
* Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
*/
/*
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 6e5ef8fb8a02..a49fe1dcb47e 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* tboot.c: main implementation of helper functions used by kernel for
* runtime support of Intel(R) Trusted Execution Technology
*
* Copyright (c) 2006-2009, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
*/
#include <linux/intel-iommu.h>
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 285aaa62d153..6384be751eff 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file manages the translation entries for the IBM Calgary IOMMU.
*
@@ -7,20 +8,6 @@
*
* Author: Jon Mason <jdmason@us.ibm.com>
* Author: Muli Ben-Yehuda <muli@il.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/types.h>
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 0e14f6c0d35e..7ce29cee9f9e 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs)
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
- unsigned long *sp =
- (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *sp = (unsigned long *)regs->sp;
/*
* Return address is either directly at stack pointer
* or above a saved flags. Eflags has bits 22-31 zero,
@@ -82,8 +81,11 @@ static void __init setup_default_timer_irq(void)
/* Default timer init function */
void __init hpet_time_init(void)
{
- if (!hpet_enable())
- setup_pit_timer();
+ if (!hpet_enable()) {
+ if (!pit_timer_init())
+ return;
+ }
+
setup_default_timer_irq();
}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index a5b802a12212..71d3fef1edc9 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -5,6 +5,7 @@
#include <linux/user.h>
#include <linux/regset.h>
#include <linux/syscalls.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/desc.h>
@@ -220,6 +221,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
struct user_desc __user *u_info)
{
struct user_desc info;
+ int index;
if (idx == -1 && get_user(idx, &u_info->entry_number))
return -EFAULT;
@@ -227,8 +229,11 @@ int do_get_thread_area(struct task_struct *p, int idx,
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- fill_user_desc(&info, idx,
- &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]);
+ index = idx - GDT_ENTRY_TLS_MIN;
+ index = array_index_nospec(index,
+ GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1);
+
+ fill_user_desc(&info, idx, &p->thread.tls_array[index]);
if (copy_to_user(u_info, &info, sizeof(info)))
return -EFAULT;
diff --git a/arch/x86/kernel/tls.h b/arch/x86/kernel/tls.h
index 2f083a2fe216..3a76e1d3535e 100644
--- a/arch/x86/kernel/tls.h
+++ b/arch/x86/kernel/tls.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Internal declarations for x86 TLS implementation functions.
*
* Copyright (C) 2007 Red Hat, Inc. All rights reserved.
*
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- *
* Red Hat Author: Roland McGrath.
*/
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8b6d03e55d2f..4bb0f8447112 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -254,9 +254,9 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
show_signal(tsk, signr, "trap ", str, regs, error_code);
if (!sicode)
- force_sig(signr, tsk);
+ force_sig(signr);
else
- force_sig_fault(signr, sicode, addr, tsk);
+ force_sig_fault(signr, sicode, addr);
}
NOKPROBE_SYMBOL(do_trap);
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message,
#ifdef CONFIG_X86_64
/* Runs on IST stack */
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
-#ifdef CONFIG_VMAP_STACK
- unsigned long cr2;
-#endif
#ifdef CONFIG_X86_ESPFIX64
extern unsigned char native_irq_return_iret[];
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
* stack even if the actual trigger for the double fault was
* something else.
*/
- cr2 = read_cr2();
if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
#endif
@@ -566,7 +562,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
- force_sig(SIGSEGV, tsk);
+ force_sig(SIGSEGV);
}
NOKPROBE_SYMBOL(do_general_protection);
@@ -805,7 +801,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
}
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
- send_sigtrap(tsk, regs, error_code, si_code);
+ send_sigtrap(regs, error_code, si_code);
cond_local_irq_disable(regs);
debug_stack_usage_dec();
@@ -856,7 +852,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
return;
force_sig_fault(SIGFPE, si_code,
- (void __user *)uprobe_get_trap_addr(regs), task);
+ (void __user *)uprobe_get_trap_addr(regs));
}
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 356dfc555a27..57d87f79558f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
@@ -58,7 +59,7 @@ struct cyc2ns {
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
+__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
{
int seq, idx;
@@ -75,7 +76,7 @@ void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
}
-void __always_inline cyc2ns_read_end(void)
+__always_inline void cyc2ns_read_end(void)
{
preempt_enable_notrace();
}
@@ -631,31 +632,38 @@ unsigned long native_calibrate_tsc(void)
crystal_khz = ecx_hz / 1000;
- if (crystal_khz == 0) {
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
- crystal_khz = 24000; /* 24.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_X:
- crystal_khz = 25000; /* 25.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT:
- crystal_khz = 19200; /* 19.2 MHz */
- break;
- }
- }
+ /*
+ * Denverton SoCs don't report crystal clock, and also don't support
+ * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
+ * clock.
+ */
+ if (crystal_khz == 0 &&
+ boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
+ crystal_khz = 25000;
- if (crystal_khz == 0)
- return 0;
/*
- * TSC frequency determined by CPUID is a "hardware reported"
+ * TSC frequency reported directly by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
* is considered a known frequency.
*/
- setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+ if (crystal_khz != 0)
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+
+ /*
+ * Some Intel SoCs like Skylake and Kabylake don't report the crystal
+ * clock, but we can easily calculate it to a high degree of accuracy
+ * by considering the crystal ratio and the CPU speed.
+ */
+ if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
+ unsigned int eax_base_mhz, ebx, ecx, edx;
+
+ cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
+ crystal_khz = eax_base_mhz * 1000 *
+ eax_denominator / ebx_numerator;
+ }
+
+ if (crystal_khz == 0)
+ return 0;
/*
* For Atom SoCs TSC is the only reliable clocksource.
@@ -664,6 +672,16 @@ unsigned long native_calibrate_tsc(void)
if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * The local APIC appears to be fed by the core crystal clock
+ * (which sounds entirely sensible). We can set the global
+ * lapic_timer_period here to avoid having to calibrate the APIC
+ * timer later.
+ */
+ lapic_timer_period = crystal_khz * 1000 / HZ;
+#endif
+
return crystal_khz * ebx_numerator / eax_denominator;
}
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 3d0e9aeea7c8..067858fe4db8 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
/*
* MSR-based CPU/TSC frequency discovery for certain CPUs.
*
- * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
+ * Set global "lapic_timer_period" to bus_clock_cycles/jiffy
* Return processor base frequency in KHz, or 0 on failure.
*/
unsigned long cpu_khz_from_msr(void)
@@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void)
res = freq * ratio;
#ifdef CONFIG_X86_LOCAL_APIC
- lapic_timer_frequency = (freq * 1000) / HZ;
+ lapic_timer_period = (freq * 1000) / HZ;
#endif
/*
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f8f3cfda01ae..5b345add550f 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -277,7 +277,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, addr);
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
return;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 9b9fd4826e7a..a224b5ab103f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
@@ -69,15 +70,6 @@ static void unwind_dump(struct unwind_state *state)
}
}
-static size_t regs_size(struct pt_regs *regs)
-{
- /* x86_32 regs from kernel mode are two words shorter: */
- if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
- return sizeof(*regs) - 2*sizeof(long);
-
- return sizeof(*regs);
-}
-
static bool in_entry_code(unsigned long ip)
{
char *addr = (char *)ip;
@@ -197,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
}
#endif
-#ifdef CONFIG_X86_32
-#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
-#else
-#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
-#endif
-
static bool update_stack_state(struct unwind_state *state,
unsigned long *next_bp)
{
@@ -213,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state,
size_t len;
if (state->regs)
- prev_frame_end = (void *)state->regs + regs_size(state->regs);
+ prev_frame_end = (void *)state->regs + sizeof(*state->regs);
else
prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
@@ -221,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state,
regs = decode_frame_pointer(next_bp);
if (regs) {
frame = (unsigned long *)regs;
- len = KERNEL_REGS_SIZE;
+ len = sizeof(*regs);
state->got_irq = true;
} else {
frame = next_bp;
@@ -245,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state,
frame < prev_frame_end)
return false;
- /*
- * On 32-bit with user mode regs, make sure the last two regs are safe
- * to access:
- */
- if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
- !on_stack(info, frame, len + 2*sizeof(long)))
- return false;
-
/* Move state to the next frame: */
if (regs) {
state->regs = regs;
@@ -411,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
* Pretend that the frame is complete and that BP points to it, but save
* the real BP so that we can use it when looking for the next frame.
*/
- if (regs && regs->ip == 0 &&
- (unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
+ if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) {
state->next_bp = bp;
- bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1;
+ bp = ((unsigned long *)regs->sp) - 1;
}
/* Initialize stack info and make sure the frame data is accessible: */
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 4f0e17b90463..c49f10ffd8cd 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/sched.h>
#include <linux/ftrace.h>
#include <asm/ptrace.h>
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 89be1be1790c..332ae6530fa8 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/module.h>
#include <linux/sort.h>
#include <asm/ptrace.h>
@@ -81,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip);
* But they are copies of the ftrace entries that are static and
* defined in ftrace_*.S, which do have orc entries.
*
- * If the undwinder comes across a ftrace trampoline, then find the
+ * If the unwinder comes across a ftrace trampoline, then find the
* ftrace function that was used to create it, and use that ftrace
- * function's orc entrie, as the placement of the return code in
+ * function's orc entry, as the placement of the return code in
* the stack will be identical.
*/
static struct orc_entry *orc_ftrace_find(unsigned long ip)
@@ -127,6 +128,16 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+ .type = ORC_TYPE_CALL,
+ .sp_reg = ORC_REG_BP,
+ .sp_offset = 16,
+ .bp_reg = ORC_REG_PREV_SP,
+ .bp_offset = -16,
+ .end = 0,
+};
+
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@@ -391,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state)
* calls and calls to noreturn functions.
*/
orc = orc_find(state->signal ? state->ip : state->ip - 1);
- if (!orc)
- goto err;
+ if (!orc) {
+ /*
+ * As a fallback, try to assume this code uses a frame pointer.
+ * This is useful for generated code, like BPF, which ORC
+ * doesn't know about. This is just a guess, so the rest of
+ * the unwind is no longer considered reliable.
+ */
+ orc = &orc_fp_entry;
+ state->error = true;
+ }
/* End-of-stack check for kernel threads: */
if (orc->sp_reg == ORC_REG_UNDEFINED) {
@@ -579,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
goto done;
state->ip = regs->ip;
- state->sp = kernel_stack_pointer(regs);
+ state->sp = regs->sp;
state->bp = regs->bp;
state->regs = regs;
state->full_regs = true;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ccf03416e434..d8359ebeea70 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* User-space Probes (UProbes) for x86
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2008-2011
* Authors:
* Srikar Dronamraju
@@ -1087,7 +1074,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
current->pid, regs->sp, regs->ip);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
return -1;
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 3d3c2f71f617..a024c4f7ba56 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
* verify_cpu.S - Code for cpu long mode and SSE verification. This
@@ -9,9 +10,6 @@
* Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
* Copyright (c) 2010 Kees Cook (kees.cook@canonical.com)
*
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- *
* This is a common code for verification whether CPU supports
* long mode and SSE or not. It is not called directly instead this
* file is included at various places and compiled in that context.
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 6a38717d179c..a76c12b38e92 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -583,7 +583,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
return 1; /* we let this handle by the calling routine */
current->thread.trap_nr = trapno;
current->thread.error_code = error_code;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
return 0;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0850b5149345..e2feacf921a0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -141,10 +141,10 @@ SECTIONS
*(.text.__x86.indirect_thunk)
__indirect_thunk_end = .;
#endif
- } :text = 0x9090
- /* End of text section */
- _etext = .;
+ /* End of text section */
+ _etext = .;
+ } :text = 0x9090
NOTES :text :note
@@ -368,6 +368,14 @@ SECTIONS
__bss_stop = .;
}
+ /*
+ * The memory occupied from _text to here, __end_of_kernel_reserve, is
+ * automatically reserved in setup_arch(). Anything after here must be
+ * explicitly reserved using memblock_reserve() or it will be discarded
+ * and treated as available memory.
+ */
+ __end_of_kernel_reserve = .;
+
. = ALIGN(PAGE_SIZE);
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
__brk_base = .;
@@ -379,10 +387,34 @@ SECTIONS
. = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
_end = .;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /*
+ * Early scratch/workarea section: Lives outside of the kernel proper
+ * (_text - _end).
+ *
+ * Resides after _end because even though the .brk section is after
+ * __end_of_kernel_reserve, the .brk section is later reserved as a
+ * part of the kernel. Since it is located after __end_of_kernel_reserve
+ * it will be discarded and become part of the available memory. As
+ * such, it can only be used by very early boot code and must not be
+ * needed afterwards.
+ *
+ * Currently used by SME for performing in-place encryption of the
+ * kernel during boot. Resides on a 2MB boundary to simplify the
+ * pagetable setup used for SME in-place encryption.
+ */
+ . = ALIGN(HPAGE_SIZE);
+ .init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
+ __init_scratch_begin = .;
+ *(.init.scratch)
+ . = ALIGN(HPAGE_SIZE);
+ __init_scratch_end = .;
+ }
+#endif
+
STABS_DEBUG
DWARF_DEBUG
- /* Sections to be discarded */
DISCARDS
/DISCARD/ : {
*(.eh_frame)
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 891a75dbc131..796cfaa46bfa 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* vSMPowered(tm) systems specific initialization
* Copyright (C) 2005 ScaleMP Inc.
*
- * Use of this code is subject to the terms and conditions of the
- * GNU general public license version 2. See "COPYING" or
- * http://www.gnu.org/licenses/gpl.html
- *
* Ravikiran Thirumalai <kiran@scalemp.com>,
* Shai Fultheim <shai@scalemp.com>
* Paravirt ops integration: Glauber de Oliveira Costa <gcosta@redhat.com>,
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 50a2b492fdd6..1bef687faf22 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -29,8 +29,8 @@ void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
static int __init iommu_init_noop(void) { return 0; }
static void iommu_shutdown_noop(void) { }
-static bool __init bool_x86_init_noop(void) { return false; }
-static void x86_op_int_noop(int cpu) { }
+bool __init bool_x86_init_noop(void) { return false; }
+void x86_op_int_noop(int cpu) { }
/*
* The platform setup functions are preset with the default functions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index fc042419e670..840e12583b85 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -41,6 +41,7 @@ config KVM
select PERF_EVENTS
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_NO_POLL
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 80a642a0143d..22c2720cd948 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine driver for Linux
* cpuid support routines
@@ -6,10 +7,6 @@
*
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
* Copyright IBM Corporation, 2008
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include <linux/kvm_host.h>
@@ -137,6 +134,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (best) {
+ if (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT)
+ best->ecx |= F(MWAIT);
+ else
+ best->ecx &= ~F(MWAIT);
+ }
+ }
+
/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -279,19 +286,38 @@ static void cpuid_mask(u32 *word, int wordnum)
*word &= boot_cpu_data.x86_capability[wordnum];
}
-static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index)
{
entry->function = function;
entry->index = index;
+ entry->flags = 0;
+
cpuid_count(entry->function, entry->index,
&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
- entry->flags = 0;
+
+ switch (function) {
+ case 2:
+ entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ break;
+ case 4:
+ case 7:
+ case 0xb:
+ case 0xd:
+ case 0x14:
+ case 0x8000001d:
+ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ break;
+ }
}
-static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
- u32 func, u32 index, int *nent, int maxnent)
+static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
+ u32 func, int *nent, int maxnent)
{
+ entry->function = func;
+ entry->index = 0;
+ entry->flags = 0;
+
switch (func) {
case 0:
entry->eax = 7;
@@ -303,21 +329,93 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
break;
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
- if (index == 0)
- entry->ecx = F(RDPID);
+ entry->eax = 0;
+ entry->ecx = F(RDPID);
++*nent;
default:
break;
}
- entry->function = func;
- entry->index = index;
-
return 0;
}
-static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index, int *nent, int maxnent)
+static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
+{
+ unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
+ unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
+ unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
+ unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
+ unsigned f_la57;
+
+ /* cpuid 7.0.ebx */
+ const u32 kvm_cpuid_7_0_ebx_x86_features =
+ F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+ F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+ F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
+ F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+ F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
+
+ /* cpuid 7.0.ecx*/
+ const u32 kvm_cpuid_7_0_ecx_x86_features =
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+ F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+ F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
+ F(MD_CLEAR);
+
+ /* cpuid 7.1.eax */
+ const u32 kvm_cpuid_7_1_eax_x86_features =
+ F(AVX512_BF16);
+
+ switch (index) {
+ case 0:
+ entry->eax = min(entry->eax, 1u);
+ entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
+ /* TSC_ADJUST is emulated */
+ entry->ebx |= F(TSC_ADJUST);
+
+ entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
+ f_la57 = entry->ecx & F(LA57);
+ cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ /* Set LA57 based on hardware capability. */
+ entry->ecx |= f_la57;
+ entry->ecx |= f_umip;
+ /* PKU is not yet implemented for shadow paging. */
+ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+ entry->ecx &= ~F(PKU);
+
+ entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+ cpuid_mask(&entry->edx, CPUID_7_EDX);
+ /*
+ * We emulate ARCH_CAPABILITIES in software even
+ * if the host doesn't support it.
+ */
+ entry->edx |= F(ARCH_CAPABILITIES);
+ break;
+ case 1:
+ entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
+ }
+}
+
+static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
+ int *nent, int maxnent)
{
int r;
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -330,12 +428,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_lm = 0;
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
- unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
- unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
- unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
- unsigned f_la57 = 0;
/* cpuid 1.edx */
const u32 kvm_cpuid_1_edx_x86_features =
@@ -380,7 +474,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
- F(AMD_SSB_NO) | F(AMD_STIBP);
+ F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -388,31 +482,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
F(PMM) | F(PMM_EN);
- /* cpuid 7.0.ebx */
- const u32 kvm_cpuid_7_0_ebx_x86_features =
- F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
- F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
- F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
-
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
- /* cpuid 7.0.ecx*/
- const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
- F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
- F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
- F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
-
- /* cpuid 7.0.edx*/
- const u32 kvm_cpuid_7_0_edx_x86_features =
- F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
- F(MD_CLEAR);
-
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -421,12 +494,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(entry, function, index);
+ do_host_cpuid(entry, function, 0);
++*nent;
switch (function) {
case 0:
- entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd));
+ /* Limited to the highest leaf implemented in KVM. */
+ entry->eax = min(entry->eax, 0x1fU);
break;
case 1:
entry->edx &= kvm_cpuid_1_edx_x86_features;
@@ -444,23 +518,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 2: {
int t, times = entry->eax & 0xff;
- entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times; ++t) {
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(&entry[t], function, 0);
- entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ do_host_cpuid(&entry[t], function, 0);
++*nent;
}
break;
}
- /* function 4 has additional index. */
- case 4: {
+ /* functions 4 and 0x8000001d have additional index. */
+ case 4:
+ case 0x8000001d: {
int i, cache_type;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until cache_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@@ -469,9 +541,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
cache_type = entry[i - 1].eax & 0x1f;
if (!cache_type)
break;
- do_cpuid_1_ent(&entry[i], function, i);
- entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ do_host_cpuid(&entry[i], function, i);
++*nent;
}
break;
@@ -482,36 +552,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx = 0;
entry->edx = 0;
break;
+ /* function 7 has additional index. */
case 7: {
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
- /* Mask ebx against host capability word 9 */
- if (index == 0) {
- entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
- cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
- // TSC_ADJUST is emulated
- entry->ebx |= F(TSC_ADJUST);
- entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
- f_la57 = entry->ecx & F(LA57);
- cpuid_mask(&entry->ecx, CPUID_7_ECX);
- /* Set LA57 based on hardware capability. */
- entry->ecx |= f_la57;
- entry->ecx |= f_umip;
- /* PKU is not yet implemented for shadow paging. */
- if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
- entry->ecx &= ~F(PKU);
- entry->edx &= kvm_cpuid_7_0_edx_x86_features;
- cpuid_mask(&entry->edx, CPUID_7_EDX);
- /*
- * We emulate ARCH_CAPABILITIES in software even
- * if the host doesn't support it.
- */
- entry->edx |= F(ARCH_CAPABILITIES);
- } else {
- entry->ebx = 0;
- entry->ecx = 0;
- entry->edx = 0;
+ int i;
+
+ for (i = 0; ; ) {
+ do_cpuid_7_mask(&entry[i], i);
+ if (i == entry->eax)
+ break;
+ if (*nent >= maxnent)
+ goto out;
+
+ ++i;
+ do_host_cpuid(&entry[i], function, i);
+ ++*nent;
}
- entry->eax = 0;
break;
}
case 9:
@@ -545,11 +600,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = edx.full;
break;
}
- /* function 0xb has additional index. */
+ /*
+ * Per Intel's SDM, the 0x1f is a superset of 0xb,
+ * thus they can be handled by common code.
+ */
+ case 0x1f:
case 0xb: {
int i, level_type;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@@ -558,9 +616,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
- do_cpuid_1_ent(&entry[i], function, i);
- entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ do_host_cpuid(&entry[i], function, i);
++*nent;
}
break;
@@ -573,7 +629,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx = xstate_required_size(supported, false);
entry->ecx = entry->ebx;
entry->edx &= supported >> 32;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
if (!supported)
break;
@@ -582,7 +637,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(&entry[i], function, idx);
+ do_host_cpuid(&entry[i], function, idx);
if (idx == 1) {
entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
@@ -599,8 +654,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
entry[i].ecx = 0;
entry[i].edx = 0;
- entry[i].flags |=
- KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
++i;
}
@@ -613,12 +666,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!f_intel_pt)
break;
- entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (t = 1; t <= times; ++t) {
if (*nent >= maxnent)
goto out;
- do_cpuid_1_ent(&entry[t], function, t);
- entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ do_host_cpuid(&entry[t], function, t);
++*nent;
}
break;
@@ -642,7 +693,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
- (1 << KVM_FEATURE_PV_SEND_IPI);
+ (1 << KVM_FEATURE_PV_SEND_IPI) |
+ (1 << KVM_FEATURE_POLL_CONTROL) |
+ (1 << KVM_FEATURE_PV_SCHED_YIELD);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@@ -701,8 +754,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
- break;
- case 0x8000001d:
+ case 0x8000001e:
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
@@ -733,21 +785,19 @@ out:
return r;
}
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
- u32 idx, int *nent, int maxnent, unsigned int type)
+static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
+ int *nent, int maxnent, unsigned int type)
{
if (type == KVM_GET_EMULATED_CPUID)
- return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
+ return __do_cpuid_func_emulated(entry, func, nent, maxnent);
- return __do_cpuid_ent(entry, func, idx, nent, maxnent);
+ return __do_cpuid_func(entry, func, nent, maxnent);
}
#undef F
struct kvm_cpuid_param {
u32 func;
- u32 idx;
- bool has_leaf_count;
bool (*qualifier)(const struct kvm_cpuid_param *param);
};
@@ -791,11 +841,10 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
int limit, nent = 0, r = -E2BIG, i;
u32 func;
static const struct kvm_cpuid_param param[] = {
- { .func = 0, .has_leaf_count = true },
- { .func = 0x80000000, .has_leaf_count = true },
- { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
+ { .func = 0 },
+ { .func = 0x80000000 },
+ { .func = 0xC0000000, .qualifier = is_centaur_cpu },
{ .func = KVM_CPUID_SIGNATURE },
- { .func = KVM_CPUID_FEATURES },
};
if (cpuid->nent < 1)
@@ -819,19 +868,16 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
if (ent->qualifier && !ent->qualifier(ent))
continue;
- r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
- &nent, cpuid->nent, type);
+ r = do_cpuid_func(&cpuid_entries[nent], ent->func,
+ &nent, cpuid->nent, type);
if (r)
goto out_free;
- if (!ent->has_leaf_count)
- continue;
-
limit = cpuid_entries[nent - 1].eax;
for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
- r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
- &nent, cpuid->nent, type);
+ r = do_cpuid_func(&cpuid_entries[nent], func,
+ &nent, cpuid->nent, type);
if (r)
goto out_free;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9a327d5b6d1f..d78a61408243 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
- [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
- [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX},
[CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
[CPUID_6_EAX] = { 6, 0, CPUID_EAX},
[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index c19c7ede9bd6..329361b69d5e 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -1,20 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine driver for Linux
*
* Copyright 2016 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include <linux/kvm_host.h>
#include <linux/debugfs.h>
+#include "lapic.h"
bool kvm_arch_has_vcpu_debugfs(void)
{
return true;
}
+static int vcpu_get_timer_advance_ns(void *data, u64 *val)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
+ *val = vcpu->arch.apic->lapic_timer.timer_advance_ns;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n");
+
static int vcpu_get_tsc_offset(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
@@ -51,6 +58,14 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
if (!ret)
return -ENOMEM;
+ if (lapic_in_kernel(vcpu)) {
+ ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
+ vcpu->debugfs_dentry,
+ vcpu, &vcpu_timer_advance_ns_fops);
+ if (!ret)
+ return -ENOMEM;
+ }
+
if (kvm_has_tsc_control) {
ret = debugfs_create_file("tsc-scaling-ratio", 0444,
vcpu->debugfs_dentry,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d0d5dd44b4f4..718f7d9afedc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/******************************************************************************
* emulate.c
*
@@ -14,9 +15,6 @@
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
*
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
* From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
*/
@@ -314,29 +312,42 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
-#define FOP_FUNC(name) \
+#define __FOP_FUNC(name) \
".align " __stringify(FASTOP_SIZE) " \n\t" \
".type " name ", @function \n\t" \
name ":\n\t"
-#define FOP_RET "ret \n\t"
+#define FOP_FUNC(name) \
+ __FOP_FUNC(#name)
+
+#define __FOP_RET(name) \
+ "ret \n\t" \
+ ".size " name ", .-" name "\n\t"
+
+#define FOP_RET(name) \
+ __FOP_RET(#name)
#define FOP_START(op) \
extern void em_##op(struct fastop *fake); \
asm(".pushsection .text, \"ax\" \n\t" \
".global em_" #op " \n\t" \
- FOP_FUNC("em_" #op)
+ ".align " __stringify(FASTOP_SIZE) " \n\t" \
+ "em_" #op ":\n\t"
#define FOP_END \
".popsection")
+#define __FOPNOP(name) \
+ __FOP_FUNC(name) \
+ __FOP_RET(name)
+
#define FOPNOP() \
- FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
- FOP_RET
+ __FOPNOP(__stringify(__UNIQUE_ID(nop)))
#define FOP1E(op, dst) \
- FOP_FUNC(#op "_" #dst) \
- "10: " #op " %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst) \
+ "10: " #op " %" #dst " \n\t" \
+ __FOP_RET(#op "_" #dst)
#define FOP1EEX(op, dst) \
FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
@@ -368,8 +379,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP2E(op, dst, src) \
- FOP_FUNC(#op "_" #dst "_" #src) \
- #op " %" #src ", %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst "_" #src) \
+ #op " %" #src ", %" #dst " \n\t" \
+ __FOP_RET(#op "_" #dst "_" #src)
#define FASTOP2(op) \
FOP_START(op) \
@@ -407,8 +419,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP3E(op, dst, src, src2) \
- FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
- #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
+ #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
+ __FOP_RET(#op "_" #dst "_" #src "_" #src2)
/* 3-operand, word-only, src2=cl */
#define FASTOP3WCL(op) \
@@ -425,7 +438,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
".type " #op ", @function \n\t" \
#op ": \n\t" \
#op " %al \n\t" \
- FOP_RET
+ __FOP_RET(#op)
asm(".pushsection .fixup, \"ax\"\n"
".global kvm_fastop_exception \n"
@@ -451,7 +464,10 @@ FOP_SETCC(setle)
FOP_SETCC(setnle)
FOP_END;
-FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
+FOP_START(salc)
+FOP_FUNC(salc)
+"pushf; sbb %al, %al; popf \n\t"
+FOP_RET(salc)
FOP_END;
/*
@@ -4260,7 +4276,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
ulong dr6;
ctxt->ops->get_dr(ctxt, 6, &dr6);
- dr6 &= ~15;
+ dr6 &= ~DR_TRAP_BITS;
dr6 |= DR6_BD | DR6_RTM;
ctxt->ops->set_dr(ctxt, 6, dr6);
return emulate_db(ctxt);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8ca4b39918e0..c10a8b10b203 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM Microsoft Hyper-V emulation
*
@@ -15,10 +16,6 @@
* Amit Shah <amit.shah@qumranet.com>
* Ben-Ami Yassour <benami@il.ibm.com>
* Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include "x86.h"
@@ -1597,7 +1594,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
uint16_t code, rep_idx, rep_cnt;
- bool fast, longmode, rep;
+ bool fast, rep;
/*
* hypercall generates UD from non zero cpl and real mode
@@ -1608,9 +1605,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
return 1;
}
- longmode = is_64_bit_mode(vcpu);
-
- if (!longmode) {
+#ifdef CONFIG_X86_64
+ if (is_64_bit_mode(vcpu)) {
+ param = kvm_rcx_read(vcpu);
+ ingpa = kvm_rdx_read(vcpu);
+ outgpa = kvm_r8_read(vcpu);
+ } else
+#endif
+ {
param = ((u64)kvm_rdx_read(vcpu) << 32) |
(kvm_rax_read(vcpu) & 0xffffffff);
ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
@@ -1618,13 +1620,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
(kvm_rsi_read(vcpu) & 0xffffffff);
}
-#ifdef CONFIG_X86_64
- else {
- param = kvm_rcx_read(vcpu);
- ingpa = kvm_rdx_read(vcpu);
- outgpa = kvm_r8_read(vcpu);
- }
-#endif
code = param & 0xffff;
fast = !!(param & HV_HYPERCALL_FAST_BIT);
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index fd7cf13a2144..757cb578101c 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* KVM Microsoft Hyper-V emulation
*
@@ -15,10 +16,6 @@
* Amit Shah <amit.shah@qumranet.com>
* Ben-Ami Yassour <benami@il.ibm.com>
* Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#ifndef __ARCH_X86_KVM_HYPERV_H__
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 1add1bc881e2..d859ae8890d0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -45,11 +45,6 @@
#include "lapic.h"
#include "irq.h"
-#if 0
-#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
-#else
-#define ioapic_debug(fmt, arg...)
-#endif
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
@@ -294,7 +289,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
default:
index = (ioapic->ioregsel - 0x10) >> 1;
- ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
e = &ioapic->redirtbl[index];
@@ -343,12 +337,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
entry->fields.remote_irr))
return -1;
- ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
- "vector=%x trig_mode=%x\n",
- entry->fields.dest_id, entry->fields.dest_mode,
- entry->fields.delivery_mode, entry->fields.vector,
- entry->fields.trig_mode);
-
irqe.dest_id = entry->fields.dest_id;
irqe.vector = entry->fields.vector;
irqe.dest_mode = entry->fields.dest_mode;
@@ -515,7 +503,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
@@ -558,8 +545,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
- (void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
switch (len) {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index faa264822cee..e330e7d125f7 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -1,23 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* irq.c: API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
- *
*/
#include <linux/export.h>
@@ -172,3 +160,10 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
__kvm_migrate_apic_timer(vcpu);
__kvm_migrate_pit_timer(vcpu);
}
+
+bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
+{
+ bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE;
+
+ return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm);
+}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index d5005cc26521..7c6233d37c64 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* irq.h: in kernel interrupt controller related definitions
* Copyright (c) 2007, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
- *
*/
#ifndef __IRQ_H
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 3cc3b2d130a0..8ecd48d31800 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* irq_comm.c: Common API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
@@ -86,7 +75,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
- } else if (kvm_lapic_enabled(vcpu)) {
+ } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
if (!kvm_vector_hashing_enabled()) {
if (!lowest)
lowest = vcpu;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4924f83ed4f3..0aa158657f20 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Local APIC virtualization
@@ -13,9 +14,6 @@
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
*
* Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
*/
#include <linux/kvm_host.h>
@@ -54,9 +52,6 @@
#define PRIu64 "u"
#define PRIo64 "o"
-/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...) do {} while (0)
-
/* 14 is the version for Xeon and Pentium 8.4.8*/
#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
#define LAPIC_MMIO_LENGTH (1 << 12)
@@ -71,6 +66,7 @@
#define X2APIC_BROADCAST 0xFFFFFFFFul
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
+#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -87,11 +83,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
apic_test_vector(vector, apic->regs + APIC_IRR);
}
-static inline void apic_clear_vector(int vec, void *bitmap)
-{
- clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
-}
-
static inline int __apic_test_and_set_vector(int vec, void *bitmap)
{
return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -127,6 +118,17 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
return apic->vcpu->vcpu_id;
}
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
+
+static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
+}
+
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
@@ -445,12 +447,12 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
if (unlikely(vcpu->arch.apicv_active)) {
/* need to update RVI */
- apic_clear_vector(vec, apic->regs + APIC_IRR);
+ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_x86_ops->hwapic_irr_update(vcpu,
apic_find_highest_irr(apic));
} else {
apic->irr_pending = false;
- apic_clear_vector(vec, apic->regs + APIC_IRR);
+ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
if (apic_search_irr(apic) != -1)
apic->irr_pending = true;
}
@@ -633,7 +635,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
if (pv_eoi_get_user(vcpu, &val) < 0)
- apic_debug("Can't read EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return val & 0x1;
}
@@ -641,7 +643,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
- apic_debug("Can't set EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return;
}
@@ -651,7 +653,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
- apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return;
}
@@ -685,9 +687,6 @@ static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
else
ppr = isrv & 0xf0;
- apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
- apic, ppr, isr, isrv);
-
*new_ppr = ppr;
if (old_ppr != ppr)
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
@@ -764,8 +763,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
return ((logical_id >> 4) == (mda >> 4))
&& (logical_id & mda & 0xf) != 0;
default:
- apic_debug("Bad DFR vcpu %d: %08x\n",
- apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
return false;
}
}
@@ -804,10 +801,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
struct kvm_lapic *target = vcpu->arch.apic;
u32 mda = kvm_apic_mda(vcpu, dest, source, target);
- apic_debug("target %p, source %p, dest 0x%x, "
- "dest_mode 0x%x, short_hand 0x%x\n",
- target, source, dest, dest_mode, short_hand);
-
ASSERT(target);
switch (short_hand) {
case APIC_DEST_NOSHORT:
@@ -822,8 +815,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
case APIC_DEST_ALLBUT:
return target != source;
default:
- apic_debug("kvm: apic: Bad dest shorthand value %x\n",
- short_hand);
return false;
}
}
@@ -1055,9 +1046,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
if (trig_mode)
- kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
+ kvm_lapic_set_vector(vector,
+ apic->regs + APIC_TMR);
else
- apic_clear_vector(vector, apic->regs + APIC_TMR);
+ kvm_lapic_clear_vector(vector,
+ apic->regs + APIC_TMR);
}
if (vcpu->arch.apicv_active)
@@ -1099,15 +1092,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
smp_wmb();
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
- } else {
- apic_debug("Ignoring de-assert INIT to vcpu %d\n",
- vcpu->vcpu_id);
}
break;
case APIC_DM_STARTUP:
- apic_debug("SIPI to vcpu %d vector 0x%02x\n",
- vcpu->vcpu_id, vector);
result = 1;
apic->sipi_vector = vector;
/* make sure sipi_vector is visible for the receiver */
@@ -1225,14 +1213,6 @@ static void apic_send_ipi(struct kvm_lapic *apic)
trace_kvm_apic_ipi(icr_low, irq.dest_id);
- apic_debug("icr_high 0x%x, icr_low 0x%x, "
- "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
- "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
- "msi_redir_hint 0x%x\n",
- icr_high, icr_low, irq.shorthand, irq.dest_id,
- irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
- irq.vector, irq.msi_redir_hint);
-
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
@@ -1286,7 +1266,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
switch (offset) {
case APIC_ARBPRI:
- apic_debug("Access APIC ARBPRI register which is for P6\n");
break;
case APIC_TMCCT: /* Timer CCR */
@@ -1315,25 +1294,46 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
return container_of(dev, struct kvm_lapic, dev);
}
+#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
+#define APIC_REGS_MASK(first, count) \
+ (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
+
int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
{
unsigned char alignment = offset & 0xf;
u32 result;
/* this bitmask has a bit cleared for each reserved register */
- static const u64 rmask = 0x43ff01ffffffe70cULL;
-
- if ((alignment + len) > 4) {
- apic_debug("KVM_APIC_READ: alignment error %x %d\n",
- offset, len);
- return 1;
- }
+ u64 valid_reg_mask =
+ APIC_REG_MASK(APIC_ID) |
+ APIC_REG_MASK(APIC_LVR) |
+ APIC_REG_MASK(APIC_TASKPRI) |
+ APIC_REG_MASK(APIC_PROCPRI) |
+ APIC_REG_MASK(APIC_LDR) |
+ APIC_REG_MASK(APIC_DFR) |
+ APIC_REG_MASK(APIC_SPIV) |
+ APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
+ APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
+ APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
+ APIC_REG_MASK(APIC_ESR) |
+ APIC_REG_MASK(APIC_ICR) |
+ APIC_REG_MASK(APIC_ICR2) |
+ APIC_REG_MASK(APIC_LVTT) |
+ APIC_REG_MASK(APIC_LVTTHMR) |
+ APIC_REG_MASK(APIC_LVTPC) |
+ APIC_REG_MASK(APIC_LVT0) |
+ APIC_REG_MASK(APIC_LVT1) |
+ APIC_REG_MASK(APIC_LVTERR) |
+ APIC_REG_MASK(APIC_TMICT) |
+ APIC_REG_MASK(APIC_TMCCT) |
+ APIC_REG_MASK(APIC_TDCR);
+
+ /* ARBPRI is not valid on x2APIC */
+ if (!apic_x2apic_mode(apic))
+ valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
- if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
- apic_debug("KVM_APIC_READ: read reserved register %x\n",
- offset);
+ if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
- }
result = __apic_read(apic, offset & ~0xf);
@@ -1391,9 +1391,6 @@ static void update_divide_count(struct kvm_lapic *apic)
tmp1 = tdcr & 0xf;
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
apic->divide_count = 0x1 << (tmp2 & 0x7);
-
- apic_debug("timer divide count is 0x%x\n",
- apic->divide_count);
}
static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
@@ -1435,29 +1432,6 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
}
}
-static void apic_timer_expired(struct kvm_lapic *apic)
-{
- struct kvm_vcpu *vcpu = apic->vcpu;
- struct swait_queue_head *q = &vcpu->wq;
- struct kvm_timer *ktimer = &apic->lapic_timer;
-
- if (atomic_read(&apic->lapic_timer.pending))
- return;
-
- atomic_inc(&apic->lapic_timer.pending);
- kvm_set_pending_timer(vcpu);
-
- /*
- * For x86, the atomic_inc() is serialized, thus
- * using swait_active() is safe.
- */
- if (swait_active(q))
- swake_up_one(q);
-
- if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
- ktimer->expired_tscdeadline = ktimer->tscdeadline;
-}
-
/*
* On APICv, this test will cause a busy wait
* during a higher-priority task.
@@ -1501,50 +1475,106 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
}
}
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
+static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
+ s64 advance_expire_delta)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
- u64 guest_tsc, tsc_deadline, ns;
+ u64 ns;
+
+ /* too early */
+ if (advance_expire_delta < 0) {
+ ns = -advance_expire_delta * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ timer_advance_ns -= min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ } else {
+ /* too late */
+ ns = advance_expire_delta * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ timer_advance_ns += min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ }
- if (apic->lapic_timer.expired_tscdeadline == 0)
- return;
+ if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ if (unlikely(timer_advance_ns > 5000)) {
+ timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
+ apic->lapic_timer.timer_advance_adjust_done = false;
+ }
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+}
+
+static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ u64 guest_tsc, tsc_deadline;
- if (!lapic_timer_int_injected(vcpu))
+ if (apic->lapic_timer.expired_tscdeadline == 0)
return;
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
+ apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
- if (!apic->lapic_timer.timer_advance_adjust_done) {
- /* too early */
- if (guest_tsc < tsc_deadline) {
- ns = (tsc_deadline - guest_tsc) * 1000000ULL;
- do_div(ns, vcpu->arch.virtual_tsc_khz);
- timer_advance_ns -= min((u32)ns,
- timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
- } else {
- /* too late */
- ns = (guest_tsc - tsc_deadline) * 1000000ULL;
- do_div(ns, vcpu->arch.virtual_tsc_khz);
- timer_advance_ns += min((u32)ns,
- timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
- }
- if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
- apic->lapic_timer.timer_advance_adjust_done = true;
- if (unlikely(timer_advance_ns > 5000)) {
- timer_advance_ns = 0;
- apic->lapic_timer.timer_advance_adjust_done = true;
- }
- apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+ if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
+ adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
+}
+
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+ if (lapic_timer_int_injected(vcpu))
+ __kvm_wait_lapic_expire(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
+
+static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
+{
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ kvm_apic_local_deliver(apic, APIC_LVTT);
+ if (apic_lvtt_tscdeadline(apic))
+ ktimer->tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ ktimer->tscdeadline = 0;
+ ktimer->target_expiration = 0;
}
}
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ struct swait_queue_head *q = &vcpu->wq;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ if (atomic_read(&apic->lapic_timer.pending))
+ return;
+
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
+ ktimer->expired_tscdeadline = ktimer->tscdeadline;
+
+ if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
+ if (apic->lapic_timer.timer_advance_ns)
+ __kvm_wait_lapic_expire(vcpu);
+ kvm_apic_inject_pending_timer_irqs(apic);
+ return;
+ }
+
+ atomic_inc(&apic->lapic_timer.pending);
+ kvm_set_pending_timer(vcpu);
+
+ /*
+ * For x86, the atomic_inc() is serialized, thus
+ * using swait_active() is safe.
+ */
+ if (swait_active(q))
+ swake_up_one(q);
+}
+
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
@@ -1571,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns);
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
- hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
} else
apic_timer_expired(apic);
@@ -1618,16 +1648,6 @@ static bool set_target_expiration(struct kvm_lapic *apic)
limit_periodic_timer_frequency(apic);
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
- PRIx64 ", "
- "timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __func__,
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_lapic_get_reg(apic, APIC_TMICT),
- apic->lapic_timer.period,
- ktime_to_ns(ktime_add_ns(now,
- apic->lapic_timer.period)));
-
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
@@ -1673,7 +1693,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -1830,8 +1850,6 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
if (lvt0_in_nmi_mode) {
- apic_debug("Receive NMI setting on APIC_LVT0 "
- "for cpu %d\n", apic->vcpu->vcpu_id);
atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
} else
atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
@@ -1945,8 +1963,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
- if (val & 4)
- apic_debug("KVM_WRITE:TDCR %x\n", val);
kvm_lapic_set_reg(apic, APIC_TDCR, val);
update_divide_count(apic);
if (apic->divide_count != old_divisor &&
@@ -1958,10 +1974,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
}
case APIC_ESR:
- if (apic_x2apic_mode(apic) && val != 0) {
- apic_debug("KVM_WRITE:ESR not zero %x\n", val);
+ if (apic_x2apic_mode(apic) && val != 0)
ret = 1;
- }
break;
case APIC_SELF_IPI:
@@ -1974,8 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
ret = 1;
break;
}
- if (ret)
- apic_debug("Local APIC Write to read-only register %x\n", reg);
+
return ret;
}
EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
@@ -2003,19 +2016,11 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
* 32/64/128 bits registers must be accessed thru 32 bits.
* Refer SDM 8.4.1
*/
- if (len != 4 || (offset & 0xf)) {
- /* Don't shout loud, $infamous_os would cause only noise. */
- apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
+ if (len != 4 || (offset & 0xf))
return 0;
- }
val = *(u32*)data;
- /* too common printing */
- if (offset != APIC_EOI)
- apic_debug("%s: offset 0x%x with length 0x%x, and value is "
- "0x%x\n", __func__, offset, len, val);
-
kvm_lapic_reg_write(apic, offset & 0xff0, val);
return 0;
@@ -2148,11 +2153,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((value & MSR_IA32_APICBASE_ENABLE) &&
apic->base_address != APIC_DEFAULT_PHYS_BASE)
pr_warn_once("APIC base relocation is unsupported by KVM");
-
- /* with FSB delivery interrupt, we can restart APIC functionality */
- apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
- "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
-
}
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -2163,8 +2163,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!apic)
return;
- apic_debug("%s\n", __func__);
-
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2217,11 +2215,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
-
- apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
- "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
- vcpu, kvm_lapic_get_reg(apic, APIC_ID),
- vcpu->arch.apic_base, apic->base_address);
}
/*
@@ -2293,7 +2286,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
struct kvm_lapic *apic;
ASSERT(vcpu != NULL);
- apic_debug("apic_init %d\n", vcpu->vcpu_id);
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
if (!apic)
@@ -2310,10 +2302,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
- apic->lapic_timer.timer_advance_ns = 1000;
+ apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
apic->lapic_timer.timer_advance_adjust_done = false;
} else {
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
@@ -2323,7 +2315,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
/*
* APIC is created enabled. This will prevent kvm_lapic_set_base from
- * thinking that APIC satet has changed.
+ * thinking that APIC state has changed.
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
@@ -2332,6 +2324,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
return 0;
nomem_free_apic:
kfree(apic);
+ vcpu->arch.apic = NULL;
nomem:
return -ENOMEM;
}
@@ -2341,7 +2334,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (!apic_enabled(apic))
+ if (!kvm_apic_hw_enabled(apic))
return -1;
__apic_update_ppr(apic, &ppr);
@@ -2366,13 +2359,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
if (atomic_read(&apic->lapic_timer.pending) > 0) {
- kvm_apic_local_deliver(apic, APIC_LVTT);
- if (apic_lvtt_tscdeadline(apic))
- apic->lapic_timer.tscdeadline = 0;
- if (apic_lvtt_oneshot(apic)) {
- apic->lapic_timer.tscdeadline = 0;
- apic->lapic_timer.target_expiration = 0;
- }
+ kvm_apic_inject_pending_timer_irqs(apic);
atomic_set(&apic->lapic_timer.pending, 0);
}
}
@@ -2494,12 +2481,13 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{
struct hrtimer *timer;
- if (!lapic_in_kernel(vcpu))
+ if (!lapic_in_kernel(vcpu) ||
+ kvm_can_post_timer_interrupt(vcpu))
return;
timer = &vcpu->arch.apic->lapic_timer.timer;
if (hrtimer_cancel(timer))
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
/*
@@ -2647,11 +2635,8 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
- if (reg == APIC_DFR || reg == APIC_ICR2) {
- apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
- reg);
+ if (reg == APIC_DFR || reg == APIC_ICR2)
return 1;
- }
if (kvm_lapic_reg_read(apic, reg, 4, &low))
return 1;
@@ -2749,8 +2734,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
- apic_debug("vcpu %d received sipi with vector # %x\n",
- vcpu->vcpu_id, sipi_vector);
kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d6d049ba3045..50053d2b8b7b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -32,6 +32,7 @@ struct kvm_timer {
u64 tscdeadline;
u64 expired_tscdeadline;
u32 timer_advance_ns;
+ s64 advance_expire_delta;
atomic_t pending; /* accumulated triggered timers */
bool hv_timer_in_use;
bool timer_advance_adjust_done;
@@ -129,6 +130,11 @@ void kvm_lapic_exit(void);
#define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4)
+static inline void kvm_lapic_clear_vector(int vec, void *bitmap)
+{
+ clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
static inline void kvm_lapic_set_vector(int vec, void *bitmap)
{
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -219,7 +225,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
-void wait_lapic_expire(struct kvm_vcpu *vcpu);
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
@@ -230,6 +236,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu);
static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
{
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1e9ba81accba..8f72526e2f68 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine driver for Linux
*
@@ -12,10 +13,6 @@
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
* Avi Kivity <avi@qumranet.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include "irq.h"
@@ -143,9 +140,6 @@ module_param(dbg, bool, 0644);
#include <trace/events/kvm.h>
-#define CREATE_TRACE_POINTS
-#include "mmutrace.h"
-
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
@@ -262,11 +256,20 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
*/
static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+/*
+ * The number of non-reserved physical address bits irrespective of features
+ * that repurpose legal bits, e.g. MKTME.
+ */
+static u8 __read_mostly shadow_phys_bits;
static void mmu_spte_set(u64 *sptep, u64 spte);
+static bool is_executable_pte(u64 spte);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
+#define CREATE_TRACE_POINTS
+#include "mmutrace.h"
+
static inline bool kvm_available_flush_tlb_with_range(void)
{
@@ -471,6 +474,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+static u8 kvm_get_shadow_phys_bits(void)
+{
+ /*
+ * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
+ * in CPU detection code, but MKTME treats those reduced bits as
+ * 'keyID' thus they are not reserved bits. Therefore for MKTME
+ * we should still return physical address bits reported by CPUID.
+ */
+ if (!boot_cpu_has(X86_FEATURE_TME) ||
+ WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
+ return boot_cpu_data.x86_phys_bits;
+
+ return cpuid_eax(0x80000008) & 0xff;
+}
+
static void kvm_mmu_reset_all_pte_masks(void)
{
u8 low_phys_bits;
@@ -484,6 +502,8 @@ static void kvm_mmu_reset_all_pte_masks(void)
shadow_present_mask = 0;
shadow_acc_track_mask = 0;
+ shadow_phys_bits = kvm_get_shadow_phys_bits();
+
/*
* If the CPU has 46 or less physical address bits, then set an
* appropriate mask to guard against L1TF attacks. Otherwise, it is
@@ -653,7 +673,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
/*
* The idea using the light way get the spte on x86_32 guest is from
- * gup_get_pte(arch/x86/mm/gup.c).
+ * gup_get_pte (mm/gup.c).
*
* An spte tlb flush may be pending, because kvm_set_pte_rmapp
* coalesces them and we are running out of the MMU lock. Therefore
@@ -1076,10 +1096,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
{
- if (sp->role.direct)
- BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
- else
+ if (!sp->role.direct) {
sp->gfns[index] = gfn;
+ return;
+ }
+
+ if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
+ pr_err_ratelimited("gfn mismatch under direct page %llx "
+ "(expected %llx, got %llx)\n",
+ sp->gfn,
+ kvm_mmu_page_get_gfn(sp, index), gfn);
}
/*
@@ -3058,10 +3084,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
ret = RET_PF_EMULATE;
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
- pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
- is_large_pte(*sptep)? "2MB" : "4kB",
- *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
- *sptep, sptep);
+ trace_kvm_mmu_set_spte(level, gfn, sptep);
if (!was_rmapped && is_large_pte(*sptep))
++vcpu->kvm->stat.lpages;
@@ -3073,8 +3096,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
}
}
- kvm_release_pfn_clean(pfn);
-
return ret;
}
@@ -3109,9 +3130,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
if (ret <= 0)
return -1;
- for (i = 0; i < ret; i++, gfn++, start++)
+ for (i = 0; i < ret; i++, gfn++, start++) {
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true);
+ put_page(pages[i]);
+ }
return 0;
}
@@ -3159,40 +3182,40 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
-static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
- int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
+ int map_writable, int level, kvm_pfn_t pfn,
+ bool prefault)
{
- struct kvm_shadow_walk_iterator iterator;
+ struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
- int emulate = 0;
- gfn_t pseudo_gfn;
+ int ret;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ gfn_t base_gfn = gfn;
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
- return 0;
+ return RET_PF_RETRY;
- for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
- if (iterator.level == level) {
- emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
- write, level, gfn, pfn, prefault,
- map_writable);
- direct_pte_prefetch(vcpu, iterator.sptep);
- ++vcpu->stat.pf_fixed;
+ trace_kvm_mmu_spte_requested(gpa, level, pfn);
+ for_each_shadow_entry(vcpu, gpa, it) {
+ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ if (it.level == level)
break;
- }
-
- drop_large_spte(vcpu, iterator.sptep);
- if (!is_shadow_present_pte(*iterator.sptep)) {
- u64 base_addr = iterator.addr;
- base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
- pseudo_gfn = base_addr >> PAGE_SHIFT;
- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
- iterator.level - 1, 1, ACC_ALL);
+ drop_large_spte(vcpu, it.sptep);
+ if (!is_shadow_present_pte(*it.sptep)) {
+ sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
+ it.level - 1, true, ACC_ALL);
- link_shadow_page(vcpu, iterator.sptep, sp);
+ link_shadow_page(vcpu, it.sptep, sp);
}
}
- return emulate;
+
+ ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
+ write, level, base_gfn, pfn, prefault,
+ map_writable);
+ direct_pte_prefetch(vcpu, it.sptep);
+ ++vcpu->stat.pf_fixed;
+ return ret;
}
static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
@@ -3219,11 +3242,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
- gfn_t *gfnp, kvm_pfn_t *pfnp,
+ gfn_t gfn, kvm_pfn_t *pfnp,
int *levelp)
{
kvm_pfn_t pfn = *pfnp;
- gfn_t gfn = *gfnp;
int level = *levelp;
/*
@@ -3250,8 +3272,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
mask = KVM_PAGES_PER_HPAGE(level) - 1;
VM_BUG_ON((gfn & mask) != (pfn & mask));
if (pfn & mask) {
- gfn &= ~mask;
- *gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
kvm_get_pfn(pfn);
@@ -3508,22 +3528,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
return r;
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
-
+ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
@@ -4018,19 +4035,6 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
-bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!lapic_in_kernel(vcpu) ||
- kvm_event_needs_reinjection(vcpu) ||
- vcpu->arch.exception.pending))
- return false;
-
- if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
- return false;
-
- return kvm_x86_ops->interrupt_allowed(vcpu);
-}
-
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
{
@@ -4150,22 +4154,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
-
+ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@@ -4497,7 +4498,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
*/
shadow_zero_check = &context->shadow_zero_check;
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
- boot_cpu_data.x86_phys_bits,
+ shadow_phys_bits,
context->shadow_root_level, uses_nx,
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
is_pse(vcpu), true);
@@ -4534,13 +4535,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
if (boot_cpu_is_amd())
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
- boot_cpu_data.x86_phys_bits,
+ shadow_phys_bits,
context->shadow_root_level, false,
boot_cpu_has(X86_FEATURE_GBPAGES),
true, true);
else
__reset_rsvds_bits_mask_ept(shadow_zero_check,
- boot_cpu_data.x86_phys_bits,
+ shadow_phys_bits,
false);
if (!shadow_me_mask)
@@ -4561,7 +4562,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
struct kvm_mmu *context, bool execonly)
{
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
- boot_cpu_data.x86_phys_bits, execonly);
+ shadow_phys_bits, execonly);
}
#define BYTE_MASK(access) \
@@ -4596,11 +4597,11 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
*/
/* Faults from writes to non-writable pages */
- u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
+ u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
/* Faults from user mode accesses to supervisor pages */
- u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
+ u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
/* Faults from fetches of non-executable pages*/
- u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
+ u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
/* Faults from kernel mode fetches of user pages */
u8 smepf = 0;
/* Faults from kernel mode accesses of user pages */
@@ -5602,14 +5603,18 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
struct page *page;
int i;
- if (tdp_enabled)
- return 0;
-
/*
- * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
- * Therefore we need to allocate shadow page tables in the first
- * 4GB of memory, which happens to fit the DMA32 zone.
+ * When using PAE paging, the four PDPTEs are treated as 'root' pages,
+ * while the PDP table is a per-vCPU construct that's allocated at MMU
+ * creation. When emulating 32-bit mode, cr3 is only 32 bits even on
+ * x86_64. Therefore we need to allocate the PDP table in the first
+ * 4GB of memory, which happens to fit the DMA32 zone. Except for
+ * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
+ * skip allocating the PDP table.
*/
+ if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
+ return 0;
+
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
if (!page)
return -ENOMEM;
@@ -5934,7 +5939,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@@ -5976,7 +5981,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
break;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return freed;
}
@@ -5998,6 +6003,34 @@ static void mmu_destroy_caches(void)
kmem_cache_destroy(mmu_page_header_cache);
}
+static void kvm_set_mmio_spte_mask(void)
+{
+ u64 mask;
+
+ /*
+ * Set the reserved bits and the present bit of an paging-structure
+ * entry to generate page fault with PFER.RSV = 1.
+ */
+
+ /*
+ * Mask the uppermost physical address bit, which would be reserved as
+ * long as the supported physical address width is less than 52.
+ */
+ mask = 1ull << 51;
+
+ /* Set the present bit. */
+ mask |= 1ull;
+
+ /*
+ * If reserved bit is not supported, clear the present bit to disable
+ * mmio page fault.
+ */
+ if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
+ mask &= ~1ull;
+
+ kvm_mmu_set_mmio_spte_mask(mask, mask);
+}
+
int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
@@ -6014,6 +6047,8 @@ int kvm_mmu_module_init(void)
kvm_mmu_reset_all_pte_masks();
+ kvm_set_mmio_spte_mask();
+
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, SLAB_ACCOUNT, NULL);
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index abac7e208853..ca39f62aabc6 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* mmu_audit.c:
*
@@ -11,10 +12,6 @@
* Avi Kivity <avi@qumranet.com>
* Marcelo Tosatti <mtosatti@redhat.com>
* Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include <linux/ratelimit.h>
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index dd30dccd2ad5..d8001b4bca05 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -301,6 +301,65 @@ TRACE_EVENT(
__entry->kvm_gen == __entry->spte_gen
)
);
+
+TRACE_EVENT(
+ kvm_mmu_set_spte,
+ TP_PROTO(int level, gfn_t gfn, u64 *sptep),
+ TP_ARGS(level, gfn, sptep),
+
+ TP_STRUCT__entry(
+ __field(u64, gfn)
+ __field(u64, spte)
+ __field(u64, sptep)
+ __field(u8, level)
+ /* These depend on page entry type, so compute them now. */
+ __field(bool, r)
+ __field(bool, x)
+ __field(u8, u)
+ ),
+
+ TP_fast_assign(
+ __entry->gfn = gfn;
+ __entry->spte = *sptep;
+ __entry->sptep = virt_to_phys(sptep);
+ __entry->level = level;
+ __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
+ __entry->x = is_executable_pte(__entry->spte);
+ __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
+ ),
+
+ TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
+ __entry->gfn, __entry->spte,
+ __entry->r ? "r" : "-",
+ __entry->spte & PT_WRITABLE_MASK ? "w" : "-",
+ __entry->x ? "x" : "-",
+ __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
+ __entry->level, __entry->sptep
+ )
+);
+
+TRACE_EVENT(
+ kvm_mmu_spte_requested,
+ TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
+ TP_ARGS(addr, level, pfn),
+
+ TP_STRUCT__entry(
+ __field(u64, gfn)
+ __field(u64, pfn)
+ __field(u8, level)
+ ),
+
+ TP_fast_assign(
+ __entry->gfn = addr >> PAGE_SHIFT;
+ __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+ __entry->level = level;
+ ),
+
+ TP_printk("gfn %llx pfn %llx level %d",
+ __entry->gfn, __entry->pfn, __entry->level
+ )
+);
+
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 9f72cc427158..25ce3edd1872 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* vMTRR implementation
*
@@ -11,9 +12,6 @@
* Marcelo Tosatti <mtosatti@redhat.com>
* Paolo Bonzini <pbonzini@redhat.com>
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
*/
#include <linux/kvm_host.h>
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index fd04d462fdae..3521e2d176f2 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Support KVM gust page tracking
*
@@ -8,9 +9,6 @@
*
* Author:
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
*/
#include <linux/kvm_host.h>
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 367a47df4ba0..7d5cdb3af594 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Kernel-based Virtual Machine driver for Linux
*
@@ -12,10 +13,6 @@
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
* Avi Kivity <avi@qumranet.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
/*
@@ -543,6 +540,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
true, true);
+ kvm_release_pfn_clean(pfn);
return true;
}
@@ -622,6 +620,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, ret;
+ gfn_t base_gfn;
direct_access = gw->pte_access;
@@ -666,35 +665,34 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
- for (;
- shadow_walk_okay(&it) && it.level > hlevel;
- shadow_walk_next(&it)) {
- gfn_t direct_gfn;
+ base_gfn = gw->gfn;
+
+ trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
+ for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
clear_sp_write_flooding_count(it.sptep);
+ base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ if (it.level == hlevel)
+ break;
+
validate_direct_spte(vcpu, it.sptep, direct_access);
drop_large_spte(vcpu, it.sptep);
- if (is_shadow_present_pte(*it.sptep))
- continue;
-
- direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
-
- sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
- true, direct_access);
- link_shadow_page(vcpu, it.sptep, sp);
+ if (!is_shadow_present_pte(*it.sptep)) {
+ sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
+ it.level - 1, true, direct_access);
+ link_shadow_page(vcpu, it.sptep, sp);
+ }
}
- clear_sp_write_flooding_count(it.sptep);
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
- it.level, gw->gfn, pfn, prefault, map_writable);
+ it.level, base_gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
-
+ ++vcpu->stat.pf_fixed;
return ret;
out_gpte_changed:
- kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
}
@@ -842,6 +840,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
walker.pte_access &= ~ACC_EXEC_MASK;
}
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@@ -850,19 +849,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (!force_pt_level)
- transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
+ transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
level, pfn, map_writable, prefault);
- ++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index e39741997893..46875bbd0419 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine -- Performance Monitoring Unit support
*
@@ -7,10 +8,6 @@
* Avi Kivity <avi@redhat.com>
* Gleb Natapov <gleb@redhat.com>
* Wei Huang <wei@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include <linux/types.h>
@@ -22,6 +19,9 @@
#include "lapic.h"
#include "pmu.h"
+/* This is enough to filter the vast majority of currently defined events. */
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
+
/* NOTE:
* - Each perf counter is defined as "struct kvm_pmc";
* - There are two types of perf counters: general purpose (gp) and fixed.
@@ -131,8 +131,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
intr ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
- printk_once("kvm_pmu: event creation failed %ld\n",
- PTR_ERR(event));
+ pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
+ PTR_ERR(event), pmc->idx);
return;
}
@@ -144,6 +144,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
unsigned config, type = PERF_TYPE_RAW;
u8 event_select, unit_mask;
+ struct kvm *kvm = pmc->vcpu->kvm;
+ struct kvm_pmu_event_filter *filter;
+ int i;
+ bool allow_event = true;
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
printk_once("kvm pmu: pin control bit is ignored\n");
@@ -155,6 +159,22 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
return;
+ filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
+ if (filter) {
+ for (i = 0; i < filter->nevents; i++)
+ if (filter->events[i] ==
+ (eventsel & AMD64_RAW_EVENT_MASK_NB))
+ break;
+ if (filter->action == KVM_PMU_EVENT_ALLOW &&
+ i == filter->nevents)
+ allow_event = false;
+ if (filter->action == KVM_PMU_EVENT_DENY &&
+ i < filter->nevents)
+ allow_event = false;
+ }
+ if (!allow_event)
+ return;
+
event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
@@ -186,12 +206,24 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
{
unsigned en_field = ctrl & 0x3;
bool pmi = ctrl & 0x8;
+ struct kvm_pmu_event_filter *filter;
+ struct kvm *kvm = pmc->vcpu->kvm;
pmc_stop_counter(pmc);
if (!en_field || !pmc_is_enabled(pmc))
return;
+ filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
+ if (filter) {
+ if (filter->action == KVM_PMU_EVENT_DENY &&
+ test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+ return;
+ if (filter->action == KVM_PMU_EVENT_ALLOW &&
+ !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+ return;
+ }
+
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
kvm_x86_ops->pmu_ops->find_fixed_event(idx),
!(en_field & 0x2), /* exclude user */
@@ -264,10 +296,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
ctr_val = rdtsc();
break;
case VMWARE_BACKDOOR_PMC_REAL_TIME:
- ctr_val = ktime_get_boot_ns();
+ ctr_val = ktime_get_boottime_ns();
break;
case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
- ctr_val = ktime_get_boot_ns() +
+ ctr_val = ktime_get_boottime_ns() +
vcpu->kvm->arch.kvmclock_offset;
break;
default:
@@ -283,7 +315,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
bool fast_mode = idx & (1u << 31);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
- u64 ctr_val;
+ u64 mask = fast_mode ? ~0u : ~0ull;
if (!pmu->version)
return 1;
@@ -291,15 +323,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
- pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx);
+ pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
if (!pmc)
return 1;
- ctr_val = pmc_read_counter(pmc);
- if (fast_mode)
- ctr_val = (u32)ctr_val;
-
- *data = ctr_val;
+ *data = pmc_read_counter(pmc) & mask;
return 0;
}
@@ -355,3 +383,46 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_pmu_reset(vcpu);
}
+
+int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_pmu_event_filter tmp, *filter;
+ size_t size;
+ int r;
+
+ if (copy_from_user(&tmp, argp, sizeof(tmp)))
+ return -EFAULT;
+
+ if (tmp.action != KVM_PMU_EVENT_ALLOW &&
+ tmp.action != KVM_PMU_EVENT_DENY)
+ return -EINVAL;
+
+ if (tmp.flags != 0)
+ return -EINVAL;
+
+ if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
+ return -E2BIG;
+
+ size = struct_size(filter, events, tmp.nevents);
+ filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
+ if (!filter)
+ return -ENOMEM;
+
+ r = -EFAULT;
+ if (copy_from_user(filter, argp, size))
+ goto cleanup;
+
+ /* Ensure nevents can't be changed between the user copies. */
+ *filter = tmp;
+
+ mutex_lock(&kvm->lock);
+ rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
+ mutex_is_locked(&kvm->lock));
+ mutex_unlock(&kvm->lock);
+
+ synchronize_srcu_expedited(&kvm->srcu);
+ r = 0;
+cleanup:
+ kfree(filter);
+ return r;
+}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ba8898e1a854..58265f761c3b 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -25,7 +25,8 @@ struct kvm_pmu_ops {
unsigned (*find_fixed_event)(int idx);
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
- struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx);
+ struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx,
+ u64 *mask);
int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
@@ -117,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
bool is_vmware_backdoor_pmc(u32 pmc_idx);
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 1495a735b38e..c8388389a3b0 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM PMU support for AMD
*
@@ -6,9 +7,6 @@
* Author:
* Wei Huang <wei@redhat.com>
*
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
* Implementation is based on pmu_intel.c file
*/
#include <linux/types.h>
@@ -186,7 +184,7 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
}
/* idx is the ECX register of RDPMC instruction */
-static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx)
+static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *counters;
@@ -269,10 +267,10 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
pmu->reserved_bits = 0xffffffff00200000ull;
+ pmu->version = 1;
/* not applicable to AMD; but clean them to prevent any fall out */
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->nr_arch_fixed_counters = 0;
- pmu->version = 0;
pmu->global_status = 0;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a849dcb7fbc5..19f69df96758 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine driver for Linux
*
@@ -9,10 +10,6 @@
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
* Avi Kivity <avi@qumranet.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#define pr_fmt(fmt) "SVM: " fmt
@@ -367,6 +364,10 @@ static int avic;
module_param(avic, int, S_IRUGO);
#endif
+/* enable/disable Next RIP Save */
+static int nrips = true;
+module_param(nrips, int, 0444);
+
/* enable/disable Virtual VMLOAD VMSAVE */
static int vls = true;
module_param(vls, int, 0444);
@@ -379,6 +380,9 @@ module_param(vgif, int, 0444);
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);
+static bool __read_mostly dump_invalid_vmcb = 0;
+module_param(dump_invalid_vmcb, bool, 0644);
+
static u8 rsm_ins_bytes[] = "\x0f\xaa";
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
@@ -770,7 +774,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm->vmcb->control.next_rip != 0) {
+ if (nrips && svm->vmcb->control.next_rip != 0) {
WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
}
@@ -807,7 +811,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
kvm_deliver_exception_payload(&svm->vcpu);
- if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
+ if (nr == BP_VECTOR && !nrips) {
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
/*
@@ -1364,6 +1368,11 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
+ if (nrips) {
+ if (!boot_cpu_has(X86_FEATURE_NRIPS))
+ nrips = false;
+ }
+
if (avic) {
if (!npt_enabled ||
!boot_cpu_has(X86_FEATURE_AVIC) ||
@@ -2024,7 +2033,11 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (!kvm_vcpu_apicv_active(vcpu))
return;
- if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
+ /*
+ * Since the host physical APIC id is 8 bits,
+ * we can support host APIC ID upto 255.
+ */
+ if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
return;
entry = READ_ONCE(*(svm->avic_physical_id_cache));
@@ -3286,7 +3299,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
- rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map);
+ rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
@@ -3576,7 +3589,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
vmcb_gpa = svm->vmcb->save.rax;
- rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map);
+ rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
@@ -3931,7 +3944,7 @@ static int rdpmc_interception(struct vcpu_svm *svm)
{
int err;
- if (!static_cpu_has(X86_FEATURE_NRIPS))
+ if (!nrips)
return emulate_on_interception(svm);
err = kvm_rdpmc(&svm->vcpu);
@@ -4824,6 +4837,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
+ if (!dump_invalid_vmcb) {
+ pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
+ return;
+ }
+
pr_err("VMCB Control Area:\n");
pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
@@ -4982,7 +5000,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
= svm->vmcb->control.exit_code;
- pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
dump_vmcb(vcpu);
return 0;
}
@@ -5152,10 +5169,13 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
- if (avic_vcpu_is_running(vcpu))
- wrmsrl(SVM_AVIC_DOORBELL,
- kvm_cpu_get_apicid(vcpu->cpu));
- else
+ if (avic_vcpu_is_running(vcpu)) {
+ int cpuid = vcpu->cpu;
+
+ if (cpuid != get_cpu())
+ wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
+ put_cpu();
+ } else
kvm_vcpu_wake_up(vcpu);
}
@@ -5632,6 +5652,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
kvm_load_guest_xcr0(vcpu);
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
+ kvm_wait_lapic_expire(vcpu);
+
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
* it's non-zero. Since vmentry is serialising on affected CPUs, there
@@ -5853,9 +5877,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xd9;
}
-static void svm_check_processor_compat(void *rtn)
+static int __init svm_check_processor_compat(void)
{
- *(int *)rtn = 0;
+ return 0;
}
static bool svm_cpu_has_accelerated_tpr(void)
@@ -5867,6 +5891,7 @@ static bool svm_has_emulated_msr(int index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
+ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
default:
break;
@@ -6154,15 +6179,9 @@ out:
return ret;
}
-static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
- local_irq_enable();
- /*
- * We must have an instruction with interrupts enabled, so
- * the timer interrupt isn't delayed by the interrupt shadow.
- */
- asm("nop");
- local_irq_disable();
+
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@@ -7109,13 +7128,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{
- bool is_user, smap;
-
- is_user = svm_get_cpl(vcpu) == 3;
- smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+ unsigned long cr4 = kvm_read_cr4(vcpu);
+ bool smep = cr4 & X86_CR4_SMEP;
+ bool smap = cr4 & X86_CR4_SMAP;
+ bool is_user = svm_get_cpl(vcpu) == 3;
/*
- * Detect and workaround Errata 1096 Fam_17h_00_0Fh
+ * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
+ *
+ * Errata:
+ * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
+ * possible that CPU microcode implementing DecodeAssist will fail
+ * to read bytes of instruction which caused #NPF. In this case,
+ * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
+ * return 0 instead of the correct guest instruction bytes.
+ *
+ * This happens because CPU microcode reading instruction bytes
+ * uses a special opcode which attempts to read data using CPL=0
+ * priviledges. The microcode reads CS:RIP and if it hits a SMAP
+ * fault, it gives up and returns no instruction bytes.
+ *
+ * Detection:
+ * We reach here in case CPU supports DecodeAssist, raised #NPF and
+ * returned 0 in GuestIntrBytes field of the VMCB.
+ * First, errata can only be triggered in case vCPU CR4.SMAP=1.
+ * Second, if vCPU CR4.SMEP=1, errata could only be triggered
+ * in case vCPU CPL==3 (Because otherwise guest would have triggered
+ * a SMEP fault instead of #NPF).
+ * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
+ * As most guests enable SMAP if they have also enabled SMEP, use above
+ * logic in order to attempt minimize false-positive of detecting errata
+ * while still preserving all cases semantic correctness.
+ *
+ * Workaround:
+ * To determine what instruction the guest was executing, the hypervisor
+ * will have to decode the instruction at the instruction pointer.
*
* In non SEV guest, hypervisor will be able to read the guest
* memory to decode the instruction pointer when insn_len is zero
@@ -7126,11 +7173,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
* instruction pointer so we will not able to workaround it. Lets
* print the error and request to kill the guest.
*/
- if (is_user && smap) {
+ if (smap && (!smep || is_user)) {
if (!sev_guest(vcpu->kvm))
return true;
- pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n");
+ pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
@@ -7248,7 +7295,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.set_tdp_cr3 = set_tdp_cr3,
.check_intercept = svm_check_intercept,
- .handle_external_intr = svm_handle_external_intr,
+ .handle_exit_irqoff = svm_handle_exit_irqoff,
.request_immediate_exit = __kvm_request_immediate_exit,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4d47a2631d1f..b5c831e79094 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1365,7 +1365,7 @@ TRACE_EVENT(kvm_hv_timer_state,
__entry->vcpu_id = vcpu_id;
__entry->hv_timer_in_use = hv_timer_in_use;
),
- TP_printk("vcpu_id %x hv_timer %x\n",
+ TP_printk("vcpu_id %x hv_timer %x",
__entry->vcpu_id,
__entry->hv_timer_in_use)
);
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 5466c6d85cf3..72359709cdc1 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -3,6 +3,7 @@
#include <linux/errno.h>
#include <linux/smp.h>
+#include "../hyperv.h"
#include "evmcs.h"
#include "vmcs.h"
#include "vmx.h"
@@ -313,6 +314,23 @@ void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
}
#endif
+bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
+{
+ struct hv_vp_assist_page assist_page;
+
+ *evmcs_gpa = -1ull;
+
+ if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
+ return false;
+
+ if (unlikely(!assist_page.enlighten_vmentry))
+ return false;
+
+ *evmcs_gpa = assist_page.current_nested_vmcs;
+
+ return true;
+}
+
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index e0fcef85b332..39a24eec8884 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -195,6 +195,7 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
+bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f1a69117ac0f..0f1378789bd0 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -41,15 +41,19 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
-static u16 shadow_read_only_fields[] = {
-#define SHADOW_FIELD_RO(x) x,
+struct shadow_vmcs_field {
+ u16 encoding;
+ u16 offset;
+};
+static struct shadow_vmcs_field shadow_read_only_fields[] = {
+#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static u16 shadow_read_write_fields[] = {
-#define SHADOW_FIELD_RW(x) x,
+static struct shadow_vmcs_field shadow_read_write_fields[] = {
+#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_write_fields =
@@ -63,34 +67,40 @@ static void init_vmcs_shadow_fields(void)
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
for (i = j = 0; i < max_shadow_read_only_fields; i++) {
- u16 field = shadow_read_only_fields[i];
+ struct shadow_vmcs_field entry = shadow_read_only_fields[i];
+ u16 field = entry.encoding;
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
(i + 1 == max_shadow_read_only_fields ||
- shadow_read_only_fields[i + 1] != field + 1))
+ shadow_read_only_fields[i + 1].encoding != field + 1))
pr_err("Missing field from shadow_read_only_field %x\n",
field + 1);
clear_bit(field, vmx_vmread_bitmap);
-#ifdef CONFIG_X86_64
if (field & 1)
+#ifdef CONFIG_X86_64
continue;
+#else
+ entry.offset += sizeof(u32);
#endif
- if (j < i)
- shadow_read_only_fields[j] = field;
- j++;
+ shadow_read_only_fields[j++] = entry;
}
max_shadow_read_only_fields = j;
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
- u16 field = shadow_read_write_fields[i];
+ struct shadow_vmcs_field entry = shadow_read_write_fields[i];
+ u16 field = entry.encoding;
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
(i + 1 == max_shadow_read_write_fields ||
- shadow_read_write_fields[i + 1] != field + 1))
+ shadow_read_write_fields[i + 1].encoding != field + 1))
pr_err("Missing field from shadow_read_write_field %x\n",
field + 1);
+ WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
+ field <= GUEST_TR_AR_BYTES,
+ "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
+
/*
* PML and the preemption timer can be emulated, but the
* processor cannot vmwrite to fields that don't exist
@@ -115,13 +125,13 @@ static void init_vmcs_shadow_fields(void)
clear_bit(field, vmx_vmwrite_bitmap);
clear_bit(field, vmx_vmread_bitmap);
-#ifdef CONFIG_X86_64
if (field & 1)
+#ifdef CONFIG_X86_64
continue;
+#else
+ entry.offset += sizeof(u32);
#endif
- if (j < i)
- shadow_read_write_fields[j] = field;
- j++;
+ shadow_read_write_fields[j++] = entry;
}
max_shadow_read_write_fields = j;
}
@@ -182,8 +192,9 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
{
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+ secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, -1ull);
+ vmx->nested.need_vmcs12_to_shadow_sync = false;
}
static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
@@ -238,22 +249,41 @@ static void free_nested(struct kvm_vcpu *vcpu)
free_loaded_vmcs(&vmx->nested.vmcs02);
}
+static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
+ struct loaded_vmcs *prev)
+{
+ struct vmcs_host_state *dest, *src;
+
+ if (unlikely(!vmx->guest_state_loaded))
+ return;
+
+ src = &prev->host_state;
+ dest = &vmx->loaded_vmcs->host_state;
+
+ vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
+ dest->ldt_sel = src->ldt_sel;
+#ifdef CONFIG_X86_64
+ dest->ds_sel = src->ds_sel;
+ dest->es_sel = src->es_sel;
+#endif
+}
+
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct loaded_vmcs *prev;
int cpu;
if (vmx->loaded_vmcs == vmcs)
return;
cpu = get_cpu();
- vmx_vcpu_put(vcpu);
+ prev = vmx->loaded_vmcs;
vmx->loaded_vmcs = vmcs;
- vmx_vcpu_load(vcpu, cpu);
+ vmx_vcpu_load_vmcs(vcpu, cpu);
+ vmx_sync_vmcs_host_state(vmx, prev);
put_cpu();
- vm_entry_controls_reset_shadow(vmx);
- vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
}
@@ -930,8 +960,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
* If PAE paging and EPT are both on, CR3 is not used by the CPU and
* must not be dereferenced.
*/
- if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
- !nested_ept) {
+ if (is_pae_paging(vcpu) && !nested_ept) {
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
*entry_failure_code = ENTRY_FAIL_PDPTE;
return -EINVAL;
@@ -1105,14 +1134,6 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
vmx->nested.msrs.misc_low = data;
vmx->nested.msrs.misc_high = data >> 32;
- /*
- * If L1 has read-only VM-exit information fields, use the
- * less permissive vmx_vmwrite_bitmap to specify write
- * permissions for the shadow VMCS.
- */
- if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
- vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
-
return 0;
}
@@ -1214,6 +1235,11 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_IA32_VMX_VMCS_ENUM:
vmx->nested.msrs.vmcs_enum = data;
return 0;
+ case MSR_IA32_VMX_VMFUNC:
+ if (data & ~vmx->nested.msrs.vmfunc_controls)
+ return -EINVAL;
+ vmx->nested.msrs.vmfunc_controls = data;
+ return 0;
default:
/*
* The rest of the VMX capability MSRs do not support restore.
@@ -1301,41 +1327,32 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
}
/*
- * Copy the writable VMCS shadow fields back to the VMCS12, in case
- * they have been modified by the L1 guest. Note that the "read-only"
- * VM-exit information fields are actually writable if the vCPU is
- * configured to support "VMWRITE to any supported field in the VMCS."
+ * Copy the writable VMCS shadow fields back to the VMCS12, in case they have
+ * been modified by the L1 guest. Note, "writable" in this context means
+ * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
+ * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
+ * VM-exit information fields (which are actually writable if the vCPU is
+ * configured to support "VMWRITE to any supported field in the VMCS").
*/
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
{
- const u16 *fields[] = {
- shadow_read_write_fields,
- shadow_read_only_fields
- };
- const int max_fields[] = {
- max_shadow_read_write_fields,
- max_shadow_read_only_fields
- };
- int i, q;
- unsigned long field;
- u64 field_value;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+ struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
+ struct shadow_vmcs_field field;
+ unsigned long val;
+ int i;
+
+ if (WARN_ON(!shadow_vmcs))
+ return;
preempt_disable();
vmcs_load(shadow_vmcs);
- for (q = 0; q < ARRAY_SIZE(fields); q++) {
- for (i = 0; i < max_fields[q]; i++) {
- field = fields[q][i];
- field_value = __vmcs_readl(field);
- vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
- }
- /*
- * Skip the VM-exit information fields if they are read-only.
- */
- if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
- break;
+ for (i = 0; i < max_shadow_read_write_fields; i++) {
+ field = shadow_read_write_fields[i];
+ val = __vmcs_readl(field.encoding);
+ vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
}
vmcs_clear(shadow_vmcs);
@@ -1346,7 +1363,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
- const u16 *fields[] = {
+ const struct shadow_vmcs_field *fields[] = {
shadow_read_write_fields,
shadow_read_only_fields
};
@@ -1354,18 +1371,23 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
max_shadow_read_write_fields,
max_shadow_read_only_fields
};
- int i, q;
- unsigned long field;
- u64 field_value = 0;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
+ struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
+ struct shadow_vmcs_field field;
+ unsigned long val;
+ int i, q;
+
+ if (WARN_ON(!shadow_vmcs))
+ return;
vmcs_load(shadow_vmcs);
for (q = 0; q < ARRAY_SIZE(fields); q++) {
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
- vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
- __vmcs_writel(field, field_value);
+ val = vmcs12_read_any(vmcs12, field.encoding,
+ field.offset);
+ __vmcs_writel(field.encoding, val);
}
}
@@ -1397,7 +1419,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
}
if (unlikely(!(evmcs->hv_clean_fields &
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
vmcs12->exception_bitmap = evmcs->exception_bitmap;
}
@@ -1437,7 +1459,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
}
if (unlikely(!(evmcs->hv_clean_fields &
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
vmcs12->pin_based_vm_exec_control =
evmcs->pin_based_vm_exec_control;
vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
@@ -1623,7 +1645,7 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
* evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
* evmcs->host_idtr_base = vmcs12->host_idtr_base;
* evmcs->host_rsp = vmcs12->host_rsp;
- * sync_vmcs12() doesn't read these:
+ * sync_vmcs02_to_vmcs12() doesn't read these:
* evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
* evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
* evmcs->msr_bitmap = vmcs12->msr_bitmap;
@@ -1768,26 +1790,22 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
bool from_launch)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct hv_vp_assist_page assist_page;
+ bool evmcs_gpa_changed = false;
+ u64 evmcs_gpa;
if (likely(!vmx->nested.enlightened_vmcs_enabled))
return 1;
- if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
- return 1;
-
- if (unlikely(!assist_page.enlighten_vmentry))
+ if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
return 1;
- if (unlikely(assist_page.current_nested_vmcs !=
- vmx->nested.hv_evmcs_vmptr)) {
-
+ if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
if (!vmx->nested.hv_evmcs)
vmx->nested.current_vmptr = -1ull;
nested_release_evmcs(vcpu);
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs),
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
&vmx->nested.hv_evmcs_map))
return 0;
@@ -1822,15 +1840,9 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
}
vmx->nested.dirty_vmcs12 = true;
- /*
- * As we keep L2 state for one guest only 'hv_clean_fields' mask
- * can't be used when we switch between them. Reset it here for
- * simplicity.
- */
- vmx->nested.hv_evmcs->hv_clean_fields &=
- ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
+ vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
+ evmcs_gpa_changed = true;
/*
* Unlike normal vmcs12, enlightened vmcs12 is not fully
* reloaded from guest's memory (read only fields, fields not
@@ -1844,10 +1856,19 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
}
}
+
+ /*
+ * Clean fields data can't de used on VMLAUNCH and when we switch
+ * between different L2 guests as KVM keeps a single VMCS12 per L1.
+ */
+ if (from_launch || evmcs_gpa_changed)
+ vmx->nested.hv_evmcs->hv_clean_fields &=
+ ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
return 1;
}
-void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu)
+void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1868,7 +1889,7 @@ void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu)
copy_vmcs12_to_shadow(vmx);
}
- vmx->nested.need_vmcs12_sync = false;
+ vmx->nested.need_vmcs12_to_shadow_sync = false;
}
static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
@@ -1948,8 +1969,20 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
- if (enable_pml)
+ /*
+ * The PML address never changes, so it is constant in vmcs02.
+ * Conceptually we want to copy the PML index from vmcs01 here,
+ * and then back to vmcs01 on nested vmexit. But since we flush
+ * the log and reset GUEST_PML_INDEX on each vmexit, the PML
+ * index is also effectively constant in vmcs02.
+ */
+ if (enable_pml) {
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ }
+
+ if (cpu_has_vmx_encls_vmexit())
+ vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
/*
* Set the MSR load/store lists to match L0's settings. Only the
@@ -1963,7 +1996,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
vmx_set_constant_host_state(vmx);
}
-static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx,
+static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
struct vmcs12 *vmcs12)
{
prepare_vmcs02_constant_state(vmx);
@@ -1984,17 +2017,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
- prepare_vmcs02_early_full(vmx, vmcs12);
+ prepare_vmcs02_early_rare(vmx, vmcs12);
/*
* PIN CONTROLS
*/
- exec_control = vmcs12->pin_based_vm_exec_control;
-
- /* Preemption timer setting is computed directly in vmx_vcpu_run. */
- exec_control |= vmcs_config.pin_based_exec_ctrl;
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- vmx->loaded_vmcs->hv_timer_armed = false;
+ exec_control = vmx_pin_based_exec_ctrl(vmx);
+ exec_control |= (vmcs12->pin_based_vm_exec_control &
+ ~PIN_BASED_VMX_PREEMPTION_TIMER);
/* Posted interrupts setting is only taken from vmcs12. */
if (nested_cpu_has_posted_intr(vmcs12)) {
@@ -2003,7 +2033,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
} else {
exec_control &= ~PIN_BASED_POSTED_INTR;
}
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
+ pin_controls_set(vmx, exec_control);
/*
* EXEC CONTROLS
@@ -2014,28 +2044,31 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
- /*
- * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
- * nested_get_vmcs12_pages can't fix it up, the illegal value
- * will result in a VM entry failure.
- */
- if (exec_control & CPU_BASED_TPR_SHADOW) {
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
+ if (exec_control & CPU_BASED_TPR_SHADOW)
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
- } else {
#ifdef CONFIG_X86_64
+ else
exec_control |= CPU_BASED_CR8_LOAD_EXITING |
CPU_BASED_CR8_STORE_EXITING;
#endif
- }
/*
* A vmexit (to either L1 hypervisor or L0 userspace) is always needed
* for I/O port accesses.
*/
- exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+ exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
+
+ /*
+ * This bit will be computed in nested_get_vmcs12_pages, because
+ * we do not have access to L1's MSR bitmap yet. For now, keep
+ * the same bit as before, hoping to avoid multiple VMWRITEs that
+ * only set/clear this bit.
+ */
+ exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
+ exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
+
+ exec_controls_set(vmx, exec_control);
/*
* SECONDARY EXEC CONTROLS
@@ -2061,22 +2094,19 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
/* VMCS shadowing for L2 is emulated for now */
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
- if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
- vmcs_write16(GUEST_INTR_STATUS,
- vmcs12->guest_intr_status);
-
/*
- * Write an illegal value to APIC_ACCESS_ADDR. Later,
- * nested_get_vmcs12_pages will either fix it up or
- * remove the VM execution control.
+ * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
+ * will not have to rewrite the controls just for this bit.
*/
- if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
- vmcs_write64(APIC_ACCESS_ADDR, -1ull);
+ if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
+ (vmcs12->guest_cr4 & X86_CR4_UMIP))
+ exec_control |= SECONDARY_EXEC_DESC;
- if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
- vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+ if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
+ vmcs_write16(GUEST_INTR_STATUS,
+ vmcs12->guest_intr_status);
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+ secondary_exec_controls_set(vmx, exec_control);
}
/*
@@ -2095,7 +2125,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
if (guest_efer != host_efer)
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
}
- vm_entry_controls_init(vmx, exec_control);
+ vm_entry_controls_set(vmx, exec_control);
/*
* EXIT CONTROLS
@@ -2107,17 +2137,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
exec_control = vmx_vmexit_ctrl();
if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
exec_control |= VM_EXIT_LOAD_IA32_EFER;
- vm_exit_controls_init(vmx, exec_control);
-
- /*
- * Conceptually we want to copy the PML address and index from
- * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
- * since we always flush the log on each vmexit and never change
- * the PML address (once set), this happens to be equivalent to
- * simply resetting the index in vmcs02.
- */
- if (enable_pml)
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ vm_exit_controls_set(vmx, exec_control);
/*
* Interrupt/Exception Fields
@@ -2138,7 +2158,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
}
}
-static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
@@ -2162,6 +2182,8 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
@@ -2198,6 +2220,10 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
}
+
+ if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
}
if (nested_cpu_has_xsaves(vmcs12))
@@ -2233,14 +2259,6 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
set_cr4_guest_host_mask(vmx);
-
- if (kvm_mpx_supported()) {
- if (vmx->nested.nested_run_pending &&
- (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
- else
- vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
- }
}
/*
@@ -2259,20 +2277,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+ bool load_guest_pdptrs_vmcs12 = false;
- if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
- prepare_vmcs02_full(vmx, vmcs12);
+ if (vmx->nested.dirty_vmcs12 || hv_evmcs) {
+ prepare_vmcs02_rare(vmx, vmcs12);
vmx->nested.dirty_vmcs12 = false;
- }
- /*
- * First, the fields that are shadowed. This must be kept in sync
- * with vmcs_shadow_fields.h.
- */
- if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
- vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+ load_guest_pdptrs_vmcs12 = !hv_evmcs ||
+ !(hv_evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
}
if (vmx->nested.nested_run_pending &&
@@ -2283,6 +2296,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
}
+ if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
+ vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
@@ -2372,6 +2388,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
entry_failure_code))
return -EINVAL;
+ /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
+ if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
+ is_pae_paging(vcpu)) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ }
+
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
@@ -2609,6 +2634,30 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
!kvm_pat_valid(vmcs12->host_ia32_pat))
return -EINVAL;
+ ia32e = (vmcs12->vm_exit_controls &
+ VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+
+ if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
+ vmcs12->host_cs_selector == 0 ||
+ vmcs12->host_tr_selector == 0 ||
+ (vmcs12->host_ss_selector == 0 && !ia32e))
+ return -EINVAL;
+
+#ifdef CONFIG_X86_64
+ if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_gs_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_idtr_base, vcpu) ||
+ is_noncanonical_address(vmcs12->host_tr_base, vcpu))
+ return -EINVAL;
+#endif
+
/*
* If the load IA32_EFER VM-exit control is 1, bits reserved in the
* IA32_EFER MSR must be 0 in the field for that register. In addition,
@@ -2616,8 +2665,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
* the host address-space size VM-exit control.
*/
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
- ia32e = (vmcs12->vm_exit_controls &
- VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
@@ -2781,17 +2828,16 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
[launched]"i"(offsetof(struct loaded_vmcs, launched)),
[host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
[wordsize]"i"(sizeof(ulong))
- : "cc", "memory"
+ : "memory"
);
- preempt_enable();
-
if (vmx->msr_autoload.host.nr)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
if (vmx->msr_autoload.guest.nr)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
if (vm_fail) {
+ preempt_enable();
WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
@@ -2803,6 +2849,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
local_irq_enable();
if (hw_breakpoint_active())
set_debugreg(__this_cpu_read(cpu_dr7), 7);
+ preempt_enable();
/*
* A non-failing VMEntry means we somehow entered guest mode with
@@ -2851,18 +2898,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
hpa = page_to_phys(vmx->nested.apic_access_page);
vmcs_write64(APIC_ACCESS_ADDR, hpa);
} else {
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+ secondary_exec_controls_clearbit(vmx,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
}
}
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
map = &vmx->nested.virtual_apic_map;
- /*
- * If translation failed, VM entry will fail because
- * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
- */
if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
@@ -2876,11 +2919,13 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
* _not_ what the processor does but it's basically the
* only possibility we have.
*/
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_TPR_SHADOW);
+ exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
} else {
- printk("bad virtual-APIC page address\n");
- dump_vmcs();
+ /*
+ * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to
+ * force VM-Entry to fail.
+ */
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
}
}
@@ -2896,11 +2941,9 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
}
if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_USE_MSR_BITMAPS);
+ exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
else
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_USE_MSR_BITMAPS);
+ exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
}
/*
@@ -2953,7 +2996,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
u32 exit_reason = EXIT_REASON_INVALID_STATE;
u32 exit_qual;
- evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
+ evaluate_pending_interrupts = exec_controls_get(vmx) &
(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
@@ -2964,6 +3007,25 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ /*
+ * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
+ * nested early checks are disabled. In the event of a "late" VM-Fail,
+ * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
+ * software model to the pre-VMEntry host state. When EPT is disabled,
+ * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
+ * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing
+ * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
+ * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested
+ * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
+ * guaranteed to be overwritten with a shadow CR3 prior to re-entering
+ * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
+ * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
+ * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
+ * path would need to manually save/restore vmcs01.GUEST_CR3.
+ */
+ if (!enable_ept && !nested_early_check)
+ vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
+
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
prepare_vmcs02_early(vmx, vmcs12);
@@ -3059,7 +3121,7 @@ vmentry_fail_vmexit:
vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
vmcs12->exit_qualification = exit_qual;
if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
return 1;
}
@@ -3077,7 +3139,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true))
+ if (!nested_vmx_handle_enlightened_vmptrld(vcpu, launch))
return 1;
if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
@@ -3393,20 +3455,57 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
}
-/*
- * Update the guest state fields of vmcs12 to reflect changes that
- * occurred while L2 was running. (The "IA-32e mode guest" bit of the
- * VM-entry controls is also updated, since this is really a guest
- * state bit.)
- */
-static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
-{
- vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
- vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
+static bool is_vmcs12_ext_field(unsigned long field)
+{
+ switch (field) {
+ case GUEST_ES_SELECTOR:
+ case GUEST_CS_SELECTOR:
+ case GUEST_SS_SELECTOR:
+ case GUEST_DS_SELECTOR:
+ case GUEST_FS_SELECTOR:
+ case GUEST_GS_SELECTOR:
+ case GUEST_LDTR_SELECTOR:
+ case GUEST_TR_SELECTOR:
+ case GUEST_ES_LIMIT:
+ case GUEST_CS_LIMIT:
+ case GUEST_SS_LIMIT:
+ case GUEST_DS_LIMIT:
+ case GUEST_FS_LIMIT:
+ case GUEST_GS_LIMIT:
+ case GUEST_LDTR_LIMIT:
+ case GUEST_TR_LIMIT:
+ case GUEST_GDTR_LIMIT:
+ case GUEST_IDTR_LIMIT:
+ case GUEST_ES_AR_BYTES:
+ case GUEST_DS_AR_BYTES:
+ case GUEST_FS_AR_BYTES:
+ case GUEST_GS_AR_BYTES:
+ case GUEST_LDTR_AR_BYTES:
+ case GUEST_TR_AR_BYTES:
+ case GUEST_ES_BASE:
+ case GUEST_CS_BASE:
+ case GUEST_SS_BASE:
+ case GUEST_DS_BASE:
+ case GUEST_FS_BASE:
+ case GUEST_GS_BASE:
+ case GUEST_LDTR_BASE:
+ case GUEST_TR_BASE:
+ case GUEST_GDTR_BASE:
+ case GUEST_IDTR_BASE:
+ case GUEST_PENDING_DBG_EXCEPTIONS:
+ case GUEST_BNDCFGS:
+ return true;
+ default:
+ break;
+ }
- vmcs12->guest_rsp = kvm_rsp_read(vcpu);
- vmcs12->guest_rip = kvm_rip_read(vcpu);
- vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
+ return false;
+}
+
+static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
@@ -3427,8 +3526,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
- vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
- vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
@@ -3444,11 +3541,69 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
+ vmcs12->guest_pending_dbg_exceptions =
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+ if (kvm_mpx_supported())
+ vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+
+ vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
+}
+
+static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int cpu;
+
+ if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
+ return;
+
+
+ WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
+
+ cpu = get_cpu();
+ vmx->loaded_vmcs = &vmx->nested.vmcs02;
+ vmx_vcpu_load(&vmx->vcpu, cpu);
+
+ sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+
+ vmx->loaded_vmcs = &vmx->vmcs01;
+ vmx_vcpu_load(&vmx->vcpu, cpu);
+ put_cpu();
+}
+
+/*
+ * Update the guest state fields of vmcs12 to reflect changes that
+ * occurred while L2 was running. (The "IA-32e mode guest" bit of the
+ * VM-entry controls is also updated, since this is really a guest
+ * state bit.)
+ */
+static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vmx->nested.hv_evmcs)
+ sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+
+ vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs;
+
+ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
+ vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
+
+ vmcs12->guest_rsp = kvm_rsp_read(vcpu);
+ vmcs12->guest_rip = kvm_rip_read(vcpu);
+ vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
+
+ vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
+ vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
+
+ vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
+ vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
+ vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
vmcs12->guest_interruptibility_info =
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
- vmcs12->guest_pending_dbg_exceptions =
- vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
else
@@ -3469,10 +3624,12 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
*/
if (enable_ept) {
vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
- vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
- vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
- vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
- vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+ if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
+ vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
+ vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
+ vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
+ vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+ }
}
vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
@@ -3484,22 +3641,11 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
- if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
+ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
- vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- }
- /* TODO: These cannot have changed unless we have MSR bitmaps and
- * the relevant bit asks not to trap the change */
- if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
- vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
vmcs12->guest_ia32_efer = vcpu->arch.efer;
- vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
- vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
- vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
- if (kvm_mpx_supported())
- vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
}
/*
@@ -3517,11 +3663,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 exit_reason, u32 exit_intr_info,
unsigned long exit_qualification)
{
- /* update guest state fields: */
- sync_vmcs12(vcpu, vmcs12);
-
/* update exit information fields: */
-
vmcs12->vm_exit_reason = exit_reason;
vmcs12->exit_qualification = exit_qualification;
vmcs12->vm_exit_intr_info = exit_intr_info;
@@ -3775,18 +3917,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
-
- /*
- * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
- * points to shadow pages! Fortunately we only get here after a WARN_ON
- * if EPT is disabled, so a VMabort is perfectly fine.
- */
- if (enable_ept) {
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
- } else {
- nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
- }
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -3794,7 +3926,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
* VMFail, like everything else we just need to ensure our
* software model is up-to-date.
*/
- ept_save_pdptrs(vcpu);
+ if (enable_ept)
+ ept_save_pdptrs(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -3882,14 +4015,14 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
if (likely(!vmx->fail)) {
- if (exit_reason == -1)
- sync_vmcs12(vcpu, vmcs12);
- else
+ sync_vmcs02_to_vmcs12(vcpu, vmcs12);
+
+ if (exit_reason != -1)
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
exit_qualification);
/*
- * Must happen outside of sync_vmcs12() as it will
+ * Must happen outside of sync_vmcs02_to_vmcs12() as it will
* also be used to capture vmcs12 cache as part of
* capturing nVMX state for snapshot (migration).
*
@@ -3945,7 +4078,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
/* in case we halted in L2 */
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -4008,7 +4141,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
* #UD or #GP.
*/
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
- u32 vmx_instruction_info, bool wr, gva_t *ret)
+ u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
{
gva_t off;
bool exn;
@@ -4068,7 +4201,10 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
* mode, e.g. a 32-bit address size can yield a 64-bit virtual
* address when using FS/GS with a non-zero base.
*/
- *ret = s.base + off;
+ if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
+ *ret = s.base + off;
+ else
+ *ret = off;
/* Long mode: #GP(0)/#SS(0) if the memory address is in a
* non-canonical form. This is the only check on the memory
@@ -4115,7 +4251,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
*/
if (!(s.base == 0 && s.limit == 0xffffffff &&
((s.type & 8) || !(s.type & 4))))
- exn = exn || (off + sizeof(u64) > s.limit);
+ exn = exn || ((u64)off + len - 1 > s.limit);
}
if (exn) {
kvm_queue_exception_e(vcpu,
@@ -4134,7 +4270,8 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
struct x86_exception e;
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
+ vmcs_read32(VMX_INSTRUCTION_INFO), false,
+ sizeof(*vmpointer), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
@@ -4300,11 +4437,12 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
if (vmx->nested.current_vmptr == -1ull)
return;
+ copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
+
if (enable_shadow_vmcs) {
/* copy to memory all shadowed fields in case
they were modified */
copy_shadow_to_vmcs12(vmx);
- vmx->nested.need_vmcs12_sync = false;
vmx_disable_shadow_vmcs(vmx);
}
vmx->nested.posted_intr_nv = -1;
@@ -4334,6 +4472,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 zero = 0;
gpa_t vmptr;
+ u64 evmcs_gpa;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -4349,10 +4488,18 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- if (vmx->nested.hv_evmcs_map.hva) {
- if (vmptr == vmx->nested.hv_evmcs_vmptr)
- nested_release_evmcs(vcpu);
- } else {
+ /*
+ * When Enlightened VMEntry is enabled on the calling CPU we treat
+ * memory area pointer by vmptr as Enlightened VMCS (as there's no good
+ * way to distinguish it from VMCS12) and we must not corrupt it by
+ * writing to the non-existent 'launch_state' field. The area doesn't
+ * have to be the currently active EVMCS on the calling CPU and there's
+ * nothing KVM has to do to transition it from 'active' to 'non-active'
+ * state. It is possible that the area will stay mapped as
+ * vmx->nested.hv_evmcs but this shouldn't be a problem.
+ */
+ if (likely(!vmx->nested.enlightened_vmcs_enabled ||
+ !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vcpu);
@@ -4386,8 +4533,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
u64 field_value;
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ int len;
gva_t gva = 0;
struct vmcs12 *vmcs12;
+ short offset;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -4409,11 +4558,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
/* Decode instruction info and find the field to read */
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
- /* Read the field, zero-extended to a u64 field_value */
- if (vmcs12_read_any(vmcs12, field, &field_value) < 0)
+
+ offset = vmcs_field_to_offset(field);
+ if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+ if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+
+ /* Read the field, zero-extended to a u64 field_value */
+ field_value = vmcs12_read_any(vmcs12, field, offset);
+
/*
* Now copy part of this value to register or memory, as requested.
* Note that the number of bits actually copied is 32 or 64 depending
@@ -4423,21 +4579,45 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
field_value);
} else {
+ len = is_64_bit_mode(vcpu) ? 8 : 4;
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, true, &gva))
+ vmx_instruction_info, true, len, &gva))
return 1;
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
- kvm_write_guest_virt_system(vcpu, gva, &field_value,
- (is_long_mode(vcpu) ? 8 : 4), NULL);
+ kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
}
return nested_vmx_succeed(vcpu);
}
+static bool is_shadow_field_rw(unsigned long field)
+{
+ switch (field) {
+#define SHADOW_FIELD_RW(x, y) case x:
+#include "vmcs_shadow_fields.h"
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+static bool is_shadow_field_ro(unsigned long field)
+{
+ switch (field) {
+#define SHADOW_FIELD_RO(x, y) case x:
+#include "vmcs_shadow_fields.h"
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
static int handle_vmwrite(struct kvm_vcpu *vcpu)
{
unsigned long field;
+ int len;
gva_t gva;
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -4452,6 +4632,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
u64 field_value = 0;
struct x86_exception e;
struct vmcs12 *vmcs12;
+ short offset;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -4463,11 +4644,11 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
field_value = kvm_register_readl(vcpu,
(((vmx_instruction_info) >> 3) & 0xf));
else {
+ len = is_64_bit_mode(vcpu) ? 8 : 4;
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false, len, &gva))
return 1;
- if (kvm_read_guest_virt(vcpu, gva, &field_value,
- (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
+ if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
@@ -4484,9 +4665,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
- if (!is_guest_mode(vcpu))
+ if (!is_guest_mode(vcpu)) {
vmcs12 = get_vmcs12(vcpu);
- else {
+
+ /*
+ * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
+ * vmcs12, else we may crush a field or consume a stale value.
+ */
+ if (!is_shadow_field_rw(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+ } else {
/*
* When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
* to shadowed-field sets the ALU flags for VMfailInvalid.
@@ -4496,28 +4684,46 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
vmcs12 = get_shadow_vmcs12(vcpu);
}
- if (vmcs12_write_any(vmcs12, field, field_value) < 0)
+ offset = vmcs_field_to_offset(field);
+ if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/*
- * Do not track vmcs12 dirty-state if in guest-mode
- * as we actually dirty shadow vmcs12 instead of vmcs12.
+ * Some Intel CPUs intentionally drop the reserved bits of the AR byte
+ * fields on VMWRITE. Emulate this behavior to ensure consistent KVM
+ * behavior regardless of the underlying hardware, e.g. if an AR_BYTE
+ * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
+ * from L1 will return a different value than VMREAD from L2 (L1 sees
+ * the stripped down value, L2 sees the full value as stored by KVM).
*/
- if (!is_guest_mode(vcpu)) {
- switch (field) {
-#define SHADOW_FIELD_RW(x) case x:
-#include "vmcs_shadow_fields.h"
- /*
- * The fields that can be updated by L1 without a vmexit are
- * always updated in the vmcs02, the others go down the slow
- * path of prepare_vmcs02.
- */
- break;
- default:
- vmx->nested.dirty_vmcs12 = true;
- break;
+ if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
+ field_value &= 0x1f0ff;
+
+ vmcs12_write_any(vmcs12, field, offset, field_value);
+
+ /*
+ * Do not track vmcs12 dirty-state if in guest-mode as we actually
+ * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated
+ * by L1 without a vmexit are always updated in the vmcs02, i.e. don't
+ * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path.
+ */
+ if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
+ /*
+ * L1 can read these fields without exiting, ensure the
+ * shadow VMCS is up-to-date.
+ */
+ if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
+ preempt_disable();
+ vmcs_load(vmx->vmcs01.shadow_vmcs);
+
+ __vmcs_writel(field, field_value);
+
+ vmcs_clear(vmx->vmcs01.shadow_vmcs);
+ vmcs_load(vmx->loaded_vmcs->vmcs);
+ preempt_enable();
}
+ vmx->nested.dirty_vmcs12 = true;
}
return nested_vmx_succeed(vcpu);
@@ -4527,11 +4733,10 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
{
vmx->nested.current_vmptr = vmptr;
if (enable_shadow_vmcs) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_SHADOW_VMCS);
+ secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER,
__pa(vmx->vmcs01.shadow_vmcs));
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
}
vmx->nested.dirty_vmcs12 = true;
}
@@ -4615,7 +4820,8 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
return 1;
- if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
+ if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
+ true, sizeof(gpa_t), &gva))
return 1;
/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
@@ -4661,7 +4867,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
* operand is read even if it isn't needed (e.g., for type==global)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false, sizeof(operand), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
@@ -4670,13 +4876,11 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
+ case VMX_EPT_EXTENT_CONTEXT:
/*
- * TODO: track mappings and invalidate
- * single context requests appropriately
+ * TODO: Sync the necessary shadow EPT roots here, rather than
+ * at the next emulated VM-entry.
*/
- case VMX_EPT_EXTENT_CONTEXT:
- kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
break;
default:
BUG_ON(1);
@@ -4723,7 +4927,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
* operand is read even if it isn't needed (e.g., for type==global)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false, sizeof(operand), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
@@ -5226,40 +5430,42 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12;
struct kvm_nested_state kvm_state = {
.flags = 0,
- .format = 0,
+ .format = KVM_STATE_NESTED_FORMAT_VMX,
.size = sizeof(kvm_state),
- .vmx.vmxon_pa = -1ull,
- .vmx.vmcs_pa = -1ull,
+ .hdr.vmx.vmxon_pa = -1ull,
+ .hdr.vmx.vmcs12_pa = -1ull,
};
+ struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
+ &user_kvm_nested_state->data.vmx[0];
if (!vcpu)
- return kvm_state.size + 2 * VMCS12_SIZE;
+ return kvm_state.size + sizeof(*user_vmx_nested_state);
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
- if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
- kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
-
if (nested_vmx_allowed(vcpu) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
- kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
- kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
+ kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
+ kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
if (vmx_has_valid_vmcs12(vcpu)) {
- kvm_state.size += VMCS12_SIZE;
+ kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
+
+ if (vmx->nested.hv_evmcs)
+ kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
if (is_guest_mode(vcpu) &&
nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull)
- kvm_state.size += VMCS12_SIZE;
+ kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
}
if (vmx->nested.smm.vmxon)
- kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
+ kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
if (vmx->nested.smm.guest_mode)
- kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
+ kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
if (is_guest_mode(vcpu)) {
kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
@@ -5282,28 +5488,32 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
* When running L2, the authoritative vmcs12 state is in the
* vmcs02. When running L1, the authoritative vmcs12 state is
* in the shadow or enlightened vmcs linked to vmcs01, unless
- * need_vmcs12_sync is set, in which case, the authoritative
+ * need_vmcs12_to_shadow_sync is set, in which case, the authoritative
* vmcs12 state is in the vmcs12 already.
*/
if (is_guest_mode(vcpu)) {
- sync_vmcs12(vcpu, vmcs12);
- } else if (!vmx->nested.need_vmcs12_sync) {
+ sync_vmcs02_to_vmcs12(vcpu, vmcs12);
+ sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+ } else if (!vmx->nested.need_vmcs12_to_shadow_sync) {
if (vmx->nested.hv_evmcs)
copy_enlightened_to_vmcs12(vmx);
else if (enable_shadow_vmcs)
copy_shadow_to_vmcs12(vmx);
}
+ BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
+ BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
+
/*
* Copy over the full allocated size of vmcs12 rather than just the size
* of the struct.
*/
- if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE))
+ if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
return -EFAULT;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
- if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
+ if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
get_shadow_vmcs12(vcpu), VMCS12_SIZE))
return -EFAULT;
}
@@ -5331,33 +5541,44 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12;
u32 exit_qual;
+ struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
+ &user_kvm_nested_state->data.vmx[0];
int ret;
- if (kvm_state->format != 0)
+ if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
return -EINVAL;
- if (!nested_vmx_allowed(vcpu))
- return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
+ if (kvm_state->hdr.vmx.vmxon_pa == -1ull) {
+ if (kvm_state->hdr.vmx.smm.flags)
+ return -EINVAL;
- if (kvm_state->vmx.vmxon_pa == -1ull) {
- if (kvm_state->vmx.smm.flags)
+ if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
return -EINVAL;
- if (kvm_state->vmx.vmcs_pa != -1ull)
+ /*
+ * KVM_STATE_NESTED_EVMCS used to signal that KVM should
+ * enable eVMCS capability on vCPU. However, since then
+ * code was changed such that flag signals vmcs12 should
+ * be copied into eVMCS in guest memory.
+ *
+ * To preserve backwards compatability, allow user
+ * to set this flag even when there is no VMXON region.
+ */
+ if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
+ return -EINVAL;
+ } else {
+ if (!nested_vmx_allowed(vcpu))
return -EINVAL;
- vmx_leave_nested(vcpu);
- return 0;
+ if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
+ return -EINVAL;
}
- if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
- return -EINVAL;
-
- if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+ if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
return -EINVAL;
- if (kvm_state->vmx.smm.flags &
+ if (kvm_state->hdr.vmx.smm.flags &
~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
@@ -5366,21 +5587,26 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
* nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
* must be zero.
*/
- if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags)
+ if (is_smm(vcpu) ?
+ (kvm_state->flags &
+ (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
+ : kvm_state->hdr.vmx.smm.flags)
return -EINVAL;
- if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
- !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
+ if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+ !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
+ if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
+ (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
+ return -EINVAL;
+
vmx_leave_nested(vcpu);
- if (kvm_state->vmx.vmxon_pa == -1ull)
- return 0;
- if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
- nested_enable_evmcs(vcpu, NULL);
+ if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
+ return 0;
- vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
+ vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
ret = enter_vmx_operation(vcpu);
if (ret)
return ret;
@@ -5389,32 +5615,32 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
return 0;
- if (kvm_state->vmx.vmcs_pa != -1ull) {
- if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
- !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
+ if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
+ if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
+ !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
return -EINVAL;
- set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+ set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
/*
* Sync eVMCS upon entry as we may not have
* HV_X64_MSR_VP_ASSIST_PAGE set up yet.
*/
- vmx->nested.need_vmcs12_sync = true;
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
} else {
return -EINVAL;
}
- if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
+ if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
vmx->nested.smm.vmxon = true;
vmx->nested.vmxon = false;
- if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
+ if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
vmx->nested.smm.guest_mode = true;
}
vmcs12 = get_vmcs12(vcpu);
- if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
+ if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
return -EFAULT;
if (vmcs12->hdr.revision_id != VMCS12_REVISION)
@@ -5423,52 +5649,53 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
return 0;
+ vmx->nested.nested_run_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
+ ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
- if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
- return -EINVAL;
+ if (kvm_state->size <
+ sizeof(*kvm_state) +
+ sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
+ goto error_guest_mode;
if (copy_from_user(shadow_vmcs12,
- user_kvm_nested_state->data + VMCS12_SIZE,
- sizeof(*vmcs12)))
- return -EFAULT;
+ user_vmx_nested_state->shadow_vmcs12,
+ sizeof(*shadow_vmcs12))) {
+ ret = -EFAULT;
+ goto error_guest_mode;
+ }
if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
!shadow_vmcs12->hdr.shadow_vmcs)
- return -EINVAL;
+ goto error_guest_mode;
}
if (nested_vmx_check_controls(vcpu, vmcs12) ||
nested_vmx_check_host_state(vcpu, vmcs12) ||
nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
- return -EINVAL;
+ goto error_guest_mode;
vmx->nested.dirty_vmcs12 = true;
- vmx->nested.nested_run_pending =
- !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
-
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- if (ret) {
- vmx->nested.nested_run_pending = 0;
- return -EINVAL;
- }
+ if (ret)
+ goto error_guest_mode;
return 0;
+
+error_guest_mode:
+ vmx->nested.nested_run_pending = 0;
+ return ret;
}
void nested_vmx_vcpu_setup(void)
{
if (enable_shadow_vmcs) {
- /*
- * At vCPU creation, "VMWRITE to any supported field
- * in the VMCS" is supported, so use the more
- * permissive vmx_vmread_bitmap to specify both read
- * and write permissions for the shadow VMCS.
- */
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
- vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
}
}
@@ -5598,10 +5825,15 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
msrs->secondary_ctls_low = 0;
msrs->secondary_ctls_high &=
SECONDARY_EXEC_DESC |
+ SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+ SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_WBINVD_EXITING;
+ SECONDARY_EXEC_RDRAND_EXITING |
+ SECONDARY_EXEC_ENABLE_INVPCID |
+ SECONDARY_EXEC_RDSEED_EXITING |
+ SECONDARY_EXEC_XSAVES;
/*
* We can emulate "VMCS shadowing," even if the hardware
@@ -5721,14 +5953,6 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
- /*
- * Without EPT it is not possible to restore L1's CR3 and PDPTR on
- * VMfail, because they are not available in vmcs01. Just always
- * use hardware checks.
- */
- if (!enable_ept)
- nested_early_check = 1;
-
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index e847ff1019a2..187d39bf0bf1 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,11 +17,11 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info, unsigned long exit_qualification);
-void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu);
+void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
- u32 vmx_instruction_info, bool wr, gva_t *ret);
+ u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h
index b8e50f76fefc..2200fb698dd0 100644
--- a/arch/x86/kvm/vmx/ops.h
+++ b/arch/x86/kvm/vmx/ops.h
@@ -146,7 +146,6 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
__vmcs_writel(field, value);
#ifndef CONFIG_X86_64
- asm volatile ("");
__vmcs_writel(field+1, value >> 32);
#endif
}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f8502c376b37..4dea0e0e7e39 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM PMU support for Intel CPUs
*
@@ -6,10 +7,6 @@
* Authors:
* Avi Kivity <avi@redhat.com>
* Gleb Natapov <gleb@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include <linux/types.h>
#include <linux/kvm_host.h>
@@ -126,7 +123,7 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
}
static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
- unsigned idx)
+ unsigned idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
@@ -138,6 +135,7 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
if (fixed && idx >= pmu->nr_arch_fixed_counters)
return NULL;
counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
+ *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
return &counters[idx];
}
@@ -183,9 +181,13 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
*data = pmu->global_ovf_ctrl;
return 0;
default:
- if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
- (pmc = get_fixed_pmc(pmu, msr))) {
- *data = pmc_read_counter(pmc);
+ if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
+ u64 val = pmc_read_counter(pmc);
+ *data = val & pmu->counter_bitmask[KVM_PMC_GP];
+ return 0;
+ } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ u64 val = pmc_read_counter(pmc);
+ *data = val & pmu->counter_bitmask[KVM_PMC_FIXED];
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
*data = pmc->eventsel;
@@ -235,11 +237,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
default:
- if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
- (pmc = get_fixed_pmc(pmu, msr))) {
- if (!msr_info->host_initiated)
- data = (s64)(s32)data;
- pmc->counter += data - pmc_read_counter(pmc);
+ if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
+ if (msr_info->host_initiated)
+ pmc->counter = data;
+ else
+ pmc->counter = (s32)data;
+ return 0;
+ } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ pmc->counter = data;
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
@@ -332,17 +337,22 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = NULL;
int i;
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
- struct kvm_pmc *pmc = &pmu->gp_counters[i];
+ pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
pmc->counter = pmc->eventsel = 0;
}
- for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
- pmc_stop_counter(&pmu->fixed_counters[i]);
+ for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+ pmc = &pmu->fixed_counters[i];
+
+ pmc_stop_counter(pmc);
+ pmc->counter = 0;
+ }
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
pmu->global_ovf_ctrl = 0;
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index cb6079f8a227..481ad879197b 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -42,6 +42,14 @@ struct vmcs_host_state {
#endif
};
+struct vmcs_controls_shadow {
+ u32 vm_entry;
+ u32 vm_exit;
+ u32 pin;
+ u32 exec;
+ u32 secondary_exec;
+};
+
/*
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
@@ -53,7 +61,7 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
- bool hv_timer_armed;
+ bool hv_timer_soft_disabled;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
@@ -61,6 +69,7 @@ struct loaded_vmcs {
unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
struct vmcs_host_state host_state;
+ struct vmcs_controls_shadow controls_shadow;
};
static inline bool is_exception_n(u32 intr_info, u8 vector)
@@ -115,6 +124,12 @@ static inline bool is_nmi(u32 intr_info)
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
}
+static inline bool is_external_intr(u32 intr_info)
+{
+ return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
+ == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR);
+}
+
enum vmcs_field_width {
VMCS_FIELD_WIDTH_U16 = 0,
VMCS_FIELD_WIDTH_U64 = 1,
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 3a742428ad17..d0c6df373f67 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -201,9 +201,10 @@ struct __packed vmcs12 {
/*
* VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
* and any VMCS region. Although only sizeof(struct vmcs12) are used by the
- * current implementation, 4K are reserved to avoid future complications.
+ * current implementation, 4K are reserved to avoid future complications and
+ * to preserve userspace ABI.
*/
-#define VMCS12_SIZE 0x1000
+#define VMCS12_SIZE KVM_STATE_NESTED_VMX_VMCS_SIZE
/*
* VMCS12_MAX_FIELD_INDEX is the highest index value used in any
@@ -394,69 +395,48 @@ static inline short vmcs_field_to_offset(unsigned long field)
#undef ROL16
-/*
- * Read a vmcs12 field. Since these can have varying lengths and we return
- * one type, we chose the biggest type (u64) and zero-extend the return value
- * to that size. Note that the caller, handle_vmread, might need to use only
- * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
- * 64-bit fields are to be returned).
- */
-static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
- unsigned long field, u64 *ret)
+static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
+ u16 offset)
{
- short offset = vmcs_field_to_offset(field);
- char *p;
-
- if (offset < 0)
- return offset;
-
- p = (char *)vmcs12 + offset;
+ char *p = (char *)vmcs12 + offset;
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
- *ret = *((natural_width *)p);
- return 0;
+ return *((natural_width *)p);
case VMCS_FIELD_WIDTH_U16:
- *ret = *((u16 *)p);
- return 0;
+ return *((u16 *)p);
case VMCS_FIELD_WIDTH_U32:
- *ret = *((u32 *)p);
- return 0;
+ return *((u32 *)p);
case VMCS_FIELD_WIDTH_U64:
- *ret = *((u64 *)p);
- return 0;
+ return *((u64 *)p);
default:
- WARN_ON(1);
- return -ENOENT;
+ WARN_ON_ONCE(1);
+ return -1;
}
}
-static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
- unsigned long field, u64 field_value){
- short offset = vmcs_field_to_offset(field);
+static inline void vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field,
+ u16 offset, u64 field_value)
+{
char *p = (char *)vmcs12 + offset;
- if (offset < 0)
- return offset;
-
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_U16:
*(u16 *)p = field_value;
- return 0;
+ break;
case VMCS_FIELD_WIDTH_U32:
*(u32 *)p = field_value;
- return 0;
+ break;
case VMCS_FIELD_WIDTH_U64:
*(u64 *)p = field_value;
- return 0;
+ break;
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*(natural_width *)p = field_value;
- return 0;
+ break;
default:
- WARN_ON(1);
- return -ENOENT;
+ WARN_ON_ONCE(1);
+ break;
}
-
}
#endif /* __KVM_X86_VMX_VMCS12_H */
diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
index 132432f375c2..eb1ecd16fd22 100644
--- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h
+++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
@@ -1,8 +1,12 @@
+#if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW)
+BUILD_BUG_ON(1)
+#endif
+
#ifndef SHADOW_FIELD_RO
-#define SHADOW_FIELD_RO(x)
+#define SHADOW_FIELD_RO(x, y)
#endif
#ifndef SHADOW_FIELD_RW
-#define SHADOW_FIELD_RW(x)
+#define SHADOW_FIELD_RW(x, y)
#endif
/*
@@ -28,47 +32,48 @@
*/
/* 16-bits */
-SHADOW_FIELD_RW(GUEST_INTR_STATUS)
-SHADOW_FIELD_RW(GUEST_PML_INDEX)
-SHADOW_FIELD_RW(HOST_FS_SELECTOR)
-SHADOW_FIELD_RW(HOST_GS_SELECTOR)
+SHADOW_FIELD_RW(GUEST_INTR_STATUS, guest_intr_status)
+SHADOW_FIELD_RW(GUEST_PML_INDEX, guest_pml_index)
+SHADOW_FIELD_RW(HOST_FS_SELECTOR, host_fs_selector)
+SHADOW_FIELD_RW(HOST_GS_SELECTOR, host_gs_selector)
/* 32-bits */
-SHADOW_FIELD_RO(VM_EXIT_REASON)
-SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
-SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
-SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
-SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
-SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
-SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
-SHADOW_FIELD_RW(EXCEPTION_BITMAP)
-SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
-SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
-SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
-SHADOW_FIELD_RW(TPR_THRESHOLD)
-SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
-SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
-SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
-SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
+SHADOW_FIELD_RO(VM_EXIT_REASON, vm_exit_reason)
+SHADOW_FIELD_RO(VM_EXIT_INTR_INFO, vm_exit_intr_info)
+SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len)
+SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field)
+SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code)
+SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code)
+SHADOW_FIELD_RO(GUEST_CS_AR_BYTES, guest_cs_ar_bytes)
+SHADOW_FIELD_RO(GUEST_SS_AR_BYTES, guest_ss_ar_bytes)
+SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control)
+SHADOW_FIELD_RW(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control)
+SHADOW_FIELD_RW(EXCEPTION_BITMAP, exception_bitmap)
+SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code)
+SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field)
+SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len)
+SHADOW_FIELD_RW(TPR_THRESHOLD, tpr_threshold)
+SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info)
+SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value)
/* Natural width */
-SHADOW_FIELD_RO(EXIT_QUALIFICATION)
-SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
-SHADOW_FIELD_RW(GUEST_RIP)
-SHADOW_FIELD_RW(GUEST_RSP)
-SHADOW_FIELD_RW(GUEST_CR0)
-SHADOW_FIELD_RW(GUEST_CR3)
-SHADOW_FIELD_RW(GUEST_CR4)
-SHADOW_FIELD_RW(GUEST_RFLAGS)
-SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
-SHADOW_FIELD_RW(CR0_READ_SHADOW)
-SHADOW_FIELD_RW(CR4_READ_SHADOW)
-SHADOW_FIELD_RW(HOST_FS_BASE)
-SHADOW_FIELD_RW(HOST_GS_BASE)
+SHADOW_FIELD_RO(EXIT_QUALIFICATION, exit_qualification)
+SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS, guest_linear_address)
+SHADOW_FIELD_RW(GUEST_RIP, guest_rip)
+SHADOW_FIELD_RW(GUEST_RSP, guest_rsp)
+SHADOW_FIELD_RW(GUEST_CR0, guest_cr0)
+SHADOW_FIELD_RW(GUEST_CR3, guest_cr3)
+SHADOW_FIELD_RW(GUEST_CR4, guest_cr4)
+SHADOW_FIELD_RW(GUEST_RFLAGS, guest_rflags)
+SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK, cr0_guest_host_mask)
+SHADOW_FIELD_RW(CR0_READ_SHADOW, cr0_read_shadow)
+SHADOW_FIELD_RW(CR4_READ_SHADOW, cr4_read_shadow)
+SHADOW_FIELD_RW(HOST_FS_BASE, host_fs_base)
+SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base)
/* 64-bit */
-SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
-SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address)
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address)
#undef SHADOW_FIELD_RO
#undef SHADOW_FIELD_RW
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index d4cb1945b2e3..4010d519eb8c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -54,9 +54,9 @@ ENTRY(vmx_vmenter)
ret
3: cmpb $0, kvm_rebooting
- jne 4f
- call kvm_spurious_fault
-4: ret
+ je 4f
+ ret
+4: ud2
.pushsection .fixup, "ax"
5: jmp 3b
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1ac167614032..a279447eb75b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine driver for Linux
*
@@ -10,10 +11,6 @@
* Authors:
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include <linux/frame.h>
@@ -114,6 +111,9 @@ static u64 __read_mostly host_xss;
bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
+static bool __read_mostly dump_invalid_vmcs = 0;
+module_param(dump_invalid_vmcs, bool, 0644);
+
#define MSR_BITMAP_MODE_X2APIC 1
#define MSR_BITMAP_MODE_X2APIC_APICV 2
@@ -389,6 +389,7 @@ static const struct kvm_vmx_segment_field {
};
u64 host_efer;
+static unsigned long host_idt_base;
/*
* Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
@@ -1035,6 +1036,33 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base)
+{
+ if (unlikely(fs_sel != host->fs_sel)) {
+ if (!(fs_sel & 7))
+ vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+ else
+ vmcs_write16(HOST_FS_SELECTOR, 0);
+ host->fs_sel = fs_sel;
+ }
+ if (unlikely(gs_sel != host->gs_sel)) {
+ if (!(gs_sel & 7))
+ vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+ else
+ vmcs_write16(HOST_GS_SELECTOR, 0);
+ host->gs_sel = gs_sel;
+ }
+ if (unlikely(fs_base != host->fs_base)) {
+ vmcs_writel(HOST_FS_BASE, fs_base);
+ host->fs_base = fs_base;
+ }
+ if (unlikely(gs_base != host->gs_base)) {
+ vmcs_writel(HOST_GS_BASE, gs_base);
+ host->gs_base = gs_base;
+ }
+}
+
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1053,20 +1081,18 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* when guest state is loaded. This happens when guest transitions
* to/from long-mode by setting MSR_EFER.LMA.
*/
- if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
- vmx->guest_msrs_dirty = false;
+ if (!vmx->guest_msrs_ready) {
+ vmx->guest_msrs_ready = true;
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
vmx->guest_msrs[i].mask);
}
-
- if (vmx->loaded_cpu_state)
+ if (vmx->guest_state_loaded)
return;
- vmx->loaded_cpu_state = vmx->loaded_vmcs;
- host_state = &vmx->loaded_cpu_state->host_state;
+ host_state = &vmx->loaded_vmcs->host_state;
/*
* Set host fs and gs selectors. Unfortunately, 22.2.3 does not
@@ -1100,42 +1126,20 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
- if (unlikely(fs_sel != host_state->fs_sel)) {
- if (!(fs_sel & 7))
- vmcs_write16(HOST_FS_SELECTOR, fs_sel);
- else
- vmcs_write16(HOST_FS_SELECTOR, 0);
- host_state->fs_sel = fs_sel;
- }
- if (unlikely(gs_sel != host_state->gs_sel)) {
- if (!(gs_sel & 7))
- vmcs_write16(HOST_GS_SELECTOR, gs_sel);
- else
- vmcs_write16(HOST_GS_SELECTOR, 0);
- host_state->gs_sel = gs_sel;
- }
- if (unlikely(fs_base != host_state->fs_base)) {
- vmcs_writel(HOST_FS_BASE, fs_base);
- host_state->fs_base = fs_base;
- }
- if (unlikely(gs_base != host_state->gs_base)) {
- vmcs_writel(HOST_GS_BASE, gs_base);
- host_state->gs_base = gs_base;
- }
+ vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
+ vmx->guest_state_loaded = true;
}
static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
{
struct vmcs_host_state *host_state;
- if (!vmx->loaded_cpu_state)
+ if (!vmx->guest_state_loaded)
return;
- WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
- host_state = &vmx->loaded_cpu_state->host_state;
+ host_state = &vmx->loaded_vmcs->host_state;
++vmx->vcpu.stat.host_state_reload;
- vmx->loaded_cpu_state = NULL;
#ifdef CONFIG_X86_64
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
@@ -1161,13 +1165,15 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
load_fixmap_gdt(raw_smp_processor_id());
+ vmx->guest_state_loaded = false;
+ vmx->guest_msrs_ready = false;
}
#ifdef CONFIG_X86_64
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
preempt_disable();
- if (vmx->loaded_cpu_state)
+ if (vmx->guest_state_loaded)
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
preempt_enable();
return vmx->msr_guest_kernel_gs_base;
@@ -1176,7 +1182,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{
preempt_disable();
- if (vmx->loaded_cpu_state)
+ if (vmx->guest_state_loaded)
wrmsrl(MSR_KERNEL_GS_BASE, data);
preempt_enable();
vmx->msr_guest_kernel_gs_base = data;
@@ -1225,11 +1231,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
pi_set_on(pi_desc);
}
-/*
- * Switches to specified vcpu, until a matching vcpu_put(), but assumes
- * vcpu mutex is already taken.
- */
-void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
@@ -1290,8 +1292,20 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_has_tsc_control &&
vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
decache_tsc_multiplier(vmx);
+}
+
+/*
+ * Switches to specified vcpu, until a matching vcpu_put(), but assumes
+ * vcpu mutex is already taken.
+ */
+void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ vmx_vcpu_load_vmcs(vcpu, cpu);
vmx_vcpu_pi_load(vcpu, cpu);
+
vmx->host_pkru = read_pkru();
vmx->host_debugctlmsr = get_debugctlmsr();
}
@@ -1310,7 +1324,7 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
pi_set_sn(pi_desc);
}
-void vmx_vcpu_put(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
vmx_vcpu_pi_put(vcpu);
@@ -1579,7 +1593,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
move_msr_up(vmx, index, save_nmsrs++);
vmx->save_nmsrs = save_nmsrs;
- vmx->guest_msrs_dirty = true;
+ vmx->guest_msrs_ready = false;
if (cpu_has_vmx_msr_bitmap())
vmx_update_msr_bitmap(&vmx->vcpu);
@@ -1692,9 +1706,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
- case MSR_IA32_POWER_CTL:
- msr_info->data = vmx->msr_ia32_power_ctl;
- break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1718,7 +1729,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data);
case MSR_IA32_XSS:
- if (!vmx_xsaves_supported())
+ if (!vmx_xsaves_supported() ||
+ (!msr_info->host_initiated &&
+ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
msr_info->data = vcpu->arch.ia32_xss;
break;
@@ -1817,17 +1831,28 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
#endif
case MSR_IA32_SYSENTER_CS:
+ if (is_guest_mode(vcpu))
+ get_vmcs12(vcpu)->guest_sysenter_cs = data;
vmcs_write32(GUEST_SYSENTER_CS, data);
break;
case MSR_IA32_SYSENTER_EIP:
+ if (is_guest_mode(vcpu))
+ get_vmcs12(vcpu)->guest_sysenter_eip = data;
vmcs_writel(GUEST_SYSENTER_EIP, data);
break;
case MSR_IA32_SYSENTER_ESP:
+ if (is_guest_mode(vcpu))
+ get_vmcs12(vcpu)->guest_sysenter_esp = data;
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_POWER_CTL:
- vmx->msr_ia32_power_ctl = data;
+ case MSR_IA32_DEBUGCTLMSR:
+ if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
+ VM_EXIT_SAVE_DEBUG_CONTROLS)
+ get_vmcs12(vcpu)->guest_ia32_debugctl = data;
+
+ ret = kvm_set_msr_common(vcpu, msr_info);
break;
+
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1896,9 +1921,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
MSR_TYPE_W);
break;
case MSR_IA32_CR_PAT:
+ if (!kvm_pat_valid(data))
+ return 1;
+
+ if (is_guest_mode(vcpu) &&
+ get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
+ get_vmcs12(vcpu)->guest_ia32_pat = data;
+
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- if (!kvm_pat_valid(data))
- return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@@ -1932,7 +1962,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_XSS:
- if (!vmx_xsaves_supported())
+ if (!vmx_xsaves_supported() ||
+ (!msr_info->host_initiated &&
+ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
/*
* The only supported bit as of Skylake is bit 8, but
@@ -2435,6 +2468,7 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
return -ENOMEM;
loaded_vmcs->shadow_vmcs = NULL;
+ loaded_vmcs->hv_timer_soft_disabled = false;
loaded_vmcs_init(loaded_vmcs);
if (cpu_has_vmx_msr_bitmap()) {
@@ -2455,6 +2489,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
}
memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+ memset(&loaded_vmcs->controls_shadow, 0,
+ sizeof(struct vmcs_controls_shadow));
return 0;
@@ -2737,7 +2773,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
(unsigned long *)&vcpu->arch.regs_dirty))
return;
- if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
+ if (is_pae_paging(vcpu)) {
vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
@@ -2749,7 +2785,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
- if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
+ if (is_pae_paging(vcpu)) {
mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
@@ -2766,22 +2802,20 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
vmx_decache_cr3(vcpu);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
- (CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING));
+ exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
- ~(CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING));
+ exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
@@ -2881,6 +2915,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* Pass through host's Machine Check Enable value to hw_cr4, which
* is in force while we are in guest mode. Do not let guests control
@@ -2891,20 +2926,19 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
if (enable_unrestricted_guest)
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
- else if (to_vmx(vcpu)->rmode.vm86_active)
+ else if (vmx->rmode.vm86_active)
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
else
hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
if (cr4 & X86_CR4_UMIP) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_DESC);
+ secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP;
} else if (!is_guest_mode(vcpu) ||
- !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_DESC);
+ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
+ secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
+ }
}
if (cr4 & X86_CR4_VMXE) {
@@ -2919,7 +2953,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
- if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
+ if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
return 1;
vcpu->arch.cr4 = cr4;
@@ -3537,7 +3571,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
u8 mode = 0;
if (cpu_has_secondary_exec_ctrls() &&
- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ (secondary_exec_controls_get(to_vmx(vcpu)) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
mode |= MSR_BITMAP_MODE_X2APIC;
if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
@@ -3731,7 +3765,6 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
{
u32 low32, high32;
unsigned long tmpl;
- struct desc_ptr dt;
unsigned long cr0, cr3, cr4;
cr0 = read_cr0();
@@ -3767,9 +3800,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
- store_idt(&dt);
- vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
- vmx->host_idt_base = dt.address;
+ vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */
vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
@@ -3798,7 +3829,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
}
-static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
{
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
@@ -3808,8 +3839,9 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!enable_vnmi)
pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
- /* Enable the preemption timer dynamically */
- pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ if (!enable_preemption_timer)
+ pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+
return pin_based_exec_ctrl;
}
@@ -3817,14 +3849,14 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
if (cpu_has_secondary_exec_ctrls()) {
if (kvm_vcpu_apicv_active(vcpu))
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ secondary_exec_controls_setbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
else
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ secondary_exec_controls_clearbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
}
@@ -4015,15 +4047,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
/* Control */
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
vmx->hv_deadline_tsc = -1;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
+ exec_controls_set(vmx, vmx_exec_control(vmx));
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
- vmx->secondary_exec_control);
+ secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
}
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
@@ -4081,10 +4112,10 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
++vmx->nmsrs;
}
- vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
+ vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
- vm_entry_controls_init(vmx, vmx_vmentry_ctrl());
+ vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
@@ -4208,8 +4239,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_INTR_PENDING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
@@ -4220,8 +4250,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
return;
}
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_NMI_PENDING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4442,11 +4471,11 @@ static void kvm_machine_check(void)
static int handle_machine_check(struct kvm_vcpu *vcpu)
{
- /* already handled by vcpu_run */
+ /* handled by vmx_vcpu_run() */
return 1;
}
-static int handle_exception(struct kvm_vcpu *vcpu)
+static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_run *kvm_run = vcpu->run;
@@ -4458,11 +4487,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
vect_info = vmx->idt_vectoring_info;
intr_info = vmx->exit_intr_info;
- if (is_machine_check(intr_info))
- return handle_machine_check(vcpu);
-
- if (is_nmi(intr_info))
- return 1; /* already handled by vmx_vcpu_run() */
+ if (is_machine_check(intr_info) || is_nmi(intr_info))
+ return 1; /* handled by handle_exception_nmi_irqoff() */
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
@@ -4518,7 +4544,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
- vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
if (is_icebp(intr_info))
skip_emulated_instruction(vcpu);
@@ -4763,7 +4789,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_DEBUG;
return 0;
} else {
- vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
return 1;
@@ -4771,8 +4797,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
}
if (vcpu->guest_debug == 0) {
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_MOV_DR_EXITING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
/*
* No more DR vmexits; force a reload of the debug registers
@@ -4816,7 +4841,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
- vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
+ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
}
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
@@ -4876,8 +4901,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
static int handle_interrupt_window(struct kvm_vcpu *vcpu)
{
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_INTR_PENDING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5131,8 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!enable_vnmi);
- vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
- CPU_BASED_VIRTUAL_NMI_PENDING);
+ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5144,7 +5167,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
int ret = 1;
- u32 cpu_exec_ctrl;
bool intr_window_requested;
unsigned count = 130;
@@ -5155,8 +5177,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
*/
WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
- cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
+ intr_window_requested = exec_controls_get(vmx) &
+ CPU_BASED_VIRTUAL_INTR_PENDING;
while (vmx->emulation_required && count-- != 0) {
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
@@ -5342,7 +5364,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
* is read even if it isn't needed (e.g., for type==all)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmx_instruction_info, false, &gva))
+ vmx_instruction_info, false,
+ sizeof(operand), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
@@ -5437,8 +5460,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
- if (!to_vmx(vcpu)->req_immediate_exit)
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!vmx->req_immediate_exit &&
+ !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
kvm_lapic_expired_hv_timer(vcpu);
+
return 1;
}
@@ -5469,7 +5496,7 @@ static int handle_encls(struct kvm_vcpu *vcpu)
* to be done to userspace and return 0.
*/
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
- [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
+ [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
[EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
@@ -5607,15 +5634,24 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
void dump_vmcs(void)
{
- u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
- u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
- u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
- u32 secondary_exec_control = 0;
- unsigned long cr4 = vmcs_readl(GUEST_CR4);
- u64 efer = vmcs_read64(GUEST_IA32_EFER);
+ u32 vmentry_ctl, vmexit_ctl;
+ u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
+ unsigned long cr4;
+ u64 efer;
int i, n;
+ if (!dump_invalid_vmcs) {
+ pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
+ return;
+ }
+
+ vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
+ vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
+ cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+ cr4 = vmcs_readl(GUEST_CR4);
+ efer = vmcs_read64(GUEST_IA32_EFER);
+ secondary_exec_control = 0;
if (cpu_has_secondary_exec_ctrls())
secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
@@ -5793,6 +5829,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
}
if (unlikely(vmx->fail)) {
+ dump_vmcs();
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR);
@@ -5943,6 +5980,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 sec_exec_control;
if (!lapic_in_kernel(vcpu))
@@ -5954,11 +5992,11 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
/* Postpone execution until vmcs01 is the current VMCS. */
if (is_guest_mode(vcpu)) {
- to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
+ vmx->nested.change_vmcs01_virtual_apic_mode = true;
return;
}
- sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ sec_exec_control = secondary_exec_controls_get(vmx);
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -5980,7 +6018,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
break;
}
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+ secondary_exec_controls_set(vmx, sec_exec_control);
vmx_update_msr_bitmap(vcpu);
}
@@ -6098,76 +6136,81 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
}
-static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
+static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = 0;
- u16 basic_exit_reason = (u16)vmx->exit_reason;
-
- if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
- || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
- return;
-
- if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- vmx->exit_intr_info = exit_intr_info;
+ vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* if exit due to PF check for async PF */
- if (is_page_fault(exit_intr_info))
+ if (is_page_fault(vmx->exit_intr_info))
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
/* Handle machine checks before interrupts are enabled */
- if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
- is_machine_check(exit_intr_info))
+ if (is_machine_check(vmx->exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
- if (is_nmi(exit_intr_info)) {
+ if (is_nmi(vmx->exit_intr_info)) {
kvm_before_interrupt(&vmx->vcpu);
asm("int $2");
kvm_after_interrupt(&vmx->vcpu);
}
}
-static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
- u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
- if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
- == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
- unsigned int vector;
- unsigned long entry;
- gate_desc *desc;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned int vector;
+ unsigned long entry;
#ifdef CONFIG_X86_64
- unsigned long tmp;
+ unsigned long tmp;
#endif
+ gate_desc *desc;
+ u32 intr_info;
+
+ intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ if (WARN_ONCE(!is_external_intr(intr_info),
+ "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
+ return;
+
+ vector = intr_info & INTR_INFO_VECTOR_MASK;
+ desc = (gate_desc *)host_idt_base + vector;
+ entry = gate_offset(desc);
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- desc = (gate_desc *)vmx->host_idt_base + vector;
- entry = gate_offset(desc);
- asm volatile(
+ kvm_before_interrupt(vcpu);
+
+ asm volatile(
#ifdef CONFIG_X86_64
- "mov %%" _ASM_SP ", %[sp]\n\t"
- "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
- "push $%c[ss]\n\t"
- "push %[sp]\n\t"
+ "mov %%" _ASM_SP ", %[sp]\n\t"
+ "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
+ "push $%c[ss]\n\t"
+ "push %[sp]\n\t"
#endif
- "pushf\n\t"
- __ASM_SIZE(push) " $%c[cs]\n\t"
- CALL_NOSPEC
- :
+ "pushf\n\t"
+ __ASM_SIZE(push) " $%c[cs]\n\t"
+ CALL_NOSPEC
+ :
#ifdef CONFIG_X86_64
- [sp]"=&r"(tmp),
+ [sp]"=&r"(tmp),
#endif
- ASM_CALL_CONSTRAINT
- :
- THUNK_TARGET(entry),
- [ss]"i"(__KERNEL_DS),
- [cs]"i"(__KERNEL_CS)
- );
- }
+ ASM_CALL_CONSTRAINT
+ :
+ THUNK_TARGET(entry),
+ [ss]"i"(__KERNEL_DS),
+ [cs]"i"(__KERNEL_CS)
+ );
+
+ kvm_after_interrupt(vcpu);
+}
+STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
+
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+ handle_external_interrupt_irqoff(vcpu);
+ else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
+ handle_exception_nmi_irqoff(vmx);
}
-STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
static bool vmx_has_emulated_msr(int index)
{
@@ -6178,6 +6221,8 @@ static bool vmx_has_emulated_msr(int index)
* real mode.
*/
return enable_unrestricted_guest || emulate_invalid_guest_state;
+ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ return nested;
case MSR_AMD64_VIRT_SPEC_CTRL:
/* This is AMD only. */
return false;
@@ -6323,15 +6368,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
-{
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
- if (!vmx->loaded_vmcs->hv_timer_armed)
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- vmx->loaded_vmcs->hv_timer_armed = true;
-}
-
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6339,11 +6375,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
u32 delta_tsc;
if (vmx->req_immediate_exit) {
- vmx_arm_hv_timer(vmx, 0);
- return;
- }
-
- if (vmx->hv_deadline_tsc != -1) {
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
+ vmx->loaded_vmcs->hv_timer_soft_disabled = false;
+ } else if (vmx->hv_deadline_tsc != -1) {
tscl = rdtsc();
if (vmx->hv_deadline_tsc > tscl)
/* set_hv_timer ensures the delta fits in 32-bits */
@@ -6352,14 +6386,12 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
else
delta_tsc = 0;
- vmx_arm_hv_timer(vmx, delta_tsc);
- return;
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
+ vmx->loaded_vmcs->hv_timer_soft_disabled = false;
+ } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
+ vmx->loaded_vmcs->hv_timer_soft_disabled = true;
}
-
- if (vmx->loaded_vmcs->hv_timer_armed)
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- vmx->loaded_vmcs->hv_timer_armed = false;
}
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
@@ -6392,8 +6424,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
- if (vmx->nested.need_vmcs12_sync)
- nested_sync_from_vmcs12(vcpu);
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
+ nested_sync_vmcs12_to_shadow(vcpu);
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -6431,7 +6463,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
- vmx_update_hv_timer(vcpu);
+ if (enable_preemption_timer)
+ vmx_update_hv_timer(vcpu);
+
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
+ kvm_wait_lapic_expire(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -6524,13 +6561,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = 0;
vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
+ if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+ kvm_machine_check();
+
if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
return;
vmx->loaded_vmcs->launched = 1;
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
- vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
@@ -6621,6 +6660,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+ if (kvm_cstate_in_guest(kvm)) {
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
+ }
vmx->msr_bitmap_mode = 0;
vmx->loaded_vmcs = &vmx->vmcs01;
@@ -6717,22 +6762,22 @@ static int vmx_vm_init(struct kvm *kvm)
return 0;
}
-static void __init vmx_check_processor_compat(void *rtn)
+static int __init vmx_check_processor_compat(void)
{
struct vmcs_config vmcs_conf;
struct vmx_capability vmx_cap;
- *(int *)rtn = 0;
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
- *(int *)rtn = -EIO;
+ return -EIO;
if (nested)
nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
enable_apicv);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
- *(int *)rtn = -EIO;
+ return -EIO;
}
+ return 0;
}
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
@@ -6786,7 +6831,7 @@ static int vmx_get_lpage_level(void)
return PT_PDPE_LEVEL;
}
-static void vmcs_set_secondary_exec_control(u32 new_ctl)
+static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
{
/*
* These bits in the secondary execution controls field
@@ -6800,10 +6845,10 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_DESC;
- u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ u32 new_ctl = vmx->secondary_exec_control;
+ u32 cur_ctl = secondary_exec_controls_get(vmx);
- vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
- (new_ctl & ~mask) | (cur_ctl & mask));
+ secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
}
/*
@@ -6941,7 +6986,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
- vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
+ vmcs_set_secondary_exec_control(vmx);
}
if (nested_vmx_allowed(vcpu))
@@ -7020,7 +7065,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
- if (kvm_mwait_in_guest(vcpu->kvm))
+ if (kvm_mwait_in_guest(vcpu->kvm) ||
+ kvm_can_post_timer_interrupt(vcpu))
return -EOPNOTSUPP;
vmx = to_vmx(vcpu);
@@ -7409,16 +7455,20 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{
- return 0;
+ return false;
}
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
+ struct desc_ptr dt;
int r, i;
rdmsrl_safe(MSR_EFER, &host_efer);
+ store_idt(&dt);
+ host_idt_base = dt.address;
+
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);
@@ -7522,17 +7572,33 @@ static __init int hardware_setup(void)
}
if (!cpu_has_vmx_preemption_timer())
- kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
+ enable_preemption_timer = false;
- if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
+ if (enable_preemption_timer) {
+ u64 use_timer_freq = 5000ULL * 1000 * 1000;
u64 vmx_msr;
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
cpu_preemption_timer_multi =
vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
- } else {
+
+ if (tsc_khz)
+ use_timer_freq = (u64)tsc_khz * 1000;
+ use_timer_freq >>= cpu_preemption_timer_multi;
+
+ /*
+ * KVM "disables" the preemption timer by setting it to its max
+ * value. Don't use the timer if it might cause spurious exits
+ * at a rate faster than 0.1 Hz (of uninterrupted guest time).
+ */
+ if (use_timer_freq > 0xffffffffu / 10)
+ enable_preemption_timer = false;
+ }
+
+ if (!enable_preemption_timer) {
kvm_x86_ops->set_hv_timer = NULL;
kvm_x86_ops->cancel_hv_timer = NULL;
+ kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
@@ -7674,7 +7740,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.set_tdp_cr3 = vmx_set_cr3,
.check_intercept = vmx_check_intercept,
- .handle_external_intr = vmx_handle_external_intr,
+ .handle_exit_irqoff = vmx_handle_exit_irqoff,
.mpx_supported = vmx_mpx_supported,
.xsaves_supported = vmx_xsaves_supported,
.umip_emulated = vmx_umip_emulated,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 63d37ccce3dc..82d0bc3a4d52 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -109,14 +109,21 @@ struct nested_vmx {
* to guest memory during VM exit.
*/
struct vmcs12 *cached_shadow_vmcs12;
+
/*
* Indicates if the shadow vmcs or enlightened vmcs must be updated
* with the data held by struct vmcs12.
*/
- bool need_vmcs12_sync;
+ bool need_vmcs12_to_shadow_sync;
bool dirty_vmcs12;
/*
+ * Indicates lazily loaded guest state has not yet been decached from
+ * vmcs02.
+ */
+ bool need_sync_vmcs02_to_vmcs12_rare;
+
+ /*
* vmcs02 has been initialized, i.e. state that is constant for
* vmcs02 has been written to the backing VMCS. Initialization
* is delayed until L1 actually attempts to run a nested VM.
@@ -180,14 +187,24 @@ struct vcpu_vmx {
struct kvm_vcpu vcpu;
u8 fail;
u8 msr_bitmap_mode;
+
+ /*
+ * If true, host state has been stored in vmx->loaded_vmcs for
+ * the CPU registers that only need to be switched when transitioning
+ * to/from the kernel, and the registers have been loaded with guest
+ * values. If false, host state is loaded in the CPU registers
+ * and vmx->loaded_vmcs->host_state is invalid.
+ */
+ bool guest_state_loaded;
+
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
+
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
- bool guest_msrs_dirty;
- unsigned long host_idt_base;
+ bool guest_msrs_ready;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
@@ -195,21 +212,15 @@ struct vcpu_vmx {
u64 spec_ctrl;
- u32 vm_entry_controls_shadow;
- u32 vm_exit_controls_shadow;
u32 secondary_exec_control;
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
* non-nested (L1) guest, it always points to vmcs01. For a nested
- * guest (L2), it points to a different VMCS. loaded_cpu_state points
- * to the VMCS whose state is loaded into the CPU registers that only
- * need to be switched when transitioning to/from the kernel; a NULL
- * value indicates that host state is loaded.
+ * guest (L2), it points to a different VMCS.
*/
struct loaded_vmcs vmcs01;
struct loaded_vmcs *loaded_vmcs;
- struct loaded_vmcs *loaded_cpu_state;
struct msr_autoload {
struct vmx_msrs guest;
@@ -260,8 +271,6 @@ struct vcpu_vmx {
unsigned long host_debugctlmsr;
- u64 msr_ia32_power_ctl;
-
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
@@ -292,12 +301,14 @@ struct kvm_vmx {
};
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void vmx_vcpu_put(struct kvm_vcpu *vcpu);
int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
@@ -319,6 +330,7 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
+void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
#define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1
@@ -375,69 +387,31 @@ static inline u8 vmx_get_rvi(void)
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
}
-static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
-{
- vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
-}
-
-static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
-{
- vmcs_write32(VM_ENTRY_CONTROLS, val);
- vmx->vm_entry_controls_shadow = val;
-}
-
-static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
-{
- if (vmx->vm_entry_controls_shadow != val)
- vm_entry_controls_init(vmx, val);
-}
-
-static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
-{
- return vmx->vm_entry_controls_shadow;
-}
-
-static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
-}
-
-static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
-}
-
-static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
-{
- vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
-}
-
-static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
-{
- vmcs_write32(VM_EXIT_CONTROLS, val);
- vmx->vm_exit_controls_shadow = val;
-}
-
-static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
-{
- if (vmx->vm_exit_controls_shadow != val)
- vm_exit_controls_init(vmx, val);
-}
-
-static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
-{
- return vmx->vm_exit_controls_shadow;
-}
-
-static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
-}
-
-static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
-{
- vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
+#define BUILD_CONTROLS_SHADOW(lname, uname) \
+static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \
+{ \
+ if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
+ vmcs_write32(uname, val); \
+ vmx->loaded_vmcs->controls_shadow.lname = val; \
+ } \
+} \
+static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \
+{ \
+ return vmx->loaded_vmcs->controls_shadow.lname; \
+} \
+static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \
+{ \
+ lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
+} \
+static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \
+{ \
+ lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
}
+BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS)
+BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS)
+BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
+BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
+BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{
@@ -467,6 +441,7 @@ static inline u32 vmx_vmexit_ctrl(void)
}
u32 vmx_exec_control(struct vcpu_vmx *vmx);
+u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 536b78c4af6e..58305cf81182 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel-based Virtual Machine driver for Linux
*
@@ -13,10 +14,6 @@
* Yaniv Kamay <yaniv@qumranet.com>
* Amit Shah <amit.shah@qumranet.com>
* Ben-Ami Yassour <benami@il.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#include <linux/kvm_host.h>
@@ -54,6 +51,7 @@
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
+#include <linux/sched/isolation.h>
#include <linux/mem_encrypt.h>
#include <trace/events/kvm.h>
@@ -70,6 +68,7 @@
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/intel_pt.h>
+#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -143,7 +142,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
* tuning, i.e. allows priveleged userspace to set an exact advancement time.
*/
static int __read_mostly lapic_timer_advance_ns = -1;
-module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
@@ -155,6 +154,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
static bool __read_mostly force_emulation_prefix = false;
module_param(force_emulation_prefix, bool, S_IRUGO);
+int __read_mostly pi_inject_timer = -1;
+module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -719,7 +721,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
gfn_t gfn;
int r;
- if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
+ if (!is_pae_paging(vcpu))
return false;
if (!test_bit(VCPU_EXREG_PDPTR,
@@ -962,8 +964,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (is_long_mode(vcpu) &&
(cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
return 1;
- else if (is_pae(vcpu) && is_paging(vcpu) &&
- !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+ else if (is_pae_paging(vcpu) &&
+ !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
@@ -1176,7 +1178,28 @@ static u32 emulated_msrs[] = {
MSR_AMD64_VIRT_SPEC_CTRL,
MSR_IA32_POWER_CTL,
+ /*
+ * The following list leaves out MSRs whose values are determined
+ * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
+ * We always support the "true" VMX control MSRs, even if the host
+ * processor does not, so I am putting these registers here rather
+ * than in msrs_to_save.
+ */
+ MSR_IA32_VMX_BASIC,
+ MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+ MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+ MSR_IA32_VMX_TRUE_EXIT_CTLS,
+ MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+ MSR_IA32_VMX_MISC,
+ MSR_IA32_VMX_CR0_FIXED0,
+ MSR_IA32_VMX_CR4_FIXED0,
+ MSR_IA32_VMX_VMCS_ENUM,
+ MSR_IA32_VMX_PROCBASED_CTLS2,
+ MSR_IA32_VMX_EPT_VPID_CAP,
+ MSR_IA32_VMX_VMFUNC,
+
MSR_K7_HWCR,
+ MSR_KVM_POLL_CONTROL,
};
static unsigned num_emulated_msrs;
@@ -1212,11 +1235,12 @@ static u32 msr_based_features[] = {
static unsigned int num_msr_based_features;
-u64 kvm_get_arch_capabilities(void)
+static u64 kvm_get_arch_capabilities(void)
{
- u64 data;
+ u64 data = 0;
- rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
/*
* If we're doing cache flushes (either "always" or "cond")
@@ -1232,7 +1256,6 @@ u64 kvm_get_arch_capabilities(void)
return data;
}
-EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
{
@@ -1298,7 +1321,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u64 efer = msr_info->data;
if (efer & efer_reserved_bits)
- return false;
+ return 1;
if (!msr_info->host_initiated) {
if (!__kvm_valid_efer(vcpu, efer))
@@ -1437,12 +1460,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
{
- /*
- * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
- * vcpu_enter_guest. This function is only called from
- * the physical CPU that is running vcpu.
- */
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
}
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
@@ -1521,9 +1540,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
*pshift = shift;
*pmultiplier = div_frac(scaled64, tps32);
-
- pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
- __func__, base_hz, scaled_hz, shift, *pmultiplier);
}
#ifdef CONFIG_X86_64
@@ -1557,7 +1573,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
vcpu->arch.tsc_always_catchup = 1;
return 0;
} else {
- WARN(1, "user requested TSC rate below hardware speed\n");
+ pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
return -1;
}
}
@@ -1567,8 +1583,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
user_tsc_khz, tsc_khz);
if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
- WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
- user_tsc_khz);
+ pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
+ user_tsc_khz);
return -1;
}
@@ -1731,7 +1747,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
- ns = ktime_get_boot_ns();
+ ns = ktime_get_boottime_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
@@ -1766,12 +1782,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
- pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
u64 delta = nsec_to_cycles(vcpu, elapsed);
data += delta;
offset = kvm_compute_tsc_offset(vcpu, data);
- pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
matched = true;
already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
@@ -1790,8 +1804,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm->arch.cur_tsc_write = data;
kvm->arch.cur_tsc_offset = offset;
matched = false;
- pr_debug("kvm: new tsc generation %llu, clock %llu\n",
- kvm->arch.cur_tsc_generation, data);
}
/*
@@ -2073,7 +2085,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
spin_unlock(&ka->pvclock_gtod_sync_lock);
- return ktime_get_boot_ns() + ka->kvmclock_offset;
+ return ktime_get_boottime_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
@@ -2089,7 +2101,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
&hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
} else
- ret = ktime_get_boot_ns() + ka->kvmclock_offset;
+ ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
put_cpu();
@@ -2188,7 +2200,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
}
if (!use_master_clock) {
host_tsc = rdtsc();
- kernel_ns = ktime_get_boot_ns();
+ kernel_ns = ktime_get_boottime_ns();
}
tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -2547,13 +2559,24 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_IA32_MISC_ENABLE:
- vcpu->arch.ia32_misc_enable_msr = data;
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
+ ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
+ return 1;
+ vcpu->arch.ia32_misc_enable_msr = data;
+ kvm_update_cpuid(vcpu);
+ } else {
+ vcpu->arch.ia32_misc_enable_msr = data;
+ }
break;
case MSR_IA32_SMBASE:
if (!msr_info->host_initiated)
return 1;
vcpu->arch.smbase = data;
break;
+ case MSR_IA32_POWER_CTL:
+ vcpu->arch.msr_ia32_power_ctl = data;
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
@@ -2628,6 +2651,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
break;
+ case MSR_KVM_POLL_CONTROL:
+ /* only enable bit supported */
+ if (data & (-1ULL << 1))
+ return 1;
+
+ vcpu->arch.msr_kvm_poll_control = data;
+ break;
+
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
@@ -2805,6 +2836,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.arch_capabilities;
break;
+ case MSR_IA32_POWER_CTL:
+ msr_info->data = vcpu->arch.msr_ia32_power_ctl;
+ break;
case MSR_IA32_TSC:
msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
break;
@@ -2877,6 +2911,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KVM_PV_EOI_EN:
msr_info->data = vcpu->arch.pv_eoi.msr_val;
break;
+ case MSR_KVM_POLL_CONTROL:
+ msr_info->data = vcpu->arch.msr_kvm_poll_control;
+ break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
@@ -3086,6 +3123,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_PMU_EVENT_FILTER:
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
@@ -3098,7 +3136,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_TSC_STABLE;
break;
case KVM_CAP_X86_DISABLE_EXITS:
- r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
+ r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
+ KVM_X86_DISABLE_EXITS_CSTATE;
if(kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
@@ -3122,6 +3161,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+ case KVM_CAP_MAX_VCPU_ID:
+ r = KVM_MAX_VCPU_ID;
+ break;
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
@@ -4612,6 +4654,8 @@ split_irqchip_unlock:
kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
+ kvm->arch.cstate_in_guest = true;
r = 0;
break;
case KVM_CAP_MSR_PLATFORM_INFO:
@@ -4926,6 +4970,9 @@ set_identity_unlock:
r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
break;
}
+ case KVM_SET_PMU_EVENT_FILTER:
+ r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
+ break;
default:
r = -ENOTTY;
}
@@ -6378,7 +6425,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
vcpu->arch.db);
if (dr6 != 0) {
- vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
*r = EMULATE_DONE;
@@ -6705,7 +6752,7 @@ static void kvm_hyperv_tsc_notifier(void)
struct kvm_vcpu *vcpu;
int cpu;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_make_mclock_inprogress_request(kvm);
@@ -6731,7 +6778,7 @@ static void kvm_hyperv_tsc_notifier(void)
spin_unlock(&ka->pvclock_gtod_sync_lock);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
}
#endif
@@ -6782,17 +6829,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != cpu)
continue;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- if (vcpu->cpu != smp_processor_id())
+ if (vcpu->cpu != raw_smp_processor_id())
send_ipi = 1;
}
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@@ -6857,7 +6904,6 @@ static void kvm_timer_init(void)
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
- pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
kvmclock_cpu_online, kvmclock_cpu_down_prep);
@@ -6907,35 +6953,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
.handle_intel_pt_intr = kvm_handle_intel_pt_intr,
};
-static void kvm_set_mmio_spte_mask(void)
-{
- u64 mask;
- int maxphyaddr = boot_cpu_data.x86_phys_bits;
-
- /*
- * Set the reserved bits and the present bit of an paging-structure
- * entry to generate page fault with PFER.RSV = 1.
- */
-
- /*
- * Mask the uppermost physical address bit, which would be reserved as
- * long as the supported physical address width is less than 52.
- */
- mask = 1ull << 51;
-
- /* Set the present bit. */
- mask |= 1ull;
-
- /*
- * If reserved bit is not supported, clear the present bit to disable
- * mmio page fault.
- */
- if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
- mask &= ~1ull;
-
- kvm_mmu_set_mmio_spte_mask(mask, mask);
-}
-
#ifdef CONFIG_X86_64
static void pvclock_gtod_update_fn(struct work_struct *work)
{
@@ -6944,12 +6961,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
atomic_set(&kvm_guest_has_master_clock, 0);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -7032,8 +7049,6 @@ int kvm_arch_init(void *opaque)
if (r)
goto out_free_percpu;
- kvm_set_mmio_spte_mask();
-
kvm_x86_ops = ops;
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
@@ -7047,6 +7062,8 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
+ if (pi_inject_timer == -1)
+ pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
@@ -7172,6 +7189,23 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
}
+static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
+{
+ struct kvm_vcpu *target = NULL;
+ struct kvm_apic_map *map;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
+ target = map->phys_map[dest_id]->vcpu;
+
+ rcu_read_unlock();
+
+ if (target)
+ kvm_vcpu_yield_to(target);
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@@ -7218,6 +7252,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
+ case KVM_HC_SCHED_YIELD:
+ kvm_sched_yield(vcpu->kvm, a0);
+ ret = 0;
+ break;
default:
ret = -KVM_ENOSYS;
break;
@@ -7950,9 +7988,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- if (lapic_in_kernel(vcpu) &&
- vcpu->arch.apic->lapic_timer.timer_advance_ns)
- wait_lapic_expire(vcpu);
guest_enter_irqoff();
fpregs_assert_state_consistent();
@@ -8001,13 +8036,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_before_interrupt(vcpu);
- kvm_x86_ops->handle_external_intr(vcpu);
- kvm_after_interrupt(vcpu);
+ kvm_x86_ops->handle_exit_irqoff(vcpu);
+ /*
+ * Consume any pending interrupts, including the possible source of
+ * VM-Exit on SVM and any ticks that occur between VM-Exit and now.
+ * An instruction is required after local_irq_enable() to fully unblock
+ * interrupts on processors that implement an interrupt shadow, the
+ * stat.exits increment will do nicely.
+ */
+ kvm_before_interrupt(vcpu);
+ local_irq_enable();
++vcpu->stat.exits;
+ local_irq_disable();
+ kvm_after_interrupt(vcpu);
guest_exit_irqoff();
+ if (lapic_in_kernel(vcpu)) {
+ s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
+ if (delta != S64_MIN) {
+ trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
+ vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
+ }
+ }
local_irq_enable();
preempt_enable();
@@ -8593,7 +8644,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
+ if (is_pae_paging(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
mmu_reset_needed = 1;
}
@@ -8874,6 +8925,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+
+ /* poll control enabled by default */
+ vcpu->arch.msr_kvm_poll_control = 1;
+
mutex_unlock(&vcpu->mutex);
if (!kvmclock_periodic_sync)
@@ -9015,7 +9070,7 @@ int kvm_arch_hardware_enable(void)
* before any KVM threads can be running. Unfortunately, we can't
* bring the TSCs fully up to date with real time, as we aren't yet far
* enough into CPU bringup that we know how much real time has actually
- * elapsed; our helper function, ktime_get_boot_ns() will be using boot
+ * elapsed; our helper function, ktime_get_boottime_ns() will be using boot
* variables that haven't been updated yet.
*
* So we simply find the maximum observed TSC above, then record the
@@ -9106,9 +9161,9 @@ void kvm_arch_hardware_unsetup(void)
kvm_x86_ops->hardware_unsetup();
}
-void kvm_arch_check_processor_compat(void *rtn)
+int kvm_arch_check_processor_compat(void)
{
- kvm_x86_ops->check_processor_compatibility(rtn);
+ return kvm_x86_ops->check_processor_compatibility();
}
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
@@ -9243,7 +9298,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
- kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
+ kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
pvclock_update_vm_gtod_copy(kvm);
kvm->arch.guest_can_read_msr_platform_info = true;
@@ -9380,6 +9435,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+ kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
kvm_hv_destroy_vm(kvm);
@@ -9788,6 +9844,36 @@ static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
sizeof(u32));
}
+static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
+ return false;
+
+ if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
+ (vcpu->arch.apf.send_user_only &&
+ kvm_x86_ops->get_cpl(vcpu) == 0))
+ return false;
+
+ return true;
+}
+
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(!lapic_in_kernel(vcpu) ||
+ kvm_event_needs_reinjection(vcpu) ||
+ vcpu->arch.exception.pending))
+ return false;
+
+ if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
+ return false;
+
+ /*
+ * If interrupts are off we cannot even use an artificial
+ * halt state.
+ */
+ return kvm_x86_ops->interrupt_allowed(vcpu);
+}
+
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
@@ -9796,11 +9882,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
trace_kvm_async_pf_not_present(work->arch.token, work->gva);
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
- if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
- (vcpu->arch.apf.send_user_only &&
- kvm_x86_ops->get_cpl(vcpu) == 0))
- kvm_make_request(KVM_REQ_APF_HALT, vcpu);
- else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
+ if (kvm_can_deliver_async_pf(vcpu) &&
+ !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
fault.error_code = 0;
@@ -9808,6 +9891,16 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
fault.address = work->arch.token;
fault.async_page_fault = true;
kvm_inject_page_fault(vcpu, &fault);
+ } else {
+ /*
+ * It is not possible to deliver a paravirtualized asynchronous
+ * page fault, but putting the guest in an artificial halt state
+ * can be beneficial nevertheless: if an interrupt arrives, we
+ * can deliver it timely and perhaps the guest will schedule
+ * another process. When the instruction that triggered a page
+ * fault is retried, hopefully the page will be ready in the host.
+ */
+ kvm_make_request(KVM_REQ_APF_HALT, vcpu);
}
}
@@ -9948,6 +10041,13 @@ bool kvm_vector_hashing_enabled(void)
}
EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
+bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
+}
+EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
+
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a470ff0868c5..6594020c0691 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -139,6 +139,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
}
+static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
+{
+ return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu);
+}
+
static inline u32 bit(int bitno)
{
return 1 << (bitno & 31);
@@ -296,6 +301,8 @@ extern unsigned int min_timer_period_us;
extern bool enable_vmware_backdoor;
+extern int pi_inject_timer;
+
extern struct static_key kvm_no_apic_vcpu;
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
@@ -333,6 +340,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm)
return kvm->arch.pause_in_guest;
}
+static inline bool kvm_cstate_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.cstate_in_guest;
+}
+
DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 9b0ca8fe80fc..e0788bade5ab 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* atomic64_t for 386/486
*
* Copyright © 2010 Luca Barbieri
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index db3ae85440ff..843d978ee341 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* atomic64_t for 586+
*
* Copyright © 2010 Luca Barbieri
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
index 1811fa4a1b1a..7c48ff4ae8d1 100644
--- a/arch/x86/lib/cache-smp.c
+++ b/arch/x86/lib/cache-smp.c
@@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu);
int wbinvd_on_all_cpus(void)
{
- return on_each_cpu(__wbinvd, NULL, 1);
+ on_each_cpu(__wbinvd, NULL, 1);
+ return 0;
}
EXPORT_SYMBOL(wbinvd_on_all_cpus);
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index ad8e0906d1ea..4df90c9ea383 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -18,11 +19,6 @@
* handling.
* Andi Kleen, add zeroing on error
* converted to pure assembler
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 88acd349911b..75a5a4515fa7 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm/export.h>
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 3261abb21ef4..4f1719e22d3c 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
*
* Misc librarized functions for cmdline poking.
*/
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 9b330242e740..d63185698a23 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -1,10 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm/percpu.h>
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 03a186fc06ea..691d80e97488 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -1,10 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm/export.h>
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index b2f1822084ae..4fe1601dbc5d 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
* Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v2.
*
* Functions to copy from and to user space.
*/
@@ -239,7 +239,7 @@ copy_user_handle_tail:
ret
_ASM_EXTABLE_UA(1b, 2b)
-ENDPROC(copy_user_handle_tail)
+END(copy_user_handle_tail)
/*
* copy_user_nocache - Uncached memory copy with exception handling
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
index 2dd1fe13a37b..04967cdce5d1 100644
--- a/arch/x86/lib/cpu.c
+++ b/arch/x86/lib/cpu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/types.h>
#include <linux/export.h>
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index a6a2b7dccbff..c66c8b00f236 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2002, 2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v.2
*
* Wrappers of assembly checksum functions for x86-64.
*/
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 74fdff968ea3..304f958c27b2 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -115,29 +115,29 @@ ENDPROC(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
+bad_get_user_clac:
+ ASM_CLAC
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
- ASM_CLAC
ret
-END(bad_get_user)
#ifdef CONFIG_X86_32
+bad_get_user_8_clac:
+ ASM_CLAC
bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
- ASM_CLAC
ret
-END(bad_get_user_8)
#endif
- _ASM_EXTABLE_UA(1b, bad_get_user)
- _ASM_EXTABLE_UA(2b, bad_get_user)
- _ASM_EXTABLE_UA(3b, bad_get_user)
+ _ASM_EXTABLE_UA(1b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(2b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(3b, bad_get_user_clac)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(4b, bad_get_user)
+ _ASM_EXTABLE_UA(4b, bad_get_user_clac)
#else
- _ASM_EXTABLE_UA(4b, bad_get_user_8)
- _ASM_EXTABLE_UA(5b, bad_get_user_8)
+ _ASM_EXTABLE_UA(4b, bad_get_user_8_clac)
+ _ASM_EXTABLE_UA(5b, bad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index c1f01a8e9f65..12539fca75c4 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -1,22 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* x86 instruction attribute tables
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
*/
#include <asm/insn.h>
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index cf00ab6c6621..306c3a0902ba 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -557,7 +557,8 @@ static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs,
}
/**
- * get_desc() - Obtain pointer to a segment descriptor
+ * get_desc() - Obtain contents of a segment descriptor
+ * @out: Segment descriptor contents on success
* @sel: Segment selector
*
* Given a segment selector, obtain a pointer to the segment descriptor.
@@ -565,18 +566,18 @@ static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs,
*
* Returns:
*
- * Pointer to segment descriptor on success.
+ * True on success, false on failure.
*
* NULL on error.
*/
-static struct desc_struct *get_desc(unsigned short sel)
+static bool get_desc(struct desc_struct *out, unsigned short sel)
{
struct desc_ptr gdt_desc = {0, 0};
unsigned long desc_base;
#ifdef CONFIG_MODIFY_LDT_SYSCALL
if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) {
- struct desc_struct *desc = NULL;
+ bool success = false;
struct ldt_struct *ldt;
/* Bits [15:3] contain the index of the desired entry. */
@@ -584,12 +585,14 @@ static struct desc_struct *get_desc(unsigned short sel)
mutex_lock(&current->active_mm->context.lock);
ldt = current->active_mm->context.ldt;
- if (ldt && sel < ldt->nr_entries)
- desc = &ldt->entries[sel];
+ if (ldt && sel < ldt->nr_entries) {
+ *out = ldt->entries[sel];
+ success = true;
+ }
mutex_unlock(&current->active_mm->context.lock);
- return desc;
+ return success;
}
#endif
native_store_gdt(&gdt_desc);
@@ -604,9 +607,10 @@ static struct desc_struct *get_desc(unsigned short sel)
desc_base = sel & ~(SEGMENT_RPL_MASK | SEGMENT_TI_MASK);
if (desc_base > gdt_desc.size)
- return NULL;
+ return false;
- return (struct desc_struct *)(gdt_desc.address + desc_base);
+ *out = *(struct desc_struct *)(gdt_desc.address + desc_base);
+ return true;
}
/**
@@ -628,7 +632,7 @@ static struct desc_struct *get_desc(unsigned short sel)
*/
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
{
- struct desc_struct *desc;
+ struct desc_struct desc;
short sel;
sel = get_segment_selector(regs, seg_reg_idx);
@@ -666,11 +670,10 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
if (!sel)
return -1L;
- desc = get_desc(sel);
- if (!desc)
+ if (!get_desc(&desc, sel))
return -1L;
- return get_desc_base(desc);
+ return get_desc_base(&desc);
}
/**
@@ -692,7 +695,7 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
*/
static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
{
- struct desc_struct *desc;
+ struct desc_struct desc;
unsigned long limit;
short sel;
@@ -706,8 +709,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
if (!sel)
return 0;
- desc = get_desc(sel);
- if (!desc)
+ if (!get_desc(&desc, sel))
return 0;
/*
@@ -716,8 +718,8 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
* not tested when checking the segment limits. In practice,
* this means that the segment ends in (limit << 12) + 0xfff.
*/
- limit = get_desc_limit(desc);
- if (desc->g)
+ limit = get_desc_limit(&desc);
+ if (desc.g)
limit = (limit << 12) + 0xfff;
return limit;
@@ -741,7 +743,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
*/
int insn_get_code_seg_params(struct pt_regs *regs)
{
- struct desc_struct *desc;
+ struct desc_struct desc;
short sel;
if (v8086_mode(regs))
@@ -752,8 +754,7 @@ int insn_get_code_seg_params(struct pt_regs *regs)
if (sel < 0)
return sel;
- desc = get_desc(sel);
- if (!desc)
+ if (!get_desc(&desc, sel))
return -EINVAL;
/*
@@ -761,10 +762,10 @@ int insn_get_code_seg_params(struct pt_regs *regs)
* determines whether a segment contains data or code. If this is a data
* segment, return error.
*/
- if (!(desc->type & BIT(3)))
+ if (!(desc.type & BIT(3)))
return -EINVAL;
- switch ((desc->l << 1) | desc->d) {
+ switch ((desc.l << 1) | desc.d) {
case 0: /*
* Legacy mode. CS.L=0, CS.D=0. Address and operand size are
* both 16-bit.
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1088eb8f3a5f..0b5862ba6a75 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* x86 instruction analysis
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004, 2009
*/
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index 33147fef3452..a9bdf0805be0 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2006 PathScale, Inc. All Rights Reserved.
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 9d05572370ed..92748660ba51 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index d2e5c9c39601..14bf78341d3c 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -32,8 +32,6 @@
*/
#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
-#define EXIT ASM_CLAC ; \
- ret
.text
ENTRY(__put_user_1)
@@ -43,7 +41,8 @@ ENTRY(__put_user_1)
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
@@ -56,7 +55,8 @@ ENTRY(__put_user_2)
ASM_STAC
2: movw %ax,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
@@ -69,7 +69,8 @@ ENTRY(__put_user_4)
ASM_STAC
3: movl %eax,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
@@ -85,19 +86,21 @@ ENTRY(__put_user_8)
5: movl %edx,4(%_ASM_CX)
#endif
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ RET
ENDPROC(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
+bad_put_user_clac:
+ ASM_CLAC
bad_put_user:
movl $-EFAULT,%eax
- EXIT
-END(bad_put_user)
+ RET
- _ASM_EXTABLE_UA(1b, bad_put_user)
- _ASM_EXTABLE_UA(2b, bad_put_user)
- _ASM_EXTABLE_UA(3b, bad_put_user)
- _ASM_EXTABLE_UA(4b, bad_put_user)
+ _ASM_EXTABLE_UA(1b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(2b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(3b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(4b, bad_put_user_clac)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE_UA(5b, bad_put_user)
+ _ASM_EXTABLE_UA(5b, bad_put_user_clac)
#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 9952a01cad24..fff28c6f73a2 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* User address space access functions.
*
@@ -59,7 +60,7 @@ EXPORT_SYMBOL(clear_user);
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
*/
-__visible unsigned long
+__visible notrace unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++) {
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index a5a41ec58072..0c122226ca56 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -177,7 +177,7 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
((y) + EXTENDED_Ebias) & 0x7fff; }
#define exponent16(x) (*(short *)&((x)->exp))
-#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); }
+#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (u16)(y); }
#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); }
#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index 8dc9095bab22..742619e94bdf 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -18,7 +18,7 @@
#include "control_w.h"
#define MAKE_REG(s, e, l, h) { l, h, \
- ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
+ (u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
#if 0
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index cd84f067e41d..39001a401eff 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/debugfs.h>
#include <linux/efi.h>
#include <linux/module.h>
@@ -25,8 +26,6 @@ static int ptdump_curknl_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump_curknl);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
-static struct dentry *pe_curusr;
-
static int ptdump_curusr_show(struct seq_file *m, void *v)
{
if (current->mm->pgd) {
@@ -41,8 +40,6 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curusr);
#endif
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
-static struct dentry *pe_efi;
-
static int ptdump_efi_show(struct seq_file *m, void *v)
{
if (efi_mm.pgd)
@@ -53,41 +50,24 @@ static int ptdump_efi_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump_efi);
#endif
-static struct dentry *dir, *pe_knl, *pe_curknl;
+static struct dentry *dir;
static int __init pt_dump_debug_init(void)
{
dir = debugfs_create_dir("page_tables", NULL);
- if (!dir)
- return -ENOMEM;
-
- pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
- &ptdump_fops);
- if (!pe_knl)
- goto err;
- pe_curknl = debugfs_create_file("current_kernel", 0400,
- dir, NULL, &ptdump_curknl_fops);
- if (!pe_curknl)
- goto err;
+ debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops);
+ debugfs_create_file("current_kernel", 0400, dir, NULL,
+ &ptdump_curknl_fops);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pe_curusr = debugfs_create_file("current_user", 0400,
- dir, NULL, &ptdump_curusr_fops);
- if (!pe_curusr)
- goto err;
+ debugfs_create_file("current_user", 0400, dir, NULL,
+ &ptdump_curusr_fops);
#endif
-
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
- pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
- if (!pe_efi)
- goto err;
+ debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
#endif
-
return 0;
-err:
- debugfs_remove_recursive(dir);
- return -ENOMEM;
}
static void __exit pt_dump_debug_exit(void)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 6a7302d1161f..ab67822fd2f4 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Debug helper to dump the current kernel pagetables of the system
* so that we can see what the various memory ranges are set to.
@@ -5,11 +6,6 @@
* (C) Copyright 2008 Intel Corporation
*
* Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/debugfs.h>
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index b0a2de8d2f9e..4d75bc656f97 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 46df4c6aae46..6c46095cd0d9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -46,23 +46,6 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
return 0;
}
-static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
-{
- if (!kprobes_built_in())
- return 0;
- if (user_mode(regs))
- return 0;
- /*
- * To be potentially processing a kprobe fault and to be allowed to call
- * kprobe_running(), we have to be non-preemptible.
- */
- if (preemptible())
- return 0;
- if (!kprobe_running())
- return 0;
- return kprobe_fault_handler(regs, X86_TRAP_PF);
-}
-
/*
* Prefetch quirks:
*
@@ -710,6 +693,10 @@ static void set_signal_archinfo(unsigned long address,
* To avoid leaking information about the kernel page
* table layout, pretend that user-mode accesses to
* kernel addresses are always protection faults.
+ *
+ * NB: This means that failed vsyscalls with vsyscall=none
+ * will have the PROT bit. This doesn't leak any
+ * information and does not appear to cause any problems.
*/
if (address >= TASK_SIZE_MAX)
error_code |= X86_PF_PROT;
@@ -756,8 +743,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
set_signal_archinfo(address, error_code);
/* XXX: hwpoison faults will set the wrong code. */
- force_sig_fault(signal, si_code, (void __user *)address,
- tsk);
+ force_sig_fault(signal, si_code, (void __user *)address);
}
/*
@@ -918,7 +904,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (si_code == SEGV_PKUERR)
force_sig_pkuerr((void __user *)address, pkey);
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -1015,8 +1001,6 @@ static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
vm_fault_t fault)
{
- struct task_struct *tsk = current;
-
/* Kernel mode? Handle exceptions or die: */
if (!(error_code & X86_PF_USER)) {
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
@@ -1031,6 +1015,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
#ifdef CONFIG_MEMORY_FAILURE
if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+ struct task_struct *tsk = current;
unsigned lsb = 0;
pr_err(
@@ -1040,11 +1025,11 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
- force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, tsk);
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
return;
}
#endif
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}
static noinline void
@@ -1280,7 +1265,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
return;
/* kprobes don't want to hook the spurious faults: */
- if (kprobes_fault(regs))
+ if (kprobe_page_fault(regs, X86_TRAP_PF))
return;
/*
@@ -1311,7 +1296,7 @@ void do_user_addr_fault(struct pt_regs *regs,
mm = tsk->mm;
/* kprobes don't want to hook the spurious faults: */
- if (unlikely(kprobes_fault(regs)))
+ if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
return;
/*
@@ -1369,16 +1354,18 @@ void do_user_addr_fault(struct pt_regs *regs,
#ifdef CONFIG_X86_64
/*
- * Instruction fetch faults in the vsyscall page might need
- * emulation. The vsyscall page is at a high address
- * (>PAGE_OFFSET), but is considered to be part of the user
- * address space.
+ * Faults in the vsyscall page might need emulation. The
+ * vsyscall page is at a high address (>PAGE_OFFSET), but is
+ * considered to be part of the user address space.
*
* The vsyscall page does not have a "real" VMA, so do this
* emulation before we go searching for VMAs.
+ *
+ * PKRU never rejects instruction fetches, so we don't need
+ * to consider the PF_PK bit.
*/
- if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) {
- if (emulate_vsyscall(regs, address))
+ if (is_vsyscall_vaddr(address)) {
+ if (emulate_vsyscall(hw_error_code, regs, address))
return;
}
#endif
@@ -1503,9 +1490,8 @@ good_area:
NOKPROBE_SYMBOL(do_user_addr_fault);
/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
+ * Explicitly marked noinline such that the function tracer sees this as the
+ * page_fault entry point.
*/
static noinline void
__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
@@ -1524,33 +1510,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
}
NOKPROBE_SYMBOL(__do_page_fault);
-static nokprobe_inline void
-trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
+static __always_inline void
+trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
+ if (!trace_pagefault_enabled())
+ return;
+
if (user_mode(regs))
trace_page_fault_user(address, regs, error_code);
else
trace_page_fault_kernel(address, regs, error_code);
}
-/*
- * We must have this function blacklisted from kprobes, tagged with notrace
- * and call read_cr2() before calling anything else. To avoid calling any
- * kind of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contains all sorts of tracepoints.
- */
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void
+do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
- unsigned long address = read_cr2(); /* Get the faulting address */
enum ctx_state prev_state;
prev_state = exception_enter();
- if (trace_pagefault_enabled())
- trace_page_fault_entries(address, regs, error_code);
-
+ trace_page_fault_entries(regs, error_code, address);
__do_page_fault(regs, error_code, address);
exception_exit(prev_state);
}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 0d4bdcb84da5..0a1898b8552e 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/swap.h> /* for totalram_pages */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 075e568098f2..4068abb9427f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 1995 Linus Torvalds
@@ -859,7 +860,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
void arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap)
{
@@ -871,7 +871,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
__remove_pages(zone, start_pfn, nr_pages, altmap);
}
#endif
-#endif
int kernel_set_to_readonly __read_mostly;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 62fc457f3849..a6b5c653727b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/arch/x86_64/mm/init.c
*
@@ -670,23 +671,25 @@ static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, bool init)
{
- unsigned long paddr_next, paddr_last = paddr_end;
- unsigned long vaddr = (unsigned long)__va(paddr);
- int i = p4d_index(vaddr);
+ unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
+
+ paddr_last = paddr_end;
+ vaddr = (unsigned long)__va(paddr);
+ vaddr_end = (unsigned long)__va(paddr_end);
if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
page_size_mask, init);
- for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
- p4d_t *p4d;
+ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+ p4d_t *p4d = p4d_page + p4d_index(vaddr);
pud_t *pud;
- vaddr = (unsigned long)__va(paddr);
- p4d = p4d_page + p4d_index(vaddr);
- paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+ vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
+ paddr = __pa(vaddr);
if (paddr >= paddr_end) {
+ paddr_next = __pa(vaddr_next);
if (!after_bootmem &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RAM) &&
@@ -698,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
- paddr_last = phys_pud_init(pud, paddr, paddr_end,
- page_size_mask, init);
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
+ page_size_mask, init);
continue;
}
pud = alloc_low_page();
- paddr_last = phys_pud_init(pud, paddr, paddr_end,
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
@@ -1195,7 +1198,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
remove_pagetable(start, end, false, altmap);
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
static void __meminit
kernel_physical_mapping_remove(unsigned long start, unsigned long end)
{
@@ -1210,17 +1212,12 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn);
- struct zone *zone;
+ struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+ struct zone *zone = page_zone(page);
- /* With altmap the first mapped page is offset from @start */
- if (altmap)
- page += vmem_altmap_offset(altmap);
- zone = page_zone(page);
__remove_pages(zone, start_pfn, nr_pages, altmap);
kernel_physical_mapping_remove(start, start + size);
}
-#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
static struct kcore_list kcore_vsyscall;
@@ -1521,7 +1518,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
{
int err;
- if (boot_cpu_has(X86_FEATURE_PSE))
+ if (end - start < PAGES_PER_SECTION * sizeof(struct page))
+ err = vmemmap_populate_basepages(start, end, node);
+ else if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, altmap);
else if (altmap) {
pr_err_once("%s: no cpu support for altmap allocations\n",
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index b3294d36769d..6748b4c2baff 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright © 2008 Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
#include <asm/iomap.h>
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index dd73d5d74393..63e99f15d7cf 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Re-map IO memory to kernel address space so that we can access it.
* This is needed for high PCI addresses that aren't mapped in the
@@ -27,9 +28,11 @@
#include "physaddr.h"
-struct ioremap_mem_flags {
- bool system_ram;
- bool desc_other;
+/*
+ * Descriptor controlling ioremap() behavior.
+ */
+struct ioremap_desc {
+ unsigned int flags;
};
/*
@@ -61,13 +64,14 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
return err;
}
-static bool __ioremap_check_ram(struct resource *res)
+/* Does the range (or a subset of) contain normal RAM? */
+static unsigned int __ioremap_check_ram(struct resource *res)
{
unsigned long start_pfn, stop_pfn;
unsigned long i;
if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
- return false;
+ return 0;
start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
stop_pfn = (res->end + 1) >> PAGE_SHIFT;
@@ -75,28 +79,44 @@ static bool __ioremap_check_ram(struct resource *res)
for (i = 0; i < (stop_pfn - start_pfn); ++i)
if (pfn_valid(start_pfn + i) &&
!PageReserved(pfn_to_page(start_pfn + i)))
- return true;
+ return IORES_MAP_SYSTEM_RAM;
}
- return false;
+ return 0;
}
-static int __ioremap_check_desc_other(struct resource *res)
+/*
+ * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
+ * there the whole memory is already encrypted.
+ */
+static unsigned int __ioremap_check_encrypted(struct resource *res)
{
- return (res->desc != IORES_DESC_NONE);
+ if (!sev_active())
+ return 0;
+
+ switch (res->desc) {
+ case IORES_DESC_NONE:
+ case IORES_DESC_RESERVED:
+ break;
+ default:
+ return IORES_MAP_ENCRYPTED;
+ }
+
+ return 0;
}
-static int __ioremap_res_check(struct resource *res, void *arg)
+static int __ioremap_collect_map_flags(struct resource *res, void *arg)
{
- struct ioremap_mem_flags *flags = arg;
+ struct ioremap_desc *desc = arg;
- if (!flags->system_ram)
- flags->system_ram = __ioremap_check_ram(res);
+ if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
+ desc->flags |= __ioremap_check_ram(res);
- if (!flags->desc_other)
- flags->desc_other = __ioremap_check_desc_other(res);
+ if (!(desc->flags & IORES_MAP_ENCRYPTED))
+ desc->flags |= __ioremap_check_encrypted(res);
- return flags->system_ram && flags->desc_other;
+ return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
+ (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
}
/*
@@ -105,15 +125,15 @@ static int __ioremap_res_check(struct resource *res, void *arg)
* resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
*/
static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
- struct ioremap_mem_flags *flags)
+ struct ioremap_desc *desc)
{
u64 start, end;
start = (u64)addr;
end = start + size - 1;
- memset(flags, 0, sizeof(*flags));
+ memset(desc, 0, sizeof(struct ioremap_desc));
- walk_mem_res(start, end, flags, __ioremap_res_check);
+ walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
}
/*
@@ -130,15 +150,15 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
* have to convert them into an offset in a page-aligned mapping, but the
* caller shouldn't need to know that small detail.
*/
-static void __iomem *__ioremap_caller(resource_size_t phys_addr,
- unsigned long size, enum page_cache_mode pcm,
- void *caller, bool encrypted)
+static void __iomem *
+__ioremap_caller(resource_size_t phys_addr, unsigned long size,
+ enum page_cache_mode pcm, void *caller, bool encrypted)
{
unsigned long offset, vaddr;
resource_size_t last_addr;
const resource_size_t unaligned_phys_addr = phys_addr;
const unsigned long unaligned_size = size;
- struct ioremap_mem_flags mem_flags;
+ struct ioremap_desc io_desc;
struct vm_struct *area;
enum page_cache_mode new_pcm;
pgprot_t prot;
@@ -157,12 +177,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
return NULL;
}
- __ioremap_check_mem(phys_addr, size, &mem_flags);
+ __ioremap_check_mem(phys_addr, size, &io_desc);
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
- if (mem_flags.system_ram) {
+ if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
&phys_addr, &last_addr);
return NULL;
@@ -200,7 +220,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* resulting mapping.
*/
prot = PAGE_KERNEL_IO;
- if ((sev_active() && mem_flags.desc_other) || encrypted)
+ if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
prot = pgprot_encrypted(prot);
switch (pcm) {
@@ -439,6 +459,11 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
+int __init arch_ioremap_p4d_supported(void)
+{
+ return 0;
+}
+
int __init arch_ioremap_pud_supported(void)
{
#ifdef CONFIG_X86_64
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8dc0fc0b1382..296da58f3013 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -199,7 +199,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
- p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
+ p4d = pgd_val(*pgd) & PTE_PFN_MASK;
p4d += __START_KERNEL_map - phys_base;
return (p4d_t *)p4d + p4d_index(addr);
}
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index dc3f058bdf9b..dc6182eecefa 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region {
} kaslr_regions[] = {
{ &page_offset_base, 0 },
{ &vmalloc_base, 0 },
- { &vmemmap_base, 1 },
+ { &vmemmap_base, 0 },
};
/* Get size in bytes used by the memory region */
@@ -78,6 +78,7 @@ void __init kernel_randomize_memory(void)
unsigned long rand, memory_tb;
struct rnd_state rand_state;
unsigned long remain_entropy;
+ unsigned long vmemmap_size;
vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr = vaddr_start;
@@ -109,6 +110,14 @@ void __init kernel_randomize_memory(void)
if (memory_tb < kaslr_regions[0].size_tb)
kaslr_regions[0].size_tb = memory_tb;
+ /*
+ * Calculate the vmemmap region size in TBs, aligned to a TB
+ * boundary.
+ */
+ vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) *
+ sizeof(struct page);
+ kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT);
+
/* Calculate entropy available between regions */
remain_entropy = vaddr_end - vaddr_start;
for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 51f50a7a07ef..fece30ca8b0c 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* AMD Memory Encryption Support
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define DISABLE_BRANCH_PROFILING
@@ -18,6 +15,10 @@
#include <linux/dma-direct.h>
#include <linux/swiotlb.h>
#include <linux/mem_encrypt.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
@@ -44,7 +45,7 @@ EXPORT_SYMBOL_GPL(sev_enable_key);
bool sev_enabled __section(.data);
/* Buffer used for early in-place encryption by BSP, no locking needed */
-static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
/*
* This routine does not change the underlying encryption setting of the
@@ -351,6 +352,32 @@ bool sev_active(void)
}
EXPORT_SYMBOL(sev_active);
+/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
+bool force_dma_unencrypted(struct device *dev)
+{
+ /*
+ * For SEV, all DMA must be to unencrypted addresses.
+ */
+ if (sev_active())
+ return true;
+
+ /*
+ * For SME, all DMA must be to unencrypted addresses if the
+ * device does not support DMA to addresses that include the
+ * encryption mask.
+ */
+ if (sme_active()) {
+ u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
+ u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
+ dev->bus_dma_mask);
+
+ if (dma_dev_mask <= dma_enc_mask)
+ return true;
+ }
+
+ return false;
+}
+
/* Architecture __weak replacement functions */
void __init mem_encrypt_free_decrypted_mem(void)
{
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 40a6085063d6..6d71481a1e70 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* AMD Memory Encryption Support
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/linkage.h>
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 4aa9b1480866..e2b0e2ac07bb 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* AMD Memory Encryption Support
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define DISABLE_BRANCH_PROFILING
@@ -73,6 +70,19 @@ struct sme_populate_pgd_data {
unsigned long vaddr_end;
};
+/*
+ * This work area lives in the .init.scratch section, which lives outside of
+ * the kernel proper. It is sized to hold the intermediate copy buffer and
+ * more than enough pagetable pages.
+ *
+ * By using this section, the kernel can be encrypted in place and it
+ * avoids any possibility of boot parameters or initramfs images being
+ * placed such that the in-place encryption logic overwrites them. This
+ * section is 2MB aligned to allow for simple pagetable setup using only
+ * PMD entries (see vmlinux.lds.S).
+ */
+static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch);
+
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
static char sme_cmdline_off[] __initdata = "off";
@@ -314,8 +324,13 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
}
#endif
- /* Set the encryption workarea to be immediately after the kernel */
- workarea_start = kernel_end;
+ /*
+ * We're running identity mapped, so we must obtain the address to the
+ * SME encryption workarea using rip-relative addressing.
+ */
+ asm ("lea sme_workarea(%%rip), %0"
+ : "=r" (workarea_start)
+ : "p" (sme_workarea));
/*
* Calculate required number of workarea bytes needed:
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index dc726e07d8ba..aae9a933dfd4 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Flexible mmap layout support
*
@@ -8,20 +9,6 @@
* All Rights Reserved.
* Copyright 2005 Andi Kleen, SUSE Labs.
* Copyright 2007 Jiri Kosina, SUSE Labs.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/personality.h>
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 2c1ecf4763c4..b8ef8557d4b3 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -1,17 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2005
* Jeff Muizelaar, 2006, 2007
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 0d1c47cbbdd6..895fb7a9294d 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -912,7 +912,7 @@ void mpx_notify_unmap(struct mm_struct *mm, unsigned long start,
ret = mpx_unmap_tables(mm, start, end);
if (ret)
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
/* MPX cannot handle addresses above 47 bits yet. */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index dfb6c4df639a..e6dad600614c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Common code for 32 and 64-bit NUMA */
#include <linux/acpi.h>
#include <linux/kernel.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index daf4d645e537..6a9a77a403c9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2002 Andi Kleen, SuSE Labs.
* Thanks to Ben LaHaise for precious feedback.
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 4fe956a63b25..d9fbd4f69920 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Handle caching attributes in page tables (PAT)
*
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index a235869532bc..3f83e31b3a93 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Fault Injection Test harness (FI)
* Copyright (C) Intel Crop.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- *
*/
/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
index e05341a51a27..e2a13dce0e13 100644
--- a/arch/x86/mm/pf_in.h
+++ b/arch/x86/mm/pf_in.h
@@ -1,22 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Fault Injection Test harness (FI)
* Copyright (C) Intel Crop.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- *
*/
#ifndef __PF_H_
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1f67b1e15bf6..44816ff6411f 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -13,33 +13,17 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
EXPORT_SYMBOL(physical_mask);
#endif
-#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
-
#ifdef CONFIG_HIGHPTE
-#define PGALLOC_USER_GFP __GFP_HIGHMEM
+#define PGTABLE_HIGHMEM __GFP_HIGHMEM
#else
-#define PGALLOC_USER_GFP 0
+#define PGTABLE_HIGHMEM 0
#endif
-gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
-
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
-}
+gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
- struct page *pte;
-
- pte = alloc_pages(__userpte_alloc_gfp, 0);
- if (!pte)
- return NULL;
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- return pte;
+ return __pte_alloc_one(mm, __userpte_alloc_gfp);
}
static int __init setup_userpte(char *arg)
@@ -235,7 +219,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{
int i;
bool failed = false;
- gfp_t gfp = PGALLOC_GFP;
+ gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
@@ -399,14 +383,14 @@ static inline pgd_t *_pgd_alloc(void)
* We allocate one page for pgd.
*/
if (!SHARED_KERNEL_PMD)
- return (pgd_t *)__get_free_pages(PGALLOC_GFP,
+ return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
PGD_ALLOCATION_ORDER);
/*
* Now PAE kernel is not running as a Xen domain. We can allocate
* a 32-byte slab for pgd to save memory space.
*/
- return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+ return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
}
static inline void _pgd_free(pgd_t *pgd)
@@ -424,7 +408,8 @@ void __init pgd_cache_init(void)
static inline pgd_t *_pgd_alloc(void)
{
- return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+ return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
+ PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 1dcfc91c8f0c..c6f84c0b5d7a 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel Memory Protection Keys management
* Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/debugfs.h> /* debugfs_create_u32() */
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 9c2463bc158f..b196524759ec 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
* This code is based in part on work published here:
*
* https://github.com/IAIK/KAISER
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index f6ae6830b341..0881e1ff1e58 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
*/
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 7f61431c75fb..4de9704c4aaf 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/init.h>
#include <linux/mm.h>
@@ -711,7 +712,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
}
/*
- * See Documentation/x86/tlb.txt for details. We choose 33
+ * See Documentation/x86/tlb.rst for details. We choose 33
* because it is large enough to cover the vast majority (at
* least 95%) of allocations, and is small enough that we are
* confident it will not cause too much overhead. Each single
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 59e123da580c..383c87300b0d 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Arch-specific network modules
#
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index afabf597c855..eaaed5bfc4a4 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* bpf_jit_comp.c: BPF JIT compiler
*
* Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
* Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/netdevice.h>
#include <linux/filter.h>
@@ -190,9 +186,7 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */
-
-#define PROLOGUE_SIZE 37
+#define PROLOGUE_SIZE 20
/*
* Emit x86-64 prologue code for BPF program and check its size.
@@ -203,44 +197,19 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
u8 *prog = *pprog;
int cnt = 0;
- /* push rbp */
- EMIT1(0x55);
-
- /* mov rbp,rsp */
- EMIT3(0x48, 0x89, 0xE5);
-
- /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
- EMIT3_off32(0x48, 0x81, 0xEC,
- round_up(stack_depth, 8) + AUX_STACK_SPACE);
-
- /* sub rbp, AUX_STACK_SPACE */
- EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
-
- /* mov qword ptr [rbp+0],rbx */
- EMIT4(0x48, 0x89, 0x5D, 0);
- /* mov qword ptr [rbp+8],r13 */
- EMIT4(0x4C, 0x89, 0x6D, 8);
- /* mov qword ptr [rbp+16],r14 */
- EMIT4(0x4C, 0x89, 0x75, 16);
- /* mov qword ptr [rbp+24],r15 */
- EMIT4(0x4C, 0x89, 0x7D, 24);
-
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ /* sub rsp, rounded_stack_depth */
+ EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
+ EMIT1(0x53); /* push rbx */
+ EMIT2(0x41, 0x55); /* push r13 */
+ EMIT2(0x41, 0x56); /* push r14 */
+ EMIT2(0x41, 0x57); /* push r15 */
if (!ebpf_from_cbpf) {
- /*
- * Clear the tail call counter (tail_call_cnt): for eBPF tail
- * calls we need to reset the counter to 0. It's done in two
- * instructions, resetting RAX register to 0, and moving it
- * to the counter location.
- */
-
- /* xor eax, eax */
- EMIT2(0x31, 0xc0);
- /* mov qword ptr [rbp+32], rax */
- EMIT4(0x48, 0x89, 0x45, 32);
-
+ /* zero init tail_call_cnt */
+ EMIT2(0x6a, 0x00);
BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
}
-
*pprog = prog;
}
@@ -285,13 +254,13 @@ static void emit_bpf_tail_call(u8 **pprog)
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
+ EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */
+ EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
/* prog = array->ptrs[index]; */
EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
@@ -1040,19 +1009,14 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
- /* mov rbx, qword ptr [rbp+0] */
- EMIT4(0x48, 0x8B, 0x5D, 0);
- /* mov r13, qword ptr [rbp+8] */
- EMIT4(0x4C, 0x8B, 0x6D, 8);
- /* mov r14, qword ptr [rbp+16] */
- EMIT4(0x4C, 0x8B, 0x75, 16);
- /* mov r15, qword ptr [rbp+24] */
- EMIT4(0x4C, 0x8B, 0x7D, 24);
-
- /* add rbp, AUX_STACK_SPACE */
- EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
- EMIT1(0xC9); /* leave */
- EMIT1(0xC3); /* ret */
+ if (!bpf_prog_was_classic(bpf_prog))
+ EMIT1(0x5B); /* get rid of tail_call_cnt */
+ EMIT2(0x41, 0x5F); /* pop r15 */
+ EMIT2(0x41, 0x5E); /* pop r14 */
+ EMIT2(0x41, 0x5D); /* pop r13 */
+ EMIT1(0x5B); /* pop rbx */
+ EMIT1(0xC9); /* leave */
+ EMIT1(0xC3); /* ret */
break;
default:
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index b29e82f190c7..393d251798c0 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -253,13 +253,14 @@ static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
/* dst = src */
static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
const u8 src[], bool dstk,
- bool sstk, u8 **pprog)
+ bool sstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
if (is64)
/* complete 8 byte move */
emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
- else
+ else if (!aux->verifier_zext)
/* zero out high 4 bytes */
emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
}
@@ -313,7 +314,8 @@ static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
}
static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
- bool dstk, u8 **pprog)
+ bool dstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -334,12 +336,14 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
*/
EMIT2(0x0F, 0xB7);
EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 32:
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 64:
/* nop */
@@ -358,7 +362,8 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
}
static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
- bool dstk, u8 **pprog)
+ bool dstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -380,16 +385,18 @@ static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
EMIT2(0x0F, 0xB7);
EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 32:
/* Emit 'bswap eax' to swap lower 4 bytes */
EMIT1(0x0F);
EMIT1(add_1reg(0xC8, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ if (!aux->verifier_zext)
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
break;
case 64:
/* Emit 'bswap eax' to swap lower 4 bytes */
@@ -569,7 +576,7 @@ static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
const u8 dst[], const u8 src[],
bool dstk, bool sstk,
- u8 **pprog)
+ u8 **pprog, const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
@@ -577,7 +584,7 @@ static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
if (is64)
emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
&prog);
- else
+ else if (!aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
*pprog = prog;
}
@@ -668,7 +675,8 @@ static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
/* ALU operation (64 bit) */
static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
const u8 dst[], const u32 val,
- bool dstk, u8 **pprog)
+ bool dstk, u8 **pprog,
+ const struct bpf_prog_aux *aux)
{
u8 *prog = *pprog;
u32 hi = 0;
@@ -679,7 +687,7 @@ static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
if (is64)
emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
- else
+ else if (!aux->verifier_zext)
emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
*pprog = prog;
@@ -724,9 +732,6 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
{
u8 *prog = *pprog;
int cnt = 0;
- static int jmp_label1 = -1;
- static int jmp_label2 = -1;
- static int jmp_label3 = -1;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
@@ -745,78 +750,22 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
/* mov ecx,src_lo */
EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
- /* cmp ecx,32 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
- /* Jumps when >= 32 */
- if (is_imm8(jmp_label(jmp_label1, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
-
- /* < 32 */
- /* shl dreg_hi,cl */
- EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
- /* mov ebx,dreg_lo */
- EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shld dreg_hi,dreg_lo,cl */
+ EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo));
/* shl dreg_lo,cl */
EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
- /* IA32_ECX = -IA32_ECX + 32 */
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+ /* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */
- /* shr ebx,cl */
- EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
- /* or dreg_hi,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 32 */
- if (jmp_label1 == -1)
- jmp_label1 = cnt;
-
- /* cmp ecx,64 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
- /* Jumps when >= 64 */
- if (is_imm8(jmp_label(jmp_label2, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* skip the next two instructions (4 bytes) when < 32 */
+ EMIT2(IA32_JB, 4);
- /* >= 32 && < 64 */
- /* sub ecx,32 */
- EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
- /* shl dreg_lo,cl */
- EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
/* mov dreg_hi,dreg_lo */
EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
-
- /* xor dreg_lo,dreg_lo */
- EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 64 */
- if (jmp_label2 == -1)
- jmp_label2 = cnt;
/* xor dreg_lo,dreg_lo */
EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
-
- if (jmp_label3 == -1)
- jmp_label3 = cnt;
if (dstk) {
/* mov dword ptr [ebp+off],dreg_lo */
@@ -836,9 +785,6 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
{
u8 *prog = *pprog;
int cnt = 0;
- static int jmp_label1 = -1;
- static int jmp_label2 = -1;
- static int jmp_label3 = -1;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
@@ -857,79 +803,23 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
/* mov ecx,src_lo */
EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
- /* cmp ecx,32 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
- /* Jumps when >= 32 */
- if (is_imm8(jmp_label(jmp_label1, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
-
- /* < 32 */
- /* lshr dreg_lo,cl */
- EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
- /* ashr dreg_hi,cl */
+ /* shrd dreg_lo,dreg_hi,cl */
+ EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
+ /* sar dreg_hi,cl */
EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
- /* IA32_ECX = -IA32_ECX + 32 */
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 32 */
- if (jmp_label1 == -1)
- jmp_label1 = cnt;
+ /* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */
- /* cmp ecx,64 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
- /* Jumps when >= 64 */
- if (is_imm8(jmp_label(jmp_label2, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* skip the next two instructions (5 bytes) when < 32 */
+ EMIT2(IA32_JB, 5);
- /* >= 32 && < 64 */
- /* sub ecx,32 */
- EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
- /* ashr dreg_hi,cl */
- EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
/* mov dreg_lo,dreg_hi */
EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
-
- /* ashr dreg_hi,imm8 */
+ /* sar dreg_hi,31 */
EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 64 */
- if (jmp_label2 == -1)
- jmp_label2 = cnt;
- /* ashr dreg_hi,imm8 */
- EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
- /* mov dreg_lo,dreg_hi */
- EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
-
- if (jmp_label3 == -1)
- jmp_label3 = cnt;
-
if (dstk) {
/* mov dword ptr [ebp+off],dreg_lo */
EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
@@ -948,9 +838,6 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
{
u8 *prog = *pprog;
int cnt = 0;
- static int jmp_label1 = -1;
- static int jmp_label2 = -1;
- static int jmp_label3 = -1;
u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
@@ -969,77 +856,23 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
/* mov ecx,src_lo */
EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
- /* cmp ecx,32 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
- /* Jumps when >= 32 */
- if (is_imm8(jmp_label(jmp_label1, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
-
- /* < 32 */
- /* lshr dreg_lo,cl */
- EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shrd dreg_lo,dreg_hi,cl */
+ EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
/* shr dreg_hi,cl */
EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
- /* IA32_ECX = -IA32_ECX + 32 */
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
-
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+ /* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */
- /* >= 32 */
- if (jmp_label1 == -1)
- jmp_label1 = cnt;
- /* cmp ecx,64 */
- EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
- /* Jumps when >= 64 */
- if (is_imm8(jmp_label(jmp_label2, 2)))
- EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
- else
- EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* skip the next two instructions (4 bytes) when < 32 */
+ EMIT2(IA32_JB, 4);
- /* >= 32 && < 64 */
- /* sub ecx,32 */
- EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
- /* shr dreg_hi,cl */
- EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
/* mov dreg_lo,dreg_hi */
EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
/* xor dreg_hi,dreg_hi */
EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
- /* goto out; */
- if (is_imm8(jmp_label(jmp_label3, 2)))
- EMIT2(0xEB, jmp_label(jmp_label3, 2));
- else
- EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
-
- /* >= 64 */
- if (jmp_label2 == -1)
- jmp_label2 = cnt;
- /* xor dreg_lo,dreg_lo */
- EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
- /* xor dreg_hi,dreg_hi */
- EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
-
- if (jmp_label3 == -1)
- jmp_label3 = cnt;
-
if (dstk) {
/* mov dword ptr [ebp+off],dreg_lo */
EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
@@ -1069,27 +902,10 @@ static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
}
/* Do LSH operation */
if (val < 32) {
- /* shl dreg_hi,imm8 */
- EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
- /* mov ebx,dreg_lo */
- EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shld dreg_hi,dreg_lo,imm8 */
+ EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val);
/* shl dreg_lo,imm8 */
EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
-
- /* IA32_ECX = 32 - val */
- /* mov ecx,val */
- EMIT2(0xB1, val);
- /* movzx ecx,ecx */
- EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shr ebx,cl */
- EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
- /* or dreg_hi,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
} else if (val >= 32 && val < 64) {
u32 value = val - 32;
@@ -1135,27 +951,10 @@ static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
/* Do RSH operation */
if (val < 32) {
- /* shr dreg_lo,imm8 */
- EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shrd dreg_lo,dreg_hi,imm8 */
+ EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
/* shr dreg_hi,imm8 */
EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
-
- /* IA32_ECX = 32 - val */
- /* mov ecx,val */
- EMIT2(0xB1, val);
- /* movzx ecx,ecx */
- EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
} else if (val >= 32 && val < 64) {
u32 value = val - 32;
@@ -1200,27 +999,10 @@ static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
}
/* Do RSH operation */
if (val < 32) {
- /* shr dreg_lo,imm8 */
- EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
- /* mov ebx,dreg_hi */
- EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shrd dreg_lo,dreg_hi,imm8 */
+ EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
/* ashr dreg_hi,imm8 */
EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
-
- /* IA32_ECX = 32 - val */
- /* mov ecx,val */
- EMIT2(0xB1, val);
- /* movzx ecx,ecx */
- EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* neg ecx */
- EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
- /* add ecx,32 */
- EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
-
- /* shl ebx,cl */
- EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
- /* or dreg_lo,ebx */
- EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
} else if (val >= 32 && val < 64) {
u32 value = val - 32;
@@ -1713,8 +1495,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU64 | BPF_MOV | BPF_X:
switch (BPF_SRC(code)) {
case BPF_X:
- emit_ia32_mov_r64(is64, dst, src, dstk,
- sstk, &prog);
+ if (imm32 == 1) {
+ /* Special mov32 for zext. */
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ }
+ emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
+ &prog, bpf_prog->aux);
break;
case BPF_K:
/* Sign-extend immediate value to dst reg */
@@ -1754,11 +1541,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
switch (BPF_SRC(code)) {
case BPF_X:
emit_ia32_alu_r64(is64, BPF_OP(code), dst,
- src, dstk, sstk, &prog);
+ src, dstk, sstk, &prog,
+ bpf_prog->aux);
break;
case BPF_K:
emit_ia32_alu_i64(is64, BPF_OP(code), dst,
- imm32, dstk, &prog);
+ imm32, dstk, &prog,
+ bpf_prog->aux);
break;
}
break;
@@ -1777,7 +1566,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
false, &prog);
break;
}
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU | BPF_RSH | BPF_X:
@@ -1797,7 +1587,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
&prog);
break;
}
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = dst / src(imm) */
/* dst = dst % src(imm) */
@@ -1819,7 +1610,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
&prog);
break;
}
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
case BPF_ALU64 | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1836,7 +1628,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
false, &prog);
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = dst << imm */
case BPF_ALU64 | BPF_LSH | BPF_K:
@@ -1872,7 +1665,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_NEG:
emit_ia32_alu_i(is64, false, BPF_OP(code),
dst_lo, 0, dstk, &prog);
- emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ if (!bpf_prog->aux->verifier_zext)
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
break;
/* dst = ~dst (64 bit) */
case BPF_ALU64 | BPF_NEG:
@@ -1892,11 +1686,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
/* dst = htole(dst) */
case BPF_ALU | BPF_END | BPF_FROM_LE:
- emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
+ emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
+ bpf_prog->aux);
break;
/* dst = htobe(dst) */
case BPF_ALU | BPF_END | BPF_FROM_BE:
- emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
+ emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
+ bpf_prog->aux);
break;
/* dst = imm64 */
case BPF_LD | BPF_IMM | BPF_DW: {
@@ -2051,6 +1847,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_B:
case BPF_H:
case BPF_W:
+ if (!bpf_prog->aux->verifier_zext)
+ break;
if (dstk) {
EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
STACK_VAR(dst_hi));
@@ -2475,6 +2273,11 @@ notyet:
return proglen;
}
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index ca1e8e6dccc8..2db73613cada 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Read address ranges from a Broadcom CNB20LE Host Bridge
*
* Copyright (c) 2010 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/acpi.h>
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 3353b76dcff0..584c25b588b4 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -1,23 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * GPL LICENSE SUMMARY
- *
* Copyright(c) 2010 Intel Corporation. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
* Contact Information:
* Intel Corporation
* 2200 Mission College Blvd.
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index d4ec117c1142..9acab6ac28f5 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Low-Level PCI Support for PC
*
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index dfbe6ac38830..467311b1eeea 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* legacy.c - traditional, old school PCI bus probing
*/
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 3e9e166f6408..bfa789875322 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2004 Matthew Wilcox <matthew@wil.cx>
* Copyright (C) 2004 Intel Corp.
- *
- * This code is released under the GNU General Public License version 2.
*/
/*
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index 7043a4f0e98a..f3aab76e357a 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Low-level PCI config space access for OLPC systems who lack the VSA
* PCI virtualization software.
*
* Copyright © 2006 Advanced Micro Devices, Inc.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
* The AMD Geode chipset (ie: GX2 processor, cs5536 I/O companion device)
* has some I/O functions (display, southbridge, sound, USB HCIs, etc)
* that more or less behave like PCI devices, but the hardware doesn't
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 3cdafea55ab6..97bbc12dd6b2 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* arch/x86/pci/sta2x11-fixup.c
* glue code for lib/swiotlb.c and DMA translation between STA2x11
@@ -6,20 +7,6 @@
* ST Microelectronics ConneXt (STA2X11/STA2X10)
*
* Copyright (c) 2010-2011 Wind River Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
*/
#include <linux/pci.h>
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 9112d1cb397b..91220cc25854 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and
* initial domain support. We also handle the DSDT _PRT callbacks for GSI's
diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile
index 57be88fa34bb..e06bbecd6358 100644
--- a/arch/x86/platform/atom/Makefile
+++ b/arch/x86/platform/atom/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 6cb6076223ba..ee6b0780bea1 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel SOC Punit device state debug driver
* Punit controls power management for North Complex devices (Graphics
* blocks, Image Signal Processing, video processing, display, DSP etc.)
*
* Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
*/
#include <linux/module.h>
@@ -113,24 +104,12 @@ DEFINE_SHOW_ATTRIBUTE(punit_dev_state);
static struct dentry *punit_dbg_file;
-static int punit_dbgfs_register(struct punit_device *punit_device)
+static void punit_dbgfs_register(struct punit_device *punit_device)
{
- struct dentry *dev_state;
-
punit_dbg_file = debugfs_create_dir("punit_atom", NULL);
- if (!punit_dbg_file)
- return -ENXIO;
-
- dev_state = debugfs_create_file("dev_power_state", 0444,
- punit_dbg_file, punit_device,
- &punit_dev_state_fops);
- if (!dev_state) {
- pr_err("punit_dev_state register failed\n");
- debugfs_remove(punit_dbg_file);
- return -ENXIO;
- }
- return 0;
+ debugfs_create_file("dev_power_state", 0444, punit_dbg_file,
+ punit_device, &punit_dev_state_fops);
}
static void punit_dbgfs_unregister(void)
@@ -154,15 +133,12 @@ MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids);
static int __init punit_atom_debug_init(void)
{
const struct x86_cpu_id *id;
- int ret;
id = x86_match_cpu(intel_punit_cpu_ids);
if (!id)
return -ENODEV;
- ret = punit_dbgfs_register((struct punit_device *)id->driver_data);
- if (ret < 0)
- return ret;
+ punit_dbgfs_register((struct punit_device *)id->driver_data);
return 0;
}
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
index 91fc92971d94..7b7f37dc80b1 100644
--- a/arch/x86/platform/ce4100/Makefile
+++ b/arch/x86/platform/ce4100/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index b3233b1835ea..40745664d92f 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel CE4100 platform specific setup code
*
* (C) Copyright 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
#include <linux/kernel.h>
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index ce874f872cc6..0ac3d4357136 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* CE4100 on Falcon Falls
*
* (c) Copyright 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
*/
/dts-v1/;
/ {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index e1cb01a22fa8..a7189a3b4d70 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -85,6 +85,8 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
pgd_t *save_pgd;
save_pgd = efi_call_phys_prolog();
+ if (!save_pgd)
+ return EFI_ABORTED;
/* Disable interrupts around EFI calls: */
local_irq_save(flags);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index cf0347f61b21..08ce8177c3af 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -84,13 +84,15 @@ pgd_t * __init efi_call_phys_prolog(void)
if (!efi_enabled(EFI_OLD_MEMMAP)) {
efi_switch_mm(&efi_mm);
- return NULL;
+ return efi_mm.pgd;
}
early_code_mapping_set_exec(1);
n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
+ if (!save_pgd)
+ return NULL;
/*
* Build 1:1 identity mapping for efi=old_map usage. Note that
@@ -138,10 +140,11 @@ pgd_t * __init efi_call_phys_prolog(void)
pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
}
-out:
__flush_tlb_all();
-
return save_pgd;
+out:
+ efi_call_phys_epilog(save_pgd);
+ return NULL;
}
void __init efi_call_phys_epilog(pgd_t *save_pgd)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index a25a9fd987a9..3b9fd679cea9 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "efi: " fmt
#include <linux/init.h>
@@ -512,6 +513,9 @@ int __init efi_reuse_config(u64 tables, int nr_tables)
void *p, *tablep;
struct efi_setup_data *data;
+ if (nr_tables == 0)
+ return 0;
+
if (!efi_setup)
return 0;
@@ -724,7 +728,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
* Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so
* page faulting on these addresses isn't expected.
*/
- if (phys_addr >= 0x0000 && phys_addr <= 0x0fff)
+ if (phys_addr <= 0x0fff)
return;
/*
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
index 5b51194f4c8d..a8a6b1dedb01 100644
--- a/arch/x86/platform/geode/Makefile
+++ b/arch/x86/platform/geode/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_ALIX) += alix.o
obj-$(CONFIG_NET5501) += net5501.o
obj-$(CONFIG_GEOS) += geos.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 1865c196f136..c33f744b5388 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* System Specific setup for PCEngines ALIX.
* At the moment this means setup of GPIO control of LEDs
* on Alix.2/3/6 boards.
*
- *
* Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
* Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
* and Philip Prindeville <philipp@redfish-solutions.com>
@@ -11,10 +11,6 @@
* TODO: There are large similarities with leds-net5501.c
* by Alessandro Zummo <a.zummo@towertech.it>
* In the future leds-net5501.c should be migrated over to platform
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
*/
#include <linux/kernel.h>
@@ -24,7 +20,6 @@
#include <linux/moduleparam.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/dmi.h>
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index 4fcdb91318a0..73a3f49b4eb6 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* System Specific setup for Traverse Technologies GEOS.
* At the moment this means setup of GPIO control of LEDs.
@@ -9,10 +10,6 @@
* TODO: There are large similarities with leds-net5501.c
* by Alessandro Zummo <a.zummo@towertech.it>
* In the future leds-net5501.c should be migrated over to platform
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
*/
#include <linux/kernel.h>
@@ -21,7 +18,6 @@
#include <linux/string.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/dmi.h>
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index a2f6b982a729..163e1b545517 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -1,19 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* System Specific setup for Soekris net5501
* At the moment this means setup of GPIO control of LEDs and buttons
* on net5501 boards.
*
- *
* Copyright (C) 2008-2009 Tower Technologies
* Written by Alessandro Zummo <a.zummo@towertech.it>
*
* Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
* Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
* and Philip Prindeville <philipp@redfish-solutions.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
*/
#include <linux/kernel.h>
@@ -22,7 +18,6 @@
#include <linux/string.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile
index f030b532fdf3..072c395379ac 100644
--- a/arch/x86/platform/goldfish/Makefile
+++ b/arch/x86/platform/goldfish/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_GOLDFISH) += goldfish.o
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
index 0d17c0aafeb1..6b6f8b4360dd 100644
--- a/arch/x86/platform/goldfish/goldfish.c
+++ b/arch/x86/platform/goldfish/goldfish.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2007 Google, Inc.
* Copyright (C) 2011 Intel, Inc.
* Copyright (C) 2013 Intel, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/kernel.h>
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 5cf886c867c2..cc2549f0ccb1 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o pwr.o
# SFI specific code
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
index 1421d5330b2c..564c47c53f3a 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_bcm43xx.c: bcm43xx platform data initialization file
*
* (C) Copyright 2016 Intel Corporation
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/gpio/machine.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
index c26cf393d35a..32912a17f68e 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_bma023.c: bma023 platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <asm/intel-mid.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index 31dce781364c..e3f4bfc08f78 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Bluetooth platform data initialization file
*
* (C) Copyright 2017 Intel Corporation
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/gpio/machine.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
index c259fb6c8f4f..a2508582a0b1 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_emc1403.c: emc1403 platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
index e639e3116acf..d9435d2196a4 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_gpio_keys.c: gpio_keys platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/input.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
index a35cf912de43..a4485cd638c6 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_lis331.c: lis331 platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/i2c.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
index 58337b2bc682..e9287c3184da 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_max7315.c: max7315 platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
index ee22864bbc2f..28a182713934 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_mpu3050.c: mpu3050 platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/gpio.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
index 4de8a664e6a1..605e1f94ad89 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel Merrifield FLIS platform device initialization file
*
* Copyright (C) 2016, Intel Corporation
*
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
index a6c3705a28ad..ec2afb41b34a 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel Merrifield power button support
*
* (C) Copyright 2017 Intel Corporation
*
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c
index 3135416df037..40e9808a9634 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel Merrifield legacy RTC initialization file
*
* (C) Copyright 2017 Intel Corporation
*
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
index 00c4a034ad93..fe3b7ff975f3 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* SDHCI platform data initilisation file
*
* (C) Copyright 2016 Intel Corporation
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
index 7a7fc54c449b..b828f4fd40be 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* spidev platform data initialization file
*
* (C) Copyright 2014, 2016 Intel Corporation
* Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
* Dan O'Donovan <dan@emutex.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/err.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 2acd6be13375..227218a8f98e 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel Merrifield watchdog platform device library file
*
* (C) Copyright 2014 Intel Corporation
* Author: David Cohen <david.a.cohen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.c b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
index e421106c11cf..b17783d0d4e7 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_msic.c: MSIC platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/kernel.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
index b7be1d041da2..91deb2e65b0e 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic.h
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* platform_msic.h: MSIC platform data header file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#ifndef _PLATFORM_MSIC_H_
#define _PLATFORM_MSIC_H_
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
index d4dc744dd5a5..e765da78ad8c 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_msic_audio.c: MSIC audio platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/kernel.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
index 5c3e9919633f..f461f84903f8 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_msic_battery.c: MSIC battery platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/kernel.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
index 9fdb88d460d7..71a7d6db3878 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_msic_gpio.c: MSIC GPIO platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/kernel.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
index 7ae37cdbf256..558c0d974430 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_msic_ocd.c: MSIC OCD platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/kernel.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
index 96809b98cf69..3d3de2d59726 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_msic_power_btn.c: MSIC power btn platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
index 3e4167d246cd..4858da1d78c6 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_msic_thermal.c: msic_thermal platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/input.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
index 8344d5a928c9..5609d8da3978 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PCAL9555a platform data initialization file
*
@@ -5,11 +6,6 @@
*
* Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
* Dan O'Donovan <dan@emutex.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/gpio.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
index 2905376559f1..44d1f884c3d3 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_tc35876x.c: tc35876x platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/gpio.h>
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
index 4f41372ce400..e689d8f61059 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* platform_tca6416.c: tca6416 platform data initialization file
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/platform_data/pca953x.h>
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 56f66eafb94f..780728161f7d 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* intel-mid.c: Intel MID platform setup code
*
* (C) Copyright 2008, 2012 Intel Corporation
* Author: Jacob Pan (jacob.jun.pan@intel.com)
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#define pr_fmt(fmt) "intel_mid: " fmt
diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
index a52914aa3b6c..2226da4f437a 100644
--- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c
+++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* intel_mid_vrtc.c: Driver for virtual RTC device on Intel MID platform
*
* (C) Copyright 2009 Intel Corporation
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* Note:
* VRTC is emulated by system controller firmware, the real HW
* RTC is located in the PMIC device. SCU FW shadows PMIC RTC
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 49ec5b94c71f..27288d8d3f71 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel MID Power Management Unit (PWRMU) device driver
*
@@ -5,10 +6,6 @@
*
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
* Intel MID Power Management Unit device driver handles the South Complex PCI
* devices such as GPDMA, SPI, I2C, PWM, and so on. By default PCI core
* modifies bits in PMCSR register in the PCI configuration space. This is not
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 7be1e1fe9ae3..b8f7f193f383 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* intel_mid_sfi.c: Intel MID SFI initialization code
*
* (C) Copyright 2013 Intel Corporation
* Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
*/
#include <linux/init.h>
diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile
index 9cc57ed36022..ed77cb9529ce 100644
--- a/arch/x86/platform/intel-quark/Makefile
+++ b/arch/x86/platform/intel-quark/Makefile
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_INTEL_IMR) += imr.o
obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index 49828c2707ac..6dd25dc5f027 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/**
* imr.c -- Intel Isolated Memory Region driver
*
@@ -34,7 +35,6 @@
#include <linux/types.h>
struct imr_device {
- struct dentry *file;
bool init;
struct mutex lock;
int max_imr;
@@ -230,13 +230,11 @@ DEFINE_SHOW_ATTRIBUTE(imr_dbgfs_state);
* imr_debugfs_register - register debugfs hooks.
*
* @idev: pointer to imr_device structure.
- * @return: 0 on success - errno on failure.
*/
-static int imr_debugfs_register(struct imr_device *idev)
+static void imr_debugfs_register(struct imr_device *idev)
{
- idev->file = debugfs_create_file("imr_state", 0444, NULL, idev,
- &imr_dbgfs_state_fops);
- return PTR_ERR_OR_ZERO(idev->file);
+ debugfs_create_file("imr_state", 0444, NULL, idev,
+ &imr_dbgfs_state_fops);
}
/**
@@ -581,7 +579,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = {
static int __init imr_init(void)
{
struct imr_device *idev = &imr_dev;
- int ret;
if (!x86_match_cpu(imr_ids) || !iosf_mbi_available())
return -ENODEV;
@@ -591,9 +588,7 @@ static int __init imr_init(void)
idev->init = true;
mutex_init(&idev->lock);
- ret = imr_debugfs_register(idev);
- if (ret != 0)
- pr_warn("debugfs register failed!\n");
+ imr_debugfs_register(idev);
imr_fixup_memmap(idev);
return 0;
}
diff --git a/arch/x86/platform/intel/Makefile b/arch/x86/platform/intel/Makefile
index b878032fbc82..dbee3b00f9d0 100644
--- a/arch/x86/platform/intel/Makefile
+++ b/arch/x86/platform/intel/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index a9f2e888e135..2e796b54cbde 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* IOSF-SB MailBox Interface Driver
* Copyright (c) 2013, Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- *
* The IOSF-SB is a fabric bus available on Atom based SOC's that uses a
* mailbox interface (MBI) to communicate with multiple devices. This
* driver implements access to this interface for those platforms that can
@@ -470,31 +461,16 @@ static struct dentry *iosf_dbg;
static void iosf_sideband_debug_init(void)
{
- struct dentry *d;
-
iosf_dbg = debugfs_create_dir("iosf_sb", NULL);
- if (IS_ERR_OR_NULL(iosf_dbg))
- return;
/* mdr */
- d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
- if (!d)
- goto cleanup;
+ debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
/* mcrx */
- d = debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
- if (!d)
- goto cleanup;
+ debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
/* mcr - initiates mailbox tranaction */
- d = debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
- if (!d)
- goto cleanup;
-
- return;
-
-cleanup:
- debugfs_remove_recursive(d);
+ debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
}
static void iosf_debugfs_init(void)
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
index db921983a102..354352748428 100644
--- a/arch/x86/platform/iris/Makefile
+++ b/arch/x86/platform/iris/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_X86_32_IRIS) += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index 735ba21efe91..1ac8578258af 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Eurobraille/Iris power off support.
*
@@ -5,20 +6,6 @@
* It is shutdown by a special I/O sequence which this module provides.
*
* Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
- *
- * This program is free software ; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation ; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY ; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with the program ; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/moduleparam.h>
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index 0668aaff8bfe..e1a32062a375 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for power management features of the OLPC XO-1 laptop
*
@@ -5,11 +6,6 @@
* Copyright (C) 2010 One Laptop per Child
* Copyright (C) 2006 Red Hat, Inc.
* Copyright (C) 2006 Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/cs5535.h>
diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c
index 8e7ddd7e313a..57f210cda761 100644
--- a/arch/x86/platform/olpc/olpc-xo1-rtc.c
+++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for OLPC XO-1 Real Time Clock (RTC)
*
* Copyright (C) 2011 One Laptop per Child
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/mc146818rtc.h>
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index d9b8a1c1ab0f..25ce1b3b0732 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for OLPC XO-1 System Control Interrupts (SCI)
*
* Copyright (C) 2010 One Laptop per Child
* Copyright (C) 2006 Red Hat, Inc.
* Copyright (C) 2006 Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/cs5535.h>
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index c0533fbc39e3..6d193bb36021 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for OLPC XO-1.5 System Control Interrupts (SCI)
*
* Copyright (C) 2009-2010 One Laptop per Child
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/device.h>
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index f0e920fb98ad..ee2beda590d0 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for the OLPC DCON and OLPC EC access
*
* Copyright © 2006 Advanced Micro Devices, Inc.
* Copyright © 2007-2008 Andres Salomon <dilinger@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/kernel.h>
@@ -30,9 +26,6 @@
struct olpc_platform_t olpc_platform_info;
EXPORT_SYMBOL_GPL(olpc_platform_info);
-/* EC event mask to be applied during suspend (defining wakeup sources). */
-static u16 ec_wakeup_mask;
-
/* what the timeout *should* be (in ms) */
#define EC_BASE_TIMEOUT 20
@@ -186,83 +179,6 @@ err:
return ret;
}
-void olpc_ec_wakeup_set(u16 value)
-{
- ec_wakeup_mask |= value;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set);
-
-void olpc_ec_wakeup_clear(u16 value)
-{
- ec_wakeup_mask &= ~value;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear);
-
-/*
- * Returns true if the compile and runtime configurations allow for EC events
- * to wake the system.
- */
-bool olpc_ec_wakeup_available(void)
-{
- if (!machine_is_olpc())
- return false;
-
- /*
- * XO-1 EC wakeups are available when olpc-xo1-sci driver is
- * compiled in
- */
-#ifdef CONFIG_OLPC_XO1_SCI
- if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */
- return true;
-#endif
-
- /*
- * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is
- * compiled in
- */
-#ifdef CONFIG_OLPC_XO15_SCI
- if (olpc_platform_info.boardrev >= olpc_board_pre(0xd0)) /* XO-1.5 */
- return true;
-#endif
-
- return false;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available);
-
-int olpc_ec_mask_write(u16 bits)
-{
- if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) {
- __be16 ec_word = cpu_to_be16(bits);
- return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *) &ec_word, 2,
- NULL, 0);
- } else {
- unsigned char ec_byte = bits & 0xff;
- return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0);
- }
-}
-EXPORT_SYMBOL_GPL(olpc_ec_mask_write);
-
-int olpc_ec_sci_query(u16 *sci_value)
-{
- int ret;
-
- if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) {
- __be16 ec_word;
- ret = olpc_ec_cmd(EC_EXT_SCI_QUERY,
- NULL, 0, (void *) &ec_word, 2);
- if (ret == 0)
- *sci_value = be16_to_cpu(ec_word);
- } else {
- unsigned char ec_byte;
- ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1);
- if (ret == 0)
- *sci_value = ec_byte;
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
-
static bool __init check_ofw_architecture(struct device_node *root)
{
const char *olpc_arch;
@@ -296,6 +212,10 @@ static bool __init platform_detect(void)
if (success) {
olpc_platform_info.boardrev = get_board_revision(root);
olpc_platform_info.flags |= OLPC_F_PRESENT;
+
+ pr_info("OLPC board revision %s%X\n",
+ ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
+ olpc_platform_info.boardrev >> 4);
}
of_node_put(root);
@@ -315,27 +235,8 @@ static int __init add_xo1_platform_devices(void)
return PTR_ERR_OR_ZERO(pdev);
}
-static int olpc_xo1_ec_probe(struct platform_device *pdev)
-{
- /* get the EC revision */
- olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
- (unsigned char *) &olpc_platform_info.ecver, 1);
-
- /* EC version 0x5f adds support for wide SCI mask */
- if (olpc_platform_info.ecver >= 0x5f)
- olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI;
-
- pr_info("OLPC board revision %s%X (EC=%x)\n",
- ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
- olpc_platform_info.boardrev >> 4,
- olpc_platform_info.ecver);
-
- return 0;
-}
static int olpc_xo1_ec_suspend(struct platform_device *pdev)
{
- olpc_ec_mask_write(ec_wakeup_mask);
-
/*
* Squelch SCIs while suspended. This is a fix for
* <http://dev.laptop.org/ticket/1835>.
@@ -359,15 +260,27 @@ static int olpc_xo1_ec_resume(struct platform_device *pdev)
}
static struct olpc_ec_driver ec_xo1_driver = {
- .probe = olpc_xo1_ec_probe,
.suspend = olpc_xo1_ec_suspend,
.resume = olpc_xo1_ec_resume,
.ec_cmd = olpc_xo1_ec_cmd,
+#ifdef CONFIG_OLPC_XO1_SCI
+ /*
+ * XO-1 EC wakeups are available when olpc-xo1-sci driver is
+ * compiled in
+ */
+ .wakeup_available = true,
+#endif
};
static struct olpc_ec_driver ec_xo1_5_driver = {
- .probe = olpc_xo1_ec_probe,
.ec_cmd = olpc_xo1_ec_cmd,
+#ifdef CONFIG_OLPC_XO1_5_SCI
+ /*
+ * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is
+ * compiled in
+ */
+ .wakeup_available = true,
+#endif
};
static int __init olpc_init(void)
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 0296c5b55e6f..26d1f6693789 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* OLPC-specific OFW device tree support code.
*
@@ -9,11 +10,6 @@
*
* Adapted for sparc by David S. Miller davem@davemloft.net
* Adapted for x86/OLPC by Andres Salomon <dilinger@queued.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
@@ -220,7 +216,7 @@ static u32 __init olpc_dt_get_board_revision(void)
return be32_to_cpu(rev);
}
-int olpc_dt_compatible_match(phandle node, const char *compat)
+static int __init olpc_dt_compatible_match(phandle node, const char *compat)
{
char buf[64], *p;
int plen, len;
diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c
index f1aab8cdb33f..20a064568463 100644
--- a/arch/x86/platform/olpc/olpc_ofw.c
+++ b/arch/x86/platform/olpc/olpc_ofw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/spinlock_types.h>
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index 1861a2ba0f2b..c0a502f7e3a7 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -86,7 +86,7 @@ static void __init init_pvh_bootparams(bool xen_guest)
}
/*
- * See Documentation/x86/boot.txt.
+ * See Documentation/x86/boot.rst.
*
* Version 2.12 supports Xen entry point but we will use default x86/PC
* environment (i.e. hardware_subarch 0).
diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile
index 762b4c7f4314..981b3e4302e6 100644
--- a/arch/x86/platform/scx200/Makefile
+++ b/arch/x86/platform/scx200/Makefile
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SCx200) += scx200.o
scx200-y += scx200_32.o
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 3dc9aee41d91..80662b72035d 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
*
diff --git a/arch/x86/platform/sfi/Makefile b/arch/x86/platform/sfi/Makefile
index cc5db1168a5e..4eba24c2af67 100644
--- a/arch/x86/platform/sfi/Makefile
+++ b/arch/x86/platform/sfi/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_SFI) += sfi.o
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index 6c7111bbd1e9..bf6016f8db4e 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* sfi.c - x86 architecture SFI support.
*
* Copyright (c) 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
*/
#define KMSG_COMPONENT "SFI"
diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile
index c54e348c96a7..910fe9e3ffb4 100644
--- a/arch/x86/platform/ts5500/Makefile
+++ b/arch/x86/platform/ts5500/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_TS5500) += ts5500.o
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
index 7e56fc74093c..0b67da056fd9 100644
--- a/arch/x86/platform/ts5500/ts5500.c
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Technologic Systems TS-5500 Single Board Computer support
*
* Copyright (C) 2013-2014 Savoir-faire Linux Inc.
* Vivien Didelot <vivien.didelot@savoirfairelinux.com>
*
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option) any later
- * version.
- *
- *
* This driver registers the Technologic Systems TS-5500 Single Board Computer
* (SBC) and its devices, and exposes information to userspace such as jumpers'
* state or available options. For further information about sysfs entries, see
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile
index 52079bebd014..a3693c829e2e 100644
--- a/arch/x86/platform/uv/Makefile
+++ b/arch/x86/platform/uv/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index ef60d789c76e..7c69652ffeea 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* BIOS run time interface routines.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Russ Anderson <rja@sgi.com>
*/
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 1297e185b8c8..20c389a91b80 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1,10 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SGI UltraViolet TLB flush routines.
*
* (c) 2008-2014 Cliff Wickman <cpw@sgi.com>, SGI.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
*/
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
@@ -68,7 +66,6 @@ static struct tunables tunables[] = {
};
static struct dentry *tunables_dir;
-static struct dentry *tunables_file;
/* these correspond to the statistics printed by ptc_seq_show() */
static char *stat_description[] = {
@@ -1702,18 +1699,8 @@ static int __init uv_ptc_init(void)
}
tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
- if (!tunables_dir) {
- pr_err("unable to create debugfs directory %s\n",
- UV_BAU_TUNABLES_DIR);
- return -EINVAL;
- }
- tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
- tunables_dir, NULL, &tunables_fops);
- if (!tunables_file) {
- pr_err("unable to create debugfs file %s\n",
- UV_BAU_TUNABLES_FILE);
- return -EINVAL;
- }
+ debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, tunables_dir, NULL,
+ &tunables_fops);
return 0;
}
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index b21a932c220c..9d08ff5a755e 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SGI NMI support routines
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Mike Travis
*/
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
index e9da9ebd924a..62214731fea5 100644
--- a/arch/x86/platform/uv/uv_sysfs.c
+++ b/arch/x86/platform/uv/uv_sysfs.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Russ Anderson
*/
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index a36b368eea08..7af31b245636 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SGI RTC clock/timer routines.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Dimitri Sivanich
*/
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index a7d966964c6f..24b079e94bc2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Suspend support specific for i386/x86-64.
*
- * Distribute under GPLv2
- *
* Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
@@ -299,7 +298,17 @@ int hibernate_resume_nonboot_cpu_disable(void)
* address in its instruction pointer may not be possible to resolve
* any more at that point (the page tables used by it previously may
* have been overwritten by hibernate image data).
+ *
+ * First, make sure that we wake up all the potentially disabled SMT
+ * threads which have been initially brought up and then put into
+ * mwait/cpuidle sleep.
+ * Those will be put to proper (not interfering with hibernation
+ * resume) sleep afterwards, and the resumed kernel will decide itself
+ * what to do with them.
*/
+ ret = cpuhp_smt_enable();
+ if (ret)
+ return ret;
smp_ops.play_dead = resume_play_dead;
ret = disable_nonboot_cpus();
smp_ops.play_dead = play_dead;
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index 4845b8c7be7f..fc413717a45f 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -11,6 +11,7 @@
#include <linux/suspend.h>
#include <linux/scatterlist.h>
#include <linux/kdebug.h>
+#include <linux/cpu.h>
#include <crypto/hash.h>
@@ -245,3 +246,35 @@ out:
__flush_tlb_all();
return 0;
}
+
+int arch_resume_nosmt(void)
+{
+ int ret = 0;
+ /*
+ * We reached this while coming out of hibernation. This means
+ * that SMT siblings are sleeping in hlt, as mwait is not safe
+ * against control transition during resume (see comment in
+ * hibernate_resume_nonboot_cpu_disable()).
+ *
+ * If the resumed kernel has SMT disabled, we have to take all the
+ * SMT siblings out of hlt, and offline them again so that they
+ * end up in mwait proper.
+ *
+ * Called with hotplug disabled.
+ */
+ cpu_hotplug_enable();
+ if (cpu_smt_control == CPU_SMT_DISABLED ||
+ cpu_smt_control == CPU_SMT_FORCE_DISABLED) {
+ enum cpuhp_smt_control old = cpu_smt_control;
+
+ ret = cpuhp_smt_enable();
+ if (ret)
+ goto out;
+ ret = cpuhp_smt_disable(old);
+ if (ret)
+ goto out;
+ }
+out:
+ cpu_hotplug_disable();
+ return ret;
+}
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index be15bdcb20df..a1061d471b73 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Hibernation support specific for i386 - temporary page tables
*
- * Distribute under GPLv2
- *
* Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl>
*/
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 239f424ccb29..0197095d9637 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Hibernation support for x86-64
*
- * Distribute under GPLv2
- *
* Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 3008baa2fa95..a4d5eb0a7ece 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -1,8 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Hibernation support for x86-64
*
- * Distribute under GPLv2.
- *
* Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright 2005 Andi Kleen <ak@suse.de>
* Copyright 2004 Pavel Machek <pavel@suse.cz>
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
index d1a4291d3568..275a646d1048 100644
--- a/arch/x86/purgatory/entry64.S
+++ b/arch/x86/purgatory/entry64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
* Copyright (C) 2014 Red Hat Inc.
@@ -5,9 +6,6 @@
* Author(s): Vivek Goyal <vgoyal@redhat.com>
*
* This code has been taken from kexec-tools.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
.text
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 025c34ac0d84..6d8d5a34c377 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* purgatory: Runs between two kernels
*
@@ -5,9 +6,6 @@
*
* Author:
* Vivek Goyal <vgoyal@redhat.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/bug.h>
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index dfae9b9e60b5..321146be741d 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* purgatory: setup code
*
@@ -5,9 +6,6 @@
* Copyright (C) 2014 Red Hat Inc.
*
* This code has been taken from kexec-tools.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <asm/purgatory.h>
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
index 50a4147f91fb..8b1427422dfc 100644
--- a/arch/x86/purgatory/stack.S
+++ b/arch/x86/purgatory/stack.S
@@ -1,10 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* purgatory: stack
*
* Copyright (C) 2014 Red Hat Inc.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
/* A stack for the loaded kernel.
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
index 795ca4f2cb3c..01ad43873ad9 100644
--- a/arch/x86/purgatory/string.c
+++ b/arch/x86/purgatory/string.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Simple string functions.
*
@@ -5,9 +6,6 @@
*
* Author:
* Vivek Goyal <vgoyal@redhat.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#include <linux/types.h>
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index a9c3db125222..9ad6842de4b4 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -11,3 +11,13 @@ config RAS_CEC
Bear in mind that this is absolutely useless if your platform doesn't
have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
+
+config RAS_CEC_DEBUG
+ bool "CEC debugging machinery"
+ default n
+ depends on RAS_CEC
+ help
+ Add extra files to (debugfs)/ras/cec to test the correctable error
+ collector feature. "pfn" is a writable file that allows user to
+ simulate an error in a particular page frame. "array" is a read-only
+ file that dumps out the current state of all pages logged so far.
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
index a3b4fd954931..34eda63c124b 100644
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*
* Copyright (C) IBM Corporation, 2009
*/
@@ -119,7 +111,7 @@ static void parse_args(int argc, char **argv)
int main(int argc, char **argv)
{
char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
- unsigned char insn_buf[16];
+ unsigned char insn_buff[16];
struct insn insn;
int insns = 0;
int warnings = 0;
@@ -138,7 +130,7 @@ int main(int argc, char **argv)
}
insns++;
- memset(insn_buf, 0, 16);
+ memset(insn_buff, 0, 16);
strcpy(copy, line);
tab1 = strchr(copy, '\t');
if (!tab1)
@@ -151,13 +143,13 @@ int main(int argc, char **argv)
*tab2 = '\0'; /* Characters beyond tab2 aren't examined */
while (s < tab2) {
if (sscanf(s, "%x", &b) == 1) {
- insn_buf[nb++] = (unsigned char) b;
+ insn_buff[nb++] = (unsigned char) b;
s += 3;
} else
break;
}
/* Decode an instruction */
- insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+ insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
insn_get_length(&insn);
if (insn.length != nb) {
warnings++;
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index 1972565ab106..185ceba9d289 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* x86 decoder sanity test - based on test_get_insn.c
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2009
* Copyright (C) Hitachi, Ltd., 2011
*/
@@ -96,7 +83,7 @@ static void dump_insn(FILE *fp, struct insn *insn)
}
static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
- unsigned char *insn_buf, struct insn *insn)
+ unsigned char *insn_buff, struct insn *insn)
{
int i;
@@ -109,7 +96,7 @@ static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
/* Input a decoded instruction sequence directly */
fprintf(fp, " $ echo ");
for (i = 0; i < MAX_INSN_SIZE; i++)
- fprintf(fp, " %02x", insn_buf[i]);
+ fprintf(fp, " %02x", insn_buff[i]);
fprintf(fp, " | %s -i -\n", prog);
if (!input_file) {
@@ -137,7 +124,7 @@ fail:
}
/* Read given instruction sequence from the input file */
-static int read_next_insn(unsigned char *insn_buf)
+static int read_next_insn(unsigned char *insn_buff)
{
char buf[256] = "", *tmp;
int i;
@@ -147,7 +134,7 @@ static int read_next_insn(unsigned char *insn_buf)
return 0;
for (i = 0; i < MAX_INSN_SIZE; i++) {
- insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
+ insn_buff[i] = (unsigned char)strtoul(tmp, &tmp, 16);
if (*tmp != ' ')
break;
}
@@ -155,19 +142,19 @@ static int read_next_insn(unsigned char *insn_buf)
return i;
}
-static int generate_insn(unsigned char *insn_buf)
+static int generate_insn(unsigned char *insn_buff)
{
int i;
if (input_file)
- return read_next_insn(insn_buf);
+ return read_next_insn(insn_buff);
/* Fills buffer with random binary up to MAX_INSN_SIZE */
for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
- *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
+ *(unsigned short *)(&insn_buff[i]) = random() & 0xffff;
while (i < MAX_INSN_SIZE)
- insn_buf[i++] = random() & 0xff;
+ insn_buff[i++] = random() & 0xff;
return i;
}
@@ -239,31 +226,31 @@ int main(int argc, char **argv)
int insns = 0;
int errors = 0;
unsigned long i;
- unsigned char insn_buf[MAX_INSN_SIZE * 2];
+ unsigned char insn_buff[MAX_INSN_SIZE * 2];
parse_args(argc, argv);
/* Prepare stop bytes with NOPs */
- memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
+ memset(insn_buff + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
for (i = 0; i < iter_end; i++) {
- if (generate_insn(insn_buf) <= 0)
+ if (generate_insn(insn_buff) <= 0)
break;
if (i < iter_start) /* Skip to given iteration number */
continue;
/* Decode an instruction */
- insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+ insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
insn_get_length(&insn);
if (insn.next_byte <= insn.kaddr ||
insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
/* Access out-of-range memory */
- dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
+ dump_stream(stderr, "Error: Found an access violation", i, insn_buff, &insn);
errors++;
} else if (verbose && !insn_complete(&insn))
- dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
+ dump_stream(stdout, "Info: Found an undecodable input", i, insn_buff, &insn);
else if (verbose >= 2)
dump_insn(stdout, &insn);
insns++;
diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
index b9933eb9274a..13f118dec74f 100644
--- a/arch/x86/um/checksum_32.S
+++ b/arch/x86/um/checksum_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -18,11 +19,6 @@
* handling.
* Andi Kleen, add zeroing on error
* converted to pure assembler
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/errno.h>
diff --git a/arch/x86/um/delay.c b/arch/x86/um/delay.c
index a8fb7ca4822b..8d510ceb43fb 100644
--- a/arch/x86/um/delay.c
+++ b/arch/x86/um/delay.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
* Mostly copied from arch/x86/lib/delay.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/export.h>
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 56c44d865f7b..19c5dbd46770 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/mm.h>
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 8b4a71efe7ee..7c11c9e5d7ea 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -471,7 +471,7 @@ long sys_sigreturn(void)
return PT_REGS_SYSCALL_RET(&current->thread.regs);
segfault:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -577,6 +577,6 @@ long sys_rt_sigreturn(void)
return PT_REGS_SYSCALL_RET(&current->thread.regs);
segfault:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
index 7c441b59d375..ac9c02b9d92c 100644
--- a/arch/x86/um/vdso/um_vdso.c
+++ b/arch/x86/um/vdso/um_vdso.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* This vDSO turns all calls into a syscall so that UML can trap them.
*/
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 6be22f991b59..9e7c4aba6c3a 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/slab.h>
diff --git a/arch/x86/video/Makefile b/arch/x86/video/Makefile
index 2c447c94adcc..11640c116115 100644
--- a/arch/x86/video/Makefile
+++ b/arch/x86/video/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_FB) += fbdev.o
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e07abefd3d26..ba5a41828e9d 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -7,6 +7,7 @@ config XEN
bool "Xen guest support"
depends on PARAVIRT
select PARAVIRT_CLOCK
+ select X86_HV_CALLBACK_VECTOR
depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_LOCAL_APIC && X86_TSC
help
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index 13da87918b4f..532410998684 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -9,13 +9,8 @@ static struct dentry *d_xen_debug;
struct dentry * __init xen_init_debugfs(void)
{
- if (!d_xen_debug) {
+ if (!d_xen_debug)
d_xen_debug = debugfs_create_dir("xen", NULL);
-
- if (!d_xen_debug)
- pr_warning("Could not create 'xen' debugfs directory\n");
- }
-
return d_xen_debug;
}
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 0e75642d42a3..e138f7de52d2 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -210,18 +210,18 @@ static void __init xen_hvm_guest_init(void)
#endif
}
-static bool xen_nopv;
static __init int xen_parse_nopv(char *arg)
{
- xen_nopv = true;
- return 0;
+ pr_notice("\"xen_nopv\" is deprecated, please use \"nopv\" instead\n");
+
+ if (xen_cpuid_base())
+ nopv = true;
+ return 0;
}
early_param("xen_nopv", xen_parse_nopv);
-bool xen_hvm_need_lapic(void)
+bool __init xen_hvm_need_lapic(void)
{
- if (xen_nopv)
- return false;
if (xen_pv_domain())
return false;
if (!xen_hvm_domain())
@@ -230,15 +230,6 @@ bool xen_hvm_need_lapic(void)
return false;
return true;
}
-EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
-
-static uint32_t __init xen_platform_hvm(void)
-{
- if (xen_pv_domain() || xen_nopv)
- return 0;
-
- return xen_cpuid_base();
-}
static __init void xen_hvm_guest_late_init(void)
{
@@ -251,6 +242,9 @@ static __init void xen_hvm_guest_late_init(void)
/* PVH detected. */
xen_pvh = true;
+ if (nopv)
+ panic("\"nopv\" and \"xen_nopv\" parameters are unsupported in PVH guest.");
+
/* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC)
acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
@@ -260,7 +254,38 @@ static __init void xen_hvm_guest_late_init(void)
#endif
}
-const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+static uint32_t __init xen_platform_hvm(void)
+{
+ uint32_t xen_domain = xen_cpuid_base();
+ struct x86_hyper_init *h = &x86_hyper_xen_hvm.init;
+
+ if (xen_pv_domain())
+ return 0;
+
+ if (xen_pvh_domain() && nopv) {
+ /* Guest booting via the Xen-PVH boot entry goes here */
+ pr_info("\"nopv\" parameter is ignored in PVH guest\n");
+ nopv = false;
+ } else if (nopv && xen_domain) {
+ /*
+ * Guest booting via normal boot entry (like via grub2) goes
+ * here.
+ *
+ * Use interface functions for bare hardware if nopv,
+ * xen_hvm_guest_late_init is an exception as we need to
+ * detect PVH and panic there.
+ */
+ h->init_platform = x86_init_noop;
+ h->x2apic_available = bool_x86_init_noop;
+ h->init_mem_mapping = x86_init_noop;
+ h->init_after_bootmem = x86_init_noop;
+ h->guest_late_init = xen_hvm_guest_late_init;
+ x86_hyper_xen_hvm.runtime.pin_vcpu = x86_op_int_noop;
+ }
+ return xen_domain;
+}
+
+struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
.name = "Xen HVM",
.detect = xen_platform_hvm,
.type = X86_HYPER_XEN_HVM,
@@ -269,4 +294,5 @@ const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
.init.guest_late_init = xen_hvm_guest_late_init,
.runtime.pin_vcpu = xen_pin_vcpu,
+ .ignore_nopv = true,
};
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4722ba2966ac..7ceb32821093 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -596,12 +596,12 @@ struct trap_array_entry {
static struct trap_array_entry trap_array[] = {
{ debug, xen_xendebug, true },
- { int3, xen_xenint3, true },
{ double_fault, xen_double_fault, true },
#ifdef CONFIG_X86_MCE
{ machine_check, xen_machine_check, true },
#endif
{ nmi, xen_xennmi, true },
+ { int3, xen_int3, false },
{ overflow, xen_overflow, false },
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
@@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placement(void)
__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
pv_ops.irq.irq_enable =
__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
+ pv_ops.mmu.read_cr2 =
+ __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
}
@@ -1463,4 +1464,5 @@ const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
.detect = xen_platform_pv,
.type = X86_HYPER_XEN_PV,
.runtime.pin_vcpu = xen_pin_vcpu,
+ .ignore_nopv = true,
};
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index beb44e22afdf..26e8b326966d 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2)
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
-static unsigned long xen_read_cr2(void)
-{
- return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
- return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
static noinline void xen_flush_tlb(void)
{
struct mmuext_op *op;
@@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void)
}
static const struct pv_mmu_ops xen_mmu_ops __initconst = {
- .read_cr2 = xen_read_cr2,
+ .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
@@ -2700,8 +2690,7 @@ struct remap_data {
struct mmu_update *mmu_update;
};
-static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
- unsigned long addr, void *data)
+static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
{
struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95ce9b5be411..0acba2c712ab 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -817,9 +817,6 @@ static int __init xen_p2m_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
- if (d_xen == NULL)
- return -ENOMEM;
-
d_mmu_debug = debugfs_create_dir("mmu", d_xen);
debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 590fcf863006..802ee5bba66c 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -58,6 +58,7 @@ static void cpu_bringup(void)
{
int cpu;
+ cr4_init();
cpu_init();
touch_softlockup_watchdog();
preempt_disable();
@@ -251,6 +252,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 3776122c87cc..6deb49094c60 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -68,11 +68,8 @@ void xen_init_lock_cpu(int cpu)
int irq;
char *name;
- if (!xen_pvspin) {
- if (cpu == 0)
- static_branch_disable(&virt_spin_lock_key);
+ if (!xen_pvspin)
return;
- }
WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n",
cpu, per_cpu(lock_kicker_irq, cpu));
@@ -124,6 +121,7 @@ void __init xen_init_spinlocks(void)
if (!xen_pvspin) {
printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
+ static_branch_disable(&virt_spin_lock_key);
return;
}
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..be104eef80be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/frame.h>
+#include <asm/asm.h>
#include <linux/linkage.h>
@@ -135,3 +136,18 @@ ENTRY(check_events)
FRAME_END
ret
ENDPROC(check_events)
+
+ENTRY(xen_read_cr2)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+ _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2);
+
+ENTRY(xen_read_cr2_direct)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 1e9ef0ba30a5..ebf610b49c06 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -32,7 +32,6 @@ xen_pv_trap divide_error
xen_pv_trap debug
xen_pv_trap xendebug
xen_pv_trap int3
-xen_pv_trap xenint3
xen_pv_trap xennmi
xen_pv_trap overflow
xen_pv_trap bounds
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f111f47ba98..45a441c33d6d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void);
__visible unsigned long xen_save_fl_direct(void);
__visible void xen_restore_fl_direct(unsigned long);
+__visible unsigned long xen_read_cr2(void);
+__visible unsigned long xen_read_cr2_direct(void);
+
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
__visible void xen_sysret32(void);