diff options
Diffstat (limited to 'arch/x86')
58 files changed, 909 insertions, 912 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ff425a2d286c..46d42af4501a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -523,6 +523,7 @@ config X86_VSMP bool "ScaleMP vSMP" select HYPERVISOR_GUEST select PARAVIRT + select PARAVIRT_XXL depends on X86_64 && PCI depends on X86_EXTENDED_PLATFORM depends on SMP @@ -753,6 +754,9 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_XXL + bool + config PARAVIRT_DEBUG bool "paravirt-ops debugging" depends on PARAVIRT && DEBUG_KERNEL diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index a423bdb42686..a1d5918765f3 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -9,6 +9,7 @@ * paravirt and debugging variants are added.) */ #undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index fbbf1ba57ec6..687e47f8a796 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -783,7 +783,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region) * will ignore all of the single-step traps generated in this range. */ -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV /* * Xen doesn't set %esp to be precisely what the normal SYSENTER * entry point expects, so fix it up before using the normal path. @@ -1241,7 +1241,7 @@ ENTRY(spurious_interrupt_bug) jmp common_exception END(spurious_interrupt_bug) -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV ENTRY(xen_hypervisor_callback) pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL @@ -1322,11 +1322,13 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(3b, 8b) _ASM_EXTABLE(4b, 9b) ENDPROC(xen_failsafe_callback) +#endif /* CONFIG_XEN_PV */ +#ifdef CONFIG_XEN_PVHVM BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, xen_evtchn_do_upcall) +#endif -#endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f95dcb209fdf..7c5ce0a6c4d2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1050,7 +1050,7 @@ ENTRY(do_softirq_own_stack) ret ENDPROC(do_softirq_own_stack) -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 /* @@ -1130,11 +1130,13 @@ ENTRY(xen_failsafe_callback) ENCODE_FRAME_POINTER jmp error_exit END(xen_failsafe_callback) +#endif /* CONFIG_XEN_PV */ +#ifdef CONFIG_XEN_PVHVM apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ xen_hvm_callback_vector xen_evtchn_do_upcall +#endif -#endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ @@ -1151,7 +1153,7 @@ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV idtentry xennmi do_nmi has_error_code=0 idtentry xendebug do_debug has_error_code=0 idtentry xenint3 do_int3 has_error_code=0 diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index b21ee65c4101..1c11f9420a82 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,2 +1,6 @@ obj-y := hv_init.o mmu.o nested.o obj-$(CONFIG_X86_64) += hv_apic.o + +ifdef CONFIG_X86_64 +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o +endif diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c new file mode 100644 index 000000000000..a861b0456b1a --- /dev/null +++ b/arch/x86/hyperv/hv_spinlock.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific spinlock code. + * + * Copyright (C) 2018, Intel, Inc. + * + * Author : Yi Sun <yi.y.sun@intel.com> + */ + +#define pr_fmt(fmt) "Hyper-V: " fmt + +#include <linux/spinlock.h> + +#include <asm/mshyperv.h> +#include <asm/paravirt.h> +#include <asm/apic.h> + +static bool __initdata hv_pvspin = true; + +static void hv_qlock_kick(int cpu) +{ + apic->send_IPI(cpu, X86_PLATFORM_IPI_VECTOR); +} + +static void hv_qlock_wait(u8 *byte, u8 val) +{ + unsigned long msr_val; + unsigned long flags; + + if (in_nmi()) + return; + + /* + * Reading HV_X64_MSR_GUEST_IDLE MSR tells the hypervisor that the + * vCPU can be put into 'idle' state. This 'idle' state is + * terminated by an IPI, usually from hv_qlock_kick(), even if + * interrupts are disabled on the vCPU. + * + * To prevent a race against the unlock path it is required to + * disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE + * MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between + * the lock value check and the rdmsrl() then the vCPU might be put + * into 'idle' state by the hypervisor and kept in that state for + * an unspecified amount of time. + */ + local_irq_save(flags); + /* + * Only issue the rdmsrl() when the lock state has not changed. + */ + if (READ_ONCE(*byte) == val) + rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val); + local_irq_restore(flags); +} + +/* + * Hyper-V does not support this so far. + */ +bool hv_vcpu_is_preempted(int vcpu) +{ + return false; +} +PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted); + +void __init hv_init_spinlocks(void) +{ + if (!hv_pvspin || !apic || + !(ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) || + !(ms_hyperv.features & HV_X64_MSR_GUEST_IDLE_AVAILABLE)) { + pr_info("PV spinlocks disabled\n"); + return; + } + pr_info("PV spinlocks enabled\n"); + + __pv_init_lock_hash(); + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_ops.lock.wait = hv_qlock_wait; + pv_ops.lock.kick = hv_qlock_kick; + pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted); +} + +static __init int hv_parse_nopvspin(char *arg) +{ + hv_pvspin = false; + return 0; +} +early_param("hv_nopvspin", hv_parse_nopvspin); diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index ef5f29f913d7..e65d7fe6489f 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -231,6 +231,6 @@ void hyperv_setup_mmu_ops(void) return; pr_info("Using hypercall for remote TLB flush\n"); - pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; - pv_mmu_ops.tlb_remove_table = tlb_remove_table; + pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others; + pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4505ac2735ad..9e5ca30738e5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -8,7 +8,7 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL /* * These special macros can be used to get or set a debugging register */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 13c5ee878a47..68a99d2a5f33 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -108,7 +108,7 @@ static inline int desc_empty(const void *ptr) return !(desc[0] | desc[1]); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define load_TR_desc() native_load_tr_desc() @@ -134,7 +134,7 @@ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { } -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6390bd8c141b..50ba74a34a37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -162,7 +162,7 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 00e01d215f74..4139f7650fe5 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -38,6 +38,8 @@ #define HV_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) /* Partition reference TSC MSR is available */ #define HV_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) +/* Partition Guest IDLE MSR is available */ +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE (1 << 10) /* A partition's reference time stamp counter (TSC) page */ #define HV_X64_MSR_REFERENCE_TSC 0x40000021 @@ -246,6 +248,9 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +/* Hyper-V guest idle MSR */ +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 + /* Hyper-V guest crash notification MSR's */ #define HV_X64_MSR_CRASH_P0 0x40000100 #define HV_X64_MSR_CRASH_P1 0x40000101 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 15450a675031..058e40fed167 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -64,7 +64,7 @@ static inline __cpuidle void native_halt(void) #endif -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #ifndef __ASSEMBLY__ @@ -123,6 +123,10 @@ static inline notrace unsigned long arch_local_irq_save(void) #define DISABLE_INTERRUPTS(x) cli #ifdef CONFIG_X86_64 +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(x) pushfq; popq %rax +#endif + #define SWAPGS swapgs /* * Currently paravirt can't handle swapgs nicely when we @@ -135,8 +139,6 @@ static inline notrace unsigned long arch_local_irq_save(void) */ #define SWAPGS_UNSAFE_STACK swapgs -#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ - #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ @@ -145,18 +147,12 @@ static inline notrace unsigned long arch_local_irq_save(void) swapgs; \ sysretl -#ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(x) pushfq; popq %rax -#endif #else #define INTERRUPT_RETURN iret -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define GET_CR0_INTO_EAX movl %cr0, %eax #endif - #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ static inline int arch_irqs_disabled_flags(unsigned long flags) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index eeeb9289c764..0ca50611e8ce 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -16,12 +16,12 @@ extern atomic64_t last_mm_ctx_id; -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { } -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ #ifdef CONFIG_PERF_EVENTS diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f37704497d8f..0d6271cce198 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -351,6 +351,8 @@ int hyperv_flush_guest_mapping(u64 as); #ifdef CONFIG_X86_64 void hv_apic_init(void); +void __init hv_init_spinlocks(void); +bool hv_vcpu_is_preempted(int vcpu); #else static inline void hv_apic_init(void) {} #endif diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 04addd6e0a4a..91e4cf189914 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -242,7 +242,7 @@ static inline unsigned long long native_read_pmc(int counter) return EAX_EDX_VAL(val, low, high); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #include <linux/errno.h> @@ -305,7 +305,7 @@ do { \ #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ /* * 64-bit version of wrmsr_safe(): diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index e375d4266b53..4bf42f9e4eea 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -17,16 +17,73 @@ #include <linux/cpumask.h> #include <asm/frame.h> +static inline unsigned long long paravirt_sched_clock(void) +{ + return PVOP_CALL0(unsigned long long, time.sched_clock); +} + +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return PVOP_CALL1(u64, time.steal_clock, cpu); +} + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) +{ + pv_ops.cpu.io_delay(); +#ifdef REALLY_SLOW_IO + pv_ops.cpu.io_delay(); + pv_ops.cpu.io_delay(); + pv_ops.cpu.io_delay(); +#endif +} + +static inline void __flush_tlb(void) +{ + PVOP_VCALL0(mmu.flush_tlb_user); +} + +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(mmu.flush_tlb_kernel); +} + +static inline void __flush_tlb_one_user(unsigned long addr) +{ + PVOP_VCALL1(mmu.flush_tlb_one_user, addr); +} + +static inline void flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +{ + PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); +} + +static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); +} + +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) +{ + PVOP_VCALL1(mmu.exit_mmap, mm); +} + +#ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { - PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); + PVOP_VCALL1(cpu.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); + PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx); } /* @@ -34,98 +91,98 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, */ static inline unsigned long paravirt_get_debugreg(int reg) { - return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); + return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) static inline void set_debugreg(unsigned long val, int reg) { - PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); + PVOP_VCALL2(cpu.set_debugreg, reg, val); } static inline unsigned long read_cr0(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); + return PVOP_CALL0(unsigned long, cpu.read_cr0); } static inline void write_cr0(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr0, x); + PVOP_VCALL1(cpu.write_cr0, x); } static inline unsigned long read_cr2(void) { - return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); + return PVOP_CALL0(unsigned long, mmu.read_cr2); } static inline void write_cr2(unsigned long x) { - PVOP_VCALL1(pv_mmu_ops.write_cr2, x); + PVOP_VCALL1(mmu.write_cr2, x); } static inline unsigned long __read_cr3(void) { - return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); + return PVOP_CALL0(unsigned long, mmu.read_cr3); } static inline void write_cr3(unsigned long x) { - PVOP_VCALL1(pv_mmu_ops.write_cr3, x); + PVOP_VCALL1(mmu.write_cr3, x); } static inline void __write_cr4(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr4, x); + PVOP_VCALL1(cpu.write_cr4, x); } #ifdef CONFIG_X86_64 static inline unsigned long read_cr8(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); + return PVOP_CALL0(unsigned long, cpu.read_cr8); } static inline void write_cr8(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr8, x); + PVOP_VCALL1(cpu.write_cr8, x); } #endif static inline void arch_safe_halt(void) { - PVOP_VCALL0(pv_irq_ops.safe_halt); + PVOP_VCALL0(irq.safe_halt); } static inline void halt(void) { - PVOP_VCALL0(pv_irq_ops.halt); + PVOP_VCALL0(irq.halt); } static inline void wbinvd(void) { - PVOP_VCALL0(pv_cpu_ops.wbinvd); + PVOP_VCALL0(cpu.wbinvd); } #define get_kernel_rpl() (pv_info.kernel_rpl) static inline u64 paravirt_read_msr(unsigned msr) { - return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr); + return PVOP_CALL1(u64, cpu.read_msr, msr); } static inline void paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { - PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); + PVOP_VCALL3(cpu.write_msr, msr, low, high); } static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) { - return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err); + return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err); } static inline int paravirt_write_msr_safe(unsigned msr, unsigned low, unsigned high) { - return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high); + return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high); } #define rdmsr(msr, val1, val2) \ @@ -170,23 +227,9 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) return err; } -static inline unsigned long long paravirt_sched_clock(void) -{ - return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); -} - -struct static_key; -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - -static inline u64 paravirt_steal_clock(int cpu) -{ - return PVOP_CALL1(u64, pv_time_ops.steal_clock, cpu); -} - static inline unsigned long long paravirt_read_pmc(int counter) { - return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); + return PVOP_CALL1(u64, cpu.read_pmc, counter); } #define rdpmc(counter, low, high) \ @@ -200,166 +243,127 @@ do { \ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { - PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); + PVOP_VCALL2(cpu.alloc_ldt, ldt, entries); } static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { - PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); + PVOP_VCALL2(cpu.free_ldt, ldt, entries); } static inline void load_TR_desc(void) { - PVOP_VCALL0(pv_cpu_ops.load_tr_desc); + PVOP_VCALL0(cpu.load_tr_desc); } static inline void load_gdt(const struct desc_ptr *dtr) { - PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); + PVOP_VCALL1(cpu.load_gdt, dtr); } static inline void load_idt(const struct desc_ptr *dtr) { - PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); + PVOP_VCALL1(cpu.load_idt, dtr); } static inline void set_ldt(const void *addr, unsigned entries) { - PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); + PVOP_VCALL2(cpu.set_ldt, addr, entries); } static inline unsigned long paravirt_store_tr(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); + return PVOP_CALL0(unsigned long, cpu.store_tr); } + #define store_tr(tr) ((tr) = paravirt_store_tr()) static inline void load_TLS(struct thread_struct *t, unsigned cpu) { - PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); + PVOP_VCALL2(cpu.load_tls, t, cpu); } #ifdef CONFIG_X86_64 static inline void load_gs_index(unsigned int gs) { - PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs); + PVOP_VCALL1(cpu.load_gs_index, gs); } #endif static inline void write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { - PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); + PVOP_VCALL3(cpu.write_ldt_entry, dt, entry, desc); } static inline void write_gdt_entry(struct desc_struct *dt, int entry, void *desc, int type) { - PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); + PVOP_VCALL4(cpu.write_gdt_entry, dt, entry, desc, type); } static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { - PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); + PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } static inline void set_iopl_mask(unsigned mask) { - PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); -} - -/* The paravirtualized I/O functions */ -static inline void slow_down_io(void) -{ - pv_cpu_ops.io_delay(); -#ifdef REALLY_SLOW_IO - pv_cpu_ops.io_delay(); - pv_cpu_ops.io_delay(); - pv_cpu_ops.io_delay(); -#endif + PVOP_VCALL1(cpu.set_iopl_mask, mask); } static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { - PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); + PVOP_VCALL2(mmu.activate_mm, prev, next); } static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); -} - -static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) -{ - PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); -} - -static inline void __flush_tlb(void) -{ - PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); -} -static inline void __flush_tlb_global(void) -{ - PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); -} -static inline void __flush_tlb_one_user(unsigned long addr) -{ - PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr); -} - -static inline void flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ - PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info); -} - -static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - PVOP_VCALL2(pv_mmu_ops.tlb_remove_table, tlb, table); + PVOP_VCALL2(mmu.dup_mmap, oldmm, mm); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) { - return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); + return PVOP_CALL1(int, mmu.pgd_alloc, mm); } static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) { - PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); + PVOP_VCALL2(mmu.pgd_free, mm, pgd); } static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); + PVOP_VCALL2(mmu.alloc_pte, mm, pfn); } static inline void paravirt_release_pte(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); + PVOP_VCALL1(mmu.release_pte, pfn); } static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); + PVOP_VCALL2(mmu.alloc_pmd, mm, pfn); } static inline void paravirt_release_pmd(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); + PVOP_VCALL1(mmu.release_pmd, pfn); } static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); + PVOP_VCALL2(mmu.alloc_pud, mm, pfn); } static inline void paravirt_release_pud(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); + PVOP_VCALL1(mmu.release_pud, pfn); } static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_p4d, mm, pfn); + PVOP_VCALL2(mmu.alloc_p4d, mm, pfn); } static inline void paravirt_release_p4d(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn); + PVOP_VCALL1(mmu.release_p4d, pfn); } static inline pte_t __pte(pteval_t val) @@ -367,13 +371,9 @@ static inline pte_t __pte(pteval_t val) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pteval_t, - pv_mmu_ops.make_pte, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pteval_t, mmu.make_pte, val, (u64)val >> 32); else - ret = PVOP_CALLEE1(pteval_t, - pv_mmu_ops.make_pte, - val); + ret = PVOP_CALLEE1(pteval_t, mmu.make_pte, val); return (pte_t) { .pte = ret }; } @@ -383,11 +383,10 @@ static inline pteval_t pte_val(pte_t pte) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val, + ret = PVOP_CALLEE2(pteval_t, mmu.pte_val, pte.pte, (u64)pte.pte >> 32); else - ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val, - pte.pte); + ret = PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte); return ret; } @@ -397,11 +396,9 @@ static inline pgd_t __pgd(pgdval_t val) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pgdval_t, mmu.make_pgd, val, (u64)val >> 32); else - ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd, - val); + ret = PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val); return (pgd_t) { ret }; } @@ -411,11 +408,10 @@ static inline pgdval_t pgd_val(pgd_t pgd) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val, + ret = PVOP_CALLEE2(pgdval_t, mmu.pgd_val, pgd.pgd, (u64)pgd.pgd >> 32); else - ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd); + ret = PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd); return ret; } @@ -426,8 +422,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long a { pteval_t ret; - ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, - mm, addr, ptep); + ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep); return (pte_t) { .pte = ret }; } @@ -437,20 +432,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long a { if (sizeof(pteval_t) > sizeof(long)) /* 5 arg words */ - pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); + pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte); else - PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, + PVOP_VCALL4(mmu.ptep_modify_prot_commit, mm, addr, ptep, pte.pte); } static inline void set_pte(pte_t *ptep, pte_t pte) { if (sizeof(pteval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, - pte.pte, (u64)pte.pte >> 32); + PVOP_VCALL3(mmu.set_pte, ptep, pte.pte, (u64)pte.pte >> 32); else - PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, - pte.pte); + PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); } static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, @@ -458,9 +451,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, { if (sizeof(pteval_t) > sizeof(long)) /* 5 arg words */ - pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); + pv_ops.mmu.set_pte_at(mm, addr, ptep, pte); else - PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); + PVOP_VCALL4(mmu.set_pte_at, mm, addr, ptep, pte.pte); } static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) @@ -468,9 +461,9 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) pmdval_t val = native_pmd_val(pmd); if (sizeof(pmdval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); + PVOP_VCALL3(mmu.set_pmd, pmdp, val, (u64)val >> 32); else - PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); + PVOP_VCALL2(mmu.set_pmd, pmdp, val); } #if CONFIG_PGTABLE_LEVELS >= 3 @@ -479,11 +472,9 @@ static inline pmd_t __pmd(pmdval_t val) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pmdval_t, mmu.make_pmd, val, (u64)val >> 32); else - ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd, - val); + ret = PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val); return (pmd_t) { ret }; } @@ -493,11 +484,10 @@ static inline pmdval_t pmd_val(pmd_t pmd) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val, + ret = PVOP_CALLEE2(pmdval_t, mmu.pmd_val, pmd.pmd, (u64)pmd.pmd >> 32); else - ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd); + ret = PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd); return ret; } @@ -507,39 +497,23 @@ static inline void set_pud(pud_t *pudp, pud_t pud) pudval_t val = native_pud_val(pud); if (sizeof(pudval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, - val, (u64)val >> 32); + PVOP_VCALL3(mmu.set_pud, pudp, val, (u64)val >> 32); else - PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, - val); + PVOP_VCALL2(mmu.set_pud, pudp, val); } #if CONFIG_PGTABLE_LEVELS >= 4 static inline pud_t __pud(pudval_t val) { pudval_t ret; - if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud, - val, (u64)val >> 32); - else - ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud, - val); + ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val); return (pud_t) { ret }; } static inline pudval_t pud_val(pud_t pud) { - pudval_t ret; - - if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val, - pud.pud, (u64)pud.pud >> 32); - else - ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val, - pud.pud); - - return ret; + return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud); } static inline void pud_clear(pud_t *pudp) @@ -551,31 +525,26 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { p4dval_t val = native_p4d_val(p4d); - if (sizeof(p4dval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp, - val, (u64)val >> 32); - else - PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp, - val); + PVOP_VCALL2(mmu.set_p4d, p4dp, val); } #if CONFIG_PGTABLE_LEVELS >= 5 static inline p4d_t __p4d(p4dval_t val) { - p4dval_t ret = PVOP_CALLEE1(p4dval_t, pv_mmu_ops.make_p4d, val); + p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val); return (p4d_t) { ret }; } static inline p4dval_t p4d_val(p4d_t p4d) { - return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); + return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d); } static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { - PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); + PVOP_VCALL2(mmu.set_pgd, pgdp, native_pgd_val(pgd)); } #define set_pgd(pgdp, pgdval) do { \ @@ -606,19 +575,18 @@ static inline void p4d_clear(p4d_t *p4dp) 64-bit pte atomically */ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) { - PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, - pte.pte, pte.pte >> 32); + PVOP_VCALL3(mmu.set_pte_atomic, ptep, pte.pte, pte.pte >> 32); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); + PVOP_VCALL3(mmu.pte_clear, mm, addr, ptep); } static inline void pmd_clear(pmd_t *pmdp) { - PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); + PVOP_VCALL1(mmu.pmd_clear, pmdp); } #else /* !CONFIG_X86_PAE */ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) @@ -641,64 +609,68 @@ static inline void pmd_clear(pmd_t *pmdp) #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); + PVOP_VCALL1(cpu.start_context_switch, prev); } static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); + PVOP_VCALL1(cpu.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { - PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); + PVOP_VCALL0(mmu.lazy_mode.enter); } static inline void arch_leave_lazy_mmu_mode(void) { - PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); + PVOP_VCALL0(mmu.lazy_mode.leave); } static inline void arch_flush_lazy_mmu_mode(void) { - PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush); + PVOP_VCALL0(mmu.lazy_mode.flush); } static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) { - pv_mmu_ops.set_fixmap(idx, phys, flags); + pv_ops.mmu.set_fixmap(idx, phys, flags); } +#endif #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val); + PVOP_VCALL2(lock.queued_spin_lock_slowpath, lock, val); } static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) { - PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock); + PVOP_VCALLEE1(lock.queued_spin_unlock, lock); } static __always_inline void pv_wait(u8 *ptr, u8 val) { - PVOP_VCALL2(pv_lock_ops.wait, ptr, val); + PVOP_VCALL2(lock.wait, ptr, val); } static __always_inline void pv_kick(int cpu) { - PVOP_VCALL1(pv_lock_ops.kick, cpu); + PVOP_VCALL1(lock.kick, cpu); } static __always_inline bool pv_vcpu_is_preempted(long cpu) { - return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); + return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu); } +void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); +bool __raw_callee_save___native_vcpu_is_preempted(long cpu); + #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 @@ -778,24 +750,25 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) +#ifdef CONFIG_PARAVIRT_XXL static inline notrace unsigned long arch_local_save_flags(void) { - return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); + return PVOP_CALLEE0(unsigned long, irq.save_fl); } static inline notrace void arch_local_irq_restore(unsigned long f) { - PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); + PVOP_VCALLEE1(irq.restore_fl, f); } static inline notrace void arch_local_irq_disable(void) { - PVOP_VCALLEE0(pv_irq_ops.irq_disable); + PVOP_VCALLEE0(irq.irq_disable); } static inline notrace void arch_local_irq_enable(void) { - PVOP_VCALLEE0(pv_irq_ops.irq_enable); + PVOP_VCALLEE0(irq.irq_enable); } static inline notrace unsigned long arch_local_irq_save(void) @@ -806,6 +779,7 @@ static inline notrace unsigned long arch_local_irq_save(void) arch_local_irq_disable(); return f; } +#endif /* Make sure as little as possible of this mess escapes. */ @@ -827,7 +801,7 @@ extern void default_banner(void); #else /* __ASSEMBLY__ */ -#define _PVSITE(ptype, clobbers, ops, word, algn) \ +#define _PVSITE(ptype, ops, word, algn) \ 771:; \ ops; \ 772:; \ @@ -836,7 +810,6 @@ extern void default_banner(void); word 771b; \ .byte ptype; \ .byte 772b-771b; \ - .short clobbers; \ .popsection @@ -868,8 +841,8 @@ extern void default_banner(void); COND_POP(set, CLBR_RCX, rcx); \ COND_POP(set, CLBR_RAX, rax) -#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) -#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) +#define PARA_PATCH(off) ((off) / 8) +#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) #else #define PV_SAVE_REGS(set) \ @@ -883,46 +856,41 @@ extern void default_banner(void); COND_POP(set, CLBR_EDI, edi); \ COND_POP(set, CLBR_EAX, eax) -#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) -#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) +#define PARA_PATCH(off) ((off) / 4) +#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4) #define PARA_INDIRECT(addr) *%cs:addr #endif +#ifdef CONFIG_PARAVIRT_XXL #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) + PARA_SITE(PARA_PATCH(PV_CPU_iret), \ + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);) #define DISABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ + PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ + PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) +#endif -#ifdef CONFIG_X86_32 -#define GET_CR0_INTO_EAX \ - push %ecx; push %edx; \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ - pop %edx; pop %ecx -#else /* !CONFIG_X86_32 */ - +#ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT_XXL /* * If swapgs is used while the userspace stack is still current, * there's no way to call a pvop. The PV replacement *must* be * inlined, or the swapgs instruction must be trapped and emulated. */ #define SWAPGS_UNSAFE_STACK \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - swapgs) + PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs) /* * Note: swapgs is very special, and in practise is either going to be @@ -931,44 +899,51 @@ extern void default_banner(void); * it. */ #define SWAPGS \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ + PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ ) +#endif #define GET_CR2_INTO_RAX \ ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); + call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); +#ifdef CONFIG_PARAVIRT_XXL #define USERGS_SYSRET64 \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ - CLBR_NONE, \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) + PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);) #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ + PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif +#endif #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop +#endif /* !CONFIG_PARAVIRT */ + #ifndef __ASSEMBLY__ +#ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { } +#endif +#ifndef CONFIG_PARAVIRT static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { } +#endif #endif /* __ASSEMBLY__ */ -#endif /* !CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 83ce282eed0a..fba54ca23b2a 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -66,12 +66,14 @@ struct paravirt_callee_save { /* general info */ struct pv_info { +#ifdef CONFIG_PARAVIRT_XXL unsigned int kernel_rpl; int shared_kernel_pmd; #ifdef CONFIG_X86_64 u16 extra_user_64bit_cs; /* __USER_CS if none */ #endif +#endif const char *name; }; @@ -85,17 +87,18 @@ struct pv_init_ops { * the number of bytes of code generated, as we nop pad the * rest in generic code. */ - unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, + unsigned (*patch)(u8 type, void *insnbuf, unsigned long addr, unsigned len); } __no_randomize_layout; - +#ifdef CONFIG_PARAVIRT_XXL struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ void (*enter)(void); void (*leave)(void); void (*flush)(void); } __no_randomize_layout; +#endif struct pv_time_ops { unsigned long long (*sched_clock)(void); @@ -104,6 +107,9 @@ struct pv_time_ops { struct pv_cpu_ops { /* hooks for various privileged instructions */ + void (*io_delay)(void); + +#ifdef CONFIG_PARAVIRT_XXL unsigned long (*get_debugreg)(int regno); void (*set_debugreg)(int regno, unsigned long value); @@ -141,7 +147,6 @@ struct pv_cpu_ops { void (*set_iopl_mask)(unsigned mask); void (*wbinvd)(void); - void (*io_delay)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ void (*cpuid)(unsigned int *eax, unsigned int *ebx, @@ -176,9 +181,11 @@ struct pv_cpu_ops { void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); +#endif } __no_randomize_layout; struct pv_irq_ops { +#ifdef CONFIG_PARAVIRT_XXL /* * Get/set interrupt state. save_fl and restore_fl are only * expected to use X86_EFLAGS_IF; all other bits @@ -195,35 +202,34 @@ struct pv_irq_ops { void (*safe_halt)(void); void (*halt)(void); - +#endif } __no_randomize_layout; struct pv_mmu_ops { + /* TLB operations */ + void (*flush_tlb_user)(void); + void (*flush_tlb_kernel)(void); + void (*flush_tlb_one_user)(unsigned long addr); + void (*flush_tlb_others)(const struct cpumask *cpus, + const struct flush_tlb_info *info); + + void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); + + /* Hook for intercepting the destruction of an mm_struct. */ + void (*exit_mmap)(struct mm_struct *mm); + +#ifdef CONFIG_PARAVIRT_XXL unsigned long (*read_cr2)(void); void (*write_cr2)(unsigned long); unsigned long (*read_cr3)(void); void (*write_cr3)(unsigned long); - /* - * Hooks for intercepting the creation/use/destruction of an - * mm_struct. - */ + /* Hooks for intercepting the creation/use of an mm_struct. */ void (*activate_mm)(struct mm_struct *prev, struct mm_struct *next); void (*dup_mmap)(struct mm_struct *oldmm, struct mm_struct *mm); - void (*exit_mmap)(struct mm_struct *mm); - - - /* TLB operations */ - void (*flush_tlb_user)(void); - void (*flush_tlb_kernel)(void); - void (*flush_tlb_one_user)(unsigned long addr); - void (*flush_tlb_others)(const struct cpumask *cpus, - const struct flush_tlb_info *info); - - void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); /* Hooks for allocating and freeing a pagetable top-level */ int (*pgd_alloc)(struct mm_struct *mm); @@ -298,6 +304,7 @@ struct pv_mmu_ops { an mfn. We can tell which is which from the index. */ void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); +#endif } __no_randomize_layout; struct arch_spinlock; @@ -321,28 +328,23 @@ struct pv_lock_ops { * number for each function using the offset which we use to indicate * what to patch. */ struct paravirt_patch_template { - struct pv_init_ops pv_init_ops; - struct pv_time_ops pv_time_ops; - struct pv_cpu_ops pv_cpu_ops; - struct pv_irq_ops pv_irq_ops; - struct pv_mmu_ops pv_mmu_ops; - struct pv_lock_ops pv_lock_ops; + struct pv_init_ops init; + struct pv_time_ops time; + struct pv_cpu_ops cpu; + struct pv_irq_ops irq; + struct pv_mmu_ops mmu; + struct pv_lock_ops lock; } __no_randomize_layout; extern struct pv_info pv_info; -extern struct pv_init_ops pv_init_ops; -extern struct pv_time_ops pv_time_ops; -extern struct pv_cpu_ops pv_cpu_ops; -extern struct pv_irq_ops pv_irq_ops; -extern struct pv_mmu_ops pv_mmu_ops; -extern struct pv_lock_ops pv_lock_ops; +extern struct paravirt_patch_template pv_ops; #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) #define paravirt_type(op) \ [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "i" (&(op)) + [paravirt_opptr] "i" (&(pv_ops.op)) #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) @@ -361,20 +363,13 @@ extern struct pv_lock_ops pv_lock_ops; unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); -unsigned paravirt_patch_call(void *insnbuf, - const void *target, u16 tgt_clobbers, - unsigned long addr, u16 site_clobbers, - unsigned len); -unsigned paravirt_patch_jmp(void *insnbuf, const void *target, - unsigned long addr, unsigned len); -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, +unsigned paravirt_patch_default(u8 type, void *insnbuf, unsigned long addr, unsigned len); unsigned paravirt_patch_insns(void *insnbuf, unsigned len, const char *start, const char *end); -unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len); +unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len); int paravirt_disable_iospace(void); @@ -488,9 +483,9 @@ int paravirt_disable_iospace(void); #endif /* CONFIG_X86_32 */ #ifdef CONFIG_PARAVIRT_DEBUG -#define PVOP_TEST_NULL(op) BUG_ON(op == NULL) +#define PVOP_TEST_NULL(op) BUG_ON(pv_ops.op == NULL) #else -#define PVOP_TEST_NULL(op) ((void)op) +#define PVOP_TEST_NULL(op) ((void)pv_ops.op) #endif #define PVOP_RETMASK(rettype) \ @@ -666,7 +661,6 @@ struct paravirt_patch_site { u8 *instr; /* original instructions */ u8 instrtype; /* type of this instruction */ u8 len; /* length of original instruction */ - u16 clobbers; /* what registers you may clobber */ }; extern struct paravirt_patch_site __parainstructions[], diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index fbd578daa66e..ec7f43327033 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -8,7 +8,7 @@ static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 858358a82b14..33845d36897c 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -20,7 +20,7 @@ typedef union { } pte_t; #endif /* !__ASSEMBLY__ */ -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #define SHARED_KERNEL_PMD ((!static_cpu_has(X86_FEATURE_PTI) && \ (pv_info.shared_kernel_pmd))) #else diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 690c0307afed..40616e805292 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -55,9 +55,9 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); extern pmdval_t early_pmd_flags; -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> -#else /* !CONFIG_PARAVIRT */ +#else /* !CONFIG_PARAVIRT_XXL */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) @@ -112,8 +112,7 @@ extern pmdval_t early_pmd_flags; #define __pte(x) native_make_pte(x) #define arch_end_context_switch(prev) do {} while(0) - -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ /* * The following only work if pte_present() is true. diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d4dfd02b740e..c7a4e2a174b9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -579,7 +579,7 @@ static inline bool on_thread_stack(void) current_stack_pointer) < THREAD_SIZE; } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define __cpuid native_cpuid @@ -590,7 +590,7 @@ static inline void load_sp0(unsigned long sp0) } #define set_iopl_mask native_set_iopl_mask -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 25f49af1b13c..f236bcd5485d 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -146,7 +146,7 @@ static inline int v8086_mode(struct pt_regs *regs) static inline bool user_64bit_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_64 -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL /* * On non-paravirt systems, this is the only long mode CPL 3 * selector. We do not allow long mode selectors in the LDT. diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index a314087add07..ac3892920419 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -210,7 +210,7 @@ #endif -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL # define get_kernel_rpl() 0 #endif diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 317fc59b512c..43c029cdc3fe 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -141,7 +141,7 @@ static inline unsigned long __read_cr4(void) return native_read_cr4(); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else @@ -208,7 +208,7 @@ static inline void load_gs_index(unsigned selector) #endif -#endif/* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ static inline void clflush(volatile void *__p) { diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 184e9a06b0ff..ebeac487a20c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -598,7 +598,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ memcpy(insnbuf, p->instr, p->len); - used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, + used = pv_ops.init.patch(p->instrtype, insnbuf, (unsigned long)p->instr, p->len); BUG_ON(used > p->len); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 01de31db300d..fc02c3cf238f 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -64,15 +64,12 @@ void common(void) { OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); #endif -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL BLANK(); - OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); - OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); - OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); - OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); - OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); - OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); + OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable); + OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable); + OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret); + OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2); #endif #ifdef CONFIG_XEN diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 3b9405e7ba2b..ddced33184b5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -21,10 +21,13 @@ static char syscalls_ia32[] = { int main(void) { #ifdef CONFIG_PARAVIRT - OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); - OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); +#ifdef CONFIG_PARAVIRT_XXL + OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template, + cpu.usergs_sysret64); + OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs); #ifdef CONFIG_DEBUG_ENTRY - OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl); + OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl); +#endif #endif BLANK(); #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c519a079b3d5..9315a1660668 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1243,10 +1243,10 @@ static void generic_identify(struct cpuinfo_x86 *c) * ESPFIX issue, we can change this. */ #ifdef CONFIG_X86_32 -# ifdef CONFIG_PARAVIRT +# ifdef CONFIG_PARAVIRT_XXL do { extern void native_iret(void); - if (pv_cpu_ops.iret == native_iret) + if (pv_ops.cpu.iret == native_iret) set_cpu_bug(c, X86_BUG_ESPFIX); } while (0); # else diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index ad12733f6058..1c72f3819eb1 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -199,6 +199,16 @@ static unsigned long hv_get_tsc_khz(void) return freq / 1000; } +#if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV) +static void __init hv_smp_prepare_boot_cpu(void) +{ + native_smp_prepare_boot_cpu(); +#if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS) + hv_init_spinlocks(); +#endif +} +#endif + static void __init ms_hyperv_init_platform(void) { int hv_host_info_eax; @@ -303,6 +313,10 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) alloc_intr_gate(HYPERV_STIMER0_VECTOR, hv_stimer0_callback_vector); + +# ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; +# endif #endif } diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 8e005329648b..d9ab49bed8af 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -97,14 +97,14 @@ static void __init vmware_sched_clock_setup(void) d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul, d->cyc2ns_shift); - pv_time_ops.sched_clock = vmware_sched_clock; + pv_ops.time.sched_clock = vmware_sched_clock; pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset); } static void __init vmware_paravirt_ops_setup(void) { pv_info.name = "VMware hypervisor"; - pv_cpu_ops.io_delay = paravirt_nop; + pv_ops.cpu.io_delay = paravirt_nop; if (vmware_tsc_khz && vmw_sched_clock) vmware_sched_clock_setup(); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a3618cf04cf6..747c758f67b7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -26,7 +26,7 @@ #include <asm/nospec-branch.h> #include <asm/fixmap.h> -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/asm-offsets.h> #include <asm/paravirt.h> #define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d9b71924c23c..ba4bfb7f6a36 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -283,7 +283,7 @@ static void __init paravirt_ops_setup(void) pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) - pv_cpu_ops.io_delay = kvm_io_delay; + pv_ops.cpu.io_delay = kvm_io_delay; #ifdef CONFIG_X86_IO_APIC no_timer_check = 1; @@ -632,14 +632,14 @@ static void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { has_steal_clock = 1; - pv_time_ops.steal_clock = kvm_steal_clock; + pv_ops.time.steal_clock = kvm_steal_clock; } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { - pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; - pv_mmu_ops.tlb_remove_table = tlb_remove_table; + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; + pv_ops.mmu.tlb_remove_table = tlb_remove_table; } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -850,13 +850,14 @@ void __init kvm_spinlock_init(void) return; __pv_init_lock_hash(); - pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; - pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); - pv_lock_ops.wait = kvm_wait; - pv_lock_ops.kick = kvm_kick_cpu; + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.lock.queued_spin_unlock = + PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_ops.lock.wait = kvm_wait; + pv_ops.lock.kick = kvm_kick_cpu; if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { - pv_lock_ops.vcpu_is_preempted = + pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(__kvm_vcpu_is_preempted); } } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 013fe3d21dbb..30084ecaa20f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -118,13 +118,13 @@ static u64 kvm_sched_clock_read(void) static inline void kvm_sched_clock_init(bool stable) { if (!stable) { - pv_time_ops.sched_clock = kvm_clock_read; + pv_ops.time.sched_clock = kvm_clock_read; clear_sched_clock_stable(); return; } kvm_sched_clock_offset = kvm_clock_read(); - pv_time_ops.sched_clock = kvm_sched_clock_read; + pv_ops.time.sched_clock = kvm_sched_clock_read; pr_info("kvm-clock: using sched offset of %llu cycles", kvm_sched_clock_offset); diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 71f2d1125ec0..4f75d0cf6305 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -17,7 +17,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock); bool pv_is_native_spin_unlock(void) { - return pv_lock_ops.queued_spin_unlock.func == + return pv_ops.lock.queued_spin_unlock.func == __raw_callee_save___native_queued_spin_unlock; } @@ -29,17 +29,6 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted); bool pv_is_native_vcpu_is_preempted(void) { - return pv_lock_ops.vcpu_is_preempted.func == + return pv_ops.lock.vcpu_is_preempted.func == __raw_callee_save___native_vcpu_is_preempted; } - -struct pv_lock_ops pv_lock_ops = { -#ifdef CONFIG_SMP - .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, - .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), - .wait = paravirt_nop, - .kick = paravirt_nop, - .vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted), -#endif /* SMP */ -}; -EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8dc69d82567e..e4d4df37922a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -81,10 +81,8 @@ struct branch { u32 delta; } __attribute__((packed)); -unsigned paravirt_patch_call(void *insnbuf, - const void *target, u16 tgt_clobbers, - unsigned long addr, u16 site_clobbers, - unsigned len) +static unsigned paravirt_patch_call(void *insnbuf, const void *target, + unsigned long addr, unsigned len) { struct branch *b = insnbuf; unsigned long delta = (unsigned long)target - (addr+5); @@ -103,8 +101,9 @@ unsigned paravirt_patch_call(void *insnbuf, return 5; } -unsigned paravirt_patch_jmp(void *insnbuf, const void *target, - unsigned long addr, unsigned len) +#ifdef CONFIG_PARAVIRT_XXL +static unsigned paravirt_patch_jmp(void *insnbuf, const void *target, + unsigned long addr, unsigned len) { struct branch *b = insnbuf; unsigned long delta = (unsigned long)target - (addr+5); @@ -121,6 +120,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, return 5; } +#endif DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); @@ -130,29 +130,14 @@ void __init native_pv_lock_init(void) static_branch_disable(&virt_spin_lock_key); } -/* - * Neat trick to map patch type back to the call within the - * corresponding structure. - */ -static void *get_call_destination(u8 type) -{ - struct paravirt_patch_template tmpl = { - .pv_init_ops = pv_init_ops, - .pv_time_ops = pv_time_ops, - .pv_cpu_ops = pv_cpu_ops, - .pv_irq_ops = pv_irq_ops, - .pv_mmu_ops = pv_mmu_ops, -#ifdef CONFIG_PARAVIRT_SPINLOCKS - .pv_lock_ops = pv_lock_ops, -#endif - }; - return *((void **)&tmpl + type); -} - -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, +unsigned paravirt_patch_default(u8 type, void *insnbuf, unsigned long addr, unsigned len) { - void *opfunc = get_call_destination(type); + /* + * Neat trick to map patch type back to the call within the + * corresponding structure. + */ + void *opfunc = *((void **)&pv_ops + type); unsigned ret; if (opfunc == NULL) @@ -167,15 +152,15 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, else if (opfunc == _paravirt_ident_64) ret = paravirt_patch_ident_64(insnbuf, len); - else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || - type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) +#ifdef CONFIG_PARAVIRT_XXL + else if (type == PARAVIRT_PATCH(cpu.iret) || + type == PARAVIRT_PATCH(cpu.usergs_sysret64)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); +#endif else - /* Otherwise call the function; assume target could - clobber any caller-save reg */ - ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, - addr, clobbers, len); + /* Otherwise call the function. */ + ret = paravirt_patch_call(insnbuf, opfunc, addr, len); return ret; } @@ -281,6 +266,7 @@ void paravirt_flush_lazy_mmu(void) preempt_enable(); } +#ifdef CONFIG_PARAVIRT_XXL void paravirt_start_context_switch(struct task_struct *prev) { BUG_ON(preemptible()); @@ -301,6 +287,7 @@ void paravirt_end_context_switch(struct task_struct *next) if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) arch_enter_lazy_mmu_mode(); } +#endif enum paravirt_lazy_mode paravirt_get_lazy_mode(void) { @@ -312,85 +299,16 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) struct pv_info pv_info = { .name = "bare hardware", +#ifdef CONFIG_PARAVIRT_XXL .kernel_rpl = 0, .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ #ifdef CONFIG_X86_64 .extra_user_64bit_cs = __USER_CS, #endif -}; - -struct pv_init_ops pv_init_ops = { - .patch = native_patch, -}; - -struct pv_time_ops pv_time_ops = { - .sched_clock = native_sched_clock, - .steal_clock = native_steal_clock, -}; - -__visible struct pv_irq_ops pv_irq_ops = { - .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), - .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), - .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), - .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), - .safe_halt = native_safe_halt, - .halt = native_halt, -}; - -__visible struct pv_cpu_ops pv_cpu_ops = { - .cpuid = native_cpuid, - .get_debugreg = native_get_debugreg, - .set_debugreg = native_set_debugreg, - .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, - .write_cr4 = native_write_cr4, -#ifdef CONFIG_X86_64 - .read_cr8 = native_read_cr8, - .write_cr8 = native_write_cr8, #endif - .wbinvd = native_wbinvd, - .read_msr = native_read_msr, - .write_msr = native_write_msr, - .read_msr_safe = native_read_msr_safe, - .write_msr_safe = native_write_msr_safe, - .read_pmc = native_read_pmc, - .load_tr_desc = native_load_tr_desc, - .set_ldt = native_set_ldt, - .load_gdt = native_load_gdt, - .load_idt = native_load_idt, - .store_tr = native_store_tr, - .load_tls = native_load_tls, -#ifdef CONFIG_X86_64 - .load_gs_index = native_load_gs_index, -#endif - .write_ldt_entry = native_write_ldt_entry, - .write_gdt_entry = native_write_gdt_entry, - .write_idt_entry = native_write_idt_entry, - - .alloc_ldt = paravirt_nop, - .free_ldt = paravirt_nop, - - .load_sp0 = native_load_sp0, - -#ifdef CONFIG_X86_64 - .usergs_sysret64 = native_usergs_sysret64, -#endif - .iret = native_iret, - .swapgs = native_swapgs, - - .set_iopl_mask = native_set_iopl_mask, - .io_delay = native_io_delay, - - .start_context_switch = paravirt_nop, - .end_context_switch = paravirt_nop, }; -/* At this point, native_get/set_debugreg has real function entries */ -NOKPROBE_SYMBOL(native_get_debugreg); -NOKPROBE_SYMBOL(native_set_debugreg); -NOKPROBE_SYMBOL(native_load_idt); - #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) /* 32-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) @@ -399,85 +317,171 @@ NOKPROBE_SYMBOL(native_load_idt); #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif -struct pv_mmu_ops pv_mmu_ops __ro_after_init = { +struct paravirt_patch_template pv_ops = { + /* Init ops. */ + .init.patch = native_patch, - .read_cr2 = native_read_cr2, - .write_cr2 = native_write_cr2, - .read_cr3 = __native_read_cr3, - .write_cr3 = native_write_cr3, + /* Time ops. */ + .time.sched_clock = native_sched_clock, + .time.steal_clock = native_steal_clock, - .flush_tlb_user = native_flush_tlb, - .flush_tlb_kernel = native_flush_tlb_global, - .flush_tlb_one_user = native_flush_tlb_one_user, - .flush_tlb_others = native_flush_tlb_others, - .tlb_remove_table = (void (*)(struct mmu_gather *, void *))tlb_remove_page, + /* Cpu ops. */ + .cpu.io_delay = native_io_delay, - .pgd_alloc = __paravirt_pgd_alloc, - .pgd_free = paravirt_nop, +#ifdef CONFIG_PARAVIRT_XXL + .cpu.cpuid = native_cpuid, + .cpu.get_debugreg = native_get_debugreg, + .cpu.set_debugreg = native_set_debugreg, + .cpu.read_cr0 = native_read_cr0, + .cpu.write_cr0 = native_write_cr0, + .cpu.write_cr4 = native_write_cr4, +#ifdef CONFIG_X86_64 + .cpu.read_cr8 = native_read_cr8, + .cpu.write_cr8 = native_write_cr8, +#endif + .cpu.wbinvd = native_wbinvd, + .cpu.read_msr = native_read_msr, + .cpu.write_msr = native_write_msr, + .cpu.read_msr_safe = native_read_msr_safe, + .cpu.write_msr_safe = native_write_msr_safe, + .cpu.read_pmc = native_read_pmc, + .cpu.load_tr_desc = native_load_tr_desc, + .cpu.set_ldt = native_set_ldt, + .cpu.load_gdt = native_load_gdt, + .cpu.load_idt = native_load_idt, + .cpu.store_tr = native_store_tr, + .cpu.load_tls = native_load_tls, +#ifdef CONFIG_X86_64 + .cpu.load_gs_index = native_load_gs_index, +#endif + .cpu.write_ldt_entry = native_write_ldt_entry, + .cpu.write_gdt_entry = native_write_gdt_entry, + .cpu.write_idt_entry = native_write_idt_entry, - .alloc_pte = paravirt_nop, - .alloc_pmd = paravirt_nop, - .alloc_pud = paravirt_nop, - .alloc_p4d = paravirt_nop, - .release_pte = paravirt_nop, - .release_pmd = paravirt_nop, - .release_pud = paravirt_nop, - .release_p4d = paravirt_nop, + .cpu.alloc_ldt = paravirt_nop, + .cpu.free_ldt = paravirt_nop, - .set_pte = native_set_pte, - .set_pte_at = native_set_pte_at, - .set_pmd = native_set_pmd, + .cpu.load_sp0 = native_load_sp0, - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, +#ifdef CONFIG_X86_64 + .cpu.usergs_sysret64 = native_usergs_sysret64, +#endif + .cpu.iret = native_iret, + .cpu.swapgs = native_swapgs, + + .cpu.set_iopl_mask = native_set_iopl_mask, + + .cpu.start_context_switch = paravirt_nop, + .cpu.end_context_switch = paravirt_nop, + + /* Irq ops. */ + .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .irq.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), + .irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), + .irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), + .irq.safe_halt = native_safe_halt, + .irq.halt = native_halt, +#endif /* CONFIG_PARAVIRT_XXL */ + + /* Mmu ops. */ + .mmu.flush_tlb_user = native_flush_tlb, + .mmu.flush_tlb_kernel = native_flush_tlb_global, + .mmu.flush_tlb_one_user = native_flush_tlb_one_user, + .mmu.flush_tlb_others = native_flush_tlb_others, + .mmu.tlb_remove_table = + (void (*)(struct mmu_gather *, void *))tlb_remove_page, + + .mmu.exit_mmap = paravirt_nop, + +#ifdef CONFIG_PARAVIRT_XXL + .mmu.read_cr2 = native_read_cr2, + .mmu.write_cr2 = native_write_cr2, + .mmu.read_cr3 = __native_read_cr3, + .mmu.write_cr3 = native_write_cr3, + + .mmu.pgd_alloc = __paravirt_pgd_alloc, + .mmu.pgd_free = paravirt_nop, + + .mmu.alloc_pte = paravirt_nop, + .mmu.alloc_pmd = paravirt_nop, + .mmu.alloc_pud = paravirt_nop, + .mmu.alloc_p4d = paravirt_nop, + .mmu.release_pte = paravirt_nop, + .mmu.release_pmd = paravirt_nop, + .mmu.release_pud = paravirt_nop, + .mmu.release_p4d = paravirt_nop, + + .mmu.set_pte = native_set_pte, + .mmu.set_pte_at = native_set_pte_at, + .mmu.set_pmd = native_set_pmd, + + .mmu.ptep_modify_prot_start = __ptep_modify_prot_start, + .mmu.ptep_modify_prot_commit = __ptep_modify_prot_commit, #if CONFIG_PGTABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE - .set_pte_atomic = native_set_pte_atomic, - .pte_clear = native_pte_clear, - .pmd_clear = native_pmd_clear, + .mmu.set_pte_atomic = native_set_pte_atomic, + .mmu.pte_clear = native_pte_clear, + .mmu.pmd_clear = native_pmd_clear, #endif - .set_pud = native_set_pud, + .mmu.set_pud = native_set_pud, - .pmd_val = PTE_IDENT, - .make_pmd = PTE_IDENT, + .mmu.pmd_val = PTE_IDENT, + .mmu.make_pmd = PTE_IDENT, #if CONFIG_PGTABLE_LEVELS >= 4 - .pud_val = PTE_IDENT, - .make_pud = PTE_IDENT, + .mmu.pud_val = PTE_IDENT, + .mmu.make_pud = PTE_IDENT, - .set_p4d = native_set_p4d, + .mmu.set_p4d = native_set_p4d, #if CONFIG_PGTABLE_LEVELS >= 5 - .p4d_val = PTE_IDENT, - .make_p4d = PTE_IDENT, + .mmu.p4d_val = PTE_IDENT, + .mmu.make_p4d = PTE_IDENT, - .set_pgd = native_set_pgd, + .mmu.set_pgd = native_set_pgd, #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ #endif /* CONFIG_PGTABLE_LEVELS >= 4 */ #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ - .pte_val = PTE_IDENT, - .pgd_val = PTE_IDENT, + .mmu.pte_val = PTE_IDENT, + .mmu.pgd_val = PTE_IDENT, - .make_pte = PTE_IDENT, - .make_pgd = PTE_IDENT, + .mmu.make_pte = PTE_IDENT, + .mmu.make_pgd = PTE_IDENT, - .dup_mmap = paravirt_nop, - .exit_mmap = paravirt_nop, - .activate_mm = paravirt_nop, + .mmu.dup_mmap = paravirt_nop, + .mmu.activate_mm = paravirt_nop, - .lazy_mode = { - .enter = paravirt_nop, - .leave = paravirt_nop, - .flush = paravirt_nop, + .mmu.lazy_mode = { + .enter = paravirt_nop, + .leave = paravirt_nop, + .flush = paravirt_nop, }, - .set_fixmap = native_set_fixmap, + .mmu.set_fixmap = native_set_fixmap, +#endif /* CONFIG_PARAVIRT_XXL */ + +#if defined(CONFIG_PARAVIRT_SPINLOCKS) + /* Lock ops. */ +#ifdef CONFIG_SMP + .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, + .lock.queued_spin_unlock = + PV_CALLEE_SAVE(__native_queued_spin_unlock), + .lock.wait = paravirt_nop, + .lock.kick = paravirt_nop, + .lock.vcpu_is_preempted = + PV_CALLEE_SAVE(__native_vcpu_is_preempted), +#endif /* SMP */ +#endif }; -EXPORT_SYMBOL_GPL(pv_time_ops); -EXPORT_SYMBOL (pv_cpu_ops); -EXPORT_SYMBOL (pv_mmu_ops); +#ifdef CONFIG_PARAVIRT_XXL +/* At this point, native_get/set_debugreg has real function entries */ +NOKPROBE_SYMBOL(native_get_debugreg); +NOKPROBE_SYMBOL(native_set_debugreg); +NOKPROBE_SYMBOL(native_load_idt); +#endif + +EXPORT_SYMBOL_GPL(pv_ops); EXPORT_SYMBOL_GPL(pv_info); -EXPORT_SYMBOL (pv_irq_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 758e69d72ebf..6368c22fa1fa 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -1,18 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/paravirt.h> -DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); -DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); -DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); -DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); -DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); -DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); -DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); +#ifdef CONFIG_PARAVIRT_XXL +DEF_NATIVE(irq, irq_disable, "cli"); +DEF_NATIVE(irq, irq_enable, "sti"); +DEF_NATIVE(irq, restore_fl, "push %eax; popf"); +DEF_NATIVE(irq, save_fl, "pushf; pop %eax"); +DEF_NATIVE(cpu, iret, "iret"); +DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax"); +DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3"); +DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax"); +#endif #if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); +DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)"); +DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -30,53 +32,42 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); -unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len) +unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len) { - const unsigned char *start, *end; - unsigned ret; - #define PATCH_SITE(ops, x) \ - case PARAVIRT_PATCH(ops.x): \ - start = start_##ops##_##x; \ - end = end_##ops##_##x; \ - goto patch_site + case PARAVIRT_PATCH(ops.x): \ + return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x) + switch (type) { - PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_irq_ops, irq_enable); - PATCH_SITE(pv_irq_ops, restore_fl); - PATCH_SITE(pv_irq_ops, save_fl); - PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_mmu_ops, read_cr2); - PATCH_SITE(pv_mmu_ops, read_cr3); - PATCH_SITE(pv_mmu_ops, write_cr3); +#ifdef CONFIG_PARAVIRT_XXL + PATCH_SITE(irq, irq_disable); + PATCH_SITE(irq, irq_enable); + PATCH_SITE(irq, restore_fl); + PATCH_SITE(irq, save_fl); + PATCH_SITE(cpu, iret); + PATCH_SITE(mmu, read_cr2); + PATCH_SITE(mmu, read_cr3); + PATCH_SITE(mmu, write_cr3); +#endif #if defined(CONFIG_PARAVIRT_SPINLOCKS) - case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): - if (pv_is_native_spin_unlock()) { - start = start_pv_lock_ops_queued_spin_unlock; - end = end_pv_lock_ops_queued_spin_unlock; - goto patch_site; - } - goto patch_default; + case PARAVIRT_PATCH(lock.queued_spin_unlock): + if (pv_is_native_spin_unlock()) + return paravirt_patch_insns(ibuf, len, + start_lock_queued_spin_unlock, + end_lock_queued_spin_unlock); + break; - case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): - if (pv_is_native_vcpu_is_preempted()) { - start = start_pv_lock_ops_vcpu_is_preempted; - end = end_pv_lock_ops_vcpu_is_preempted; - goto patch_site; - } - goto patch_default; + case PARAVIRT_PATCH(lock.vcpu_is_preempted): + if (pv_is_native_vcpu_is_preempted()) + return paravirt_patch_insns(ibuf, len, + start_lock_vcpu_is_preempted, + end_lock_vcpu_is_preempted); + break; #endif default: -patch_default: __maybe_unused - ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); - break; - -patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); break; } #undef PATCH_SITE - return ret; + return paravirt_patch_default(type, ibuf, addr, len); } diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 9cb98f7b07c9..7ca9cb726f4d 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -3,24 +3,26 @@ #include <asm/asm-offsets.h> #include <linux/stringify.h> -DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); -DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); -DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); -DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); -DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); -DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); +#ifdef CONFIG_PARAVIRT_XXL +DEF_NATIVE(irq, irq_disable, "cli"); +DEF_NATIVE(irq, irq_enable, "sti"); +DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq"); +DEF_NATIVE(irq, save_fl, "pushfq; popq %rax"); +DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax"); +DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax"); +DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3"); +DEF_NATIVE(cpu, wbinvd, "wbinvd"); -DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); -DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); +DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq"); +DEF_NATIVE(cpu, swapgs, "swapgs"); +#endif DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov64, "mov %rdi, %rax"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); +DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)"); +DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -38,55 +40,44 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); -unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len) +unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len) { - const unsigned char *start, *end; - unsigned ret; - #define PATCH_SITE(ops, x) \ - case PARAVIRT_PATCH(ops.x): \ - start = start_##ops##_##x; \ - end = end_##ops##_##x; \ - goto patch_site - switch(type) { - PATCH_SITE(pv_irq_ops, restore_fl); - PATCH_SITE(pv_irq_ops, save_fl); - PATCH_SITE(pv_irq_ops, irq_enable); - PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, usergs_sysret64); - PATCH_SITE(pv_cpu_ops, swapgs); - PATCH_SITE(pv_mmu_ops, read_cr2); - PATCH_SITE(pv_mmu_ops, read_cr3); - PATCH_SITE(pv_mmu_ops, write_cr3); - PATCH_SITE(pv_cpu_ops, wbinvd); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) - case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): - if (pv_is_native_spin_unlock()) { - start = start_pv_lock_ops_queued_spin_unlock; - end = end_pv_lock_ops_queued_spin_unlock; - goto patch_site; - } - goto patch_default; + case PARAVIRT_PATCH(ops.x): \ + return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x) - case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): - if (pv_is_native_vcpu_is_preempted()) { - start = start_pv_lock_ops_vcpu_is_preempted; - end = end_pv_lock_ops_vcpu_is_preempted; - goto patch_site; - } - goto patch_default; + switch (type) { +#ifdef CONFIG_PARAVIRT_XXL + PATCH_SITE(irq, restore_fl); + PATCH_SITE(irq, save_fl); + PATCH_SITE(irq, irq_enable); + PATCH_SITE(irq, irq_disable); + PATCH_SITE(cpu, usergs_sysret64); + PATCH_SITE(cpu, swapgs); + PATCH_SITE(cpu, wbinvd); + PATCH_SITE(mmu, read_cr2); + PATCH_SITE(mmu, read_cr3); + PATCH_SITE(mmu, write_cr3); #endif +#if defined(CONFIG_PARAVIRT_SPINLOCKS) + case PARAVIRT_PATCH(lock.queued_spin_unlock): + if (pv_is_native_spin_unlock()) + return paravirt_patch_insns(ibuf, len, + start_lock_queued_spin_unlock, + end_lock_queued_spin_unlock); + break; - default: -patch_default: __maybe_unused - ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); + case PARAVIRT_PATCH(lock.vcpu_is_preempted): + if (pv_is_native_vcpu_is_preempted()) + return paravirt_patch_insns(ibuf, len, + start_lock_vcpu_is_preempted, + end_lock_vcpu_is_preempted); break; +#endif -patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); + default: break; } #undef PATCH_SITE - return ret; + return paravirt_patch_default(type, ibuf, addr, len); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 03b7529333a6..e9f777bfed40 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -247,7 +247,7 @@ unsigned long long sched_clock(void) bool using_native_sched_clock(void) { - return pv_time_ops.sched_clock == native_sched_clock; + return pv_ops.time.sched_clock == native_sched_clock; } #else unsigned long long diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 44685fb2a192..1eae5af491c2 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,7 +26,7 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 -#if defined CONFIG_PCI && defined CONFIG_PARAVIRT +#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL /* * Interrupt control on vSMPowered systems: * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' @@ -69,17 +69,17 @@ asmlinkage __visible void vsmp_irq_enable(void) } PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); -static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, +static unsigned __init vsmp_patch(u8 type, void *ibuf, unsigned long addr, unsigned len) { switch (type) { - case PARAVIRT_PATCH(pv_irq_ops.irq_enable): - case PARAVIRT_PATCH(pv_irq_ops.irq_disable): - case PARAVIRT_PATCH(pv_irq_ops.save_fl): - case PARAVIRT_PATCH(pv_irq_ops.restore_fl): - return paravirt_patch_default(type, clobbers, ibuf, addr, len); + case PARAVIRT_PATCH(irq.irq_enable): + case PARAVIRT_PATCH(irq.irq_disable): + case PARAVIRT_PATCH(irq.save_fl): + case PARAVIRT_PATCH(irq.restore_fl): + return paravirt_patch_default(type, ibuf, addr, len); default: - return native_patch(type, clobbers, ibuf, addr, len); + return native_patch(type, ibuf, addr, len); } } @@ -111,11 +111,11 @@ static void __init set_vsmp_pv_ops(void) if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); - pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); - pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); - pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); - pv_init_ops.patch = vsmp_patch; + pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); + pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); + pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); + pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); + pv_ops.init.patch = vsmp_patch; ctl &= ~(1 << 4); } writel(ctl, address + 4); diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 7ae36868aed2..a19ef1a416ff 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -27,6 +27,7 @@ * be extended when new paravirt and debugging variants are added.) */ #undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS #include <linux/kernel.h> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 14be6a5935e1..1ef391aa184d 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -18,6 +18,7 @@ config XEN_PV bool "Xen PV guest support" default y depends on XEN + select PARAVIRT_XXL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU help diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index d83cb5478f54..dd2550d33b38 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -12,25 +12,46 @@ endif # Make sure early boot has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_enlighten_pv.o := $(nostackp) -CFLAGS_mmu_pv.o := $(nostackp) +CFLAGS_mmu_pv.o := $(nostackp) -obj-y := enlighten.o multicalls.o mmu.o irq.o \ - time.o xen-asm.o xen-asm_$(BITS).o \ - grant-table.o suspend.o platform-pci-unplug.o +obj-y += enlighten.o +obj-y += mmu.o +obj-y += time.o +obj-y += grant-table.o +obj-y += suspend.o -obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o mmu_hvm.o suspend_hvm.o -obj-$(CONFIG_XEN_PV) += setup.o apic.o pmu.o suspend_pv.o \ - p2m.o enlighten_pv.o mmu_pv.o -obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o +obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o +obj-$(CONFIG_XEN_PVHVM) += mmu_hvm.o +obj-$(CONFIG_XEN_PVHVM) += suspend_hvm.o +obj-$(CONFIG_XEN_PVHVM) += platform-pci-unplug.o -obj-$(CONFIG_EVENT_TRACING) += trace.o +obj-$(CONFIG_XEN_PV) += setup.o +obj-$(CONFIG_XEN_PV) += apic.o +obj-$(CONFIG_XEN_PV) += pmu.o +obj-$(CONFIG_XEN_PV) += suspend_pv.o +obj-$(CONFIG_XEN_PV) += p2m.o +obj-$(CONFIG_XEN_PV) += enlighten_pv.o +obj-$(CONFIG_XEN_PV) += mmu_pv.o +obj-$(CONFIG_XEN_PV) += irq.o +obj-$(CONFIG_XEN_PV) += multicalls.o +obj-$(CONFIG_XEN_PV) += xen-asm.o +obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o + +obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o +obj-$(CONFIG_XEN_PVH) += xen-pvh.o + +obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_XEN_PV_SMP) += smp_pv.o obj-$(CONFIG_XEN_PVHVM_SMP) += smp_hvm.o + obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o + obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o + obj-$(CONFIG_XEN_DOM0) += vga.o + obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o + obj-$(CONFIG_XEN_EFI) += efi.o -obj-$(CONFIG_XEN_PVH) += xen-pvh.o diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 1804b27f9632..1fbb629a9d78 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -1,18 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2014 Oracle Co., Daniel Kiper - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/bitops.h> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0ca46e03b830..67b2f31a1265 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG #include <linux/bootmem.h> #endif diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 19c1ff542387..0e75642d42a3 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + #include <linux/acpi.h> #include <linux/cpu.h> #include <linux/kexec.h> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 52a7c3faee0c..ec7a4209f310 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -995,11 +995,14 @@ void __init xen_setup_vcpu_info_placement(void) * percpu area for all cpus, so make use of it. */ if (xen_have_vcpu_info_placement) { - pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); - pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); - pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); - pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); - pv_mmu_ops.read_cr2 = xen_read_cr2_direct; + pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_ops.irq.restore_fl = + __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); + pv_ops.irq.irq_disable = + __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_ops.irq.irq_enable = + __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); + pv_ops.mmu.read_cr2 = xen_read_cr2_direct; } } @@ -1174,14 +1177,14 @@ static void __init xen_boot_params_init_edd(void) */ static void __init xen_setup_gdt(int cpu) { - pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; - pv_cpu_ops.load_gdt = xen_load_gdt_boot; + pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; + pv_ops.cpu.load_gdt = xen_load_gdt_boot; setup_stack_canary_segment(cpu); switch_to_new_gdt(cpu); - pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry; - pv_cpu_ops.load_gdt = xen_load_gdt; + pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; + pv_ops.cpu.load_gdt = xen_load_gdt; } static void __init xen_dom0_set_legacy_features(void) @@ -1206,8 +1209,8 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; - pv_init_ops.patch = paravirt_patch_default; - pv_cpu_ops = xen_cpu_ops; + pv_ops.init.patch = paravirt_patch_default; + pv_ops.cpu = xen_cpu_ops; xen_init_irq_ops(); /* @@ -1276,8 +1279,10 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { - pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; - pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; + pv_ops.mmu.ptep_modify_prot_start = + xen_ptep_modify_prot_start; + pv_ops.mmu.ptep_modify_prot_commit = + xen_ptep_modify_prot_commit; } machine_ops = xen_machine_ops; diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 92ccc718152d..ecb0d5450334 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT /****************************************************************************** * grant_table.c * x86 specific part @@ -8,30 +9,6 @@ * Copyright (c) 2004-2005, K A Fraser * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> * VA Linux Systems Japan. Split out x86 specific part. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. */ #include <linux/sched.h> diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 7515a19fd324..850c93f346c7 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -128,6 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { void __init xen_init_irq_ops(void) { - pv_irq_ops = xen_irq_ops; + pv_ops.irq = xen_irq_ops; x86_init.irqs.intr_init = xen_init_IRQ; } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 96fc2f0fdbfe..60e9c37fd79f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + #include <linux/pfn.h> #include <asm/xen/page.h> #include <asm/xen/hypercall.h> @@ -6,12 +8,6 @@ #include "multicalls.h" #include "mmu.h" -/* - * Protects atomic reservation decrease/increase against concurrent increases. - * Also protects non-atomic updates of current_pages and balloon lists. - */ -DEFINE_SPINLOCK(xen_reservation_lock); - unsigned long arbitrary_virt_to_mfn(void *vaddr) { xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); @@ -42,186 +38,6 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -static noinline void xen_flush_tlb_all(void) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_ALL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -#define REMAP_BATCH_SIZE 16 - -struct remap_data { - xen_pfn_t *pfn; - bool contiguous; - bool no_translate; - pgprot_t prot; - struct mmu_update *mmu_update; -}; - -static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) -{ - struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); - - /* - * If we have a contiguous range, just update the pfn itself, - * else update pointer to be "next pfn". - */ - if (rmd->contiguous) - (*rmd->pfn)++; - else - rmd->pfn++; - - rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; - rmd->mmu_update->ptr |= rmd->no_translate ? - MMU_PT_UPDATE_NO_TRANSLATE : - MMU_NORMAL_PT_UPDATE; - rmd->mmu_update->val = pte_val_ma(pte); - rmd->mmu_update++; - - return 0; -} - -static int do_remap_pfn(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t *pfn, int nr, - int *err_ptr, pgprot_t prot, - unsigned int domid, - bool no_translate, - struct page **pages) -{ - int err = 0; - struct remap_data rmd; - struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - unsigned long range; - int mapped = 0; - - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); - - rmd.pfn = pfn; - rmd.prot = prot; - /* - * We use the err_ptr to indicate if there we are doing a contiguous - * mapping or a discontigious mapping. - */ - rmd.contiguous = !err_ptr; - rmd.no_translate = no_translate; - - while (nr) { - int index = 0; - int done = 0; - int batch = min(REMAP_BATCH_SIZE, nr); - int batch_left = batch; - range = (unsigned long)batch << PAGE_SHIFT; - - rmd.mmu_update = mmu_update; - err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_pfn_pte_fn, &rmd); - if (err) - goto out; - - /* We record the error for each page that gives an error, but - * continue mapping until the whole set is done */ - do { - int i; - - err = HYPERVISOR_mmu_update(&mmu_update[index], - batch_left, &done, domid); - - /* - * @err_ptr may be the same buffer as @gfn, so - * only clear it after each chunk of @gfn is - * used. - */ - if (err_ptr) { - for (i = index; i < index + done; i++) - err_ptr[i] = 0; - } - if (err < 0) { - if (!err_ptr) - goto out; - err_ptr[i] = err; - done++; /* Skip failed frame. */ - } else - mapped += done; - batch_left -= done; - index += done; - } while (batch_left); - - nr -= batch; - addr += range; - if (err_ptr) - err_ptr += batch; - cond_resched(); - } -out: - - xen_flush_tlb_all(); - - return err < 0 ? err : mapped; -} - -int xen_remap_domain_gfn_range(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t gfn, int nr, - pgprot_t prot, unsigned domid, - struct page **pages) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -EOPNOTSUPP; - - return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, - pages); -} -EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range); - -int xen_remap_domain_gfn_array(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t *gfn, int nr, - int *err_ptr, pgprot_t prot, - unsigned domid, struct page **pages) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr, - prot, domid, pages); - - /* We BUG_ON because it's a programmer error to pass a NULL err_ptr, - * and the consequences later is quite hard to detect what the actual - * cause of "wrong memory was mapped in". - */ - BUG_ON(err_ptr == NULL); - return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, - false, pages); -} -EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array); - -int xen_remap_domain_mfn_array(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t *mfn, int nr, - int *err_ptr, pgprot_t prot, - unsigned int domid, struct page **pages) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -EOPNOTSUPP; - - return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, - true, pages); -} -EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); - /* Returns: 0 success */ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages) diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index dd2ad82eee80..57409373750f 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -73,7 +73,7 @@ static int is_pagetable_dying_supported(void) void __init xen_hvm_init_mmu_ops(void) { if (is_pagetable_dying_supported()) - pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; + pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap; #ifdef CONFIG_PROC_VMCORE WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram)); #endif diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index dd461c0167ef..70ea598a37d2 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * Xen mmu operations * @@ -99,6 +101,12 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; #endif /* CONFIG_X86_64 */ /* + * Protects atomic reservation decrease/increase against concurrent increases. + * Also protects non-atomic updates of current_pages and balloon lists. + */ +static DEFINE_SPINLOCK(xen_reservation_lock); + +/* * Note about cr3 (pagetable base) values: * * xen_cr3 contains the current logical cr3 value; it contains the @@ -2209,7 +2217,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) set_page_prot(initial_page_table, PAGE_KERNEL); set_page_prot(initial_kernel_pmd, PAGE_KERNEL); - pv_mmu_ops.write_cr3 = &xen_write_cr3; + pv_ops.mmu.write_cr3 = &xen_write_cr3; } /* @@ -2358,27 +2366,27 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) static void __init xen_post_allocator_init(void) { - pv_mmu_ops.set_pte = xen_set_pte; - pv_mmu_ops.set_pmd = xen_set_pmd; - pv_mmu_ops.set_pud = xen_set_pud; + pv_ops.mmu.set_pte = xen_set_pte; + pv_ops.mmu.set_pmd = xen_set_pmd; + pv_ops.mmu.set_pud = xen_set_pud; #ifdef CONFIG_X86_64 - pv_mmu_ops.set_p4d = xen_set_p4d; + pv_ops.mmu.set_p4d = xen_set_p4d; #endif /* This will work as long as patching hasn't happened yet (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; + pv_ops.mmu.alloc_pte = xen_alloc_pte; + pv_ops.mmu.alloc_pmd = xen_alloc_pmd; + pv_ops.mmu.release_pte = xen_release_pte; + pv_ops.mmu.release_pmd = xen_release_pmd; #ifdef CONFIG_X86_64 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; + pv_ops.mmu.alloc_pud = xen_alloc_pud; + pv_ops.mmu.release_pud = xen_release_pud; #endif - pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte); + pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte); #ifdef CONFIG_X86_64 - pv_mmu_ops.write_cr3 = &xen_write_cr3; + pv_ops.mmu.write_cr3 = &xen_write_cr3; #endif } @@ -2466,7 +2474,7 @@ void __init xen_init_mmu_ops(void) x86_init.paging.pagetable_init = xen_pagetable_init; x86_init.hyper.init_after_bootmem = xen_after_bootmem; - pv_mmu_ops = xen_mmu_ops; + pv_ops.mmu = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); } @@ -2666,6 +2674,138 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) } EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); +static noinline void xen_flush_tlb_all(void) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_TLB_FLUSH_ALL; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +#define REMAP_BATCH_SIZE 16 + +struct remap_data { + xen_pfn_t *pfn; + bool contiguous; + bool no_translate; + pgprot_t prot; + struct mmu_update *mmu_update; +}; + +static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_data *rmd = data; + pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); + + /* + * If we have a contiguous range, just update the pfn itself, + * else update pointer to be "next pfn". + */ + if (rmd->contiguous) + (*rmd->pfn)++; + else + rmd->pfn++; + + rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; + rmd->mmu_update->ptr |= rmd->no_translate ? + MMU_PT_UPDATE_NO_TRANSLATE : + MMU_NORMAL_PT_UPDATE; + rmd->mmu_update->val = pte_val_ma(pte); + rmd->mmu_update++; + + return 0; +} + +int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, + xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, + unsigned int domid, bool no_translate, struct page **pages) +{ + int err = 0; + struct remap_data rmd; + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; + unsigned long range; + int mapped = 0; + + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); + + rmd.pfn = pfn; + rmd.prot = prot; + /* + * We use the err_ptr to indicate if there we are doing a contiguous + * mapping or a discontigious mapping. + */ + rmd.contiguous = !err_ptr; + rmd.no_translate = no_translate; + + while (nr) { + int index = 0; + int done = 0; + int batch = min(REMAP_BATCH_SIZE, nr); + int batch_left = batch; + + range = (unsigned long)batch << PAGE_SHIFT; + + rmd.mmu_update = mmu_update; + err = apply_to_page_range(vma->vm_mm, addr, range, + remap_area_pfn_pte_fn, &rmd); + if (err) + goto out; + + /* + * We record the error for each page that gives an error, but + * continue mapping until the whole set is done + */ + do { + int i; + + err = HYPERVISOR_mmu_update(&mmu_update[index], + batch_left, &done, domid); + + /* + * @err_ptr may be the same buffer as @gfn, so + * only clear it after each chunk of @gfn is + * used. + */ + if (err_ptr) { + for (i = index; i < index + done; i++) + err_ptr[i] = 0; + } + if (err < 0) { + if (!err_ptr) + goto out; + err_ptr[i] = err; + done++; /* Skip failed frame. */ + } else + mapped += done; + batch_left -= done; + index += done; + } while (batch_left); + + nr -= batch; + addr += range; + if (err_ptr) + err_ptr += batch; + cond_resched(); + } +out: + + xen_flush_tlb_all(); + + return err < 0 ? err : mapped; +} +EXPORT_SYMBOL_GPL(xen_remap_pfn); + #ifdef CONFIG_KEXEC_CORE phys_addr_t paddr_vmcoreinfo_note(void) { diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 159a897151d6..d6d74efd8912 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * Xen leaves the responsibility for maintaining p2m mappings to the * guests themselves, but it must also access and update the p2m array diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 37c6056a7bba..33293ce01d8d 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + /* Glue code to lib/swiotlb-xen.c */ #include <linux/dma-mapping.h> diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index b99585034dd2..66ab96a4e2b3 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -1,22 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + /****************************************************************************** * platform-pci-unplug.c * * Xen platform PCI device driver * Copyright (c) 2010, Citrix - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * */ #include <linux/init.h> @@ -31,7 +19,6 @@ #define XEN_PLATFORM_ERR_PROTOCOL -2 #define XEN_PLATFORM_ERR_BLACKLIST -3 -#ifdef CONFIG_XEN_PVHVM /* store the value of xen_emul_unplug after the unplug is done */ static int xen_platform_pci_unplug; static int xen_emul_unplug; @@ -215,4 +202,3 @@ static int __init parse_xen_emul_unplug(char *arg) return 0; } early_param("xen_emul_unplug", parse_xen_emul_unplug); -#endif diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 973f10e05211..23f6793af88a 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -141,11 +141,12 @@ void __init xen_init_spinlocks(void) printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); __pv_init_lock_hash(); - pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; - pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); - pv_lock_ops.wait = xen_qlock_wait; - pv_lock_ops.kick = xen_qlock_kick; - pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.lock.queued_spin_unlock = + PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_ops.lock.wait = xen_qlock_wait; + pv_ops.lock.kick = xen_qlock_kick; + pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } static __init int xen_parse_nopvspin(char *arg) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c84f1e039d84..72bf446c3fee 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -513,7 +513,7 @@ static void __init xen_time_init(void) void __init xen_init_time_ops(void) { xen_sched_clock_offset = xen_clocksource_read(); - pv_time_ops = xen_time_ops; + pv_ops.time = xen_time_ops; x86_init.timers.timer_init = xen_time_init; x86_init.timers.setup_percpu_clockev = x86_init_noop; @@ -555,7 +555,7 @@ void __init xen_hvm_init_time_ops(void) } xen_sched_clock_offset = xen_clocksource_read(); - pv_time_ops = xen_time_ops; + pv_ops.time = xen_time_ops; x86_init.timers.setup_percpu_clockev = xen_time_init; x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h index 861fedfe5230..873c54c488fe 100644 --- a/arch/x86/xen/vdso.h +++ b/arch/x86/xen/vdso.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + /* Bit used for the pseudo-hwcap for non-negative segments. We use bit 1 to avoid bugs in some versions of glibc when bit 0 is used; the choice is otherwise arbitrary. */ diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index ca2d3b2bf2af..b0e471506cd8 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + /* * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. */ .code32 |