From b862789aa5186d5ea3a024b7cfe0f80c3a38b980 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 29 Sep 2017 19:01:45 +0800 Subject: kvm/x86: Handle async PF in RCU read-side critical sections Sasha Levin reported a WARNING: | WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329 | rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline] | WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329 | rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458 ... | CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS | 1.10.1-1ubuntu1 04/01/2014 | Call Trace: ... | RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline] | RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458 | RSP: 0018:ffff88003b2debc8 EFLAGS: 00010002 | RAX: 0000000000000001 RBX: 1ffff1000765bd85 RCX: 0000000000000000 | RDX: 1ffff100075d7882 RSI: ffffffffb5c7da20 RDI: ffff88003aebc410 | RBP: ffff88003b2def30 R08: dffffc0000000000 R09: 0000000000000001 | R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003b2def08 | R13: 0000000000000000 R14: ffff88003aebc040 R15: ffff88003aebc040 | __schedule+0x201/0x2240 kernel/sched/core.c:3292 | schedule+0x113/0x460 kernel/sched/core.c:3421 | kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158 | do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271 | async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069 | RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996 | RSP: 0018:ffff88003b2df520 EFLAGS: 00010283 | RAX: 000000000000003f RBX: ffffffffb5d1e141 RCX: ffff88003b2df670 | RDX: 0000000000000001 RSI: dffffc0000000000 RDI: ffffffffb5d1e140 | RBP: ffff88003b2df560 R08: dffffc0000000000 R09: 0000000000000000 | R10: ffff88003b2df718 R11: 0000000000000000 R12: ffff88003b2df5d8 | R13: 0000000000000064 R14: ffffffffb5d1e140 R15: 0000000000000000 | vsnprintf+0x173/0x1700 lib/vsprintf.c:2136 | sprintf+0xbe/0xf0 lib/vsprintf.c:2386 | proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23 | get_link fs/namei.c:1047 [inline] | link_path_walk+0x1041/0x1490 fs/namei.c:2127 ... This happened when the host hit a page fault, and delivered it as in an async page fault, while the guest was in an RCU read-side critical section. The guest then tries to reschedule in kvm_async_pf_task_wait(), but rcu_preempt_note_context_switch() would treat the reschedule as a sleep in RCU read-side critical section, which is not allowed (even in preemptible RCU). Thus the WARN. To cure this, make kvm_async_pf_task_wait() go to the halt path if the PF happens in a RCU read-side critical section. Reported-by: Sasha Levin Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: stable@vger.kernel.org Signed-off-by: Boqun Feng Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index aa60a08b65b1..e675704fa6f7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || preempt_count() > 1; + n.halted = is_idle_task(current) || preempt_count() > 1 || + rcu_preempt_depth(); init_swait_queue_head(&n.wq); hlist_add_head(&n.link, &b->list); raw_spin_unlock(&b->lock); -- cgit v1.2.3 From a2b7861bb33b2538420bb5d8554153484d3f961f Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Oct 2017 21:36:51 +0800 Subject: kvm/x86: Avoid async PF preempting the kernel incorrectly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, in PREEMPT_COUNT=n kernel, kvm_async_pf_task_wait() could call schedule() to reschedule in some cases. This could result in accidentally ending the current RCU read-side critical section early, causing random memory corruption in the guest, or otherwise preempting the currently running task inside between preempt_disable and preempt_enable. The difficulty to handle this well is because we don't know whether an async PF delivered in a preemptible section or RCU read-side critical section for PREEMPT_COUNT=n, since preempt_disable()/enable() and rcu_read_lock/unlock() are both no-ops in that case. To cure this, we treat any async PF interrupting a kernel context as one that cannot be preempted, preventing kvm_async_pf_task_wait() from choosing the schedule() path in that case. To do so, a second parameter for kvm_async_pf_task_wait() is introduced, so that we know whether it's called from a context interrupting the kernel, and the parameter is set properly in all the callsites. Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Boqun Feng Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/kvm.c | 14 ++++++++++---- arch/x86/kvm/mmu.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index bc62e7cbf1b1..59ad3d132353 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); void __init kvm_guest_init(void); -void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); @@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void) #else /* CONFIG_KVM_GUEST */ #define kvm_guest_init() do {} while (0) -#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e675704fa6f7..8bb9594d0761 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, return NULL; } -void kvm_async_pf_task_wait(u32 token) +/* + * @interrupt_kernel: Is this called from a routine which interrupts the kernel + * (other than user space)? + */ +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; @@ -140,8 +144,10 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || preempt_count() > 1 || - rcu_preempt_depth(); + n.halted = is_idle_task(current) || + (IS_ENABLED(CONFIG_PREEMPT_COUNT) + ? preempt_count() > 1 || rcu_preempt_depth() + : interrupt_kernel); init_swait_queue_head(&n.wq); hlist_add_head(&n.link, &b->list); raw_spin_unlock(&b->lock); @@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ prev_state = exception_enter(); - kvm_async_pf_task_wait((u32)read_cr2()); + kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca30c1eb1d9..106d4a029a8a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, case KVM_PV_REASON_PAGE_NOT_PRESENT: vcpu->arch.apf.host_apf_reason = 0; local_irq_disable(); - kvm_async_pf_task_wait(fault_address); + kvm_async_pf_task_wait(fault_address, 0); local_irq_enable(); break; case KVM_PV_REASON_PAGE_READY: -- cgit v1.2.3 From 4716276184ec67a123a4eab81609a0688b1d650b Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Fri, 20 Oct 2017 09:30:58 -0500 Subject: X86/KVM: Decrypt shared per-cpu variables when SEV is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SEV is active, guest memory is encrypted with a guest-specific key, a guest memory region shared with the hypervisor must be mapped as decrypted before it can be shared. Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: Tom Lendacky Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Borislav Petkov Cc: Paolo Bonzini Link: https://lkml.kernel.org/r/20171020143059.3291-17-brijesh.singh@amd.com --- arch/x86/kernel/kvm.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8bb9594d0761..3df743b60c80 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -75,8 +75,8 @@ static int parse_no_kvmclock_vsyscall(char *arg) early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); -static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); -static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); +static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); +static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64); static int has_steal_clock = 0; /* @@ -312,7 +312,7 @@ static void kvm_register_steal_time(void) cpu, (unsigned long long) slow_virt_to_phys(st)); } -static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; +static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) { @@ -426,9 +426,42 @@ void kvm_disable_steal_time(void) wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } +static inline void __set_percpu_decrypted(void *ptr, unsigned long size) +{ + early_set_memory_decrypted((unsigned long) ptr, size); +} + +/* + * Iterate through all possible CPUs and map the memory region pointed + * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once. + * + * Note: we iterate through all possible CPUs to ensure that CPUs + * hotplugged will have their per-cpu variable already mapped as + * decrypted. + */ +static void __init sev_map_percpu_data(void) +{ + int cpu; + + if (!sev_active()) + return; + + for_each_possible_cpu(cpu) { + __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason)); + __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time)); + __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi)); + } +} + #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { + /* + * Map the per-cpu variables as decrypted before kvm_guest_cpu_init() + * shares the guest physical address with the hypervisor. + */ + sev_map_percpu_data(); + kvm_guest_cpu_init(); native_smp_prepare_boot_cpu(); kvm_spinlock_init(); @@ -496,6 +529,7 @@ void __init kvm_guest_init(void) kvm_cpu_online, kvm_cpu_down_prepare) < 0) pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); #else + sev_map_percpu_data(); kvm_guest_cpu_init(); #endif -- cgit v1.2.3 From f72e38e8ec8869ac0ba5a75d7d2f897d98a1454e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Nov 2017 14:27:35 +0100 Subject: x86/virt, x86/platform: Merge 'struct x86_hyper' into 'struct x86_platform' and 'struct x86_init' Instead of x86_hyper being either NULL on bare metal or a pointer to a struct hypervisor_x86 in case of the kernel running as a guest merge the struct into x86_platform and x86_init. This will remove the need for wrappers making it hard to find out what is being called. With dummy functions added for all callbacks testing for a NULL function pointer can be removed, too. Suggested-by: Ingo Molnar Signed-off-by: Juergen Gross Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: akataria@vmware.com Cc: boris.ostrovsky@oracle.com Cc: devel@linuxdriverproject.org Cc: haiyangz@microsoft.com Cc: kvm@vger.kernel.org Cc: kys@microsoft.com Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: rusty@rustcorp.com.au Cc: sthemmin@microsoft.com Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20171109132739.23465-2-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hypervisor.h | 25 ++++-------------- arch/x86/include/asm/x86_init.h | 24 +++++++++++++++++ arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kernel/cpu/hypervisor.c | 54 +++++++++++++++++++-------------------- arch/x86/kernel/cpu/mshyperv.c | 2 +- arch/x86/kernel/cpu/vmware.c | 4 +-- arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/x86_init.c | 9 +++++++ arch/x86/mm/init.c | 2 +- arch/x86/xen/enlighten_hvm.c | 8 +++--- arch/x86/xen/enlighten_pv.c | 2 +- include/linux/hypervisor.h | 8 ++++-- 12 files changed, 81 insertions(+), 61 deletions(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 0ead9dbb9130..0eca7239a7aa 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -23,6 +23,7 @@ #ifdef CONFIG_HYPERVISOR_GUEST #include +#include #include /* @@ -35,17 +36,11 @@ struct hypervisor_x86 { /* Detection routine */ uint32_t (*detect)(void); - /* Platform setup (run once per boot) */ - void (*init_platform)(void); + /* init time callbacks */ + struct x86_hyper_init init; - /* X2APIC detection (run once per boot) */ - bool (*x2apic_available)(void); - - /* pin current vcpu to specified physical cpu (run rarely) */ - void (*pin_vcpu)(int); - - /* called during init_mem_mapping() to setup early mappings. */ - void (*init_mem_mapping)(void); + /* runtime callbacks */ + struct x86_hyper_runtime runtime; }; extern const struct hypervisor_x86 *x86_hyper; @@ -58,17 +53,7 @@ extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor_platform(void); -extern bool hypervisor_x2apic_available(void); -extern void hypervisor_pin_vcpu(int cpu); - -static inline void hypervisor_init_mem_mapping(void) -{ - if (x86_hyper && x86_hyper->init_mem_mapping) - x86_hyper->init_mem_mapping(); -} #else static inline void init_hypervisor_platform(void) { } -static inline bool hypervisor_x2apic_available(void) { return false; } -static inline void hypervisor_init_mem_mapping(void) { } #endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8a1ebf9540dd..ad15a0fda917 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -114,6 +114,18 @@ struct x86_init_pci { void (*fixup_irqs)(void); }; +/** + * struct x86_hyper_init - x86 hypervisor init functions + * @init_platform: platform setup + * @x2apic_available: X2APIC detection + * @init_mem_mapping: setup early mappings during init_mem_mapping() + */ +struct x86_hyper_init { + void (*init_platform)(void); + bool (*x2apic_available)(void); + void (*init_mem_mapping)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -127,6 +139,7 @@ struct x86_init_ops { struct x86_init_timers timers; struct x86_init_iommu iommu; struct x86_init_pci pci; + struct x86_hyper_init hyper; }; /** @@ -199,6 +212,15 @@ struct x86_legacy_features { struct x86_legacy_devices devices; }; +/** + * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks + * + * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely) + */ +struct x86_hyper_runtime { + void (*pin_vcpu)(int cpu); +}; + /** * struct x86_platform_ops - platform specific runtime functions * @calibrate_cpu: calibrate CPU @@ -218,6 +240,7 @@ struct x86_legacy_features { * possible in x86_early_init_platform_quirks() by * only using the current x86_hardware_subarch * semantics. + * @hyper: x86 hypervisor specific runtime callbacks */ struct x86_platform_ops { unsigned long (*calibrate_cpu)(void); @@ -233,6 +256,7 @@ struct x86_platform_ops { void (*apic_post_init)(void); struct x86_legacy_features legacy; void (*set_legacy_features)(void); + struct x86_hyper_runtime hyper; }; struct pci_dev; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ff891772c9f8..89c7c8569e5e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic(int remap_mode) * under KVM */ if (max_physical_apicid > 255 || - !hypervisor_x2apic_available()) { + !x86_init.hyper.x2apic_available()) { pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); x2apic_disable(); return; diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 4fa90006ac68..22226c1bf092 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -44,51 +44,49 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = const struct hypervisor_x86 *x86_hyper; EXPORT_SYMBOL(x86_hyper); -static inline void __init +static inline const struct hypervisor_x86 * __init detect_hypervisor_vendor(void) { - const struct hypervisor_x86 *h, * const *p; + const struct hypervisor_x86 *h = NULL, * const *p; uint32_t pri, max_pri = 0; for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { - h = *p; - pri = h->detect(); - if (pri != 0 && pri > max_pri) { + pri = (*p)->detect(); + if (pri > max_pri) { max_pri = pri; - x86_hyper = h; + h = *p; } } - if (max_pri) - pr_info("Hypervisor detected: %s\n", x86_hyper->name); + if (h) + pr_info("Hypervisor detected: %s\n", h->name); + + return h; } -void __init init_hypervisor_platform(void) +static void __init copy_array(const void *src, void *target, unsigned int size) { + unsigned int i, n = size / sizeof(void *); + const void * const *from = (const void * const *)src; + const void **to = (const void **)target; - detect_hypervisor_vendor(); - - if (!x86_hyper) - return; - - if (x86_hyper->init_platform) - x86_hyper->init_platform(); + for (i = 0; i < n; i++) + if (from[i]) + to[i] = from[i]; } -bool __init hypervisor_x2apic_available(void) +void __init init_hypervisor_platform(void) { - return x86_hyper && - x86_hyper->x2apic_available && - x86_hyper->x2apic_available(); -} + const struct hypervisor_x86 *h; -void hypervisor_pin_vcpu(int cpu) -{ - if (!x86_hyper) + h = detect_hypervisor_vendor(); + + if (!h) return; - if (x86_hyper->pin_vcpu) - x86_hyper->pin_vcpu(cpu); - else - WARN_ONCE(1, "vcpu pinning requested but not supported!\n"); + copy_array(&h->init, &x86_init.hyper, sizeof(h->init)); + copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime)); + + x86_hyper = h; + x86_init.hyper.init_platform(); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 236324e83a3a..6bb84d655e4b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -257,6 +257,6 @@ static void __init ms_hyperv_init_platform(void) const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, - .init_platform = ms_hyperv_init_platform, + .init.init_platform = ms_hyperv_init_platform, }; EXPORT_SYMBOL(x86_hyper_ms_hyperv); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 40ed26852ebd..4804c1d063c8 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -208,7 +208,7 @@ static bool __init vmware_legacy_x2apic_available(void) const __refconst struct hypervisor_x86 x86_hyper_vmware = { .name = "VMware", .detect = vmware_platform, - .init_platform = vmware_platform_setup, - .x2apic_available = vmware_legacy_x2apic_available, + .init.init_platform = vmware_platform_setup, + .init.x2apic_available = vmware_legacy_x2apic_available, }; EXPORT_SYMBOL(x86_hyper_vmware); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8bb9594d0761..9dca8437c795 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -547,7 +547,7 @@ static uint32_t __init kvm_detect(void) const struct hypervisor_x86 x86_hyper_kvm __refconst = { .name = "KVM", .detect = kvm_detect, - .x2apic_available = kvm_para_available, + .init.x2apic_available = kvm_para_available, }; EXPORT_SYMBOL_GPL(x86_hyper_kvm); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a088b2c47f73..5b2d10c1973a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -28,6 +28,8 @@ void x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } +bool __init bool_x86_init_noop(void) { return false; } +void x86_op_int_noop(int cpu) { } /* * The platform setup functions are preset with the default functions @@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = { .init_irq = x86_default_pci_init_irq, .fixup_irqs = x86_default_pci_fixup_irqs, }, + + .hyper = { + .init_platform = x86_init_noop, + .x2apic_available = bool_x86_init_noop, + .init_mem_mapping = x86_init_noop, + }, }; struct x86_cpuinit_ops x86_cpuinit = { @@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .get_nmi_reason = default_get_nmi_reason, .save_sched_clock_state = tsc_save_sched_clock_state, .restore_sched_clock_state = tsc_restore_sched_clock_state, + .hyper.pin_vcpu = x86_op_int_noop, }; EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index af5c1ed21d43..a22c2b95e513 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -671,7 +671,7 @@ void __init init_mem_mapping(void) load_cr3(swapper_pg_dir); __flush_tlb_all(); - hypervisor_init_mem_mapping(); + x86_init.hyper.init_mem_mapping(); early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index de503c225ae1..7b1622089f96 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -229,9 +229,9 @@ static uint32_t __init xen_platform_hvm(void) const struct hypervisor_x86 x86_hyper_xen_hvm = { .name = "Xen HVM", .detect = xen_platform_hvm, - .init_platform = xen_hvm_guest_init, - .pin_vcpu = xen_pin_vcpu, - .x2apic_available = xen_x2apic_para_available, - .init_mem_mapping = xen_hvm_init_mem_mapping, + .init.init_platform = xen_hvm_guest_init, + .init.x2apic_available = xen_x2apic_para_available, + .init.init_mem_mapping = xen_hvm_init_mem_mapping, + .runtime.pin_vcpu = xen_pin_vcpu, }; EXPORT_SYMBOL(x86_hyper_xen_hvm); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d4396e27b1fb..69d1a7054ddb 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1463,6 +1463,6 @@ static uint32_t __init xen_platform_pv(void) const struct hypervisor_x86 x86_hyper_xen_pv = { .name = "Xen PV", .detect = xen_platform_pv, - .pin_vcpu = xen_pin_vcpu, + .runtime.pin_vcpu = xen_pin_vcpu, }; EXPORT_SYMBOL(x86_hyper_xen_pv); diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index b4054fd5b6f6..b19563f9a8eb 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -7,8 +7,12 @@ * Juergen Gross */ -#ifdef CONFIG_HYPERVISOR_GUEST -#include +#ifdef CONFIG_X86 +#include +static inline void hypervisor_pin_vcpu(int cpu) +{ + x86_platform.hyper.pin_vcpu(cpu); +} #else static inline void hypervisor_pin_vcpu(int cpu) { -- cgit v1.2.3 From 03b2a320b19f1424e9ac9c21696be9c60b6d0d93 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Nov 2017 14:27:36 +0100 Subject: x86/virt: Add enum for hypervisors to replace x86_hyper The x86_hyper pointer is only used for checking whether a virtual device is supporting the hypervisor the system is running on. Use an enum for that purpose instead and drop the x86_hyper pointer. Signed-off-by: Juergen Gross Acked-by: Thomas Gleixner Acked-by: Xavier Deguillard Cc: Linus Torvalds Cc: Peter Zijlstra Cc: akataria@vmware.com Cc: arnd@arndb.de Cc: boris.ostrovsky@oracle.com Cc: devel@linuxdriverproject.org Cc: dmitry.torokhov@gmail.com Cc: gregkh@linuxfoundation.org Cc: haiyangz@microsoft.com Cc: kvm@vger.kernel.org Cc: kys@microsoft.com Cc: linux-graphics-maintainer@vmware.com Cc: linux-input@vger.kernel.org Cc: moltmann@vmware.com Cc: pbonzini@redhat.com Cc: pv-drivers@vmware.com Cc: rkrcmar@redhat.com Cc: sthemmin@microsoft.com Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20171109132739.23465-3-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/hyperv/hv_init.c | 2 +- arch/x86/include/asm/hypervisor.h | 23 ++++++++++++++--------- arch/x86/kernel/cpu/hypervisor.c | 12 +++++++++--- arch/x86/kernel/cpu/mshyperv.c | 4 ++-- arch/x86/kernel/cpu/vmware.c | 4 ++-- arch/x86/kernel/kvm.c | 4 ++-- arch/x86/xen/enlighten_hvm.c | 4 ++-- arch/x86/xen/enlighten_pv.c | 4 ++-- drivers/hv/vmbus_drv.c | 2 +- drivers/input/mouse/vmmouse.c | 10 ++++------ drivers/misc/vmw_balloon.c | 2 +- 11 files changed, 40 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a5db63f728a2..a0b86cf486e0 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -113,7 +113,7 @@ void hyperv_init(void) u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; - if (x86_hyper != &x86_hyper_ms_hyperv) + if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; /* Allocate percpu VP index */ diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 0eca7239a7aa..1b0a5abcd8ae 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -29,6 +29,16 @@ /* * x86 hypervisor information */ + +enum x86_hypervisor_type { + X86_HYPER_NATIVE = 0, + X86_HYPER_VMWARE, + X86_HYPER_MS_HYPERV, + X86_HYPER_XEN_PV, + X86_HYPER_XEN_HVM, + X86_HYPER_KVM, +}; + struct hypervisor_x86 { /* Hypervisor name */ const char *name; @@ -36,6 +46,9 @@ struct hypervisor_x86 { /* Detection routine */ uint32_t (*detect)(void); + /* Hypervisor type */ + enum x86_hypervisor_type type; + /* init time callbacks */ struct x86_hyper_init init; @@ -43,15 +56,7 @@ struct hypervisor_x86 { struct x86_hyper_runtime runtime; }; -extern const struct hypervisor_x86 *x86_hyper; - -/* Recognized hypervisors */ -extern const struct hypervisor_x86 x86_hyper_vmware; -extern const struct hypervisor_x86 x86_hyper_ms_hyperv; -extern const struct hypervisor_x86 x86_hyper_xen_pv; -extern const struct hypervisor_x86 x86_hyper_xen_hvm; -extern const struct hypervisor_x86 x86_hyper_kvm; - +extern enum x86_hypervisor_type x86_hyper_type; extern void init_hypervisor_platform(void); #else static inline void init_hypervisor_platform(void) { } diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 22226c1bf092..bea8d3e24f50 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -26,6 +26,12 @@ #include #include +extern const struct hypervisor_x86 x86_hyper_vmware; +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_pv; +extern const struct hypervisor_x86 x86_hyper_xen_hvm; +extern const struct hypervisor_x86 x86_hyper_kvm; + static const __initconst struct hypervisor_x86 * const hypervisors[] = { #ifdef CONFIG_XEN_PV @@ -41,8 +47,8 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = #endif }; -const struct hypervisor_x86 *x86_hyper; -EXPORT_SYMBOL(x86_hyper); +enum x86_hypervisor_type x86_hyper_type; +EXPORT_SYMBOL(x86_hyper_type); static inline const struct hypervisor_x86 * __init detect_hypervisor_vendor(void) @@ -87,6 +93,6 @@ void __init init_hypervisor_platform(void) copy_array(&h->init, &x86_init.hyper, sizeof(h->init)); copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime)); - x86_hyper = h; + x86_hyper_type = h->type; x86_init.hyper.init_platform(); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 6bb84d655e4b..85eb5fc180c8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void) #endif } -const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { +const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, + .type = X86_HYPER_MS_HYPERV, .init.init_platform = ms_hyperv_init_platform, }; -EXPORT_SYMBOL(x86_hyper_ms_hyperv); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 4804c1d063c8..8e005329648b 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void) (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; } -const __refconst struct hypervisor_x86 x86_hyper_vmware = { +const __initconst struct hypervisor_x86 x86_hyper_vmware = { .name = "VMware", .detect = vmware_platform, + .type = X86_HYPER_VMWARE, .init.init_platform = vmware_platform_setup, .init.x2apic_available = vmware_legacy_x2apic_available, }; -EXPORT_SYMBOL(x86_hyper_vmware); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9dca8437c795..a94de09edbed 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void) return kvm_cpuid_base(); } -const struct hypervisor_x86 x86_hyper_kvm __refconst = { +const __initconst struct hypervisor_x86 x86_hyper_kvm = { .name = "KVM", .detect = kvm_detect, + .type = X86_HYPER_KVM, .init.x2apic_available = kvm_para_available, }; -EXPORT_SYMBOL_GPL(x86_hyper_kvm); static __init int activate_jump_labels(void) { diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 7b1622089f96..754d5391d9fa 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void) return xen_cpuid_base(); } -const struct hypervisor_x86 x86_hyper_xen_hvm = { +const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = { .name = "Xen HVM", .detect = xen_platform_hvm, + .type = X86_HYPER_XEN_HVM, .init.init_platform = xen_hvm_guest_init, .init.x2apic_available = xen_x2apic_para_available, .init.init_mem_mapping = xen_hvm_init_mem_mapping, .runtime.pin_vcpu = xen_pin_vcpu, }; -EXPORT_SYMBOL(x86_hyper_xen_hvm); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 69d1a7054ddb..168efb2534c0 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1460,9 +1460,9 @@ static uint32_t __init xen_platform_pv(void) return 0; } -const struct hypervisor_x86 x86_hyper_xen_pv = { +const __initconst struct hypervisor_x86 x86_hyper_xen_pv = { .name = "Xen PV", .detect = xen_platform_pv, + .type = X86_HYPER_XEN_PV, .runtime.pin_vcpu = xen_pin_vcpu, }; -EXPORT_SYMBOL(x86_hyper_xen_pv); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 937801ac2fe0..2cd134dd94d2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1534,7 +1534,7 @@ static int __init hv_acpi_init(void) { int ret, t; - if (x86_hyper != &x86_hyper_ms_hyperv) + if (x86_hyper_type != X86_HYPER_MS_HYPERV) return -ENODEV; init_completion(&probe_event); diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c index 0f586780ceb4..1ae5c1ef3f5b 100644 --- a/drivers/input/mouse/vmmouse.c +++ b/drivers/input/mouse/vmmouse.c @@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse *psmouse) /* * Array of supported hypervisors. */ -static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = { - &x86_hyper_vmware, -#ifdef CONFIG_KVM_GUEST - &x86_hyper_kvm, -#endif +static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = { + X86_HYPER_VMWARE, + X86_HYPER_KVM, }; /** @@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(void) int i; for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++) - if (vmmouse_supported_hypervisors[i] == x86_hyper) + if (vmmouse_supported_hypervisors[i] == x86_hyper_type) return true; return false; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 1e688bfec567..9047c0a529b2 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void) * Check if we are running on VMware's hypervisor and bail out * if we are not. */ - if (x86_hyper != &x86_hyper_vmware) + if (x86_hyper_type != X86_HYPER_VMWARE) return -ENODEV; for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; -- cgit v1.2.3 From f3614646005a1b59f836ff54351c9bd2224b6005 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Nov 2017 14:27:38 +0100 Subject: x86/virt, x86/platform: Add ->guest_late_init() callback to hypervisor_x86 structure Add a new guest_late_init callback to the hypervisor_x86 structure. It will replace the current kvm_guest_init() call which is changed to make use of the new callback. Signed-off-by: Juergen Gross Acked-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: Linus Torvalds Cc: Peter Zijlstra Cc: kvm@vger.kernel.org Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/20171109132739.23465-5-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kvm_para.h | 2 -- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/kvm.c | 3 ++- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/x86_init.c | 1 + 5 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/kvm.c') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c373e44049b1..7b407dda2bd7 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,7 +88,6 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, #ifdef CONFIG_KVM_GUEST bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); -void __init kvm_guest_init(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); @@ -103,7 +102,6 @@ static inline void kvm_spinlock_init(void) #endif /* CONFIG_PARAVIRT_SPINLOCKS */ #else /* CONFIG_KVM_GUEST */ -#define kvm_guest_init() do {} while (0) #define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 260e9a85fecb..5dd011a8b560 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -117,11 +117,13 @@ struct x86_init_pci { /** * struct x86_hyper_init - x86 hypervisor init functions * @init_platform: platform setup + * @guest_late_init: guest late init * @x2apic_available: X2APIC detection * @init_mem_mapping: setup early mappings during init_mem_mapping() */ struct x86_hyper_init { void (*init_platform)(void); + void (*guest_late_init)(void); bool (*x2apic_available)(void); void (*init_mem_mapping)(void); }; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a94de09edbed..4b1f6f56b3e1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -465,7 +465,7 @@ static void __init kvm_apf_trap_init(void) update_intr_gate(X86_TRAP_PF, async_page_fault); } -void __init kvm_guest_init(void) +static void __init kvm_guest_init(void) { int i; @@ -548,6 +548,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = { .name = "KVM", .detect = kvm_detect, .type = X86_HYPER_KVM, + .init.guest_late_init = kvm_guest_init, .init.x2apic_available = kvm_para_available, }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0957dd73d127..372da5299334 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1294,7 +1294,7 @@ void __init setup_arch(char **cmdline_p) io_apic_init_mappings(); - kvm_guest_init(); + x86_init.hyper.guest_late_init(); e820__reserve_resources(); e820__register_nosave_regions(max_low_pfn); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 5b2d10c1973a..c8fa4cd31903 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -86,6 +86,7 @@ struct x86_init_ops x86_init __initdata = { .hyper = { .init_platform = x86_init_noop, + .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, }, -- cgit v1.2.3