From ad73fd595c2ab168fdd01a266cbe6e4df95f8db0 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:05:58 -0700 Subject: xen/vcpu: Simplify xen_vcpu related code Largely mechanical changes to aid unification of xen_vcpu_restore() logic for PV, PVH and PVHVM. xen_vcpu_setup(): the only change in logic is that clamp_max_cpus() is now handled inside the "if (!xen_have_vcpu_info_placement)" block. xen_vcpu_restore(): code movement from enlighten_pv.c to enlighten.c. xen_vcpu_info_reset(): pulls together all the code where xen_vcpu is set to default. Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 101 +++++++++++++++++++++++++++++++------------ arch/x86/xen/enlighten_hvm.c | 6 +-- arch/x86/xen/enlighten_pv.c | 47 +++++--------------- arch/x86/xen/smp_hvm.c | 3 +- arch/x86/xen/xen-ops.h | 1 + 5 files changed, 89 insertions(+), 69 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a5ffcbb20cc0..96b745e3f56c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,6 +106,35 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } +/* + * On restore, set the vcpu placement up again. + * If it fails, then we're in a bad state, since + * we can't back out from using it... + */ +void xen_vcpu_restore(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); + bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), + NULL); + + if (other_cpu && is_up && + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) + BUG(); + + xen_setup_runstate_info(cpu); + + if (xen_have_vcpu_info_placement) + xen_vcpu_setup(cpu); + + if (other_cpu && is_up && + HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) + BUG(); + } +} + static void clamp_max_cpus(void) { #ifdef CONFIG_SMP @@ -114,6 +143,17 @@ static void clamp_max_cpus(void) #endif } +void xen_vcpu_info_reset(int cpu) +{ + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + } else { + /* Set to NULL so that if somebody accesses it we get an OOPS */ + per_cpu(xen_vcpu, cpu) = NULL; + } +} + void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; @@ -137,40 +177,45 @@ void xen_vcpu_setup(int cpu) if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) return; } - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + + xen_vcpu_info_reset(cpu); + + if (xen_have_vcpu_info_placement) { + vcpup = &per_cpu(xen_vcpu_info, cpu); + info.mfn = arbitrary_virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); + + /* + * Check to see if the hypervisor will put the vcpu_info + * structure where we want it, which allows direct access via + * a percpu-variable. + * N.B. This hypercall can _only_ be called once per CPU. + * Subsequent calls will error out with -EINVAL. This is due to + * the fact that hypervisor has no unregister variant and this + * hypercall does not allow to over-write info.mfn and + * info.offset. + */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, + xen_vcpu_nr(cpu), &info); + + if (err) { + pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n", + cpu, err); + xen_have_vcpu_info_placement = 0; + } else { + /* + * This cpu is using the registered vcpu info, even if + * later ones fail to. + */ + per_cpu(xen_vcpu, cpu) = vcpup; + } + } if (!xen_have_vcpu_info_placement) { if (cpu >= MAX_VIRT_CPUS) clamp_max_cpus(); return; } - - vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - /* Check to see if the hypervisor will put the vcpu_info - structure where we want it, which allows direct access via - a percpu-variable. - N.B. This hypercall can _only_ be called once per CPU. Subsequent - calls will error out with -EINVAL. This is due to the fact that - hypervisor has no unregister variant and this hypercall does not - allow to over-write info.mfn and info.offset. - */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), - &info); - - if (err) { - printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); - xen_have_vcpu_info_placement = 0; - clamp_max_cpus(); - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; - } } void xen_reboot(int reason) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index a6d014f47e52..eb53da6547ee 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -45,11 +45,7 @@ void __ref xen_hvm_init_shared_info(void) * online but xen_hvm_init_shared_info is run at resume time too and * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { - /* Leave it to be NULL. */ - if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) - continue; - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + xen_vcpu_info_reset(cpu); } } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f33eef4ebd12..3ec1c4632e12 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -107,35 +107,6 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); -/* - * On restore, set the vcpu placement up again. - * If it fails, then we're in a bad state, since - * we can't back out from using it... - */ -void xen_vcpu_restore(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), - NULL); - - if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) - BUG(); - - xen_setup_runstate_info(cpu); - - if (xen_have_vcpu_info_placement) - xen_vcpu_setup(cpu); - - if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) - BUG(); - } -} - static void __init xen_banner(void) { unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); @@ -1332,9 +1303,17 @@ asmlinkage __visible void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - /* Don't do the full vcpu_info placement stuff until we have a - possible map and a non-dummy shared_info. */ - per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + + /* + * Setup xen_vcpu early because start_kernel needs it for + * local_irq_disable(), irqs_disabled(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); @@ -1431,9 +1410,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif xen_raw_console_write("about to get started...\n"); - /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; - + /* We need this for printk timestamps */ xen_setup_runstate_info(0); xen_efi_init(); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f18561bbf5c9..9e0fb9a015d4 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -12,7 +12,8 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) native_smp_prepare_boot_cpu(); /* - * Setup vcpu_info for boot CPU. + * Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info + * in xen_cpu_up_prepare_hvm(). */ xen_vcpu_setup(0); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a440a42c618..90828256248b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -79,6 +79,7 @@ bool xen_vcpu_stolen(int vcpu); extern int xen_have_vcpu_info_placement; void xen_vcpu_setup(int cpu); +void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP -- cgit v1.2.3 From 0b64ffb8db4e310f77a01079ca752d946a8526b5 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:05:59 -0700 Subject: xen/pvh*: Support > 32 VCPUs at domain restore When Xen restores a PVHVM or PVH guest, its shared_info only holds up to 32 CPUs. The hypercall VCPUOP_register_vcpu_info allows us to setup per-page areas for VCPUs. This means we can boot PVH* guests with more than 32 VCPUs. During restore the per-cpu structure is allocated freshly by the hypervisor (vcpu_info_mfn is set to INVALID_MFN) so that the newly restored guest can make a VCPUOP_register_vcpu_info hypercall. However, we end up triggering this condition in Xen: /* Run this command on yourself or on other offline VCPUS. */ if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) ) which means we are unable to setup the per-cpu VCPU structures for running VCPUS. The Linux PV code paths makes this work by iterating over cpu_possible in xen_vcpu_restore() with: 1) is target CPU up (VCPUOP_is_up hypercall?) 2) if yes, then VCPUOP_down to pause it 3) VCPUOP_register_vcpu_info 4) if it was down, then VCPUOP_up to bring it back up With Xen commit 192df6f9122d ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs") this is available for non-PV guests. As such first check if VCPUOP_is_up is actually possible before trying this dance. As most of this dance code is done already in xen_vcpu_restore() let's make it callable on PV, PVH and PVHVM. Based-on-patch-by: Konrad Wilk Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 45 +++++++++++++++++++++++++++++++------------- arch/x86/xen/enlighten_hvm.c | 20 +++++++------------- arch/x86/xen/smp_hvm.c | 10 ++++++++++ arch/x86/xen/suspend_hvm.c | 11 +++-------- include/xen/xen-ops.h | 2 ++ 5 files changed, 54 insertions(+), 34 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 96b745e3f56c..276cc21619ec 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,6 +106,21 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } +static void xen_vcpu_setup_restore(int cpu) +{ + /* Any per_cpu(xen_vcpu) is stale, so reset it */ + xen_vcpu_info_reset(cpu); + + /* + * For PVH and PVHVM, setup online VCPUs only. The rest will + * be handled by hotplug. + */ + if (xen_pv_domain() || + (xen_hvm_domain() && cpu_online(cpu))) { + xen_vcpu_setup(cpu); + } +} + /* * On restore, set the vcpu placement up again. * If it fails, then we're in a bad state, since @@ -117,17 +132,23 @@ void xen_vcpu_restore(void) for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), - NULL); + bool is_up; + + if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID) + continue; + + /* Only Xen 4.5 and higher support this. */ + is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, + xen_vcpu_nr(cpu), NULL) > 0; if (other_cpu && is_up && HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) BUG(); - xen_setup_runstate_info(cpu); + if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) + xen_setup_runstate_info(cpu); - if (xen_have_vcpu_info_placement) - xen_vcpu_setup(cpu); + xen_vcpu_setup_restore(cpu); if (other_cpu && is_up && HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) @@ -163,11 +184,11 @@ void xen_vcpu_setup(int cpu) BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* - * This path is called twice on PVHVM - first during bootup via - * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being - * hotplugged: cpu_up -> xen_hvm_cpu_notify. - * As we can only do the VCPUOP_register_vcpu_info once lets - * not over-write its result. + * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu) + * and at restore (xen_vcpu_restore). Also called for hotplugged + * VCPUs (cpu_init -> xen_hvm_cpu_prepare_hvm). + * However, the hypercall can only be done once (see below) so if a VCPU + * is offlined and comes back online then let's not redo the hypercall. * * For PV it is called during restore (xen_vcpu_restore) and bootup * (xen_setup_vcpu_info_placement). The hotplug mechanism does not @@ -178,8 +199,6 @@ void xen_vcpu_setup(int cpu) return; } - xen_vcpu_info_reset(cpu); - if (xen_have_vcpu_info_placement) { vcpup = &per_cpu(xen_vcpu_info, cpu); info.mfn = arbitrary_virt_to_mfn(vcpup); @@ -214,7 +233,7 @@ void xen_vcpu_setup(int cpu) if (!xen_have_vcpu_info_placement) { if (cpu >= MAX_VIRT_CPUS) clamp_max_cpus(); - return; + xen_vcpu_info_reset(cpu); } } diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index eb53da6547ee..ba1afadb2512 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -20,7 +20,6 @@ void __ref xen_hvm_init_shared_info(void) { - int cpu; struct xen_add_to_physmap xatp; static struct shared_info *shared_info_page; @@ -35,18 +34,6 @@ void __ref xen_hvm_init_shared_info(void) BUG(); HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; - - /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info - * page, we use it in the event channel upcall and in some pvclock - * related functions. We don't need the vcpu_info placement - * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and - * in that case multiple vcpus might be online. */ - for_each_online_cpu(cpu) { - xen_vcpu_info_reset(cpu); - } } static void __init init_hvm_pv_info(void) @@ -150,6 +137,13 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_shared_info(); + /* + * xen_vcpu is a pointer to the vcpu_info struct in the shared_info + * page, we use it in the event channel upcall and in some pvclock + * related functions. + */ + xen_vcpu_info_reset(0); + xen_panic_handler_init(); if (xen_feature(XENFEAT_hvm_callback_vector)) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 9e0fb9a015d4..6c8a805819ff 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -28,10 +28,20 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) { + int cpu; + native_smp_prepare_cpus(max_cpus); WARN_ON(xen_smp_intr_init(0)); xen_init_lock_cpu(0); + + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; + + /* Set default vcpu_id to make sure that we don't use cpu-0's */ + per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID; + } } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c index 01afcadde50a..484999416d8b 100644 --- a/arch/x86/xen/suspend_hvm.c +++ b/arch/x86/xen/suspend_hvm.c @@ -8,15 +8,10 @@ void xen_hvm_post_suspend(int suspend_cancelled) { - int cpu; - - if (!suspend_cancelled) + if (!suspend_cancelled) { xen_hvm_init_shared_info(); + xen_vcpu_restore(); + } xen_callback_vector(); xen_unplug_emulated_devices(); - if (xen_feature(XENFEAT_hvm_safe_pvclock)) { - for_each_online_cpu(cpu) { - xen_setup_runstate_info(cpu); - } - } } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index c44a2ee8c8f8..218e6aae5433 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -15,6 +15,8 @@ static inline uint32_t xen_vcpu_nr(int cpu) return per_cpu(xen_vcpu_id, cpu); } +#define XEN_VCPU_ID_INVALID U32_MAX + void xen_arch_pre_suspend(void); void xen_arch_post_suspend(int suspend_cancelled); -- cgit v1.2.3 From 0e4d583723487a975f66f0a8b346fdcb024fafa6 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:06:00 -0700 Subject: xen/pv: Fix OOPS on restore for a PV, !SMP domain If CONFIG_SMP is disabled, xen_setup_vcpu_info_placement() is called from xen_setup_shared_info(). This is fine as far as boot goes, but it means that we also call it in the restore path. This results in an OOPS because we assign to pv_mmu_ops.read_cr2 which is __ro_after_init. Also, though less problematically, this means we call xen_vcpu_setup() twice at restore -- once from the vcpu info placement call and the second time from xen_vcpu_restore(). Fix by calling xen_setup_vcpu_info_placement() at boot only. Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3ec1c4632e12..9a0714dbddfa 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -931,23 +931,27 @@ void xen_setup_shared_info(void) HYPERVISOR_shared_info = (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); -#ifndef CONFIG_SMP - /* In UP this is as good a place as any to set up shared info */ - xen_setup_vcpu_info_placement(); -#endif - xen_setup_mfn_list_list(); - /* - * Now that shared info is set up we can start using routines that - * point to pvclock area. - */ - if (system_state == SYSTEM_BOOTING) + if (system_state == SYSTEM_BOOTING) { +#ifndef CONFIG_SMP + /* + * In UP this is as good a place as any to set up shared info. + * Limit this to boot only, at restore vcpu setup is done via + * xen_vcpu_restore(). + */ + xen_setup_vcpu_info_placement(); +#endif + /* + * Now that shared info is set up we can start using routines + * that point to pvclock area. + */ xen_init_time_ops(); + } } /* This is called once we have the cpu_possible_mask */ -void xen_setup_vcpu_info_placement(void) +void __ref xen_setup_vcpu_info_placement(void) { int cpu; -- cgit v1.2.3 From c9b5d98b25161a7ebee6ea59d6424dd9f33c1b99 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:06:01 -0700 Subject: xen/vcpu: Handle xen_vcpu_setup() failure in hotplug The hypercall VCPUOP_register_vcpu_info can fail. This failure is handled by making per_cpu(xen_vcpu, cpu) point to its shared_info slot and those without one (cpu >= MAX_VIRT_CPUS) be NULL. For PVH/PVHVM, this is not enough, because we also need to pull these VCPUs out of circulation. Fix for PVH/PVHVM: on registration failure in the cpuhp prepare callback (xen_cpu_up_prepare_hvm()), return an error to the cpuhp state-machine so it can fail the CPU init. Fix for PV: the registration happens before smp_init(), so, in the failure case we clamp setup_max_cpus and limit the number of VCPUs that smp_init() will bring-up to MAX_VIRT_CPUS. This is functionally correct but it makes the code a bit simpler if we get rid of this explicit clamping: for VCPUs that don't have valid xen_vcpu, fail the CPU init in the cpuhp prepare callback (xen_cpu_up_prepare_pv()). Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 46 +++++++++++++++++++++++++------------------- arch/x86/xen/enlighten_hvm.c | 9 +++++---- arch/x86/xen/enlighten_pv.c | 14 +++++++++++++- arch/x86/xen/xen-ops.h | 2 +- 4 files changed, 45 insertions(+), 26 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 276cc21619ec..0e7ef69e8531 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,8 +106,10 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } -static void xen_vcpu_setup_restore(int cpu) +static int xen_vcpu_setup_restore(int cpu) { + int rc = 0; + /* Any per_cpu(xen_vcpu) is stale, so reset it */ xen_vcpu_info_reset(cpu); @@ -117,8 +119,10 @@ static void xen_vcpu_setup_restore(int cpu) */ if (xen_pv_domain() || (xen_hvm_domain() && cpu_online(cpu))) { - xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(cpu); } + + return rc; } /* @@ -128,7 +132,7 @@ static void xen_vcpu_setup_restore(int cpu) */ void xen_vcpu_restore(void) { - int cpu; + int cpu, rc; for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); @@ -148,22 +152,25 @@ void xen_vcpu_restore(void) if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_runstate_info(cpu); - xen_vcpu_setup_restore(cpu); - - if (other_cpu && is_up && + rc = xen_vcpu_setup_restore(cpu); + if (rc) + pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " + "System will hang.\n", cpu, rc); + /* + * In case xen_vcpu_setup_restore() fails, do not bring up the + * VCPU. This helps us avoid the resulting OOPS when the VCPU + * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.) + * Note that this does not improve the situation much -- now the + * VM hangs instead of OOPSing -- with the VCPUs that did not + * fail, spinning in stop_machine(), waiting for the failed + * VCPUs to come up. + */ + if (other_cpu && is_up && (rc == 0) && HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } } -static void clamp_max_cpus(void) -{ -#ifdef CONFIG_SMP - if (setup_max_cpus > MAX_VIRT_CPUS) - setup_max_cpus = MAX_VIRT_CPUS; -#endif -} - void xen_vcpu_info_reset(int cpu) { if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { @@ -175,7 +182,7 @@ void xen_vcpu_info_reset(int cpu) } } -void xen_vcpu_setup(int cpu) +int xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -196,7 +203,7 @@ void xen_vcpu_setup(int cpu) */ if (xen_hvm_domain()) { if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) - return; + return 0; } if (xen_have_vcpu_info_placement) { @@ -230,11 +237,10 @@ void xen_vcpu_setup(int cpu) } } - if (!xen_have_vcpu_info_placement) { - if (cpu >= MAX_VIRT_CPUS) - clamp_max_cpus(); + if (!xen_have_vcpu_info_placement) xen_vcpu_info_reset(cpu); - } + + return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } void xen_reboot(int reason) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index ba1afadb2512..13b5fa1a211c 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -89,7 +89,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs) static int xen_cpu_up_prepare_hvm(unsigned int cpu) { - int rc; + int rc = 0; /* * This can happen if CPU was offlined earlier and @@ -104,7 +104,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(cpu); + if (rc) + return rc; if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); @@ -113,9 +115,8 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) if (rc) { WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n", cpu, rc); - return rc; } - return 0; + return rc; } static int xen_cpu_dead_hvm(unsigned int cpu) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 9a0714dbddfa..bc44d2175459 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -958,7 +958,16 @@ void __ref xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) { /* Set up direct vCPU id mapping for PV guests. */ per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + + /* + * xen_vcpu_setup(cpu) can fail -- in which case it + * falls back to the shared_info version for cpus + * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. + * + * xen_cpu_up_prepare_pv() handles the rest by failing + * them in hotplug. + */ + (void) xen_vcpu_setup(cpu); } /* @@ -1432,6 +1441,9 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu) { int rc; + if (per_cpu(xen_vcpu, cpu) == NULL) + return -ENODEV; + xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 90828256248b..0d5004477db6 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -78,7 +78,7 @@ bool xen_vcpu_stolen(int vcpu); extern int xen_have_vcpu_info_placement; -void xen_vcpu_setup(int cpu); +int xen_vcpu_setup(int cpu); void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); -- cgit v1.2.3 From ae039001054b34c4a624539b32a8b6ff3403aaf9 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:06:02 -0700 Subject: xen/vcpu: Handle xen_vcpu_setup() failure at boot On PVH, PVHVM, at failure in the VCPUOP_register_vcpu_info hypercall we limit the number of cpus to to MAX_VIRT_CPUS. However, if this failure had occurred for a cpu beyond MAX_VIRT_CPUS, we continue to function with > MAX_VIRT_CPUS. This leads to problems at the next save/restore cycle when there are > MAX_VIRT_CPUS threads going into stop_machine() but coming back up there's valid state for only the first MAX_VIRT_CPUS. This patch pulls the excess CPUs down via cpu_down(). Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/smp.c | 31 +++++++++++++++++++++++++++++++ arch/x86/xen/smp.h | 2 ++ arch/x86/xen/smp_hvm.c | 1 + arch/x86/xen/smp_pv.c | 6 +----- 4 files changed, 35 insertions(+), 5 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 82ac611f2fc1..e7f02eb73727 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -114,6 +115,36 @@ int xen_smp_intr_init(unsigned int cpu) return rc; } +void __init xen_smp_cpus_done(unsigned int max_cpus) +{ + int cpu, rc, count = 0; + + if (xen_hvm_domain()) + native_smp_cpus_done(max_cpus); + + if (xen_have_vcpu_info_placement) + return; + + for_each_online_cpu(cpu) { + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + continue; + + rc = cpu_down(cpu); + + if (rc == 0) { + /* + * Reset vcpu_info so this cpu cannot be onlined again. + */ + xen_vcpu_info_reset(cpu); + count++; + } else { + pr_warn("%s: failed to bring CPU %d down, error %d\n", + __func__, cpu, rc); + } + } + WARN(count, "%s: brought %d CPUs offline\n", __func__, count); +} + void xen_smp_send_reschedule(int cpu) { xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 8ebb6acca64a..87d3c76cba37 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -14,6 +14,8 @@ extern void xen_smp_intr_free(unsigned int cpu); int xen_smp_intr_init_pv(unsigned int cpu); void xen_smp_intr_free_pv(unsigned int cpu); +void xen_smp_cpus_done(unsigned int max_cpus); + void xen_smp_send_reschedule(int cpu); void xen_smp_send_call_function_ipi(const struct cpumask *mask); void xen_smp_send_call_function_single_ipi(int cpu); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 6c8a805819ff..fd60abedf658 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -71,4 +71,5 @@ void __init xen_hvm_smp_init(void) smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; + smp_ops.smp_cpus_done = xen_smp_cpus_done; } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index aae32535f4ec..1ea598e5f030 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -371,10 +371,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) return 0; } -static void xen_pv_smp_cpus_done(unsigned int max_cpus) -{ -} - #ifdef CONFIG_HOTPLUG_CPU static int xen_pv_cpu_disable(void) { @@ -469,7 +465,7 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_pv_smp_prepare_cpus, - .smp_cpus_done = xen_pv_smp_cpus_done, + .smp_cpus_done = xen_smp_cpus_done, .cpu_up = xen_pv_cpu_up, .cpu_die = xen_pv_cpu_die, -- cgit v1.2.3 From a5d5f328b0e2baa5ee7c119fd66324eb79eeeb66 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jun 2017 13:53:56 +0200 Subject: xen: allocate page for shared info page from low memory In a HVM guest the kernel allocates the page for mapping the shared info structure via extend_brk() today. This will lead to a drop of performance as the underlying EPT entry will have to be split up into 4kB entries as the single shared info page is located in hypervisor memory. The issue has been detected by using the libmicro munmap test: unmapping 8kB of memory was faster by nearly a factor of two when no pv interfaces were active in the HVM guest. So instead of taking a page from memory which might be mapped via large EPT entries use a page which is already mapped via a 4kB EPT entry: we can take a page from the first 1MB of memory as the video memory at 640kB disallows using larger EPT entries. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++------- arch/x86/xen/enlighten_pv.c | 2 -- 2 files changed, 24 insertions(+), 9 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 13b5fa1a211c..87d791356ea9 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -10,9 +11,11 @@ #include #include #include +#include #include #include +#include #include "xen-ops.h" #include "mmu.h" @@ -21,19 +24,33 @@ void __ref xen_hvm_init_shared_info(void) { struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page; + u64 pa; + + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) { + /* + * Search for a free page starting at 4kB physical address. + * Low memory is preferred to avoid an EPT large page split up + * by the mapping. + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as + * the BIOS used for HVM guests is well behaved and won't + * clobber memory other than the first 4kB. + */ + for (pa = PAGE_SIZE; + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) || + memblock_is_reserved(pa); + pa += PAGE_SIZE) + ; + + memblock_reserve(pa, PAGE_SIZE); + HYPERVISOR_shared_info = __va(pa); + } - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info); if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); - - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; } static void __init init_hvm_pv_info(void) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bc44d2175459..811e4ddb3f37 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -89,8 +89,6 @@ void *xen_initial_gdt; -RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); - static int xen_cpu_up_prepare_pv(unsigned int cpu); static int xen_cpu_dead_pv(unsigned int cpu); -- cgit v1.2.3 From bf1b9ddf181d29ec91f87d9c52bcb551ccf04157 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 23 Jun 2017 17:01:20 -0500 Subject: x86: xen: remove unnecessary variable in xen_foreach_remap_area() Remove unnecessary variable mfn in function xen_foreach_remap_area() and, refactor the code. Variable mfn at line 518:mfn = xen_remap_buf.mfns[i]; is only being used to store a value to be passed as an argument to the xen_update_mem_tables() function. This value can be passed directly, which makes variable mfn unnecessary. Also, value assigned to variable mfn at line 534:mfn = xen_remap_mfn; is never used. Addresses-Coverity-ID: 1260110 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- arch/x86/xen/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a5bf7c451435..c81046323ebc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -499,7 +499,7 @@ static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages, void __init xen_remap_memory(void) { unsigned long buf = (unsigned long)&xen_remap_buf; - unsigned long mfn_save, mfn, pfn; + unsigned long mfn_save, pfn; unsigned long remapped = 0; unsigned int i; unsigned long pfn_s = ~0UL; @@ -515,8 +515,7 @@ void __init xen_remap_memory(void) pfn = xen_remap_buf.target_pfn; for (i = 0; i < xen_remap_buf.size; i++) { - mfn = xen_remap_buf.mfns[i]; - xen_update_mem_tables(pfn, mfn); + xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]); remapped++; pfn++; } @@ -530,8 +529,6 @@ void __init xen_remap_memory(void) pfn_s = xen_remap_buf.target_pfn; len = xen_remap_buf.size; } - - mfn = xen_remap_mfn; xen_remap_mfn = xen_remap_buf.next_area_mfn; } -- cgit v1.2.3