summaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/apic.c8
-rw-r--r--arch/x86/xen/enlighten.c81
-rw-r--r--arch/x86/xen/enlighten_hvm.c24
-rw-r--r--arch/x86/xen/enlighten_pv.c41
-rw-r--r--arch/x86/xen/enlighten_pvh.c9
-rw-r--r--arch/x86/xen/grant-table.c60
-rw-r--r--arch/x86/xen/mmu.c14
-rw-r--r--arch/x86/xen/mmu_pv.c24
-rw-r--r--arch/x86/xen/p2m.c2
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/spinlock.c6
-rw-r--r--arch/x86/xen/suspend.c4
-rw-r--r--arch/x86/xen/time.c99
-rw-r--r--arch/x86/xen/xen-asm_64.S14
-rw-r--r--arch/x86/xen/xen-ops.h4
15 files changed, 345 insertions, 51 deletions
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 30434b8708f2..de58533d3664 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -31,7 +31,7 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
return 0xfd;
}
-static unsigned long xen_set_apic_id(unsigned int x)
+static u32 xen_set_apic_id(unsigned int x)
{
WARN_ON(1);
return x;
@@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg)
return 0;
if (reg == APIC_LVR)
- return 0x10;
+ return 0x14;
#ifdef CONFIG_X86_32
if (reg == APIC_LDR)
return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
@@ -161,12 +161,10 @@ static struct apic xen_pv_apic = {
/* .irq_delivery_mode - used in native_compose_msi_msg only */
/* .irq_dest_mode - used in native_compose_msi_msg only */
- .target_cpus = default_target_cpus,
.disable_esr = 0,
/* .dest_logical - default_send_IPI_ use it but we use our own. */
.check_apicid_used = default_check_apicid_used, /* Used on 32-bit */
- .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = xen_noop, /* setup_local_APIC calls it */
.ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */
@@ -179,7 +177,7 @@ static struct apic xen_pv_apic = {
.get_apic_id = xen_get_apic_id,
.set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */
- .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_flat_calc_apicid,
#ifdef CONFIG_SMP
.send_IPI_mask = xen_send_IPI_mask,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d89001..c9081c6671f0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <xen/features.h>
#include <xen/page.h>
+#include <xen/interface/memory.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
}
EXPORT_SYMBOL(xen_arch_unregister_cpu);
#endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+ struct xen_memory_map memmap;
+ int rc;
+ unsigned int i, last_guest_ram;
+ phys_addr_t max_addr = PFN_PHYS(max_pfn);
+ struct e820_table *xen_e820_table;
+ const struct e820_entry *entry;
+ struct resource *res;
+
+ if (!xen_initial_domain())
+ return;
+
+ xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+ if (!xen_e820_table)
+ return;
+
+ memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+ set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+ rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+ if (rc) {
+ pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+ goto out;
+ }
+
+ last_guest_ram = 0;
+ for (i = 0; i < memmap.nr_entries; i++) {
+ if (xen_e820_table->entries[i].addr >= max_addr)
+ break;
+ if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+ last_guest_ram = i;
+ }
+
+ entry = &xen_e820_table->entries[last_guest_ram];
+ if (max_addr >= entry->addr + entry->size)
+ goto out; /* No unallocated host RAM. */
+
+ hostmem_resource->start = max_addr;
+ hostmem_resource->end = entry->addr + entry->size;
+
+ /*
+ * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+ * as unavailable. The rest of that region can be used for hotplug-based
+ * ballooning.
+ */
+ for (; i < memmap.nr_entries; i++) {
+ entry = &xen_e820_table->entries[i];
+
+ if (entry->type == E820_TYPE_RAM)
+ continue;
+
+ if (entry->addr >= hostmem_resource->end)
+ break;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ goto out;
+
+ res->name = "Unavailable host RAM";
+ res->start = entry->addr;
+ res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+ entry->addr + entry->size : hostmem_resource->end;
+ rc = insert_resource(hostmem_resource, res);
+ if (rc) {
+ pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+ __func__, res->start, res->end, rc);
+ kfree(res);
+ goto out;
+ }
+ }
+
+ out:
+ kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 754d5391d9fa..826898701045 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,3 +1,4 @@
+#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <linux/memblock.h>
@@ -188,8 +189,6 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
- if (xen_pvh_domain())
- machine_ops.emergency_restart = xen_emergency_restart;
#ifdef CONFIG_KEXEC_CORE
machine_ops.shutdown = xen_hvm_shutdown;
machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
@@ -226,6 +225,26 @@ static uint32_t __init xen_platform_hvm(void)
return xen_cpuid_base();
}
+static __init void xen_hvm_guest_late_init(void)
+{
+#ifdef CONFIG_XEN_PVH
+ /* Test for PVH domain (PVH boot path taken overrides ACPI flags). */
+ if (!xen_pvh &&
+ (x86_platform.legacy.rtc || !x86_platform.legacy.no_vga))
+ return;
+
+ /* PVH detected. */
+ xen_pvh = true;
+
+ /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
+ if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC)
+ acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
+
+ machine_ops.emergency_restart = xen_emergency_restart;
+ pv_info.name = "Xen PVH";
+#endif
+}
+
const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
.name = "Xen HVM",
.detect = xen_platform_hvm,
@@ -233,5 +252,6 @@ const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
.init.init_platform = xen_hvm_guest_init,
.init.x2apic_available = xen_x2apic_para_available,
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
+ .init.guest_late_init = xen_hvm_guest_late_init,
.runtime.pin_vcpu = xen_pin_vcpu,
};
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae3a071e1d0f..c047f42552e1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
#include "multicalls.h"
#include "pmu.h"
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
void *xen_initial_gdt;
static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -622,7 +624,7 @@ static struct trap_array_entry trap_array[] = {
{ simd_coprocessor_error, xen_simd_coprocessor_error, false },
};
-static bool get_trap_addr(void **addr, unsigned int ist)
+static bool __ref get_trap_addr(void **addr, unsigned int ist)
{
unsigned int nr;
bool ist_okay = false;
@@ -644,6 +646,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
}
}
+ if (nr == ARRAY_SIZE(trap_array) &&
+ *addr >= (void *)early_idt_handler_array[0] &&
+ *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
+ nr = (*addr - (void *)early_idt_handler_array[0]) /
+ EARLY_IDT_HANDLER_SIZE;
+ *addr = (void *)xen_early_idt_handler_array[nr];
+ }
+
if (WARN_ON(ist != 0 && !ist_okay))
return false;
@@ -1230,6 +1240,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_platform.get_nmi_reason = xen_get_nmi_reason;
x86_init.resources.memory_setup = xen_memory_setup;
+ x86_init.irqs.intr_mode_init = x86_init_noop;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
@@ -1249,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
/* Work out if we support NX */
+ get_cpu_cap(&boot_cpu_data);
x86_configure_nx();
/* Get mfn list */
@@ -1261,6 +1273,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_setup_gdt(0);
xen_init_irq_ops();
+
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
+ /*
+ * Setup xen_vcpu early because idt_setup_early_handler needs it for
+ * local_irq_disable(), irqs_disabled().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
+
+ idt_setup_early_handler();
+
xen_init_capabilities();
#ifdef CONFIG_X86_LOCAL_APIC
@@ -1294,18 +1321,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
acpi_numa = -1;
#endif
- /* Let's presume PV guests always boot on vCPU with id 0. */
- per_cpu(xen_vcpu_id, 0) = 0;
-
- /*
- * Setup xen_vcpu early because start_kernel needs it for
- * local_irq_disable(), irqs_disabled().
- *
- * Don't do the full vcpu_info placement stuff until we have
- * the cpu_possible_mask and a non-dummy shared_info.
- */
- xen_vcpu_info_reset(0);
-
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
local_irq_disable();
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 7bd3ee08393e..436c4f003e17 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -25,13 +25,6 @@ struct boot_params pvh_bootparams __attribute__((section(".data")));
struct hvm_start_info pvh_start_info;
unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
-static void xen_pvh_arch_setup(void)
-{
- /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
- if (nr_ioapics == 0)
- acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
-}
-
static void __init init_pvh_bootparams(void)
{
struct xen_memory_map memmap;
@@ -102,6 +95,4 @@ void __init xen_prepare_pvh(void)
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
init_pvh_bootparams();
-
- x86_init.oem.arch_setup = xen_pvh_arch_setup;
}
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 809b6c812654..92ccc718152d 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -49,7 +49,7 @@
static struct gnttab_vm_area {
struct vm_struct *area;
pte_t **ptes;
-} gnttab_shared_vm_area;
+} gnttab_shared_vm_area, gnttab_status_vm_area;
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
@@ -73,16 +73,43 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
return 0;
}
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared)
+{
+ grant_status_t *shared = *__shared;
+ unsigned long addr;
+ unsigned long i;
+
+ if (shared == NULL)
+ *__shared = shared = gnttab_status_vm_area.area->addr;
+
+ addr = (unsigned long)shared;
+
+ for (i = 0; i < nr_gframes; i++) {
+ set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
+ mfn_pte(frames[i], PAGE_KERNEL));
+ addr += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
{
+ pte_t **ptes;
unsigned long addr;
unsigned long i;
+ if (shared == gnttab_status_vm_area.area->addr)
+ ptes = gnttab_status_vm_area.ptes;
+ else
+ ptes = gnttab_shared_vm_area.ptes;
+
addr = (unsigned long)shared;
for (i = 0; i < nr_gframes; i++) {
- set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
- __pte(0));
+ set_pte_at(&init_mm, addr, ptes[i], __pte(0));
addr += PAGE_SIZE;
}
}
@@ -102,12 +129,35 @@ static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
return 0;
}
-int arch_gnttab_init(unsigned long nr_shared)
+static void arch_gnttab_vfree(struct gnttab_vm_area *area)
{
+ free_vm_area(area->area);
+ kfree(area->ptes);
+}
+
+int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
+{
+ int ret;
+
if (!xen_pv_domain())
return 0;
- return arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
+ ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Always allocate the space for the status frames in case
+ * we're migrated to a host with V2 support.
+ */
+ ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+err:
+ arch_gnttab_vfree(&gnttab_shared_vm_area);
+ return -ENOMEM;
}
#ifdef CONFIG_XEN_PVH
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3e15345abfe7..d33e7dbe3129 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -172,6 +172,9 @@ int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
pgprot_t prot, unsigned domid,
struct page **pages)
{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -EOPNOTSUPP;
+
return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
@@ -182,6 +185,10 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
int *err_ptr, pgprot_t prot,
unsigned domid, struct page **pages)
{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
+ prot, domid, pages);
+
/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
* and the consequences later is quite hard to detect what the actual
* cause of "wrong memory was mapped in".
@@ -193,9 +200,12 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
/* Returns: 0 success */
int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
- int numpgs, struct page **pages)
+ int nr, struct page **pages)
{
- if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return xen_xlate_unmap_gfn_range(vma, nr, pages);
+
+ if (!pages)
return 0;
return -EINVAL;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a0e2b8c6e5c7..d85076223a69 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -315,7 +315,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
static pteval_t pte_mfn_to_pfn(pteval_t val)
{
if (val & _PAGE_PRESENT) {
- unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+ unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
unsigned long pfn = mfn_to_pfn(mfn);
pteval_t flags = val & PTE_FLAGS_MASK;
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
{
struct {
struct mmuext_op op;
-#ifdef CONFIG_SMP
- DECLARE_BITMAP(mask, num_processors);
-#else
DECLARE_BITMAP(mask, NR_CPUS);
-#endif
} *args;
struct multicall_space mcs;
+ const size_t mc_entry_size = sizeof(args->op) +
+ sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
if (cpumask_empty(cpus))
return; /* nothing to do */
- mcs = xen_mc_entry(sizeof(*args));
+ mcs = xen_mc_entry(mc_entry_size);
args = mcs.args;
args->op.arg2.vcpumask = to_cpumask(args->mask);
@@ -1721,7 +1719,7 @@ static unsigned long __init m2p(phys_addr_t maddr)
{
phys_addr_t paddr;
- maddr &= PTE_PFN_MASK;
+ maddr &= XEN_PTE_MFN_MASK;
paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
return paddr;
@@ -1902,6 +1900,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Graft it onto L4[511][510] */
copy_page(level2_kernel_pgt, l2);
+ /*
+ * Zap execute permission from the ident map. Due to the sharing of
+ * L1 entries we need to do this in the L2.
+ */
+ if (__supported_pte_mask & _PAGE_NX) {
+ for (i = 0; i < PTRS_PER_PMD; ++i) {
+ if (pmd_none(level2_ident_pgt[i]))
+ continue;
+ level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+ }
+ }
+
/* Copy the initial P->M table mappings if necessary. */
i = pgd_index(xen_start_info->mfn_list);
if (i && i < pgd_index(__START_KERNEL_map))
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 6083ba462f35..13b4f19b9131 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -547,7 +547,7 @@ int xen_alloc_p2m_entry(unsigned long pfn)
if (p2m_top_mfn && pfn < MAX_P2M_PFN) {
topidx = p2m_top_index(pfn);
top_mfn_p = &p2m_top_mfn[topidx];
- mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
+ mid_mfn = READ_ONCE(p2m_top_mfn_p[topidx]);
BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b..6e0d2086eacb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
addr = xen_e820_table.entries[0].addr;
size = xen_e820_table.entries[0].size;
while (i < xen_e820_table.nr_entries) {
- bool discard = false;
chunk_size = size;
type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(pfn_s, n_pfns);
xen_max_p2m_pfn = pfn_s + n_pfns;
} else
- discard = true;
+ type = E820_TYPE_UNUSABLE;
}
- if (!discard)
- xen_align_and_add_e820_region(addr, chunk_size, type);
+ xen_align_and_add_e820_region(addr, chunk_size, type);
addr += chunk_size;
size -= chunk_size;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 08324c64005d..02f3445a2b5f 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <asm/paravirt.h>
+#include <asm/qspinlock.h>
#include <xen/interface/xen.h>
#include <xen/events.h>
@@ -81,8 +82,11 @@ void xen_init_lock_cpu(int cpu)
int irq;
char *name;
- if (!xen_pvspin)
+ if (!xen_pvspin) {
+ if (cpu == 0)
+ static_branch_disable(&virt_spin_lock_key);
return;
+ }
WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n",
cpu, per_cpu(lock_kicker_irq, cpu));
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 92bf5ecb6baf..d9f96cc5d743 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -17,6 +17,8 @@
void xen_arch_pre_suspend(void)
{
+ xen_save_time_memory_area();
+
if (xen_pv_domain())
xen_pv_pre_suspend();
}
@@ -27,6 +29,8 @@ void xen_arch_post_suspend(int cancelled)
xen_pv_post_suspend(cancelled);
else
xen_hvm_post_suspend(cancelled);
+
+ xen_restore_time_memory_area();
}
static void xen_vcpu_notify_restore(void *data)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 80c2a4bdf230..29163c43ebbd 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -75,7 +75,7 @@ static void xen_get_wallclock(struct timespec *now)
static int xen_set_wallclock(const struct timespec *now)
{
- return -1;
+ return -ENODEV;
}
static int xen_pvclock_gtod_notify(struct notifier_block *nb,
@@ -371,8 +371,95 @@ static const struct pv_time_ops xen_time_ops __initconst = {
.steal_clock = xen_steal_clock,
};
+static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
+
+void xen_save_time_memory_area(void)
+{
+ struct vcpu_register_time_memory_area t;
+ int ret;
+
+ if (!xen_clock)
+ return;
+
+ t.addr.v = NULL;
+
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
+ if (ret != 0)
+ pr_notice("Cannot save secondary vcpu_time_info (err %d)",
+ ret);
+ else
+ clear_page(xen_clock);
+}
+
+void xen_restore_time_memory_area(void)
+{
+ struct vcpu_register_time_memory_area t;
+ int ret;
+
+ if (!xen_clock)
+ return;
+
+ t.addr.v = &xen_clock->pvti;
+
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
+
+ /*
+ * We don't disable VCLOCK_PVCLOCK entirely if it fails to register the
+ * secondary time info with Xen or if we migrated to a host without the
+ * necessary flags. On both of these cases what happens is either
+ * process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT
+ * bit set. Userspace checks the latter and if 0, it discards the data
+ * in pvti and fallbacks to a system call for a reliable timestamp.
+ */
+ if (ret != 0)
+ pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
+ ret);
+}
+
+static void xen_setup_vsyscall_time_info(void)
+{
+ struct vcpu_register_time_memory_area t;
+ struct pvclock_vsyscall_time_info *ti;
+ int ret;
+
+ ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
+ if (!ti)
+ return;
+
+ t.addr.v = &ti->pvti;
+
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
+ if (ret) {
+ pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret);
+ free_page((unsigned long)ti);
+ return;
+ }
+
+ /*
+ * If primary time info had this bit set, secondary should too since
+ * it's the same data on both just different memory regions. But we
+ * still check it in case hypervisor is buggy.
+ */
+ if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
+ t.addr.v = NULL;
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
+ 0, &t);
+ if (!ret)
+ free_page((unsigned long)ti);
+
+ pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n");
+ return;
+ }
+
+ xen_clock = ti;
+ pvclock_set_pvti_cpu0_va(xen_clock);
+
+ xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK;
+}
+
static void __init xen_time_init(void)
{
+ struct pvclock_vcpu_time_info *pvti;
int cpu = smp_processor_id();
struct timespec tp;
@@ -396,6 +483,16 @@ static void __init xen_time_init(void)
setup_force_cpu_cap(X86_FEATURE_TSC);
+ /*
+ * We check ahead on the primary time info if this
+ * bit is supported hence speeding up Xen clocksource.
+ */
+ pvti = &__this_cpu_read(xen_vcpu)->time;
+ if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
+ pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+ xen_setup_vsyscall_time_info();
+ }
+
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b5..417b339e5c8e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
#include <xen/interface/xen.h>
+#include <linux/init.h>
#include <linux/linkage.h>
.macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
#endif
xen_pv_trap hypervisor_callback
+ __INIT
+ENTRY(xen_early_idt_handler_array)
+ i = 0
+ .rept NUM_EXCEPTION_VECTORS
+ pop %rcx
+ pop %r11
+ jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+ i = i + 1
+ .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+ .endr
+END(xen_early_idt_handler_array)
+ __FINIT
+
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
* Xen64 iret frame:
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f377e1820c6c..3b34745d0a52 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -70,7 +70,9 @@ void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
u64 xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
-void __init xen_init_time_ops(void);
+void xen_save_time_memory_area(void);
+void xen_restore_time_memory_area(void);
+void __ref xen_init_time_ops(void);
void __init xen_hvm_init_time_ops(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);