diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/balloon.c | 62 | ||||
-rw-r--r-- | drivers/xen/events.c | 87 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 39 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 6 | ||||
-rw-r--r-- | drivers/xen/pci.c | 105 | ||||
-rw-r--r-- | drivers/xen/swiotlb-xen.c | 70 | ||||
-rw-r--r-- | drivers/xen/xen-selfballoon.c | 67 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 4 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 101 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 2 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 121 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 17 |
12 files changed, 533 insertions, 148 deletions
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 5dfd8f8ff07f..5876e1ae6c2d 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target); * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned + * @highmem: highmem or lowmem pages * @return 0 on success, error otherwise */ -int alloc_xenballooned_pages(int nr_pages, struct page** pages) +int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) { int pgno = 0; struct page* page; mutex_lock(&balloon_mutex); while (pgno < nr_pages) { - page = balloon_retrieve(true); - if (page) { + page = balloon_retrieve(highmem); + if (page && PageHighMem(page) == highmem) { pages[pgno++] = page; } else { enum bp_state st; - st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); + if (page) + balloon_append(page); + st = decrease_reservation(nr_pages - pgno, + highmem ? GFP_HIGHUSER : GFP_USER); if (st != BP_DONE) goto out_undo; } @@ -555,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages) } EXPORT_SYMBOL(free_xenballooned_pages); -static int __init balloon_init(void) +static void __init balloon_add_region(unsigned long start_pfn, + unsigned long pages) { unsigned long pfn, extra_pfn_end; struct page *page; + /* + * If the amount of usable memory has been limited (e.g., with + * the 'mem' command line parameter), don't add pages beyond + * this limit. + */ + extra_pfn_end = min(max_pfn, start_pfn + pages); + + for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { + page = pfn_to_page(pfn); + /* totalram_pages and totalhigh_pages do not + include the boot-time balloon extension, so + don't subtract from it. */ + __balloon_append(page); + } +} + +static int __init balloon_init(void) +{ + int i; + if (!xen_domain()) return -ENODEV; pr_info("xen/balloon: Initialising balloon driver.\n"); - balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; + balloon_stats.current_pages = xen_pv_domain() + ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) + : max_pfn; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -584,24 +611,13 @@ static int __init balloon_init(void) #endif /* - * Initialise the balloon with excess memory space. We need - * to make sure we don't add memory which doesn't exist or - * logically exist. The E820 map can be trimmed to be smaller - * than the amount of physical memory due to the mem= command - * line parameter. And if this is a 32-bit non-HIGHMEM kernel - * on a system with memory which requires highmem to access, - * don't try to use it. + * Initialize the balloon with pages from the extra memory + * regions (see arch/x86/xen/setup.c). */ - extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), - (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); - for (pfn = PFN_UP(xen_extra_mem_start); - pfn < extra_pfn_end; - pfn++) { - page = pfn_to_page(pfn); - /* totalram_pages and totalhigh_pages do not include the boot-time - balloon extension, so don't subtract from it. */ - __balloon_append(page); - } + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) + if (xen_extra_mem[i].size) + balloon_add_region(PFN_UP(xen_extra_mem[i].start), + PFN_DOWN(xen_extra_mem[i].size)); return 0; } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index da70f5c32eb9..7a55b292bf39 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -54,7 +54,7 @@ * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. */ -static DEFINE_SPINLOCK(irq_mapping_update_lock); +static DEFINE_MUTEX(irq_mapping_update_lock); static LIST_HEAD(xen_irq_list_head); @@ -432,7 +432,8 @@ static int __must_check xen_allocate_irq_dynamic(void) irq = irq_alloc_desc_from(first, -1); - xen_irq_init(irq); + if (irq >= 0) + xen_irq_init(irq); return irq; } @@ -631,7 +632,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, int irq = -1; struct physdev_irq irq_op; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = find_irq_by_gsi(gsi); if (irq != -1) { @@ -684,7 +685,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, handle_edge_irq, name); out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -710,10 +711,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, { int irq, ret; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = xen_allocate_irq_dynamic(); - if (irq == -1) + if (irq < 0) goto out; irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, @@ -724,12 +725,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (ret < 0) goto error_irq; out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; error_irq: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); xen_free_irq(irq); - return -1; + return ret; } #endif @@ -740,7 +741,7 @@ int xen_destroy_irq(int irq) struct irq_info *info = info_for_irq(irq); int rc = -ENOENT; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); desc = irq_to_desc(irq); if (!desc) @@ -766,7 +767,7 @@ int xen_destroy_irq(int irq) xen_free_irq(irq); out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return rc; } @@ -776,10 +777,10 @@ int xen_irq_from_pirq(unsigned pirq) struct irq_info *info; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); list_for_each_entry(info, &xen_irq_list_head, list) { - if (info == NULL || info->type != IRQT_PIRQ) + if (info->type != IRQT_PIRQ) continue; irq = info->irq; if (info->u.pirq.pirq == pirq) @@ -787,7 +788,7 @@ int xen_irq_from_pirq(unsigned pirq) } irq = -1; out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -802,7 +803,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) { int irq; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = evtchn_to_irq[evtchn]; @@ -818,7 +819,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -829,7 +830,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) struct evtchn_bind_ipi bind_ipi; int evtchn, irq; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = per_cpu(ipi_to_irq, cpu)[ipi]; @@ -853,7 +854,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -872,13 +873,34 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); } +static int find_virq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_status status; + int port, rc = -ENOENT; + + memset(&status, 0, sizeof(status)); + for (port = 0; port <= NR_EVENT_CHANNELS; port++) { + status.dom = DOMID_SELF; + status.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); + if (rc < 0) + continue; + if (status.status != EVTCHNSTAT_virq) + continue; + if (status.u.virq == virq && status.vcpu == cpu) { + rc = port; + break; + } + } + return rc; +} int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; - int evtchn, irq; + int evtchn, irq, ret; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = per_cpu(virq_to_irq, cpu)[virq]; @@ -892,10 +914,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) bind_virq.virq = virq; bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, - &bind_virq) != 0) - BUG(); - evtchn = bind_virq.port; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (ret == 0) + evtchn = bind_virq.port; + else { + if (ret == -EEXIST) + ret = find_virq(virq, cpu); + BUG_ON(ret < 0); + evtchn = ret; + } xen_irq_info_virq_init(cpu, irq, evtchn, virq); @@ -903,7 +931,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -913,7 +941,7 @@ static void unbind_from_irq(unsigned int irq) struct evtchn_close close; int evtchn = evtchn_from_irq(irq); - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); if (VALID_EVTCHN(evtchn)) { close.port = evtchn; @@ -943,7 +971,7 @@ static void unbind_from_irq(unsigned int irq) xen_free_irq(irq); - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); } int bind_evtchn_to_irqhandler(unsigned int evtchn, @@ -1279,7 +1307,7 @@ void rebind_evtchn_irq(int evtchn, int irq) will also be masked. */ disable_irq(irq); - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); /* After resume the irq<->evtchn mappings are all cleared out */ BUG_ON(evtchn_to_irq[evtchn] != -1); @@ -1289,7 +1317,7 @@ void rebind_evtchn_irq(int evtchn, int irq) xen_irq_info_evtchn_init(irq, evtchn); - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); /* new event channels are always bound to cpu 0 */ irq_set_affinity(irq, cpumask_of(0)); @@ -1670,6 +1698,7 @@ void __init xen_init_IRQ(void) evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), GFP_KERNEL); + BUG_ON(!evtchn_to_irq); for (i = 0; i < NR_EVENT_CHANNELS; i++) evtchn_to_irq[i] = -1; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f914b26cf0c2..880798aae2f2 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -83,6 +83,7 @@ struct grant_map { struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; + struct gnttab_map_grant_ref *kmap_ops; struct page **pages; }; @@ -116,19 +117,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); + add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL); add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || + NULL == add->kmap_ops || NULL == add->pages) goto err; - if (alloc_xenballooned_pages(count, add->pages)) + if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) goto err; for (i = 0; i < count; i++) { add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; + add->kmap_ops[i].handle = -1; } add->index = 0; @@ -142,6 +146,7 @@ err: kfree(add->grants); kfree(add->map_ops); kfree(add->unmap_ops); + kfree(add->kmap_ops); kfree(add); return NULL; } @@ -243,10 +248,35 @@ static int map_grant_pages(struct grant_map *map) gnttab_set_unmap_op(&map->unmap_ops[i], addr, map->flags, -1 /* handle */); } + } else { + /* + * Setup the map_ops corresponding to the pte entries pointing + * to the kernel linear addresses of the struct pages. + * These ptes are completely different from the user ptes dealt + * with find_grant_ptes. + */ + for (i = 0; i < map->count; i++) { + unsigned level; + unsigned long address = (unsigned long) + pfn_to_kaddr(page_to_pfn(map->pages[i])); + pte_t *ptep; + u64 pte_maddr = 0; + BUG_ON(PageHighMem(map->pages[i])); + + ptep = lookup_address(address, &level); + pte_maddr = arbitrary_virt_to_machine(ptep).maddr; + gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, + map->flags | + GNTMAP_host_map | + GNTMAP_contains_pte, + map->grants[i].ref, + map->grants[i].domid); + } } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs(map->map_ops, map->pages, map->count); + err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, + map->pages, map->count); if (err) return err; @@ -462,13 +492,11 @@ static int gntdev_release(struct inode *inode, struct file *flip) pr_debug("priv %p\n", priv); - spin_lock(&priv->lock); while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); list_del(&map->next); gntdev_put_map(map); } - spin_unlock(&priv->lock); if (use_ptemod) mmu_notifier_unregister(&priv->mn, priv->mm); @@ -532,10 +560,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); - gntdev_put_map(map); err = 0; } spin_unlock(&priv->lock); + if (map) + gntdev_put_map(map); return err; } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 4f44b347b24a..8c71ab801756 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -448,7 +448,8 @@ unsigned int gnttab_max_grant_frames(void) EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, - struct page **pages, unsigned int count) + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret; pte_t *pte; @@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, */ return -EOPNOTSUPP; } - ret = m2p_add_override(mfn, pages[i], - map_ops[i].flags & GNTMAP_contains_pte); + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); if (ret) return ret; } diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index cef4bafc07dc..66057075d6e2 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -18,6 +18,7 @@ */ #include <linux/pci.h> +#include <linux/acpi.h> #include <xen/xen.h> #include <xen/interface/physdev.h> #include <xen/interface/xen.h> @@ -26,26 +27,85 @@ #include <asm/xen/hypercall.h> #include "../pci/pci.h" +static bool __read_mostly pci_seg_supported = true; + static int xen_add_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); +#ifdef CONFIG_PCI_IOV + struct pci_dev *physfn = pci_dev->physfn; +#endif + + if (pci_seg_supported) { + struct physdev_pci_device_add add = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; +#ifdef CONFIG_ACPI + acpi_handle handle; +#endif + +#ifdef CONFIG_PCI_IOV + if (pci_dev->is_virtfn) { + add.flags = XEN_PCI_DEV_VIRTFN; + add.physfn.bus = physfn->bus->number; + add.physfn.devfn = physfn->devfn; + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) + add.flags = XEN_PCI_DEV_EXTFN; + +#ifdef CONFIG_ACPI + handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); + if (!handle) + handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); +#ifdef CONFIG_PCI_IOV + if (!handle && pci_dev->is_virtfn) + handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge); +#endif + if (handle) { + acpi_status status; + + do { + unsigned long long pxm; + + status = acpi_evaluate_integer(handle, "_PXM", + NULL, &pxm); + if (ACPI_SUCCESS(status)) { + add.optarr[0] = pxm; + add.flags |= XEN_PCI_DEV_PXM; + break; + } + status = acpi_get_parent(handle, &handle); + } while (ACPI_SUCCESS(status)); + } +#endif /* CONFIG_ACPI */ + + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add); + if (r != -ENOSYS) + return r; + pci_seg_supported = false; + } + if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; #ifdef CONFIG_PCI_IOV - if (pci_dev->is_virtfn) { + else if (pci_dev->is_virtfn) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, .is_virtfn = 1, - .physfn.bus = pci_dev->physfn->bus->number, - .physfn.devfn = pci_dev->physfn->devfn, + .physfn.bus = physfn->bus->number, + .physfn.devfn = physfn->devfn, }; r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, &manage_pci_ext); - } else + } #endif - if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, @@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); - struct physdev_manage_pci manage_pci; - manage_pci.bus = pci_dev->bus->number; - manage_pci.devfn = pci_dev->devfn; + if (pci_seg_supported) { + struct physdev_pci_device device = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; - r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, - &manage_pci); + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove, + &device); + } else if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; + else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + } return r; } @@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb, r = xen_remove_device(dev); break; default: - break; + return NOTIFY_DONE; } - - return r; + if (r) + dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "add" : + (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?")); + return NOTIFY_OK; } -struct notifier_block device_nb = { +static struct notifier_block device_nb = { .notifier_call = xen_pci_notifier, }; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 6e8c15a23201..c984768d98ca 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -38,6 +38,7 @@ #include <xen/swiotlb-xen.h> #include <xen/page.h> #include <xen/xen-ops.h> +#include <xen/hvc-console.h> /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this @@ -146,8 +147,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) void __init xen_swiotlb_init(int verbose) { unsigned long bytes; - int rc; + int rc = -ENOMEM; unsigned long nr_tbl; + char *m = NULL; + unsigned int repeat = 3; nr_tbl = swioltb_nr_tbl(); if (nr_tbl) @@ -156,16 +159,17 @@ void __init xen_swiotlb_init(int verbose) xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); } - +retry: bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; /* * Get IO TLB memory from any location. */ xen_io_tlb_start = alloc_bootmem(bytes); - if (!xen_io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - + if (!xen_io_tlb_start) { + m = "Cannot allocate Xen-SWIOTLB buffer!\n"; + goto error; + } xen_io_tlb_end = xen_io_tlb_start + bytes; /* * And replace that memory with pages under 4GB. @@ -173,17 +177,28 @@ void __init xen_swiotlb_init(int verbose) rc = xen_swiotlb_fixup(xen_io_tlb_start, bytes, xen_io_tlb_nslabs); - if (rc) + if (rc) { + free_bootmem(__pa(xen_io_tlb_start), bytes); + m = "Failed to get contiguous memory for DMA from Xen!\n"\ + "You either: don't have the permissions, do not have"\ + " enough free memory under 4GB, or the hypervisor memory"\ + "is too fragmented!"; goto error; - + } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); return; error: - panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ - "We either don't have the permission or you do not have enough"\ - "free memory under 4GB!\n", rc); + if (repeat--) { + xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ + (xen_io_tlb_nslabs >> 1)); + printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n", + (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); + goto retry; + } + xen_raw_printk("%s (rc:%d)", m, rc); + panic("%s (rc:%d)", m, rc); } void * @@ -194,6 +209,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, int order = get_order(size); u64 dma_mask = DMA_BIT_MASK(32); unsigned long vstart; + phys_addr_t phys; + dma_addr_t dev_addr; /* * Ignore region specifiers - the kernel's ideas of @@ -209,18 +226,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, vstart = __get_free_pages(flags, order); ret = (void *)vstart; + if (!ret) + return ret; + if (hwdev && hwdev->coherent_dma_mask) - dma_mask = dma_alloc_coherent_mask(hwdev, flags); + dma_mask = hwdev->coherent_dma_mask; - if (ret) { + phys = virt_to_phys(ret); + dev_addr = xen_phys_to_bus(phys); + if (((dev_addr + size - 1 <= dma_mask)) && + !range_straddles_page_boundary(phys, size)) + *dma_handle = dev_addr; + else { if (xen_create_contiguous_region(vstart, order, fls64(dma_mask)) != 0) { free_pages(vstart, order); return NULL; } - memset(ret, 0, size); *dma_handle = virt_to_machine(ret).maddr; } + memset(ret, 0, size); return ret; } EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); @@ -230,11 +255,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dev_addr) { int order = get_order(size); + phys_addr_t phys; + u64 dma_mask = DMA_BIT_MASK(32); if (dma_release_from_coherent(hwdev, order, vaddr)) return; - xen_destroy_contiguous_region((unsigned long)vaddr, order); + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = hwdev->coherent_dma_mask; + + phys = virt_to_phys(vaddr); + + if (((dev_addr + size - 1 > dma_mask)) || + range_straddles_page_boundary(phys, size)) + xen_destroy_contiguous_region((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); @@ -278,9 +313,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, /* * Ensure that the address returned is DMA'ble */ - if (!dma_capable(dev, dev_addr, size)) - panic("map_single: bounce buffer is not DMA'ble"); - + if (!dma_capable(dev, dev_addr, size)) { + swiotlb_tbl_unmap_single(dev, map, size, dir); + dev_addr = 0; + } return dev_addr; } EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 6ea852e25162..d93c70857e03 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -68,6 +68,8 @@ */ #include <linux/kernel.h> +#include <linux/bootmem.h> +#include <linux/swap.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/module.h> @@ -93,6 +95,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1; /* In HZ, controls frequency of worker invocation. */ static unsigned int selfballoon_interval __read_mostly = 5; +/* + * Minimum usable RAM in MB for selfballooning target for balloon. + * If non-zero, it is added to totalreserve_pages and self-ballooning + * will not balloon below the sum. If zero, a piecewise linear function + * is calculated as a minimum and added to totalreserve_pages. Note that + * setting this value indiscriminately may cause OOMs and crashes. + */ +static unsigned int selfballoon_min_usable_mb; + static void selfballoon_process(struct work_struct *work); static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); @@ -189,20 +200,23 @@ static int __init xen_selfballooning_setup(char *s) __setup("selfballooning", xen_selfballooning_setup); #endif /* CONFIG_FRONTSWAP */ +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) + /* * Use current balloon size, the goal (vm_committed_as), and hysteresis * parameters to set a new target balloon size */ static void selfballoon_process(struct work_struct *work) { - unsigned long cur_pages, goal_pages, tgt_pages; + unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; + unsigned long useful_pages; bool reset_timer = false; if (xen_selfballooning_enabled) { - cur_pages = balloon_stats.current_pages; + cur_pages = totalram_pages; tgt_pages = cur_pages; /* default is no change */ goal_pages = percpu_counter_read_positive(&vm_committed_as) + - balloon_stats.current_pages - totalram_pages; + totalreserve_pages; #ifdef CONFIG_FRONTSWAP /* allow space for frontswap pages to be repatriated */ if (frontswap_selfshrinking && frontswap_enabled) @@ -217,7 +231,26 @@ static void selfballoon_process(struct work_struct *work) ((goal_pages - cur_pages) / selfballoon_uphysteresis); /* else if cur_pages == goal_pages, no change */ - balloon_set_new_target(tgt_pages); + useful_pages = max_pfn - totalreserve_pages; + if (selfballoon_min_usable_mb != 0) + floor_pages = totalreserve_pages + + MB2PAGES(selfballoon_min_usable_mb); + /* piecewise linear function ending in ~3% slope */ + else if (useful_pages < MB2PAGES(16)) + floor_pages = max_pfn; /* not worth ballooning */ + else if (useful_pages < MB2PAGES(64)) + floor_pages = totalreserve_pages + MB2PAGES(16) + + ((useful_pages - MB2PAGES(16)) >> 1); + else if (useful_pages < MB2PAGES(512)) + floor_pages = totalreserve_pages + MB2PAGES(40) + + ((useful_pages - MB2PAGES(40)) >> 3); + else /* useful_pages >= MB2PAGES(512) */ + floor_pages = totalreserve_pages + MB2PAGES(99) + + ((useful_pages - MB2PAGES(99)) >> 5); + if (tgt_pages < floor_pages) + tgt_pages = floor_pages; + balloon_set_new_target(tgt_pages + + balloon_stats.current_pages - totalram_pages); reset_timer = true; } #ifdef CONFIG_FRONTSWAP @@ -340,6 +373,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev, static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, show_selfballoon_uphys, store_selfballoon_uphys); +SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", + selfballoon_min_usable_mb); + +static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_min_usable_mb = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, + show_selfballoon_min_usable_mb, + store_selfballoon_min_usable_mb); + + #ifdef CONFIG_FRONTSWAP SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); @@ -421,6 +479,7 @@ static struct attribute *selfballoon_attrs[] = { &attr_selfballoon_interval.attr, &attr_selfballoon_downhysteresis.attr, &attr_selfballoon_uphysteresis.attr, + &attr_selfballoon_min_usable_mb.attr, #ifdef CONFIG_FRONTSWAP &attr_frontswap_selfshrinking.attr, &attr_frontswap_hysteresis.attr, diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 090c61ee8fd0..2eff7a6aaa20 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -212,7 +212,9 @@ int xb_init_comms(void) printk(KERN_WARNING "XENBUS response ring is not quiescent " "(%08x:%08x): fixing up\n", intf->rsp_cons, intf->rsp_prod); - intf->rsp_cons = intf->rsp_prod; + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; } if (xenbus_irq) { diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index bd2f90c9ac8b..cef9b0bf63d5 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -684,64 +684,74 @@ static int __init xenbus_probe_initcall(void) device_initcall(xenbus_probe_initcall); -static int __init xenbus_init(void) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) { int err = 0; unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; - DPRINTK(""); + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; - err = -ENODEV; - if (!xen_domain()) - return err; + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (xen_initial_domain()) { - struct evtchn_alloc_unbound alloc_unbound; + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; - /* Allocate Xenstore page */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) - goto out_error; + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; - xen_store_mfn = xen_start_info->store_mfn = - pfn_to_mfn(virt_to_phys((void *)page) >> - PAGE_SHIFT); + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; - /* Next allocate a local port which xenstored can bind to */ - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = 0; + return 0; - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (err == -ENOSYS) - goto out_error; + out_err: + if (page != 0) + free_page(page); + return err; +} - BUG_ON(err); - xen_store_evtchn = xen_start_info->store_evtchn = - alloc_unbound.port; +static int __init xenbus_init(void) +{ + int err = 0; - xen_store_interface = mfn_to_virt(xen_store_mfn); + if (!xen_domain()) + return -ENODEV; + + if (xen_hvm_domain()) { + uint64_t v = 0; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + xen_store_mfn = (unsigned long)v; + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); } else { - if (xen_hvm_domain()) { - uint64_t v = 0; - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; - err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + if (xen_store_evtchn) + xenstored_ready = 1; + else { + err = xenstored_local_init(); if (err) goto out_error; - xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); - xenstored_ready = 1; } + xen_store_interface = mfn_to_virt(xen_store_mfn); } /* Initialize the interface to xenstore. */ @@ -760,12 +770,7 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif - return 0; - - out_error: - if (page != 0) - free_page(page); - + out_error: return err; } diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 60adf919d78d..32417b5064fd 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -104,8 +104,6 @@ static int xenbus_uevent_backend(struct device *dev, xdev = to_xenbus_device(dev); bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); - if (xdev == NULL) - return -ENODEV; if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) return -ENOMEM; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index ed2ba474a560..540587e18a94 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -248,10 +248,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, } EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + printk(KERN_INFO "XENBUS: backend %s timed out.\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + printk(KERN_INFO "XENBUS: reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + static int frontend_probe_and_watch(struct notifier_block *notifier, unsigned long event, void *data) { + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); register_xenbus_watch(&fe_watch); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 5534690075af..b3b8f2f3ad10 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -45,6 +45,7 @@ #include <linux/module.h> #include <linux/mutex.h> #include <xen/xenbus.h> +#include <xen/xen.h> #include "xenbus_comms.h" struct xs_stored_msg { @@ -620,6 +621,15 @@ static struct xenbus_watch *find_watch(const char *token) return NULL; } +static void xs_reset_watches(void) +{ + int err; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); +} + /* Register callback to watch this node. */ int register_xenbus_watch(struct xenbus_watch *watch) { @@ -638,8 +648,7 @@ int register_xenbus_watch(struct xenbus_watch *watch) err = xs_watch(watch->node, token); - /* Ignore errors due to multiple registration. */ - if ((err != 0) && (err != -EEXIST)) { + if (err) { spin_lock(&watches_lock); list_del(&watch->list); spin_unlock(&watches_lock); @@ -897,5 +906,9 @@ int xs_init(void) if (IS_ERR(task)) return PTR_ERR(task); + /* shutdown watches for kexec boot */ + if (xen_hvm_domain()) + xs_reset_watches(); + return 0; } |