summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-10 18:35:02 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-10 18:35:02 +0100
commitab1831b0b87851c874a75e4b3a8538e3d76b37d7 (patch)
treee2855518eb570ad09e2bdf5b3805cfa758e0abc3
parentMerge branch 'xen/fbdev' of git://git.kernel.org/pub/scm/linux/kernel/git/jer... (diff)
parentxen: try harder to balloon up under memory pressure. (diff)
downloadlinux-ab1831b0b87851c874a75e4b3a8538e3d76b37d7.tar.xz
linux-ab1831b0b87851c874a75e4b3a8538e3d76b37d7.zip
Merge branch 'bugfix' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'bugfix' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen: xen: try harder to balloon up under memory pressure. Xen balloon: fix totalram_pages counting. xen: explicitly create/destroy stop_machine workqueues outside suspend/resume region. xen: improve error handling in do_suspend. xen: don't leak IRQs over suspend/resume. xen: call clock resume notifier on all CPUs xen: use iret for return from 64b kernel to 32b usermode xen: don't call dpm_resume_noirq() with interrupts disabled. xen: register runstate info for boot CPU early xen: register runstate on secondary CPUs xen: register timer interrupt with IRQF_TIMER xen: correctly restore pfn_to_mfn_list_list after resume xen: restore runstate_info even if !have_vcpu_info_placement xen: re-register runstate area earlier on resume. xen: wait up to 5 minutes for device connetion xen: improvement to wait_for_devices() xen: fix is_disconnected_device/exists_disconnected_device xen/xenbus: make DEVICE_ATTR()s static
-rw-r--r--arch/x86/xen/enlighten.c27
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/smp.c1
-rw-r--r--arch/x86/xen/suspend.c17
-rw-r--r--arch/x86/xen/time.c7
-rw-r--r--arch/x86/xen/xen-asm_64.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--drivers/xen/balloon.c38
-rw-r--r--drivers/xen/events.c3
-rw-r--r--drivers/xen/manage.c37
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c48
11 files changed, 107 insertions, 79 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c462cea8ef09..b8e45f164e2a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -138,24 +138,23 @@ static void xen_vcpu_setup(int cpu)
*/
void xen_vcpu_restore(void)
{
- if (have_vcpu_info_placement) {
- int cpu;
+ int cpu;
- for_each_online_cpu(cpu) {
- bool other_cpu = (cpu != smp_processor_id());
+ for_each_online_cpu(cpu) {
+ bool other_cpu = (cpu != smp_processor_id());
- if (other_cpu &&
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
- BUG();
+ if (other_cpu &&
+ HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+ BUG();
- xen_vcpu_setup(cpu);
+ xen_setup_runstate_info(cpu);
- if (other_cpu &&
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
- BUG();
- }
+ if (have_vcpu_info_placement)
+ xen_vcpu_setup(cpu);
- BUG_ON(!have_vcpu_info_placement);
+ if (other_cpu &&
+ HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+ BUG();
}
}
@@ -1180,6 +1179,8 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("about to get started...\n");
+ xen_setup_runstate_info(0);
+
/* Start the world */
#ifdef CONFIG_X86_32
i386_start_kernel();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3bf7b1d250ce..bf4cd6bfe959 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn)
}
/* Build the parallel p2m_top_mfn structures */
-static void __init xen_build_mfn_list_list(void)
+void xen_build_mfn_list_list(void)
{
unsigned pfn, idx;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 738da0cb0d8b..64757c0ba5fc 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -295,6 +295,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
(unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
+ xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_init_lock_cpu(cpu);
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 95be7b434724..987267f79bf5 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,4 +1,5 @@
#include <linux/types.h>
+#include <linux/clockchips.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
@@ -27,6 +28,8 @@ void xen_pre_suspend(void)
void xen_post_suspend(int suspend_cancelled)
{
+ xen_build_mfn_list_list();
+
xen_setup_shared_info();
if (suspend_cancelled) {
@@ -44,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled)
}
+static void xen_vcpu_notify_restore(void *data)
+{
+ unsigned long reason = (unsigned long)data;
+
+ /* Boot processor notified via generic timekeeping_resume() */
+ if ( smp_processor_id() == 0)
+ return;
+
+ clockevents_notify(reason, NULL);
+}
+
void xen_arch_resume(void)
{
- /* nothing */
+ smp_call_function(xen_vcpu_notify_restore,
+ (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0a5aa44299a5..9d1f853120d8 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -100,7 +100,7 @@ bool xen_vcpu_stolen(int vcpu)
return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
}
-static void setup_runstate_info(int cpu)
+void xen_setup_runstate_info(int cpu)
{
struct vcpu_register_runstate_memory_area area;
@@ -434,7 +434,7 @@ void xen_setup_timer(int cpu)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
@@ -442,8 +442,6 @@ void xen_setup_timer(int cpu)
evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
-
- setup_runstate_info(cpu);
}
void xen_teardown_timer(int cpu)
@@ -494,6 +492,7 @@ __init void xen_time_init(void)
setup_force_cpu_cap(X86_FEATURE_TSC);
+ xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
}
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 02f496a8dbaa..53adefda4275 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -96,7 +96,7 @@ ENTRY(xen_sysret32)
pushq $__USER32_CS
pushq %rcx
- pushq $VGCF_in_syscall
+ pushq $0
1: jmp hypercall_iret
ENDPATCH(xen_sysret32)
RELOC(xen_sysret32, 1b+1)
@@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target)
ENTRY(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
- pushq $VGCF_in_syscall
+ pushq $0
jmp hypercall_iret
ENDPROC(xen_syscall32_target)
ENDPROC(xen_sysenter_target)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 355fa6b99c9c..f9153a300bce 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);
+void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_ident_map_ISA(void);
@@ -41,6 +42,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
+void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
cycle_t xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d31505b6f7a4..420433613584 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -66,8 +66,6 @@ struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
unsigned long current_pages;
unsigned long target_pages;
- /* We may hit the hard limit in Xen. If we do then we remember it. */
- unsigned long hard_limit;
/*
* Drivers may alter the memory reservation independently, but they
* must inform the balloon driver so we avoid hitting the hard limit.
@@ -136,6 +134,8 @@ static void balloon_append(struct page *page)
list_add(&page->lru, &ballooned_pages);
balloon_stats.balloon_low++;
}
+
+ totalram_pages--;
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -156,6 +156,8 @@ static struct page *balloon_retrieve(void)
else
balloon_stats.balloon_low--;
+ totalram_pages++;
+
return page;
}
@@ -181,7 +183,7 @@ static void balloon_alarm(unsigned long unused)
static unsigned long current_target(void)
{
- unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
+ unsigned long target = balloon_stats.target_pages;
target = min(target,
balloon_stats.current_pages +
@@ -217,23 +219,10 @@ static int increase_reservation(unsigned long nr_pages)
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
- if (rc < nr_pages) {
- if (rc > 0) {
- int ret;
-
- /* We hit the Xen hard limit: reprobe. */
- reservation.nr_extents = rc;
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- BUG_ON(ret != rc);
- }
- if (rc >= 0)
- balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
- balloon_stats.driver_pages);
+ if (rc < 0)
goto out;
- }
- for (i = 0; i < nr_pages; i++) {
+ for (i = 0; i < rc; i++) {
page = balloon_retrieve();
BUG_ON(page == NULL);
@@ -259,13 +248,12 @@ static int increase_reservation(unsigned long nr_pages)
__free_page(page);
}
- balloon_stats.current_pages += nr_pages;
- totalram_pages = balloon_stats.current_pages;
+ balloon_stats.current_pages += rc;
out:
spin_unlock_irqrestore(&balloon_lock, flags);
- return 0;
+ return rc < 0 ? rc : rc != nr_pages;
}
static int decrease_reservation(unsigned long nr_pages)
@@ -323,7 +311,6 @@ static int decrease_reservation(unsigned long nr_pages)
BUG_ON(ret != nr_pages);
balloon_stats.current_pages -= nr_pages;
- totalram_pages = balloon_stats.current_pages;
spin_unlock_irqrestore(&balloon_lock, flags);
@@ -367,7 +354,6 @@ static void balloon_process(struct work_struct *work)
static void balloon_set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
- balloon_stats.hard_limit = ~0UL;
balloon_stats.target_pages = target;
schedule_work(&balloon_worker);
}
@@ -422,12 +408,10 @@ static int __init balloon_init(void)
pr_info("xen_balloon: Initialising balloon driver.\n");
balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
- totalram_pages = balloon_stats.current_pages;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
balloon_stats.driver_pages = 0UL;
- balloon_stats.hard_limit = ~0UL;
init_timer(&balloon_timer);
balloon_timer.data = 0;
@@ -472,9 +456,6 @@ module_exit(balloon_exit);
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
-BALLOON_SHOW(hard_limit_kb,
- (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
- (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
@@ -544,7 +525,6 @@ static struct attribute *balloon_info_attrs[] = {
&attr_current_kb.attr,
&attr_low_kb.attr,
&attr_high_kb.attr,
- &attr_hard_limit_kb.attr,
&attr_driver_kb.attr,
NULL
};
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 2f57276e87a2..ce602dd09bc1 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -474,6 +474,9 @@ static void unbind_from_irq(unsigned int irq)
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
+ }
+
+ if (irq_info[irq].type != IRQT_UNBOUND) {
irq_info[irq] = mk_unbound_info();
dynamic_irq_cleanup(irq);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 10d03d7931c4..c4997930afc7 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -43,7 +43,6 @@ static int xen_suspend(void *data)
if (err) {
printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
err);
- dpm_resume_noirq(PMSG_RESUME);
return err;
}
@@ -69,7 +68,6 @@ static int xen_suspend(void *data)
}
sysdev_resume();
- dpm_resume_noirq(PMSG_RESUME);
return 0;
}
@@ -81,6 +79,12 @@ static void do_suspend(void)
shutting_down = SHUTDOWN_SUSPEND;
+ err = stop_machine_create();
+ if (err) {
+ printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
+ goto out;
+ }
+
#ifdef CONFIG_PREEMPT
/* If the kernel is preemptible, we need to freeze all the processes
to prevent them from being in the middle of a pagetable update
@@ -88,29 +92,32 @@ static void do_suspend(void)
err = freeze_processes();
if (err) {
printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
- return;
+ goto out_destroy_sm;
}
#endif
err = dpm_suspend_start(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
- goto out;
+ goto out_thaw;
}
- printk(KERN_DEBUG "suspending xenstore...\n");
- xs_suspend();
-
err = dpm_suspend_noirq(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
- goto resume_devices;
+ goto out_resume;
}
+ printk(KERN_DEBUG "suspending xenstore...\n");
+ xs_suspend();
+
err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+
+ dpm_resume_noirq(PMSG_RESUME);
+
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
- goto out;
+ cancelled = 1;
}
if (!cancelled) {
@@ -119,17 +126,21 @@ static void do_suspend(void)
} else
xs_suspend_cancel();
- dpm_resume_noirq(PMSG_RESUME);
-
-resume_devices:
+out_resume:
dpm_resume_end(PMSG_RESUME);
/* Make sure timer events get retriggered on all CPUs */
clock_was_set();
-out:
+
+out_thaw:
#ifdef CONFIG_PREEMPT
thaw_processes();
+
+out_destroy_sm:
#endif
+ stop_machine_destroy();
+
+out:
shutting_down = SHUTDOWN_INVALID;
}
#endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index d42e25d5968d..649fcdf114b7 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -454,21 +454,21 @@ static ssize_t xendev_show_nodename(struct device *dev,
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
-DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
+static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
static ssize_t xendev_show_devtype(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
-DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
+static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
static ssize_t xendev_show_modalias(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
}
-DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
+static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
@@ -843,7 +843,7 @@ postcore_initcall(xenbus_probe_init);
MODULE_LICENSE("GPL");
-static int is_disconnected_device(struct device *dev, void *data)
+static int is_device_connecting(struct device *dev, void *data)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
struct device_driver *drv = data;
@@ -861,14 +861,15 @@ static int is_disconnected_device(struct device *dev, void *data)
return 0;
xendrv = to_xenbus_driver(dev->driver);
- return (xendev->state != XenbusStateConnected ||
- (xendrv->is_ready && !xendrv->is_ready(xendev)));
+ return (xendev->state < XenbusStateConnected ||
+ (xendev->state == XenbusStateConnected &&
+ xendrv->is_ready && !xendrv->is_ready(xendev)));
}
-static int exists_disconnected_device(struct device_driver *drv)
+static int exists_connecting_device(struct device_driver *drv)
{
return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
- is_disconnected_device);
+ is_device_connecting);
}
static int print_device_status(struct device *dev, void *data)
@@ -884,10 +885,13 @@ static int print_device_status(struct device *dev, void *data)
/* Information only: is this too noisy? */
printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
xendev->nodename);
- } else if (xendev->state != XenbusStateConnected) {
+ } else if (xendev->state < XenbusStateConnected) {
+ enum xenbus_state rstate = XenbusStateUnknown;
+ if (xendev->otherend)
+ rstate = xenbus_read_driver_state(xendev->otherend);
printk(KERN_WARNING "XENBUS: Timeout connecting "
- "to device: %s (state %d)\n",
- xendev->nodename, xendev->state);
+ "to device: %s (local state %d, remote state %d)\n",
+ xendev->nodename, xendev->state, rstate);
}
return 0;
@@ -897,7 +901,7 @@ static int print_device_status(struct device *dev, void *data)
static int ready_to_wait_for_devices;
/*
- * On a 10 second timeout, wait for all devices currently configured. We need
+ * On a 5-minute timeout, wait for all devices currently configured. We need
* to do this to guarantee that the filesystems and / or network devices
* needed for boot are available, before we can allow the boot to proceed.
*
@@ -912,18 +916,30 @@ static int ready_to_wait_for_devices;
*/
static void wait_for_devices(struct xenbus_driver *xendrv)
{
- unsigned long timeout = jiffies + 10*HZ;
+ unsigned long start = jiffies;
struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+ unsigned int seconds_waited = 0;
if (!ready_to_wait_for_devices || !xen_domain())
return;
- while (exists_disconnected_device(drv)) {
- if (time_after(jiffies, timeout))
- break;
+ while (exists_connecting_device(drv)) {
+ if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
+ if (!seconds_waited)
+ printk(KERN_WARNING "XENBUS: Waiting for "
+ "devices to initialise: ");
+ seconds_waited += 5;
+ printk("%us...", 300 - seconds_waited);
+ if (seconds_waited == 300)
+ break;
+ }
+
schedule_timeout_interruptible(HZ/10);
}
+ if (seconds_waited)
+ printk("\n");
+
bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
print_device_status);
}