summaryrefslogtreecommitdiffstats
path: root/kernel/power
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-17 00:20:36 +0200
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-17 00:20:36 +0200
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /kernel/power
downloadlinux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.xz
linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'kernel/power')
-rw-r--r--kernel/power/Kconfig74
-rw-r--r--kernel/power/Makefile11
-rw-r--r--kernel/power/console.c58
-rw-r--r--kernel/power/disk.c431
-rw-r--r--kernel/power/main.c269
-rw-r--r--kernel/power/pm.c265
-rw-r--r--kernel/power/power.h52
-rw-r--r--kernel/power/poweroff.c45
-rw-r--r--kernel/power/process.c121
-rw-r--r--kernel/power/smp.c85
-rw-r--r--kernel/power/swsusp.c1433
11 files changed, 2844 insertions, 0 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
new file mode 100644
index 000000000000..696387ffe49c
--- /dev/null
+++ b/kernel/power/Kconfig
@@ -0,0 +1,74 @@
+config PM
+ bool "Power Management support"
+ ---help---
+ "Power Management" means that parts of your computer are shut
+ off or put into a power conserving "sleep" mode if they are not
+ being used. There are two competing standards for doing this: APM
+ and ACPI. If you want to use either one, say Y here and then also
+ to the requisite support below.
+
+ Power Management is most important for battery powered laptop
+ computers; if you have a laptop, check out the Linux Laptop home
+ page on the WWW at <http://www.linux-on-laptops.com/> or
+ Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
+ and the Battery Powered Linux mini-HOWTO, available from
+ <http://www.tldp.org/docs.html#howto>.
+
+ Note that, even if you say N here, Linux on the x86 architecture
+ will issue the hlt instruction if nothing is to be done, thereby
+ sending the processor to sleep and saving power.
+
+config PM_DEBUG
+ bool "Power Management Debug Support"
+ depends on PM
+ ---help---
+ This option enables verbose debugging support in the Power Management
+ code. This is helpful when debugging and reporting various PM bugs,
+ like suspend support.
+
+config SOFTWARE_SUSPEND
+ bool "Software Suspend (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && PM && SWAP
+ ---help---
+ Enable the possibility of suspending the machine.
+ It doesn't need APM.
+ You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
+ (patch for sysvinit needed).
+
+ It creates an image which is saved in your active swap. Upon next
+ boot, pass the 'resume=/dev/swappartition' argument to the kernel to
+ have it detect the saved image, restore memory state from it, and
+ continue to run as before. If you do not want the previous state to
+ be reloaded, then use the 'noresume' kernel argument. However, note
+ that your partitions will be fsck'd and you must re-mkswap your swap
+ partitions. It does not work with swap files.
+
+ Right now you may boot without resuming and then later resume but
+ in meantime you cannot use those swap partitions/files which were
+ involved in suspending. Also in this case there is a risk that buffers
+ on disk won't match with saved ones.
+
+ For more information take a look at <file:Documentation/power/swsusp.txt>.
+
+config PM_STD_PARTITION
+ string "Default resume partition"
+ depends on SOFTWARE_SUSPEND
+ default ""
+ ---help---
+ The default resume partition is the partition that the suspend-
+ to-disk implementation will look for a suspended disk image.
+
+ The partition specified here will be different for almost every user.
+ It should be a valid swap partition (at least for now) that is turned
+ on before suspending.
+
+ The partition specified can be overridden by specifying:
+
+ resume=/dev/<other device>
+
+ which will set the resume partition to the device specified.
+
+ Note there is currently not a way to specify which device to save the
+ suspended image to. It will simply pick the first available swap
+ device.
+
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
new file mode 100644
index 000000000000..fbdc634135a7
--- /dev/null
+++ b/kernel/power/Makefile
@@ -0,0 +1,11 @@
+
+ifeq ($(CONFIG_PM_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+swsusp-smp-$(CONFIG_SMP) += smp.o
+
+obj-y := main.o process.o console.o pm.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o
+
+obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
new file mode 100644
index 000000000000..7ff375e7c95f
--- /dev/null
+++ b/kernel/power/console.c
@@ -0,0 +1,58 @@
+/*
+ * drivers/power/process.c - Functions for saving/restoring console.
+ *
+ * Originally from swsusp.
+ */
+
+#include <linux/vt_kern.h>
+#include <linux/kbd_kern.h>
+#include <linux/console.h>
+#include "power.h"
+
+static int new_loglevel = 10;
+static int orig_loglevel;
+#ifdef SUSPEND_CONSOLE
+static int orig_fgconsole, orig_kmsg;
+#endif
+
+int pm_prepare_console(void)
+{
+ orig_loglevel = console_loglevel;
+ console_loglevel = new_loglevel;
+
+#ifdef SUSPEND_CONSOLE
+ acquire_console_sem();
+
+ orig_fgconsole = fg_console;
+
+ if (vc_allocate(SUSPEND_CONSOLE)) {
+ /* we can't have a free VC for now. Too bad,
+ * we don't want to mess the screen for now. */
+ release_console_sem();
+ return 1;
+ }
+
+ set_console(SUSPEND_CONSOLE);
+ release_console_sem();
+
+ if (vt_waitactive(SUSPEND_CONSOLE)) {
+ pr_debug("Suspend: Can't switch VCs.");
+ return 1;
+ }
+ orig_kmsg = kmsg_redirect;
+ kmsg_redirect = SUSPEND_CONSOLE;
+#endif
+ return 0;
+}
+
+void pm_restore_console(void)
+{
+ console_loglevel = orig_loglevel;
+#ifdef SUSPEND_CONSOLE
+ acquire_console_sem();
+ set_console(orig_fgconsole);
+ release_console_sem();
+ kmsg_redirect = orig_kmsg;
+#endif
+ return;
+}
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
new file mode 100644
index 000000000000..02b6764034dc
--- /dev/null
+++ b/kernel/power/disk.c
@@ -0,0 +1,431 @@
+/*
+ * kernel/power/disk.c - Suspend-to-disk support.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include "power.h"
+
+
+extern suspend_disk_method_t pm_disk_mode;
+extern struct pm_ops * pm_ops;
+
+extern int swsusp_suspend(void);
+extern int swsusp_write(void);
+extern int swsusp_check(void);
+extern int swsusp_read(void);
+extern void swsusp_close(void);
+extern int swsusp_resume(void);
+extern int swsusp_free(void);
+
+
+static int noresume = 0;
+char resume_file[256] = CONFIG_PM_STD_PARTITION;
+dev_t swsusp_resume_device;
+
+/**
+ * power_down - Shut machine down for hibernate.
+ * @mode: Suspend-to-disk mode
+ *
+ * Use the platform driver, if configured so, and return gracefully if it
+ * fails.
+ * Otherwise, try to power off and reboot. If they fail, halt the machine,
+ * there ain't no turning back.
+ */
+
+static void power_down(suspend_disk_method_t mode)
+{
+ unsigned long flags;
+ int error = 0;
+
+ local_irq_save(flags);
+ switch(mode) {
+ case PM_DISK_PLATFORM:
+ device_shutdown();
+ error = pm_ops->enter(PM_SUSPEND_DISK);
+ break;
+ case PM_DISK_SHUTDOWN:
+ printk("Powering off system\n");
+ device_shutdown();
+ machine_power_off();
+ break;
+ case PM_DISK_REBOOT:
+ device_shutdown();
+ machine_restart(NULL);
+ break;
+ }
+ machine_halt();
+ /* Valid image is on the disk, if we continue we risk serious data corruption
+ after resume. */
+ printk(KERN_CRIT "Please power me down manually\n");
+ while(1);
+}
+
+
+static int in_suspend __nosavedata = 0;
+
+
+/**
+ * free_some_memory - Try to free as much memory as possible
+ *
+ * ... but do not OOM-kill anyone
+ *
+ * Notice: all userland should be stopped at this point, or
+ * livelock is possible.
+ */
+
+static void free_some_memory(void)
+{
+ unsigned int i = 0;
+ unsigned int tmp;
+ unsigned long pages = 0;
+ char *p = "-\\|/";
+
+ printk("Freeing memory... ");
+ while ((tmp = shrink_all_memory(10000))) {
+ pages += tmp;
+ printk("\b%c", p[i]);
+ i++;
+ if (i > 3)
+ i = 0;
+ }
+ printk("\bdone (%li pages freed)\n", pages);
+}
+
+
+static inline void platform_finish(void)
+{
+ if (pm_disk_mode == PM_DISK_PLATFORM) {
+ if (pm_ops && pm_ops->finish)
+ pm_ops->finish(PM_SUSPEND_DISK);
+ }
+}
+
+static void finish(void)
+{
+ device_resume();
+ platform_finish();
+ enable_nonboot_cpus();
+ thaw_processes();
+ pm_restore_console();
+}
+
+
+static int prepare_processes(void)
+{
+ int error;
+
+ pm_prepare_console();
+
+ sys_sync();
+
+ if (freeze_processes()) {
+ error = -EBUSY;
+ return error;
+ }
+
+ if (pm_disk_mode == PM_DISK_PLATFORM) {
+ if (pm_ops && pm_ops->prepare) {
+ if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
+ return error;
+ }
+ }
+
+ /* Free memory before shutting down devices. */
+ free_some_memory();
+
+ return 0;
+}
+
+static void unprepare_processes(void)
+{
+ enable_nonboot_cpus();
+ thaw_processes();
+ pm_restore_console();
+}
+
+static int prepare_devices(void)
+{
+ int error;
+
+ disable_nonboot_cpus();
+ if ((error = device_suspend(PMSG_FREEZE))) {
+ printk("Some devices failed to suspend\n");
+ platform_finish();
+ enable_nonboot_cpus();
+ return error;
+ }
+
+ return 0;
+}
+
+/**
+ * pm_suspend_disk - The granpappy of power management.
+ *
+ * If we're going through the firmware, then get it over with quickly.
+ *
+ * If not, then call swsusp to do its thing, then figure out how
+ * to power down the system.
+ */
+
+int pm_suspend_disk(void)
+{
+ int error;
+
+ error = prepare_processes();
+ if (!error) {
+ error = prepare_devices();
+ }
+
+ if (error) {
+ unprepare_processes();
+ return error;
+ }
+
+ pr_debug("PM: Attempting to suspend to disk.\n");
+ if (pm_disk_mode == PM_DISK_FIRMWARE)
+ return pm_ops->enter(PM_SUSPEND_DISK);
+
+ pr_debug("PM: snapshotting memory.\n");
+ in_suspend = 1;
+ if ((error = swsusp_suspend()))
+ goto Done;
+
+ if (in_suspend) {
+ pr_debug("PM: writing image.\n");
+ error = swsusp_write();
+ if (!error)
+ power_down(pm_disk_mode);
+ } else
+ pr_debug("PM: Image restored successfully.\n");
+ swsusp_free();
+ Done:
+ finish();
+ return error;
+}
+
+
+/**
+ * software_resume - Resume from a saved image.
+ *
+ * Called as a late_initcall (so all devices are discovered and
+ * initialized), we call swsusp to see if we have a saved image or not.
+ * If so, we quiesce devices, the restore the saved image. We will
+ * return above (in pm_suspend_disk() ) if everything goes well.
+ * Otherwise, we fail gracefully and return to the normally
+ * scheduled program.
+ *
+ */
+
+static int software_resume(void)
+{
+ int error;
+
+ if (noresume) {
+ /**
+ * FIXME: If noresume is specified, we need to find the partition
+ * and reset it back to normal swap space.
+ */
+ return 0;
+ }
+
+ pr_debug("PM: Checking swsusp image.\n");
+
+ if ((error = swsusp_check()))
+ goto Done;
+
+ pr_debug("PM: Preparing processes for restore.\n");
+
+ if ((error = prepare_processes())) {
+ swsusp_close();
+ goto Cleanup;
+ }
+
+ pr_debug("PM: Reading swsusp image.\n");
+
+ if ((error = swsusp_read()))
+ goto Cleanup;
+
+ pr_debug("PM: Preparing devices for restore.\n");
+
+ if ((error = prepare_devices()))
+ goto Free;
+
+ mb();
+
+ pr_debug("PM: Restoring saved image.\n");
+ swsusp_resume();
+ pr_debug("PM: Restore failed, recovering.n");
+ finish();
+ Free:
+ swsusp_free();
+ Cleanup:
+ unprepare_processes();
+ Done:
+ pr_debug("PM: Resume from disk failed.\n");
+ return 0;
+}
+
+late_initcall(software_resume);
+
+
+static char * pm_disk_modes[] = {
+ [PM_DISK_FIRMWARE] = "firmware",
+ [PM_DISK_PLATFORM] = "platform",
+ [PM_DISK_SHUTDOWN] = "shutdown",
+ [PM_DISK_REBOOT] = "reboot",
+};
+
+/**
+ * disk - Control suspend-to-disk mode
+ *
+ * Suspend-to-disk can be handled in several ways. The greatest
+ * distinction is who writes memory to disk - the firmware or the OS.
+ * If the firmware does it, we assume that it also handles suspending
+ * the system.
+ * If the OS does it, then we have three options for putting the system
+ * to sleep - using the platform driver (e.g. ACPI or other PM registers),
+ * powering off the system or rebooting the system (for testing).
+ *
+ * The system will support either 'firmware' or 'platform', and that is
+ * known a priori (and encoded in pm_ops). But, the user may choose
+ * 'shutdown' or 'reboot' as alternatives.
+ *
+ * show() will display what the mode is currently set to.
+ * store() will accept one of
+ *
+ * 'firmware'
+ * 'platform'
+ * 'shutdown'
+ * 'reboot'
+ *
+ * It will only change to 'firmware' or 'platform' if the system
+ * supports it (as determined from pm_ops->pm_disk_mode).
+ */
+
+static ssize_t disk_show(struct subsystem * subsys, char * buf)
+{
+ return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
+}
+
+
+static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
+{
+ int error = 0;
+ int i;
+ int len;
+ char *p;
+ suspend_disk_method_t mode = 0;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ down(&pm_sem);
+ for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) {
+ if (!strncmp(buf, pm_disk_modes[i], len)) {
+ mode = i;
+ break;
+ }
+ }
+ if (mode) {
+ if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
+ pm_disk_mode = mode;
+ else {
+ if (pm_ops && pm_ops->enter &&
+ (mode == pm_ops->pm_disk_mode))
+ pm_disk_mode = mode;
+ else
+ error = -EINVAL;
+ }
+ } else
+ error = -EINVAL;
+
+ pr_debug("PM: suspend-to-disk mode set to '%s'\n",
+ pm_disk_modes[mode]);
+ up(&pm_sem);
+ return error ? error : n;
+}
+
+power_attr(disk);
+
+static ssize_t resume_show(struct subsystem * subsys, char *buf)
+{
+ return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
+ MINOR(swsusp_resume_device));
+}
+
+static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+ int len;
+ char *p;
+ unsigned int maj, min;
+ int error = -EINVAL;
+ dev_t res;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
+ res = MKDEV(maj,min);
+ if (maj == MAJOR(res) && min == MINOR(res)) {
+ swsusp_resume_device = res;
+ printk("Attempting manual resume\n");
+ noresume = 0;
+ software_resume();
+ }
+ }
+
+ return error >= 0 ? n : error;
+}
+
+power_attr(resume);
+
+static struct attribute * g[] = {
+ &disk_attr.attr,
+ &resume_attr.attr,
+ NULL,
+};
+
+
+static struct attribute_group attr_group = {
+ .attrs = g,
+};
+
+
+static int __init pm_disk_init(void)
+{
+ return sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
+}
+
+core_initcall(pm_disk_init);
+
+
+static int __init resume_setup(char *str)
+{
+ if (noresume)
+ return 1;
+
+ strncpy( resume_file, str, 255 );
+ return 1;
+}
+
+static int __init noresume_setup(char *str)
+{
+ noresume = 1;
+ return 1;
+}
+
+__setup("noresume", noresume_setup);
+__setup("resume=", resume_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
new file mode 100644
index 000000000000..7960ddf04a57
--- /dev/null
+++ b/kernel/power/main.c
@@ -0,0 +1,269 @@
+/*
+ * kernel/power/main.c - PM subsystem core functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ *
+ * This file is released under the GPLv2
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+
+
+#include "power.h"
+
+DECLARE_MUTEX(pm_sem);
+
+struct pm_ops * pm_ops = NULL;
+suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
+
+/**
+ * pm_set_ops - Set the global power method table.
+ * @ops: Pointer to ops structure.
+ */
+
+void pm_set_ops(struct pm_ops * ops)
+{
+ down(&pm_sem);
+ pm_ops = ops;
+ up(&pm_sem);
+}
+
+
+/**
+ * suspend_prepare - Do prep work before entering low-power state.
+ * @state: State we're entering.
+ *
+ * This is common code that is called for each state that we're
+ * entering. Allocate a console, stop all processes, then make sure
+ * the platform can enter the requested state.
+ */
+
+static int suspend_prepare(suspend_state_t state)
+{
+ int error = 0;
+
+ if (!pm_ops || !pm_ops->enter)
+ return -EPERM;
+
+ pm_prepare_console();
+
+ if (freeze_processes()) {
+ error = -EAGAIN;
+ goto Thaw;
+ }
+
+ if (pm_ops->prepare) {
+ if ((error = pm_ops->prepare(state)))
+ goto Thaw;
+ }
+
+ if ((error = device_suspend(PMSG_SUSPEND))) {
+ printk(KERN_ERR "Some devices failed to suspend\n");
+ goto Finish;
+ }
+ return 0;
+ Finish:
+ if (pm_ops->finish)
+ pm_ops->finish(state);
+ Thaw:
+ thaw_processes();
+ pm_restore_console();
+ return error;
+}
+
+
+static int suspend_enter(suspend_state_t state)
+{
+ int error = 0;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if ((error = device_power_down(PMSG_SUSPEND))) {
+ printk(KERN_ERR "Some devices failed to power down\n");
+ goto Done;
+ }
+ error = pm_ops->enter(state);
+ device_power_up();
+ Done:
+ local_irq_restore(flags);
+ return error;
+}
+
+
+/**
+ * suspend_finish - Do final work before exiting suspend sequence.
+ * @state: State we're coming out of.
+ *
+ * Call platform code to clean up, restart processes, and free the
+ * console that we've allocated. This is not called for suspend-to-disk.
+ */
+
+static void suspend_finish(suspend_state_t state)
+{
+ device_resume();
+ if (pm_ops && pm_ops->finish)
+ pm_ops->finish(state);
+ thaw_processes();
+ pm_restore_console();
+}
+
+
+
+
+static char * pm_states[] = {
+ [PM_SUSPEND_STANDBY] = "standby",
+ [PM_SUSPEND_MEM] = "mem",
+ [PM_SUSPEND_DISK] = "disk",
+ NULL,
+};
+
+
+/**
+ * enter_state - Do common work of entering low-power state.
+ * @state: pm_state structure for state we're entering.
+ *
+ * Make sure we're the only ones trying to enter a sleep state. Fail
+ * if someone has beat us to it, since we don't want anything weird to
+ * happen when we wake up.
+ * Then, do the setup for suspend, enter the state, and cleaup (after
+ * we've woken up).
+ */
+
+static int enter_state(suspend_state_t state)
+{
+ int error;
+
+ if (down_trylock(&pm_sem))
+ return -EBUSY;
+
+ if (state == PM_SUSPEND_DISK) {
+ error = pm_suspend_disk();
+ goto Unlock;
+ }
+
+ /* Suspend is hard to get right on SMP. */
+ if (num_online_cpus() != 1) {
+ error = -EPERM;
+ goto Unlock;
+ }
+
+ pr_debug("PM: Preparing system for suspend\n");
+ if ((error = suspend_prepare(state)))
+ goto Unlock;
+
+ pr_debug("PM: Entering state.\n");
+ error = suspend_enter(state);
+
+ pr_debug("PM: Finishing up.\n");
+ suspend_finish(state);
+ Unlock:
+ up(&pm_sem);
+ return error;
+}
+
+/*
+ * This is main interface to the outside world. It needs to be
+ * called from process context.
+ */
+int software_suspend(void)
+{
+ return enter_state(PM_SUSPEND_DISK);
+}
+
+
+/**
+ * pm_suspend - Externally visible function for suspending system.
+ * @state: Enumarted value of state to enter.
+ *
+ * Determine whether or not value is within range, get state
+ * structure, and enter (above).
+ */
+
+int pm_suspend(suspend_state_t state)
+{
+ if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX)
+ return enter_state(state);
+ return -EINVAL;
+}
+
+
+
+decl_subsys(power,NULL,NULL);
+
+
+/**
+ * state - control system power state.
+ *
+ * show() returns what states are supported, which is hard-coded to
+ * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
+ * 'disk' (Suspend-to-Disk).
+ *
+ * store() accepts one of those strings, translates it into the
+ * proper enumerated value, and initiates a suspend transition.
+ */
+
+static ssize_t state_show(struct subsystem * subsys, char * buf)
+{
+ int i;
+ char * s = buf;
+
+ for (i = 0; i < PM_SUSPEND_MAX; i++) {
+ if (pm_states[i])
+ s += sprintf(s,"%s ",pm_states[i]);
+ }
+ s += sprintf(s,"\n");
+ return (s - buf);
+}
+
+static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+ suspend_state_t state = PM_SUSPEND_STANDBY;
+ char ** s;
+ char *p;
+ int error;
+ int len;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
+ if (*s && !strncmp(buf, *s, len))
+ break;
+ }
+ if (*s)
+ error = enter_state(state);
+ else
+ error = -EINVAL;
+ return error ? error : n;
+}
+
+power_attr(state);
+
+static struct attribute * g[] = {
+ &state_attr.attr,
+ NULL,
+};
+
+static struct attribute_group attr_group = {
+ .attrs = g,
+};
+
+
+static int __init pm_init(void)
+{
+ int error = subsystem_register(&power_subsys);
+ if (!error)
+ error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
+ return error;
+}
+
+core_initcall(pm_init);
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
new file mode 100644
index 000000000000..61deda04e39e
--- /dev/null
+++ b/kernel/power/pm.c
@@ -0,0 +1,265 @@
+/*
+ * pm.c - Power management interface
+ *
+ * Copyright (C) 2000 Andrew Henroid
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+#include <linux/interrupt.h>
+
+int pm_active;
+
+/*
+ * Locking notes:
+ * pm_devs_lock can be a semaphore providing pm ops are not called
+ * from an interrupt handler (already a bad idea so no change here). Each
+ * change must be protected so that an unlink of an entry doesn't clash
+ * with a pm send - which is permitted to sleep in the current architecture
+ *
+ * Module unloads clashing with pm events now work out safely, the module
+ * unload path will block until the event has been sent. It may well block
+ * until a resume but that will be fine.
+ */
+
+static DECLARE_MUTEX(pm_devs_lock);
+static LIST_HEAD(pm_devs);
+
+/**
+ * pm_register - register a device with power management
+ * @type: device type
+ * @id: device ID
+ * @callback: callback function
+ *
+ * Add a device to the list of devices that wish to be notified about
+ * power management events. A &pm_dev structure is returned on success,
+ * on failure the return is %NULL.
+ *
+ * The callback function will be called in process context and
+ * it may sleep.
+ */
+
+struct pm_dev *pm_register(pm_dev_t type,
+ unsigned long id,
+ pm_callback callback)
+{
+ struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
+ if (dev) {
+ memset(dev, 0, sizeof(*dev));
+ dev->type = type;
+ dev->id = id;
+ dev->callback = callback;
+
+ down(&pm_devs_lock);
+ list_add(&dev->entry, &pm_devs);
+ up(&pm_devs_lock);
+ }
+ return dev;
+}
+
+/**
+ * pm_unregister - unregister a device with power management
+ * @dev: device to unregister
+ *
+ * Remove a device from the power management notification lists. The
+ * dev passed must be a handle previously returned by pm_register.
+ */
+
+void pm_unregister(struct pm_dev *dev)
+{
+ if (dev) {
+ down(&pm_devs_lock);
+ list_del(&dev->entry);
+ up(&pm_devs_lock);
+
+ kfree(dev);
+ }
+}
+
+static void __pm_unregister(struct pm_dev *dev)
+{
+ if (dev) {
+ list_del(&dev->entry);
+ kfree(dev);
+ }
+}
+
+/**
+ * pm_unregister_all - unregister all devices with matching callback
+ * @callback: callback function pointer
+ *
+ * Unregister every device that would call the callback passed. This
+ * is primarily meant as a helper function for loadable modules. It
+ * enables a module to give up all its managed devices without keeping
+ * its own private list.
+ */
+
+void pm_unregister_all(pm_callback callback)
+{
+ struct list_head *entry;
+
+ if (!callback)
+ return;
+
+ down(&pm_devs_lock);
+ entry = pm_devs.next;
+ while (entry != &pm_devs) {
+ struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+ entry = entry->next;
+ if (dev->callback == callback)
+ __pm_unregister(dev);
+ }
+ up(&pm_devs_lock);
+}
+
+/**
+ * pm_send - send request to a single device
+ * @dev: device to send to
+ * @rqst: power management request
+ * @data: data for the callback
+ *
+ * Issue a power management request to a given device. The
+ * %PM_SUSPEND and %PM_RESUME events are handled specially. The
+ * data field must hold the intended next state. No call is made
+ * if the state matches.
+ *
+ * BUGS: what stops two power management requests occurring in parallel
+ * and conflicting.
+ *
+ * WARNING: Calling pm_send directly is not generally recommended, in
+ * particular there is no locking against the pm_dev going away. The
+ * caller must maintain all needed locking or have 'inside knowledge'
+ * on the safety. Also remember that this function is not locked against
+ * pm_unregister. This means that you must handle SMP races on callback
+ * execution and unload yourself.
+ */
+
+static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
+{
+ int status = 0;
+ unsigned long prev_state, next_state;
+
+ if (in_interrupt())
+ BUG();
+
+ switch (rqst) {
+ case PM_SUSPEND:
+ case PM_RESUME:
+ prev_state = dev->state;
+ next_state = (unsigned long) data;
+ if (prev_state != next_state) {
+ if (dev->callback)
+ status = (*dev->callback)(dev, rqst, data);
+ if (!status) {
+ dev->state = next_state;
+ dev->prev_state = prev_state;
+ }
+ }
+ else {
+ dev->prev_state = prev_state;
+ }
+ break;
+ default:
+ if (dev->callback)
+ status = (*dev->callback)(dev, rqst, data);
+ break;
+ }
+ return status;
+}
+
+/*
+ * Undo incomplete request
+ */
+static void pm_undo_all(struct pm_dev *last)
+{
+ struct list_head *entry = last->entry.prev;
+ while (entry != &pm_devs) {
+ struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+ if (dev->state != dev->prev_state) {
+ /* previous state was zero (running) resume or
+ * previous state was non-zero (suspended) suspend
+ */
+ pm_request_t undo = (dev->prev_state
+ ? PM_SUSPEND:PM_RESUME);
+ pm_send(dev, undo, (void*) dev->prev_state);
+ }
+ entry = entry->prev;
+ }
+}
+
+/**
+ * pm_send_all - send request to all managed devices
+ * @rqst: power management request
+ * @data: data for the callback
+ *
+ * Issue a power management request to a all devices. The
+ * %PM_SUSPEND events are handled specially. Any device is
+ * permitted to fail a suspend by returning a non zero (error)
+ * value from its callback function. If any device vetoes a
+ * suspend request then all other devices that have suspended
+ * during the processing of this request are restored to their
+ * previous state.
+ *
+ * WARNING: This function takes the pm_devs_lock. The lock is not dropped until
+ * the callbacks have completed. This prevents races against pm locking
+ * functions, races against module unload pm_unregister code. It does
+ * mean however that you must not issue pm_ functions within the callback
+ * or you will deadlock and users will hate you.
+ *
+ * Zero is returned on success. If a suspend fails then the status
+ * from the device that vetoes the suspend is returned.
+ *
+ * BUGS: what stops two power management requests occurring in parallel
+ * and conflicting.
+ */
+
+int pm_send_all(pm_request_t rqst, void *data)
+{
+ struct list_head *entry;
+
+ down(&pm_devs_lock);
+ entry = pm_devs.next;
+ while (entry != &pm_devs) {
+ struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+ if (dev->callback) {
+ int status = pm_send(dev, rqst, data);
+ if (status) {
+ /* return devices to previous state on
+ * failed suspend request
+ */
+ if (rqst == PM_SUSPEND)
+ pm_undo_all(dev);
+ up(&pm_devs_lock);
+ return status;
+ }
+ }
+ entry = entry->next;
+ }
+ up(&pm_devs_lock);
+ return 0;
+}
+
+EXPORT_SYMBOL(pm_register);
+EXPORT_SYMBOL(pm_unregister);
+EXPORT_SYMBOL(pm_unregister_all);
+EXPORT_SYMBOL(pm_send_all);
+EXPORT_SYMBOL(pm_active);
+
+
diff --git a/kernel/power/power.h b/kernel/power/power.h
new file mode 100644
index 000000000000..cd6a3493cc0d
--- /dev/null
+++ b/kernel/power/power.h
@@ -0,0 +1,52 @@
+#include <linux/suspend.h>
+#include <linux/utsname.h>
+
+/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
+ we probably do not take enough locks for switching consoles, etc,
+ so bad things might happen.
+*/
+#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
+#endif
+
+
+struct swsusp_info {
+ struct new_utsname uts;
+ u32 version_code;
+ unsigned long num_physpages;
+ int cpus;
+ unsigned long image_pages;
+ unsigned long pagedir_pages;
+ suspend_pagedir_t * suspend_pagedir;
+ swp_entry_t pagedir[768];
+} __attribute__((aligned(PAGE_SIZE)));
+
+
+
+#ifdef CONFIG_SOFTWARE_SUSPEND
+extern int pm_suspend_disk(void);
+
+#else
+static inline int pm_suspend_disk(void)
+{
+ return -EPERM;
+}
+#endif
+extern struct semaphore pm_sem;
+#define power_attr(_name) \
+static struct subsys_attribute _name##_attr = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = 0644, \
+ }, \
+ .show = _name##_show, \
+ .store = _name##_store, \
+}
+
+extern struct subsystem power_subsys;
+
+extern int freeze_processes(void);
+extern void thaw_processes(void);
+
+extern int pm_prepare_console(void);
+extern void pm_restore_console(void);
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
new file mode 100644
index 000000000000..715081b2d829
--- /dev/null
+++ b/kernel/power/poweroff.c
@@ -0,0 +1,45 @@
+/*
+ * poweroff.c - sysrq handler to gracefully power down machine.
+ *
+ * This file is released under the GPL v2
+ */
+
+#include <linux/kernel.h>
+#include <linux/sysrq.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <linux/workqueue.h>
+
+/*
+ * When the user hits Sys-Rq o to power down the machine this is the
+ * callback we use.
+ */
+
+static void do_poweroff(void *dummy)
+{
+ if (pm_power_off)
+ pm_power_off();
+}
+
+static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
+
+static void handle_poweroff(int key, struct pt_regs *pt_regs,
+ struct tty_struct *tty)
+{
+ schedule_work(&poweroff_work);
+}
+
+static struct sysrq_key_op sysrq_poweroff_op = {
+ .handler = handle_poweroff,
+ .help_msg = "powerOff",
+ .action_msg = "Power Off",
+ .enable_mask = SYSRQ_ENABLE_BOOT,
+};
+
+static int pm_sysrq_init(void)
+{
+ register_sysrq_key('o', &sysrq_poweroff_op);
+ return 0;
+}
+
+subsys_initcall(pm_sysrq_init);
diff --git a/kernel/power/process.c b/kernel/power/process.c
new file mode 100644
index 000000000000..78d92dc6a1ed
--- /dev/null
+++ b/kernel/power/process.c
@@ -0,0 +1,121 @@
+/*
+ * drivers/power/process.c - Functions for starting/stopping processes on
+ * suspend transitions.
+ *
+ * Originally from swsusp.
+ */
+
+
+#undef DEBUG
+
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+
+/*
+ * Timeout for stopping processes
+ */
+#define TIMEOUT (6 * HZ)
+
+
+static inline int freezeable(struct task_struct * p)
+{
+ if ((p == current) ||
+ (p->flags & PF_NOFREEZE) ||
+ (p->exit_state == EXIT_ZOMBIE) ||
+ (p->exit_state == EXIT_DEAD) ||
+ (p->state == TASK_STOPPED) ||
+ (p->state == TASK_TRACED))
+ return 0;
+ return 1;
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(unsigned long flag)
+{
+ /* Hmm, should we be allowed to suspend when there are realtime
+ processes around? */
+ long save;
+ save = current->state;
+ current->state = TASK_UNINTERRUPTIBLE;
+ pr_debug("%s entered refrigerator\n", current->comm);
+ printk("=");
+ current->flags &= ~PF_FREEZE;
+
+ spin_lock_irq(&current->sighand->siglock);
+ recalc_sigpending(); /* We sent fake signal, clean it up */
+ spin_unlock_irq(&current->sighand->siglock);
+
+ current->flags |= PF_FROZEN;
+ while (current->flags & PF_FROZEN)
+ schedule();
+ pr_debug("%s left refrigerator\n", current->comm);
+ current->state = save;
+}
+
+/* 0 = success, else # of processes that we failed to stop */
+int freeze_processes(void)
+{
+ int todo;
+ unsigned long start_time;
+ struct task_struct *g, *p;
+
+ printk( "Stopping tasks: " );
+ start_time = jiffies;
+ do {
+ todo = 0;
+ read_lock(&tasklist_lock);
+ do_each_thread(g, p) {
+ unsigned long flags;
+ if (!freezeable(p))
+ continue;
+ if ((p->flags & PF_FROZEN) ||
+ (p->state == TASK_TRACED) ||
+ (p->state == TASK_STOPPED))
+ continue;
+
+ /* FIXME: smp problem here: we may not access other process' flags
+ without locking */
+ p->flags |= PF_FREEZE;
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ signal_wake_up(p, 0);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ todo++;
+ } while_each_thread(g, p);
+ read_unlock(&tasklist_lock);
+ yield(); /* Yield is okay here */
+ if (time_after(jiffies, start_time + TIMEOUT)) {
+ printk( "\n" );
+ printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
+ return todo;
+ }
+ } while(todo);
+
+ printk( "|\n" );
+ BUG_ON(in_atomic());
+ return 0;
+}
+
+void thaw_processes(void)
+{
+ struct task_struct *g, *p;
+
+ printk( "Restarting tasks..." );
+ read_lock(&tasklist_lock);
+ do_each_thread(g, p) {
+ if (!freezeable(p))
+ continue;
+ if (p->flags & PF_FROZEN) {
+ p->flags &= ~PF_FROZEN;
+ wake_up_process(p);
+ } else
+ printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+ } while_each_thread(g, p);
+
+ read_unlock(&tasklist_lock);
+ schedule();
+ printk( " done\n" );
+}
+
+EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
new file mode 100644
index 000000000000..7fa7f6e2b7fb
--- /dev/null
+++ b/kernel/power/smp.c
@@ -0,0 +1,85 @@
+/*
+ * drivers/power/smp.c - Functions for stopping other CPUs.
+ *
+ * Copyright 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#undef DEBUG
+
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+#include <asm/tlbflush.h>
+
+static atomic_t cpu_counter, freeze;
+
+
+static void smp_pause(void * data)
+{
+ struct saved_context ctxt;
+ __save_processor_state(&ctxt);
+ printk("Sleeping in:\n");
+ dump_stack();
+ atomic_inc(&cpu_counter);
+ while (atomic_read(&freeze)) {
+ /* FIXME: restore takes place at random piece inside this.
+ This should probably be written in assembly, and
+ preserve general-purpose registers, too
+
+ What about stack? We may need to move to new stack here.
+
+ This should better be ran with interrupts disabled.
+ */
+ cpu_relax();
+ barrier();
+ }
+ atomic_dec(&cpu_counter);
+ __restore_processor_state(&ctxt);
+}
+
+static cpumask_t oldmask;
+
+void disable_nonboot_cpus(void)
+{
+ printk("Freezing CPUs (at %d)", smp_processor_id());
+ oldmask = current->cpus_allowed;
+ set_cpus_allowed(current, cpumask_of_cpu(0));
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(HZ);
+ printk("...");
+ BUG_ON(smp_processor_id() != 0);
+
+ /* FIXME: for this to work, all the CPUs must be running
+ * "idle" thread (or we deadlock). Is that guaranteed? */
+
+ atomic_set(&cpu_counter, 0);
+ atomic_set(&freeze, 1);
+ smp_call_function(smp_pause, NULL, 0, 0);
+ while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
+ cpu_relax();
+ barrier();
+ }
+ printk("ok\n");
+}
+
+void enable_nonboot_cpus(void)
+{
+ printk("Restarting CPUs");
+ atomic_set(&freeze, 0);
+ while (atomic_read(&cpu_counter)) {
+ cpu_relax();
+ barrier();
+ }
+ printk("...");
+ set_cpus_allowed(current, oldmask);
+ schedule();
+ printk("ok\n");
+
+}
+
+
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
new file mode 100644
index 000000000000..ae5bebc3b18f
--- /dev/null
+++ b/kernel/power/swsusp.c
@@ -0,0 +1,1433 @@
+/*
+ * linux/kernel/power/swsusp.c
+ *
+ * This file is to realize architecture-independent
+ * machine suspend feature using pretty near only high-level routines
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
+ *
+ * This file is released under the GPLv2.
+ *
+ * I'd like to thank the following people for their work:
+ *
+ * Pavel Machek <pavel@ucw.cz>:
+ * Modifications, defectiveness pointing, being with me at the very beginning,
+ * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
+ *
+ * Steve Doddi <dirk@loth.demon.co.uk>:
+ * Support the possibility of hardware state restoring.
+ *
+ * Raph <grey.havens@earthling.net>:
+ * Support for preserving states of network devices and virtual console
+ * (including X and svgatextmode)
+ *
+ * Kurt Garloff <garloff@suse.de>:
+ * Straightened the critical function in order to prevent compilers from
+ * playing tricks with local variables.
+ *
+ * Andreas Mohr <a.mohr@mailto.de>
+ *
+ * Alex Badea <vampire@go.ro>:
+ * Fixed runaway init
+ *
+ * More state savers are welcome. Especially for the scsi layer...
+ *
+ * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/smp_lock.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/bitops.h>
+#include <linux/vt_kern.h>
+#include <linux/kbd_kern.h>
+#include <linux/keyboard.h>
+#include <linux/spinlock.h>
+#include <linux/genhd.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/swap.h>
+#include <linux/pm.h>
+#include <linux/device.h>
+#include <linux/buffer_head.h>
+#include <linux/swapops.h>
+#include <linux/bootmem.h>
+#include <linux/syscalls.h>
+#include <linux/console.h>
+#include <linux/highmem.h>
+#include <linux/bio.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/io.h>
+
+#include "power.h"
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+
+/* Variables to be preserved over suspend */
+static int nr_copy_pages_check;
+
+extern char resume_file[];
+
+/* Local variables that should not be affected by save */
+unsigned int nr_copy_pages __nosavedata = 0;
+
+/* Suspend pagedir is allocated before final copy, therefore it
+ must be freed after resume
+
+ Warning: this is evil. There are actually two pagedirs at time of
+ resume. One is "pagedir_save", which is empty frame allocated at
+ time of suspend, that must be freed. Second is "pagedir_nosave",
+ allocated at time of resume, that travels through memory not to
+ collide with anything.
+
+ Warning: this is even more evil than it seems. Pagedirs this file
+ talks about are completely different from page directories used by
+ MMU hardware.
+ */
+suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
+static suspend_pagedir_t *pagedir_save;
+
+#define SWSUSP_SIG "S1SUSPEND"
+
+static struct swsusp_header {
+ char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
+ swp_entry_t swsusp_info;
+ char orig_sig[10];
+ char sig[10];
+} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
+
+static struct swsusp_info swsusp_info;
+
+/*
+ * XXX: We try to keep some more pages free so that I/O operations succeed
+ * without paging. Might this be more?
+ */
+#define PAGES_FOR_IO 512
+
+/*
+ * Saving part...
+ */
+
+/* We memorize in swapfile_used what swap devices are used for suspension */
+#define SWAPFILE_UNUSED 0
+#define SWAPFILE_SUSPEND 1 /* This is the suspending device */
+#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
+
+static unsigned short swapfile_used[MAX_SWAPFILES];
+static unsigned short root_swap;
+
+static int mark_swapfiles(swp_entry_t prev)
+{
+ int error;
+
+ rw_swap_page_sync(READ,
+ swp_entry(root_swap, 0),
+ virt_to_page((unsigned long)&swsusp_header));
+ if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
+ !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
+ memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
+ memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
+ swsusp_header.swsusp_info = prev;
+ error = rw_swap_page_sync(WRITE,
+ swp_entry(root_swap, 0),
+ virt_to_page((unsigned long)
+ &swsusp_header));
+ } else {
+ pr_debug("swsusp: Partition is not swap space.\n");
+ error = -ENODEV;
+ }
+ return error;
+}
+
+/*
+ * Check whether the swap device is the specified resume
+ * device, irrespective of whether they are specified by
+ * identical names.
+ *
+ * (Thus, device inode aliasing is allowed. You can say /dev/hda4
+ * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
+ * and they'll be considered the same device. This is *necessary* for
+ * devfs, since the resume code can only recognize the form /dev/hda4,
+ * but the suspend code would see the long name.)
+ */
+static int is_resume_device(const struct swap_info_struct *swap_info)
+{
+ struct file *file = swap_info->swap_file;
+ struct inode *inode = file->f_dentry->d_inode;
+
+ return S_ISBLK(inode->i_mode) &&
+ swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
+}
+
+static int swsusp_swap_check(void) /* This is called before saving image */
+{
+ int i, len;
+
+ len=strlen(resume_file);
+ root_swap = 0xFFFF;
+
+ swap_list_lock();
+ for(i=0; i<MAX_SWAPFILES; i++) {
+ if (swap_info[i].flags == 0) {
+ swapfile_used[i]=SWAPFILE_UNUSED;
+ } else {
+ if(!len) {
+ printk(KERN_WARNING "resume= option should be used to set suspend device" );
+ if(root_swap == 0xFFFF) {
+ swapfile_used[i] = SWAPFILE_SUSPEND;
+ root_swap = i;
+ } else
+ swapfile_used[i] = SWAPFILE_IGNORED;
+ } else {
+ /* we ignore all swap devices that are not the resume_file */
+ if (is_resume_device(&swap_info[i])) {
+ swapfile_used[i] = SWAPFILE_SUSPEND;
+ root_swap = i;
+ } else {
+ swapfile_used[i] = SWAPFILE_IGNORED;
+ }
+ }
+ }
+ }
+ swap_list_unlock();
+ return (root_swap != 0xffff) ? 0 : -ENODEV;
+}
+
+/**
+ * This is called after saving image so modification
+ * will be lost after resume... and that's what we want.
+ * we make the device unusable. A new call to
+ * lock_swapdevices can unlock the devices.
+ */
+static void lock_swapdevices(void)
+{
+ int i;
+
+ swap_list_lock();
+ for(i = 0; i< MAX_SWAPFILES; i++)
+ if(swapfile_used[i] == SWAPFILE_IGNORED) {
+ swap_info[i].flags ^= 0xFF;
+ }
+ swap_list_unlock();
+}
+
+/**
+ * write_swap_page - Write one page to a fresh swap location.
+ * @addr: Address we're writing.
+ * @loc: Place to store the entry we used.
+ *
+ * Allocate a new swap entry and 'sync' it. Note we discard -EIO
+ * errors. That is an artifact left over from swsusp. It did not
+ * check the return of rw_swap_page_sync() at all, since most pages
+ * written back to swap would return -EIO.
+ * This is a partial improvement, since we will at least return other
+ * errors, though we need to eventually fix the damn code.
+ */
+static int write_page(unsigned long addr, swp_entry_t * loc)
+{
+ swp_entry_t entry;
+ int error = 0;
+
+ entry = get_swap_page();
+ if (swp_offset(entry) &&
+ swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
+ error = rw_swap_page_sync(WRITE, entry,
+ virt_to_page(addr));
+ if (error == -EIO)
+ error = 0;
+ if (!error)
+ *loc = entry;
+ } else
+ error = -ENOSPC;
+ return error;
+}
+
+/**
+ * data_free - Free the swap entries used by the saved image.
+ *
+ * Walk the list of used swap entries and free each one.
+ * This is only used for cleanup when suspend fails.
+ */
+static void data_free(void)
+{
+ swp_entry_t entry;
+ int i;
+
+ for (i = 0; i < nr_copy_pages; i++) {
+ entry = (pagedir_nosave + i)->swap_address;
+ if (entry.val)
+ swap_free(entry);
+ else
+ break;
+ (pagedir_nosave + i)->swap_address = (swp_entry_t){0};
+ }
+}
+
+/**
+ * data_write - Write saved image to swap.
+ *
+ * Walk the list of pages in the image and sync each one to swap.
+ */
+static int data_write(void)
+{
+ int error = 0, i = 0;
+ unsigned int mod = nr_copy_pages / 100;
+ struct pbe *p;
+
+ if (!mod)
+ mod = 1;
+
+ printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
+ for_each_pbe(p, pagedir_nosave) {
+ if (!(i%mod))
+ printk( "\b\b\b\b%3d%%", i / mod );
+ if ((error = write_page(p->address, &(p->swap_address))))
+ return error;
+ i++;
+ }
+ printk("\b\b\b\bdone\n");
+ return error;
+}
+
+static void dump_info(void)
+{
+ pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
+ pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
+ pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
+ pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
+ pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
+ pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
+ pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
+ pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
+ pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
+ pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
+ pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages);
+}
+
+static void init_header(void)
+{
+ memset(&swsusp_info, 0, sizeof(swsusp_info));
+ swsusp_info.version_code = LINUX_VERSION_CODE;
+ swsusp_info.num_physpages = num_physpages;
+ memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
+
+ swsusp_info.suspend_pagedir = pagedir_nosave;
+ swsusp_info.cpus = num_online_cpus();
+ swsusp_info.image_pages = nr_copy_pages;
+}
+
+static int close_swap(void)
+{
+ swp_entry_t entry;
+ int error;
+
+ dump_info();
+ error = write_page((unsigned long)&swsusp_info, &entry);
+ if (!error) {
+ printk( "S" );
+ error = mark_swapfiles(entry);
+ printk( "|\n" );
+ }
+ return error;
+}
+
+/**
+ * free_pagedir_entries - Free pages used by the page directory.
+ *
+ * This is used during suspend for error recovery.
+ */
+
+static void free_pagedir_entries(void)
+{
+ int i;
+
+ for (i = 0; i < swsusp_info.pagedir_pages; i++)
+ swap_free(swsusp_info.pagedir[i]);
+}
+
+
+/**
+ * write_pagedir - Write the array of pages holding the page directory.
+ * @last: Last swap entry we write (needed for header).
+ */
+
+static int write_pagedir(void)
+{
+ int error = 0;
+ unsigned n = 0;
+ struct pbe * pbe;
+
+ printk( "Writing pagedir...");
+ for_each_pb_page(pbe, pagedir_nosave) {
+ if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
+ return error;
+ }
+
+ swsusp_info.pagedir_pages = n;
+ printk("done (%u pages)\n", n);
+ return error;
+}
+
+/**
+ * write_suspend_image - Write entire image and metadata.
+ *
+ */
+
+static int write_suspend_image(void)
+{
+ int error;
+
+ init_header();
+ if ((error = data_write()))
+ goto FreeData;
+
+ if ((error = write_pagedir()))
+ goto FreePagedir;
+
+ if ((error = close_swap()))
+ goto FreePagedir;
+ Done:
+ return error;
+ FreePagedir:
+ free_pagedir_entries();
+ FreeData:
+ data_free();
+ goto Done;
+}
+
+
+#ifdef CONFIG_HIGHMEM
+struct highmem_page {
+ char *data;
+ struct page *page;
+ struct highmem_page *next;
+};
+
+static struct highmem_page *highmem_copy;
+
+static int save_highmem_zone(struct zone *zone)
+{
+ unsigned long zone_pfn;
+ mark_free_pages(zone);
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
+ struct page *page;
+ struct highmem_page *save;
+ void *kaddr;
+ unsigned long pfn = zone_pfn + zone->zone_start_pfn;
+
+ if (!(pfn%1000))
+ printk(".");
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ /*
+ * This condition results from rvmalloc() sans vmalloc_32()
+ * and architectural memory reservations. This should be
+ * corrected eventually when the cases giving rise to this
+ * are better understood.
+ */
+ if (PageReserved(page)) {
+ printk("highmem reserved page?!\n");
+ continue;
+ }
+ BUG_ON(PageNosave(page));
+ if (PageNosaveFree(page))
+ continue;
+ save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
+ if (!save)
+ return -ENOMEM;
+ save->next = highmem_copy;
+ save->page = page;
+ save->data = (void *) get_zeroed_page(GFP_ATOMIC);
+ if (!save->data) {
+ kfree(save);
+ return -ENOMEM;
+ }
+ kaddr = kmap_atomic(page, KM_USER0);
+ memcpy(save->data, kaddr, PAGE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ highmem_copy = save;
+ }
+ return 0;
+}
+#endif /* CONFIG_HIGHMEM */
+
+
+static int save_highmem(void)
+{
+#ifdef CONFIG_HIGHMEM
+ struct zone *zone;
+ int res = 0;
+
+ pr_debug("swsusp: Saving Highmem\n");
+ for_each_zone(zone) {
+ if (is_highmem(zone))
+ res = save_highmem_zone(zone);
+ if (res)
+ return res;
+ }
+#endif
+ return 0;
+}
+
+static int restore_highmem(void)
+{
+#ifdef CONFIG_HIGHMEM
+ printk("swsusp: Restoring Highmem\n");
+ while (highmem_copy) {
+ struct highmem_page *save = highmem_copy;
+ void *kaddr;
+ highmem_copy = save->next;
+
+ kaddr = kmap_atomic(save->page, KM_USER0);
+ memcpy(kaddr, save->data, PAGE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ free_page((long) save->data);
+ kfree(save);
+ }
+#endif
+ return 0;
+}
+
+
+static int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+/**
+ * saveable - Determine whether a page should be cloned or not.
+ * @pfn: The page
+ *
+ * We save a page if it's Reserved, and not in the range of pages
+ * statically defined as 'unsaveable', or if it isn't reserved, and
+ * isn't part of a free chunk of pages.
+ */
+
+static int saveable(struct zone * zone, unsigned long * zone_pfn)
+{
+ unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
+ struct page * page;
+
+ if (!pfn_valid(pfn))
+ return 0;
+
+ page = pfn_to_page(pfn);
+ BUG_ON(PageReserved(page) && PageNosave(page));
+ if (PageNosave(page))
+ return 0;
+ if (PageReserved(page) && pfn_is_nosave(pfn)) {
+ pr_debug("[nosave pfn 0x%lx]", pfn);
+ return 0;
+ }
+ if (PageNosaveFree(page))
+ return 0;
+
+ return 1;
+}
+
+static void count_data_pages(void)
+{
+ struct zone *zone;
+ unsigned long zone_pfn;
+
+ nr_copy_pages = 0;
+
+ for_each_zone(zone) {
+ if (is_highmem(zone))
+ continue;
+ mark_free_pages(zone);
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
+ nr_copy_pages += saveable(zone, &zone_pfn);
+ }
+}
+
+
+static void copy_data_pages(void)
+{
+ struct zone *zone;
+ unsigned long zone_pfn;
+ struct pbe * pbe = pagedir_nosave;
+
+ pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages);
+ for_each_zone(zone) {
+ if (is_highmem(zone))
+ continue;
+ mark_free_pages(zone);
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
+ if (saveable(zone, &zone_pfn)) {
+ struct page * page;
+ page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
+ BUG_ON(!pbe);
+ pbe->orig_address = (long) page_address(page);
+ /* copy_page is not usable for copying task structs. */
+ memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
+ pbe = pbe->next;
+ }
+ }
+ }
+ BUG_ON(pbe);
+}
+
+
+/**
+ * calc_nr - Determine the number of pages needed for a pbe list.
+ */
+
+static int calc_nr(int nr_copy)
+{
+ int extra = 0;
+ int mod = !!(nr_copy % PBES_PER_PAGE);
+ int diff = (nr_copy / PBES_PER_PAGE) + mod;
+
+ do {
+ extra += diff;
+ nr_copy += diff;
+ mod = !!(nr_copy % PBES_PER_PAGE);
+ diff = (nr_copy / PBES_PER_PAGE) + mod - extra;
+ } while (diff > 0);
+
+ return nr_copy;
+}
+
+/**
+ * free_pagedir - free pages allocated with alloc_pagedir()
+ */
+
+static inline void free_pagedir(struct pbe *pblist)
+{
+ struct pbe *pbe;
+
+ while (pblist) {
+ pbe = (pblist + PB_PAGE_SKIP)->next;
+ free_page((unsigned long)pblist);
+ pblist = pbe;
+ }
+}
+
+/**
+ * fill_pb_page - Create a list of PBEs on a given memory page
+ */
+
+static inline void fill_pb_page(struct pbe *pbpage)
+{
+ struct pbe *p;
+
+ p = pbpage;
+ pbpage += PB_PAGE_SKIP;
+ do
+ p->next = p + 1;
+ while (++p < pbpage);
+}
+
+/**
+ * create_pbe_list - Create a list of PBEs on top of a given chain
+ * of memory pages allocated with alloc_pagedir()
+ */
+
+static void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
+{
+ struct pbe *pbpage, *p;
+ unsigned num = PBES_PER_PAGE;
+
+ for_each_pb_page (pbpage, pblist) {
+ if (num >= nr_pages)
+ break;
+
+ fill_pb_page(pbpage);
+ num += PBES_PER_PAGE;
+ }
+ if (pbpage) {
+ for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
+}
+
+/**
+ * alloc_pagedir - Allocate the page directory.
+ *
+ * First, determine exactly how many pages we need and
+ * allocate them.
+ *
+ * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
+ * struct pbe elements (pbes) and the last element in the page points
+ * to the next page.
+ *
+ * On each page we set up a list of struct_pbe elements.
+ */
+
+static struct pbe * alloc_pagedir(unsigned nr_pages)
+{
+ unsigned num;
+ struct pbe *pblist, *pbe;
+
+ if (!nr_pages)
+ return NULL;
+
+ pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
+ pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
+ pbe = pbe->next, num += PBES_PER_PAGE) {
+ pbe += PB_PAGE_SKIP;
+ pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ }
+ if (!pbe) { /* get_zeroed_page() failed */
+ free_pagedir(pblist);
+ pblist = NULL;
+ }
+ return pblist;
+}
+
+/**
+ * free_image_pages - Free pages allocated for snapshot
+ */
+
+static void free_image_pages(void)
+{
+ struct pbe * p;
+
+ for_each_pbe(p, pagedir_save) {
+ if (p->address) {
+ ClearPageNosave(virt_to_page(p->address));
+ free_page(p->address);
+ p->address = 0;
+ }
+ }
+}
+
+/**
+ * alloc_image_pages - Allocate pages for the snapshot.
+ */
+
+static int alloc_image_pages(void)
+{
+ struct pbe * p;
+
+ for_each_pbe(p, pagedir_save) {
+ p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
+ if (!p->address)
+ return -ENOMEM;
+ SetPageNosave(virt_to_page(p->address));
+ }
+ return 0;
+}
+
+void swsusp_free(void)
+{
+ BUG_ON(PageNosave(virt_to_page(pagedir_save)));
+ BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
+ free_image_pages();
+ free_pagedir(pagedir_save);
+}
+
+
+/**
+ * enough_free_mem - Make sure we enough free memory to snapshot.
+ *
+ * Returns TRUE or FALSE after checking the number of available
+ * free pages.
+ */
+
+static int enough_free_mem(void)
+{
+ if (nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) {
+ pr_debug("swsusp: Not enough free pages: Have %d\n",
+ nr_free_pages());
+ return 0;
+ }
+ return 1;
+}
+
+
+/**
+ * enough_swap - Make sure we have enough swap to save the image.
+ *
+ * Returns TRUE or FALSE after checking the total amount of swap
+ * space avaiable.
+ *
+ * FIXME: si_swapinfo(&i) returns all swap devices information.
+ * We should only consider resume_device.
+ */
+
+static int enough_swap(void)
+{
+ struct sysinfo i;
+
+ si_swapinfo(&i);
+ if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
+ pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
+ return 0;
+ }
+ return 1;
+}
+
+static int swsusp_alloc(void)
+{
+ int error;
+
+ pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
+ nr_copy_pages, PAGES_FOR_IO, nr_free_pages());
+
+ pagedir_nosave = NULL;
+ if (!enough_free_mem())
+ return -ENOMEM;
+
+ if (!enough_swap())
+ return -ENOSPC;
+
+ nr_copy_pages = calc_nr(nr_copy_pages);
+
+ if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
+ printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
+ return -ENOMEM;
+ }
+ create_pbe_list(pagedir_save, nr_copy_pages);
+ pagedir_nosave = pagedir_save;
+ if ((error = alloc_image_pages())) {
+ printk(KERN_ERR "suspend: Allocating image pages failed.\n");
+ swsusp_free();
+ return error;
+ }
+
+ nr_copy_pages_check = nr_copy_pages;
+ return 0;
+}
+
+static int suspend_prepare_image(void)
+{
+ int error;
+
+ pr_debug("swsusp: critical section: \n");
+ if (save_highmem()) {
+ printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n");
+ restore_highmem();
+ return -ENOMEM;
+ }
+
+ drain_local_pages();
+ count_data_pages();
+ printk("swsusp: Need to copy %u pages\n", nr_copy_pages);
+
+ error = swsusp_alloc();
+ if (error)
+ return error;
+
+ /* During allocating of suspend pagedir, new cold pages may appear.
+ * Kill them.
+ */
+ drain_local_pages();
+ copy_data_pages();
+
+ /*
+ * End of critical section. From now on, we can write to memory,
+ * but we should not touch disk. This specially means we must _not_
+ * touch swap space! Except we must write out our image of course.
+ */
+
+ printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages );
+ return 0;
+}
+
+
+/* It is important _NOT_ to umount filesystems at this point. We want
+ * them synced (in case something goes wrong) but we DO not want to mark
+ * filesystem clean: it is not. (And it does not matter, if we resume
+ * correctly, we'll mark system clean, anyway.)
+ */
+int swsusp_write(void)
+{
+ int error;
+ device_resume();
+ lock_swapdevices();
+ error = write_suspend_image();
+ /* This will unlock ignored swap devices since writing is finished */
+ lock_swapdevices();
+ return error;
+
+}
+
+
+extern asmlinkage int swsusp_arch_suspend(void);
+extern asmlinkage int swsusp_arch_resume(void);
+
+
+asmlinkage int swsusp_save(void)
+{
+ int error = 0;
+
+ if ((error = swsusp_swap_check())) {
+ printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try "
+ "swapon -a!\n");
+ return error;
+ }
+ return suspend_prepare_image();
+}
+
+int swsusp_suspend(void)
+{
+ int error;
+ if ((error = arch_prepare_suspend()))
+ return error;
+ local_irq_disable();
+ /* At this point, device_suspend() has been called, but *not*
+ * device_power_down(). We *must* device_power_down() now.
+ * Otherwise, drivers for some devices (e.g. interrupt controllers)
+ * become desynchronized with the actual state of the hardware
+ * at resume time, and evil weirdness ensues.
+ */
+ if ((error = device_power_down(PMSG_FREEZE))) {
+ printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
+ local_irq_enable();
+ swsusp_free();
+ return error;
+ }
+ save_processor_state();
+ if ((error = swsusp_arch_suspend()))
+ swsusp_free();
+ /* Restore control flow magically appears here */
+ restore_processor_state();
+ BUG_ON (nr_copy_pages_check != nr_copy_pages);
+ restore_highmem();
+ device_power_up();
+ local_irq_enable();
+ return error;
+}
+
+int swsusp_resume(void)
+{
+ int error;
+ local_irq_disable();
+ if (device_power_down(PMSG_FREEZE))
+ printk(KERN_ERR "Some devices failed to power down, very bad\n");
+ /* We'll ignore saved state, but this gets preempt count (etc) right */
+ save_processor_state();
+ error = swsusp_arch_resume();
+ /* Code below is only ever reached in case of failure. Otherwise
+ * execution continues at place where swsusp_arch_suspend was called
+ */
+ BUG_ON(!error);
+ restore_processor_state();
+ restore_highmem();
+ device_power_up();
+ local_irq_enable();
+ return error;
+}
+
+/* More restore stuff */
+
+/*
+ * Returns true if given address/order collides with any orig_address
+ */
+static int does_collide_order(unsigned long addr, int order)
+{
+ int i;
+
+ for (i=0; i < (1<<order); i++)
+ if (!PageNosaveFree(virt_to_page(addr + i * PAGE_SIZE)))
+ return 1;
+ return 0;
+}
+
+/**
+ * On resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * which had been used before suspend.
+ *
+ * We don't know which pages are usable until we allocate them.
+ *
+ * Allocated but unusable (ie eaten) memory pages are linked together
+ * to create a list, so that we can free them easily
+ *
+ * We could have used a type other than (void *)
+ * for this purpose, but ...
+ */
+static void **eaten_memory = NULL;
+
+static inline void eat_page(void *page)
+{
+ void **c;
+
+ c = eaten_memory;
+ eaten_memory = page;
+ *eaten_memory = c;
+}
+
+static unsigned long get_usable_page(unsigned gfp_mask)
+{
+ unsigned long m;
+
+ m = get_zeroed_page(gfp_mask);
+ while (does_collide_order(m, 0)) {
+ eat_page((void *)m);
+ m = get_zeroed_page(gfp_mask);
+ if (!m)
+ break;
+ }
+ return m;
+}
+
+static void free_eaten_memory(void)
+{
+ unsigned long m;
+ void **c;
+ int i = 0;
+
+ c = eaten_memory;
+ while (c) {
+ m = (unsigned long)c;
+ c = *c;
+ free_page(m);
+ i++;
+ }
+ eaten_memory = NULL;
+ pr_debug("swsusp: %d unused pages freed\n", i);
+}
+
+/**
+ * check_pagedir - We ensure here that pages that the PBEs point to
+ * won't collide with pages where we're going to restore from the loaded
+ * pages later
+ */
+
+static int check_pagedir(struct pbe *pblist)
+{
+ struct pbe *p;
+
+ /* This is necessary, so that we can free allocated pages
+ * in case of failure
+ */
+ for_each_pbe (p, pblist)
+ p->address = 0UL;
+
+ for_each_pbe (p, pblist) {
+ p->address = get_usable_page(GFP_ATOMIC);
+ if (!p->address)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * swsusp_pagedir_relocate - It is possible, that some memory pages
+ * occupied by the list of PBEs collide with pages where we're going to
+ * restore from the loaded pages later. We relocate them here.
+ */
+
+static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
+{
+ struct zone *zone;
+ unsigned long zone_pfn;
+ struct pbe *pbpage, *tail, *p;
+ void *m;
+ int rel = 0, error = 0;
+
+ if (!pblist) /* a sanity check */
+ return NULL;
+
+ pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
+ swsusp_info.pagedir_pages);
+
+ /* Set page flags */
+
+ for_each_zone(zone) {
+ for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
+ SetPageNosaveFree(pfn_to_page(zone_pfn +
+ zone->zone_start_pfn));
+ }
+
+ /* Clear orig addresses */
+
+ for_each_pbe (p, pblist)
+ ClearPageNosaveFree(virt_to_page(p->orig_address));
+
+ tail = pblist + PB_PAGE_SKIP;
+
+ /* Relocate colliding pages */
+
+ for_each_pb_page (pbpage, pblist) {
+ if (does_collide_order((unsigned long)pbpage, 0)) {
+ m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
+ if (!m) {
+ error = -ENOMEM;
+ break;
+ }
+ memcpy(m, (void *)pbpage, PAGE_SIZE);
+ if (pbpage == pblist)
+ pblist = (struct pbe *)m;
+ else
+ tail->next = (struct pbe *)m;
+
+ eat_page((void *)pbpage);
+ pbpage = (struct pbe *)m;
+
+ /* We have to link the PBEs again */
+
+ for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++)
+ if (p->next) /* needed to save the end */
+ p->next = p + 1;
+
+ rel++;
+ }
+ tail = pbpage + PB_PAGE_SKIP;
+ }
+
+ if (error) {
+ printk("\nswsusp: Out of memory\n\n");
+ free_pagedir(pblist);
+ free_eaten_memory();
+ pblist = NULL;
+ }
+ else
+ printk("swsusp: Relocated %d pages\n", rel);
+
+ return pblist;
+}
+
+/**
+ * Using bio to read from swap.
+ * This code requires a bit more work than just using buffer heads
+ * but, it is the recommended way for 2.5/2.6.
+ * The following are to signal the beginning and end of I/O. Bios
+ * finish asynchronously, while we want them to happen synchronously.
+ * A simple atomic_t, and a wait loop take care of this problem.
+ */
+
+static atomic_t io_done = ATOMIC_INIT(0);
+
+static int end_io(struct bio * bio, unsigned int num, int err)
+{
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ panic("I/O error reading memory image");
+ atomic_set(&io_done, 0);
+ return 0;
+}
+
+static struct block_device * resume_bdev;
+
+/**
+ * submit - submit BIO request.
+ * @rw: READ or WRITE.
+ * @off physical offset of page.
+ * @page: page we're reading or writing.
+ *
+ * Straight from the textbook - allocate and initialize the bio.
+ * If we're writing, make sure the page is marked as dirty.
+ * Then submit it and wait.
+ */
+
+static int submit(int rw, pgoff_t page_off, void * page)
+{
+ int error = 0;
+ struct bio * bio;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+ bio->bi_sector = page_off * (PAGE_SIZE >> 9);
+ bio_get(bio);
+ bio->bi_bdev = resume_bdev;
+ bio->bi_end_io = end_io;
+
+ if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
+ error = -EFAULT;
+ goto Done;
+ }
+
+ if (rw == WRITE)
+ bio_set_pages_dirty(bio);
+
+ atomic_set(&io_done, 1);
+ submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+ while (atomic_read(&io_done))
+ yield();
+
+ Done:
+ bio_put(bio);
+ return error;
+}
+
+static int bio_read_page(pgoff_t page_off, void * page)
+{
+ return submit(READ, page_off, page);
+}
+
+static int bio_write_page(pgoff_t page_off, void * page)
+{
+ return submit(WRITE, page_off, page);
+}
+
+/*
+ * Sanity check if this image makes sense with this kernel/swap context
+ * I really don't think that it's foolproof but more than nothing..
+ */
+
+static const char * sanity_check(void)
+{
+ dump_info();
+ if(swsusp_info.version_code != LINUX_VERSION_CODE)
+ return "kernel version";
+ if(swsusp_info.num_physpages != num_physpages)
+ return "memory size";
+ if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
+ return "system type";
+ if (strcmp(swsusp_info.uts.release,system_utsname.release))
+ return "kernel release";
+ if (strcmp(swsusp_info.uts.version,system_utsname.version))
+ return "version";
+ if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
+ return "machine";
+ if(swsusp_info.cpus != num_online_cpus())
+ return "number of cpus";
+ return NULL;
+}
+
+
+static int check_header(void)
+{
+ const char * reason = NULL;
+ int error;
+
+ if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info)))
+ return error;
+
+ /* Is this same machine? */
+ if ((reason = sanity_check())) {
+ printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason);
+ return -EPERM;
+ }
+ nr_copy_pages = swsusp_info.image_pages;
+ return error;
+}
+
+static int check_sig(void)
+{
+ int error;
+
+ memset(&swsusp_header, 0, sizeof(swsusp_header));
+ if ((error = bio_read_page(0, &swsusp_header)))
+ return error;
+ if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
+ memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
+
+ /*
+ * Reset swap signature now.
+ */
+ error = bio_write_page(0, &swsusp_header);
+ } else {
+ printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n");
+ return -EINVAL;
+ }
+ if (!error)
+ pr_debug("swsusp: Signature found, resuming\n");
+ return error;
+}
+
+/**
+ * data_read - Read image pages from swap.
+ *
+ * You do not need to check for overlaps, check_pagedir()
+ * already did that.
+ */
+
+static int data_read(struct pbe *pblist)
+{
+ struct pbe * p;
+ int error = 0;
+ int i = 0;
+ int mod = swsusp_info.image_pages / 100;
+
+ if (!mod)
+ mod = 1;
+
+ printk("swsusp: Reading image data (%lu pages): ",
+ swsusp_info.image_pages);
+
+ for_each_pbe (p, pblist) {
+ if (!(i % mod))
+ printk("\b\b\b\b%3d%%", i / mod);
+
+ error = bio_read_page(swp_offset(p->swap_address),
+ (void *)p->address);
+ if (error)
+ return error;
+
+ i++;
+ }
+ printk("\b\b\b\bdone\n");
+ return error;
+}
+
+extern dev_t name_to_dev_t(const char *line);
+
+/**
+ * read_pagedir - Read page backup list pages from swap
+ */
+
+static int read_pagedir(struct pbe *pblist)
+{
+ struct pbe *pbpage, *p;
+ unsigned i = 0;
+ int error;
+
+ if (!pblist)
+ return -EFAULT;
+
+ printk("swsusp: Reading pagedir (%lu pages)\n",
+ swsusp_info.pagedir_pages);
+
+ for_each_pb_page (pbpage, pblist) {
+ unsigned long offset = swp_offset(swsusp_info.pagedir[i++]);
+
+ error = -EFAULT;
+ if (offset) {
+ p = (pbpage + PB_PAGE_SKIP)->next;
+ error = bio_read_page(offset, (void *)pbpage);
+ (pbpage + PB_PAGE_SKIP)->next = p;
+ }
+ if (error)
+ break;
+ }
+
+ if (error)
+ free_page((unsigned long)pblist);
+
+ BUG_ON(i != swsusp_info.pagedir_pages);
+
+ return error;
+}
+
+
+static int check_suspend_image(void)
+{
+ int error = 0;
+
+ if ((error = check_sig()))
+ return error;
+
+ if ((error = check_header()))
+ return error;
+
+ return 0;
+}
+
+static int read_suspend_image(void)
+{
+ int error = 0;
+ struct pbe *p;
+
+ if (!(p = alloc_pagedir(nr_copy_pages)))
+ return -ENOMEM;
+
+ if ((error = read_pagedir(p)))
+ return error;
+
+ create_pbe_list(p, nr_copy_pages);
+
+ if (!(pagedir_nosave = swsusp_pagedir_relocate(p)))
+ return -ENOMEM;
+
+ /* Allocate memory for the image and read the data from swap */
+
+ error = check_pagedir(pagedir_nosave);
+ free_eaten_memory();
+ if (!error)
+ error = data_read(pagedir_nosave);
+
+ if (error) { /* We fail cleanly */
+ for_each_pbe (p, pagedir_nosave)
+ if (p->address) {
+ free_page(p->address);
+ p->address = 0UL;
+ }
+ free_pagedir(pagedir_nosave);
+ }
+ return error;
+}
+
+/**
+ * swsusp_check - Check for saved image in swap
+ */
+
+int swsusp_check(void)
+{
+ int error;
+
+ if (!swsusp_resume_device) {
+ if (!strlen(resume_file))
+ return -ENOENT;
+ swsusp_resume_device = name_to_dev_t(resume_file);
+ pr_debug("swsusp: Resume From Partition %s\n", resume_file);
+ } else {
+ pr_debug("swsusp: Resume From Partition %d:%d\n",
+ MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
+ }
+
+ resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+ if (!IS_ERR(resume_bdev)) {
+ set_blocksize(resume_bdev, PAGE_SIZE);
+ error = check_suspend_image();
+ if (error)
+ blkdev_put(resume_bdev);
+ } else
+ error = PTR_ERR(resume_bdev);
+
+ if (!error)
+ pr_debug("swsusp: resume file found\n");
+ else
+ pr_debug("swsusp: Error %d check for resume file\n", error);
+ return error;
+}
+
+/**
+ * swsusp_read - Read saved image from swap.
+ */
+
+int swsusp_read(void)
+{
+ int error;
+
+ if (IS_ERR(resume_bdev)) {
+ pr_debug("swsusp: block device not initialised\n");
+ return PTR_ERR(resume_bdev);
+ }
+
+ error = read_suspend_image();
+ blkdev_put(resume_bdev);
+
+ if (!error)
+ pr_debug("swsusp: Reading resume file was successful\n");
+ else
+ pr_debug("swsusp: Error %d resuming\n", error);
+ return error;
+}
+
+/**
+ * swsusp_close - close swap device.
+ */
+
+void swsusp_close(void)
+{
+ if (IS_ERR(resume_bdev)) {
+ pr_debug("swsusp: block device not initialised\n");
+ return;
+ }
+
+ blkdev_put(resume_bdev);
+}