From 9185cfa92507d07ac787bc73d06c42222eec7239 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 1 Nov 2006 13:23:14 +0100
Subject: ACPI: S4: Use "platform" rather than "shutdown" mode by default

It has been reported that on some systems the functionality after a resume
from disk is limited if the system is simply powered off during the suspend
instead of using the ACPI S4 suspend (aka platform mode).

Unfortunately the default is currently to power off the system during the
suspend so the users of these systems experience problems after the resume
if they don't switch to the platform mode explicitly.  This patch makes swsusp
use the platform mode by default to avoid such situations.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Stefan Seyfried <seife@suse.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 kernel/power/disk.c | 8 +++++---
 kernel/power/main.c | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d3a158a60312..ae6bbc903b7d 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -44,9 +44,11 @@ static void power_down(suspend_disk_method_t mode)
 
 	switch(mode) {
 	case PM_DISK_PLATFORM:
-		kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
-		error = pm_ops->enter(PM_SUSPEND_DISK);
-		break;
+		if (pm_ops && pm_ops->enter) {
+			kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
+			error = pm_ops->enter(PM_SUSPEND_DISK);
+			break;
+		}
 	case PM_DISK_SHUTDOWN:
 		kernel_power_off();
 		break;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 873228c71dab..1210961a5aa7 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -27,7 +27,7 @@
 DECLARE_MUTEX(pm_sem);
 
 struct pm_ops *pm_ops;
-suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
+suspend_disk_method_t pm_disk_mode = PM_DISK_PLATFORM;
 
 /**
  *	pm_set_ops - Set the global power method table. 
-- 
cgit v1.2.3


From 52bad64d95bd89e08c49ec5a071fa6dcbe5a1a9c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:54:01 +0000
Subject: WorkStruct: Separate delayable and non-delayable events.

Separate delayable work items from non-delayable work items be splitting them
into a separate structure (delayed_work), which incorporates a work_struct and
the timer_list removed from work_struct.

The work_struct struct is huge, and this limits it's usefulness.  On a 64-bit
architecture it's nearly 100 bytes in size.  This reduces that by half for the
non-delayable type of event.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 arch/x86_64/kernel/mce.c           |  2 +-
 drivers/ata/libata-core.c          | 11 +++-----
 drivers/ata/libata-eh.c            |  2 +-
 drivers/char/random.c              |  2 +-
 drivers/char/tty_io.c              |  2 +-
 fs/aio.c                           |  4 +--
 fs/nfs/client.c                    |  2 +-
 fs/nfs/namespace.c                 |  3 ++-
 include/linux/aio.h                |  2 +-
 include/linux/kbd_kern.h           |  2 +-
 include/linux/libata.h             |  4 +--
 include/linux/nfs_fs_sb.h          |  2 +-
 include/linux/sunrpc/rpc_pipe_fs.h |  2 +-
 include/linux/sunrpc/xprt.h        |  2 +-
 include/linux/tty.h                |  2 +-
 include/linux/workqueue.h          | 44 +++++++++++++++++++++++---------
 kernel/workqueue.c                 | 51 +++++++++++++++++++++-----------------
 mm/slab.c                          |  8 +++---
 net/core/link_watch.c              |  9 +++----
 net/sunrpc/cache.c                 |  4 +--
 net/sunrpc/rpc_pipe.c              |  3 ++-
 net/sunrpc/xprtsock.c              |  6 ++---
 22 files changed, 96 insertions(+), 73 deletions(-)

(limited to 'kernel')

diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bbea88801d88..5306f2630905 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -307,7 +307,7 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
 
 static int check_interval = 5 * 60; /* 5 minutes */
 static void mcheck_timer(void *data);
-static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
+static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer, NULL);
 
 static void mcheck_check_cpu(void *info)
 {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 915a55a6cc14..0bb4b4dced76 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -937,12 +937,9 @@ void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
 	if (ap->pflags & ATA_PFLAG_FLUSH_PORT_TASK)
 		return;
 
-	PREPARE_WORK(&ap->port_task, fn, data);
+	PREPARE_DELAYED_WORK(&ap->port_task, fn, data);
 
-	if (!delay)
-		rc = queue_work(ata_wq, &ap->port_task);
-	else
-		rc = queue_delayed_work(ata_wq, &ap->port_task, delay);
+	rc = queue_delayed_work(ata_wq, &ap->port_task, delay);
 
 	/* rc == 0 means that another user is using port task */
 	WARN_ON(rc == 0);
@@ -5320,8 +5317,8 @@ void ata_port_init(struct ata_port *ap, struct ata_host *host,
 	ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
 #endif
 
-	INIT_WORK(&ap->port_task, NULL, NULL);
-	INIT_WORK(&ap->hotplug_task, ata_scsi_hotplug, ap);
+	INIT_DELAYED_WORK(&ap->port_task, NULL, NULL);
+	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug, ap);
 	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan, ap);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 02b2b2787d9b..9f6b7cc74fd9 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -332,7 +332,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	if (ap->pflags & ATA_PFLAG_LOADING)
 		ap->pflags &= ~ATA_PFLAG_LOADING;
 	else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
-		queue_work(ata_aux_wq, &ap->hotplug_task);
+		queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0);
 
 	if (ap->pflags & ATA_PFLAG_RECOVERED)
 		ata_port_printk(ap, KERN_INFO, "EH complete\n");
diff --git a/drivers/char/random.c b/drivers/char/random.c
index eb6b13f4211a..f2ab61f3e8ae 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1424,7 +1424,7 @@ static unsigned int ip_cnt;
 
 static void rekey_seq_generator(void *private_);
 
-static DECLARE_WORK(rekey_work, rekey_seq_generator, NULL);
+static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
 
 /*
  * Lock avoidance:
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index e90ea39c7c4b..7297acfe520c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3580,7 +3580,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	tty->overrun_time = jiffies;
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
-	INIT_WORK(&tty->buf.work, flush_to_ldisc, tty);
+	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc, tty);
 	init_MUTEX(&tty->buf.pty_sem);
 	mutex_init(&tty->termios_mutex);
 	init_waitqueue_head(&tty->write_wait);
diff --git a/fs/aio.c b/fs/aio.c
index 94766599db00..11a1a7100ad6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -227,7 +227,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
-	INIT_WORK(&ctx->wq, aio_kick_handler, ctx);
+	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler, ctx);
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freectx;
@@ -876,7 +876,7 @@ static void aio_kick_handler(void *data)
 	 * we're in a worker thread already, don't use queue_delayed_work,
 	 */
 	if (requeue)
-		queue_work(aio_wq, &ctx->wq);
+		queue_delayed_work(aio_wq, &ctx->wq, 0);
 }
 
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5fea638743e4..6f0487d6f44a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,7 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	INIT_LIST_HEAD(&clp->cl_state_owners);
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index ec1114b33d89..5ed798bc1cf7 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -21,7 +21,8 @@
 static void nfs_expire_automounts(void *list);
 
 LIST_HEAD(nfs_automount_list);
-static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
+static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts,
+			    &nfs_automount_list);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
 static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 0d71c0041f13..9e350fd44d77 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -194,7 +194,7 @@ struct kioctx {
 
 	struct aio_ring_info	ring_info;
 
-	struct work_struct	wq;
+	struct delayed_work	wq;
 };
 
 /* prototypes */
diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h
index efe0ee4cc80b..06c58c423fe1 100644
--- a/include/linux/kbd_kern.h
+++ b/include/linux/kbd_kern.h
@@ -158,7 +158,7 @@ static inline void con_schedule_flip(struct tty_struct *t)
 	if (t->buf.tail != NULL)
 		t->buf.tail->commit = t->buf.tail->used;
 	spin_unlock_irqrestore(&t->buf.lock, flags);
-	schedule_work(&t->buf.work);
+	schedule_delayed_work(&t->buf.work, 0);
 }
 
 #endif
diff --git a/include/linux/libata.h b/include/linux/libata.h
index abd2debebca2..5f04006e8dd2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -568,8 +568,8 @@ struct ata_port {
 	struct ata_host		*host;
 	struct device 		*dev;
 
-	struct work_struct	port_task;
-	struct work_struct	hotplug_task;
+	struct delayed_work	port_task;
+	struct delayed_work	hotplug_task;
 	struct work_struct	scsi_rescan_task;
 
 	unsigned int		hsm_task_state;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7ccfc7ef0a83..95796e6924f1 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -51,7 +51,7 @@ struct nfs_client {
 
 	unsigned long		cl_lease_time;
 	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
+	struct delayed_work	cl_renewd;
 
 	struct rpc_wait_queue	cl_rpcwaitq;
 
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index a2eb9b4a9de3..4a68125b6de6 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -30,7 +30,7 @@ struct rpc_inode {
 #define RPC_PIPE_WAIT_FOR_OPEN	1
 	int flags;
 	struct rpc_pipe_ops *ops;
-	struct work_struct queue_timeout;
+	struct delayed_work queue_timeout;
 };
 
 static inline struct rpc_inode *
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 60394fbc4c70..3e04c1512fc4 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -177,7 +177,7 @@ struct rpc_xprt {
 	unsigned long		connect_timeout,
 				bind_timeout,
 				reestablish_timeout;
-	struct work_struct	connect_worker;
+	struct delayed_work	connect_worker;
 	unsigned short		port;
 
 	/*
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 44091c0db0b4..c1f716446161 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -53,7 +53,7 @@ struct tty_buffer {
 };
 
 struct tty_bufhead {
-	struct work_struct		work;
+	struct delayed_work work;
 	struct semaphore pty_sem;
 	spinlock_t lock;
 	struct tty_buffer *head;	/* Queue head */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9bca3539a1e5..9faaccae570e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -17,6 +17,10 @@ struct work_struct {
 	void (*func)(void *);
 	void *data;
 	void *wq_data;
+};
+
+struct delayed_work {
+	struct work_struct work;
 	struct timer_list timer;
 };
 
@@ -28,32 +32,48 @@ struct execute_work {
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	.data = (d),						\
+	}
+
+#define __DELAYED_WORK_INITIALIZER(n, f, d) {			\
+	.work = __WORK_INITIALIZER((n).work, (f), (d)),		\
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
 #define DECLARE_WORK(n, f, d)					\
 	struct work_struct n = __WORK_INITIALIZER(n, f, d)
 
+#define DECLARE_DELAYED_WORK(n, f, d)				\
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, d)
+
 /*
- * initialize a work-struct's func and data pointers:
+ * initialize a work item's function and data pointers
  */
 #define PREPARE_WORK(_work, _func, _data)			\
 	do {							\
-		(_work)->func = _func;				\
-		(_work)->data = _data;				\
+		(_work)->func = (_func);			\
+		(_work)->data = (_data);			\
 	} while (0)
 
+#define PREPARE_DELAYED_WORK(_work, _func, _data)		\
+	PREPARE_WORK(&(_work)->work, (_func), (_data))
+
 /*
- * initialize all of a work-struct:
+ * initialize all of a work item in one go
  */
 #define INIT_WORK(_work, _func, _data)				\
 	do {							\
 		INIT_LIST_HEAD(&(_work)->entry);		\
 		(_work)->pending = 0;				\
 		PREPARE_WORK((_work), (_func), (_data));	\
+	} while (0)
+
+#define INIT_DELAYED_WORK(_work, _func, _data)		\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func), (_data));	\
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
+
 extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread);
 #define create_workqueue(name) __create_workqueue((name), 0)
@@ -62,24 +82,24 @@ extern struct workqueue_struct *__create_workqueue(const char *name,
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
-extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay));
+extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay));
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-	struct work_struct *work, unsigned long delay);
+	struct delayed_work *work, unsigned long delay);
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay));
+extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
-extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay);
+extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
 extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
 
 extern void init_workqueues(void);
-void cancel_rearming_delayed_work(struct work_struct *work);
+void cancel_rearming_delayed_work(struct delayed_work *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
-				       struct work_struct *);
+				       struct delayed_work *);
 int execute_in_process_context(void (*fn)(void *), void *,
 			       struct execute_work *);
 
@@ -88,13 +108,13 @@ int execute_in_process_context(void (*fn)(void *), void *,
  * function may still be running on return from cancel_delayed_work().  Run
  * flush_scheduled_work() to wait on it.
  */
-static inline int cancel_delayed_work(struct work_struct *work)
+static inline int cancel_delayed_work(struct delayed_work *work)
 {
 	int ret;
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
-		clear_bit(0, &work->pending);
+		clear_bit(0, &work->work.pending);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 17c2f03d2c27..44fc54b7decf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -122,29 +122,33 @@ EXPORT_SYMBOL_GPL(queue_work);
 
 static void delayed_work_timer_fn(unsigned long __data)
 {
-	struct work_struct *work = (struct work_struct *)__data;
-	struct workqueue_struct *wq = work->wq_data;
+	struct delayed_work *dwork = (struct delayed_work *)__data;
+	struct workqueue_struct *wq = dwork->work.wq_data;
 	int cpu = smp_processor_id();
 
 	if (unlikely(is_single_threaded(wq)))
 		cpu = singlethread_cpu;
 
-	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), &dwork->work);
 }
 
 /**
  * queue_delayed_work - queue work on a workqueue after delay
  * @wq: workqueue to use
- * @work: work to queue
+ * @work: delayable work to queue
  * @delay: number of jiffies to wait before queueing
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
  */
 int fastcall queue_delayed_work(struct workqueue_struct *wq,
-			struct work_struct *work, unsigned long delay)
+			struct delayed_work *dwork, unsigned long delay)
 {
 	int ret = 0;
-	struct timer_list *timer = &work->timer;
+	struct timer_list *timer = &dwork->timer;
+	struct work_struct *work = &dwork->work;
+
+	if (delay == 0)
+		return queue_work(wq, work);
 
 	if (!test_and_set_bit(0, &work->pending)) {
 		BUG_ON(timer_pending(timer));
@@ -153,7 +157,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 		/* This stores wq for the moment, for the timer_fn */
 		work->wq_data = wq;
 		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)work;
+		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
 		add_timer(timer);
 		ret = 1;
@@ -172,10 +176,11 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
  * Returns 0 if @work was already on a queue, non-zero otherwise.
  */
 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-			struct work_struct *work, unsigned long delay)
+			struct delayed_work *dwork, unsigned long delay)
 {
 	int ret = 0;
-	struct timer_list *timer = &work->timer;
+	struct timer_list *timer = &dwork->timer;
+	struct work_struct *work = &dwork->work;
 
 	if (!test_and_set_bit(0, &work->pending)) {
 		BUG_ON(timer_pending(timer));
@@ -184,7 +189,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		/* This stores wq for the moment, for the timer_fn */
 		work->wq_data = wq;
 		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)work;
+		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
 		add_timer_on(timer, cpu);
 		ret = 1;
@@ -468,31 +473,31 @@ EXPORT_SYMBOL(schedule_work);
 
 /**
  * schedule_delayed_work - put work task in global workqueue after delay
- * @work: job to be done
- * @delay: number of jiffies to wait
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait or 0 for immediate execution
  *
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue.
  */
-int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
+int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_delayed_work(keventd_wq, work, delay);
+	return queue_delayed_work(keventd_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
- * @work: job to be done
+ * @dwork: job to be done
  * @delay: number of jiffies to wait
  *
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue on the specified CPU.
  */
 int schedule_delayed_work_on(int cpu,
-			struct work_struct *work, unsigned long delay)
+			struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_delayed_work_on(cpu, keventd_wq, work, delay);
+	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work_on);
 
@@ -539,12 +544,12 @@ EXPORT_SYMBOL(flush_scheduled_work);
  * cancel_rearming_delayed_workqueue - reliably kill off a delayed
  *			work whose handler rearms the delayed work.
  * @wq:   the controlling workqueue structure
- * @work: the delayed work struct
+ * @dwork: the delayed work struct
  */
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
-				       struct work_struct *work)
+				       struct delayed_work *dwork)
 {
-	while (!cancel_delayed_work(work))
+	while (!cancel_delayed_work(dwork))
 		flush_workqueue(wq);
 }
 EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
@@ -552,11 +557,11 @@ EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
 /**
  * cancel_rearming_delayed_work - reliably kill off a delayed keventd
  *			work whose handler rearms the delayed work.
- * @work: the delayed work struct
+ * @dwork: the delayed work struct
  */
-void cancel_rearming_delayed_work(struct work_struct *work)
+void cancel_rearming_delayed_work(struct delayed_work *dwork)
 {
-	cancel_rearming_delayed_workqueue(keventd_wq, work);
+	cancel_rearming_delayed_workqueue(keventd_wq, dwork);
 }
 EXPORT_SYMBOL(cancel_rearming_delayed_work);
 
diff --git a/mm/slab.c b/mm/slab.c
index 3c4a7e34eddc..a65bc5e992c3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -753,7 +753,7 @@ int slab_is_available(void)
 	return g_cpucache_up == FULL;
 }
 
-static DEFINE_PER_CPU(struct work_struct, reap_work);
+static DEFINE_PER_CPU(struct delayed_work, reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
@@ -916,16 +916,16 @@ static void next_reap_node(void)
  */
 static void __devinit start_cpu_timer(int cpu)
 {
-	struct work_struct *reap_work = &per_cpu(reap_work, cpu);
+	struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
 
 	/*
 	 * When this gets called from do_initcalls via cpucache_init(),
 	 * init_workqueues() has already run, so keventd will be setup
 	 * at that time.
 	 */
-	if (keventd_up() && reap_work->func == NULL) {
+	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
-		INIT_WORK(reap_work, cache_reap, NULL);
+		INIT_DELAYED_WORK(reap_work, cache_reap, NULL);
 		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
 	}
 }
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 4b36114744c5..f2ed09e25dfd 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -35,7 +35,7 @@ static unsigned long linkwatch_flags;
 static unsigned long linkwatch_nextevent;
 
 static void linkwatch_event(void *dummy);
-static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event, NULL);
 
 static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
@@ -171,10 +171,9 @@ void linkwatch_fire_event(struct net_device *dev)
 			unsigned long delay = linkwatch_nextevent - jiffies;
 
 			/* If we wrap around we'll delay it by at most HZ. */
-			if (!delay || delay > HZ)
-				schedule_work(&linkwatch_work);
-			else
-				schedule_delayed_work(&linkwatch_work, delay);
+			if (delay > HZ)
+				delay = 0;
+			schedule_delayed_work(&linkwatch_work, delay);
 		}
 	}
 }
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 00cb388ece03..d5725cb1491e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -285,7 +285,7 @@ static struct file_operations content_file_operations;
 static struct file_operations cache_flush_operations;
 
 static void do_cache_clean(void *data);
-static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
+static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean, NULL);
 
 void cache_register(struct cache_detail *cd)
 {
@@ -337,7 +337,7 @@ void cache_register(struct cache_detail *cd)
 	spin_unlock(&cache_list_lock);
 
 	/* start the cleaning process */
-	schedule_work(&cache_cleaner);
+	schedule_delayed_work(&cache_cleaner, 0);
 }
 
 int cache_unregister(struct cache_detail *cd)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9a0b41a97f90..97be3f7fed44 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -837,7 +837,8 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		INIT_LIST_HEAD(&rpci->pipe);
 		rpci->pipelen = 0;
 		init_waitqueue_head(&rpci->waitq);
-		INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci);
+		INIT_DELAYED_WORK(&rpci->queue_timeout,
+				    rpc_timeout_upcall_queue, rpci);
 		rpci->ops = NULL;
 	}
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 757fc91ef25d..3c7532cd009e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1262,7 +1262,7 @@ static void xs_connect(struct rpc_task *task)
 			xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
 	} else {
 		dprintk("RPC:      xs_connect scheduled xprt %p\n", xprt);
-		schedule_work(&xprt->connect_worker);
+		schedule_delayed_work(&xprt->connect_worker, 0);
 
 		/* flush_scheduled_work can sleep... */
 		if (!RPC_IS_ASYNC(task))
@@ -1375,7 +1375,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-	INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1420,7 +1420,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-	INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-- 
cgit v1.2.3


From 6bb49e5965c1fc399b4d3cd2b5cf2da535b330c0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:54:45 +0000
Subject: WorkStruct: Typedef the work function prototype

Define a type for the work function prototype.  It's not only kept in the
work_struct struct, it's also passed as an argument to several functions.

This makes it easier to change it.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 drivers/block/floppy.c    | 4 ++--
 include/linux/workqueue.h | 9 +++++----
 kernel/workqueue.c        | 6 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9e6d3a87cbe3..5a14fac13b12 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -996,7 +996,7 @@ static DECLARE_WORK(floppy_work, NULL, NULL);
 
 static void schedule_bh(void (*handler) (void))
 {
-	PREPARE_WORK(&floppy_work, (void (*)(void *))handler, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)handler, NULL);
 	schedule_work(&floppy_work);
 }
 
@@ -1008,7 +1008,7 @@ static void cancel_activity(void)
 
 	spin_lock_irqsave(&floppy_lock, flags);
 	do_floppy = NULL;
-	PREPARE_WORK(&floppy_work, (void *)empty, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)empty, NULL);
 	del_timer(&fd_timer);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 }
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9faaccae570e..cef40b22ff9a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -11,10 +11,12 @@
 
 struct workqueue_struct;
 
+typedef void (*work_func_t)(void *data);
+
 struct work_struct {
 	unsigned long pending;
 	struct list_head entry;
-	void (*func)(void *);
+	work_func_t func;
 	void *data;
 	void *wq_data;
 };
@@ -91,7 +93,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
-extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
+extern int schedule_on_each_cpu(work_func_t func, void *info);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
@@ -100,8 +102,7 @@ extern void init_workqueues(void);
 void cancel_rearming_delayed_work(struct delayed_work *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
 				       struct delayed_work *);
-int execute_in_process_context(void (*fn)(void *), void *,
-			       struct execute_work *);
+int execute_in_process_context(work_func_t fn, void *, struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 44fc54b7decf..1e9d61ecf762 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -217,7 +217,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 	while (!list_empty(&cwq->worklist)) {
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
-		void (*f) (void *) = work->func;
+		work_func_t f = work->func;
 		void *data = work->data;
 
 		list_del_init(cwq->worklist.next);
@@ -513,7 +513,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
  *
  * schedule_on_each_cpu() is very slow.
  */
-int schedule_on_each_cpu(void (*func)(void *info), void *info)
+int schedule_on_each_cpu(work_func_t func, void *info)
 {
 	int cpu;
 	struct work_struct *works;
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
  * Returns:	0 - function was executed
  *		1 - function was scheduled for execution
  */
-int execute_in_process_context(void (*fn)(void *data), void *data,
+int execute_in_process_context(work_func_t fn, void *data,
 			       struct execute_work *ew)
 {
 	if (!in_interrupt()) {
-- 
cgit v1.2.3


From 365970a1ea76d81cb1ad2f652acb605f06dae256 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:54:49 +0000
Subject: WorkStruct: Merge the pending bit into the wq_data pointer

Reclaim a word from the size of the work_struct by folding the pending bit and
the wq_data pointer together.  This shouldn't cause misalignment problems as
all pointers should be at least 4-byte aligned.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 drivers/block/floppy.c    |  4 ++--
 include/linux/workqueue.h | 27 +++++++++++++++++++++++----
 kernel/workqueue.c        | 41 ++++++++++++++++++++++++++++++++---------
 3 files changed, 57 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 5a14fac13b12..aa1eb4466f9d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -1868,7 +1868,7 @@ static void show_floppy(void)
 	printk("fdc_busy=%lu\n", fdc_busy);
 	if (do_floppy)
 		printk("do_floppy=%p\n", do_floppy);
-	if (floppy_work.pending)
+	if (work_pending(&floppy_work))
 		printk("floppy_work.func=%p\n", floppy_work.func);
 	if (timer_pending(&fd_timer))
 		printk("fd_timer.function=%p\n", fd_timer.function);
@@ -4498,7 +4498,7 @@ static void floppy_release_irq_and_dma(void)
 		printk("floppy timer still active:%s\n", timeout_message);
 	if (timer_pending(&fd_timer))
 		printk("auxiliary floppy timer still active\n");
-	if (floppy_work.pending)
+	if (work_pending(&floppy_work))
 		printk("work still pending\n");
 #endif
 	old_fdc = fdc;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index cef40b22ff9a..ecc017d24cf3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -14,11 +14,15 @@ struct workqueue_struct;
 typedef void (*work_func_t)(void *data);
 
 struct work_struct {
-	unsigned long pending;
+	/* the first word is the work queue pointer and the pending flag
+	 * rolled into one */
+	unsigned long management;
+#define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
+#define WORK_STRUCT_FLAG_MASK (3UL)
+#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
 	void *data;
-	void *wq_data;
 };
 
 struct delayed_work {
@@ -65,7 +69,7 @@ struct execute_work {
 #define INIT_WORK(_work, _func, _data)				\
 	do {							\
 		INIT_LIST_HEAD(&(_work)->entry);		\
-		(_work)->pending = 0;				\
+		(_work)->management = 0;			\
 		PREPARE_WORK((_work), (_func), (_data));	\
 	} while (0)
 
@@ -75,6 +79,21 @@ struct execute_work {
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
+/**
+ * work_pending - Find out whether a work item is currently pending
+ * @work: The work item in question
+ */
+#define work_pending(work) \
+	test_bit(WORK_STRUCT_PENDING, &(work)->management)
+
+/**
+ * delayed_work_pending - Find out whether a delayable work item is currently
+ * pending
+ * @work: The work item in question
+ */
+#define delayed_work_pending(work) \
+	test_bit(WORK_STRUCT_PENDING, &(work)->work.management)
+
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread);
@@ -115,7 +134,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
-		clear_bit(0, &work->work.pending);
+		clear_bit(WORK_STRUCT_PENDING, &work->work.management);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1e9d61ecf762..967479756511 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -80,6 +80,29 @@ static inline int is_single_threaded(struct workqueue_struct *wq)
 	return list_empty(&wq->list);
 }
 
+static inline void set_wq_data(struct work_struct *work, void *wq)
+{
+	unsigned long new, old, res;
+
+	/* assume the pending flag is already set and that the task has already
+	 * been queued on this workqueue */
+	new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
+	res = work->management;
+	if (res != new) {
+		do {
+			old = res;
+			new = (unsigned long) wq;
+			new |= (old & WORK_STRUCT_FLAG_MASK);
+			res = cmpxchg(&work->management, old, new);
+		} while (res != old);
+	}
+}
+
+static inline void *get_wq_data(struct work_struct *work)
+{
+	return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
+}
+
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
@@ -87,7 +110,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 	unsigned long flags;
 
 	spin_lock_irqsave(&cwq->lock, flags);
-	work->wq_data = cwq;
+	set_wq_data(work, cwq);
 	list_add_tail(&work->entry, &cwq->worklist);
 	cwq->insert_sequence++;
 	wake_up(&cwq->more_work);
@@ -108,7 +131,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
 	int ret = 0, cpu = get_cpu();
 
-	if (!test_and_set_bit(0, &work->pending)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
 		if (unlikely(is_single_threaded(wq)))
 			cpu = singlethread_cpu;
 		BUG_ON(!list_empty(&work->entry));
@@ -123,7 +146,7 @@ EXPORT_SYMBOL_GPL(queue_work);
 static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct workqueue_struct *wq = dwork->work.wq_data;
+	struct workqueue_struct *wq = get_wq_data(&dwork->work);
 	int cpu = smp_processor_id();
 
 	if (unlikely(is_single_threaded(wq)))
@@ -150,12 +173,12 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 	if (delay == 0)
 		return queue_work(wq, work);
 
-	if (!test_and_set_bit(0, &work->pending)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		/* This stores wq for the moment, for the timer_fn */
-		work->wq_data = wq;
+		set_wq_data(work, wq);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -182,12 +205,12 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
-	if (!test_and_set_bit(0, &work->pending)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		/* This stores wq for the moment, for the timer_fn */
-		work->wq_data = wq;
+		set_wq_data(work, wq);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -223,8 +246,8 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
-		BUG_ON(work->wq_data != cwq);
-		clear_bit(0, &work->pending);
+		BUG_ON(get_wq_data(work) != cwq);
+		clear_bit(WORK_STRUCT_PENDING, &work->management);
 		f(data);
 
 		spin_lock_irqsave(&cwq->lock, flags);
-- 
cgit v1.2.3


From 65f27f38446e1976cc98fd3004b110fedcddd189 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:55:48 +0000
Subject: WorkStruct: Pass the work_struct pointer instead of context data

Pass the work_struct pointer to the work function rather than context data.
The work function can use container_of() to work out the data.

For the cases where the container of the work_struct may go away the moment the
pending bit is cleared, it is made possible to defer the release of the
structure by deferring the clearing of the pending bit.

To make this work, an extra flag is introduced into the management side of the
work_struct.  This governs auto-release of the structure upon execution.

Ordinarily, the work queue executor would release the work_struct for further
scheduling or deallocation by clearing the pending bit prior to jumping to the
work function.  This means that, unless the driver makes some guarantee itself
that the work_struct won't go away, the work function may not access anything
else in the work_struct or its container lest they be deallocated..  This is a
problem if the auxiliary data is taken away (as done by the last patch).

However, if the pending bit is *not* cleared before jumping to the work
function, then the work function *may* access the work_struct and its container
with no problems.  But then the work function must itself release the
work_struct by calling work_release().

In most cases, automatic release is fine, so this is the default.  Special
initiators exist for the non-auto-release case (ending in _NAR).


Signed-Off-By: David Howells <dhowells@redhat.com>
---
 arch/x86_64/kernel/mce.c           |  6 +--
 arch/x86_64/kernel/smpboot.c       | 12 +++--
 arch/x86_64/kernel/time.c          |  4 +-
 block/as-iosched.c                 |  7 +--
 block/cfq-iosched.c                |  8 +--
 block/ll_rw_blk.c                  |  8 +--
 crypto/cryptomgr.c                 |  7 +--
 drivers/acpi/osl.c                 | 25 +++-------
 drivers/ata/libata-core.c          | 20 ++++----
 drivers/ata/libata-scsi.c          | 14 +++---
 drivers/ata/libata.h               |  4 +-
 drivers/block/floppy.c             |  6 +--
 drivers/char/random.c              |  6 +--
 drivers/char/sysrq.c               |  4 +-
 drivers/char/tty_io.c              | 31 ++++++------
 drivers/char/vt.c                  |  6 +--
 drivers/cpufreq/cpufreq.c          | 10 ++--
 drivers/input/keyboard/atkbd.c     |  6 +--
 drivers/input/serio/libps2.c       |  6 +--
 drivers/net/e1000/e1000_main.c     | 10 ++--
 drivers/pci/pcie/aer/aerdrv.c      |  2 +-
 drivers/pci/pcie/aer/aerdrv.h      |  2 +-
 drivers/pci/pcie/aer/aerdrv_core.c |  8 +--
 drivers/scsi/scsi_scan.c           |  7 +--
 drivers/scsi/scsi_sysfs.c          | 10 ++--
 fs/aio.c                           | 14 +++---
 fs/bio.c                           |  6 +--
 fs/file.c                          |  6 ++-
 fs/nfs/client.c                    |  2 +-
 fs/nfs/namespace.c                 |  9 ++--
 fs/nfs/nfs4_fs.h                   |  2 +-
 fs/nfs/nfs4renewd.c                |  5 +-
 include/linux/libata.h             |  3 +-
 include/linux/workqueue.h          | 99 +++++++++++++++++++++++++++++---------
 include/net/inet_timewait_sock.h   |  2 +-
 ipc/util.c                         |  7 ++-
 kernel/kmod.c                      | 16 +++---
 kernel/kthread.c                   | 13 +++--
 kernel/power/poweroff.c            |  4 +-
 kernel/sys.c                       |  4 +-
 kernel/workqueue.c                 | 19 +++-----
 mm/slab.c                          |  6 +--
 net/core/link_watch.c              |  6 +--
 net/ipv4/inet_timewait_sock.c      |  5 +-
 net/ipv4/tcp_minisocks.c           |  3 +-
 net/sunrpc/cache.c                 |  6 +--
 net/sunrpc/rpc_pipe.c              |  7 +--
 net/sunrpc/sched.c                 |  8 +--
 net/sunrpc/xprt.c                  |  7 +--
 net/sunrpc/xprtsock.c              | 18 ++++---
 security/keys/key.c                |  6 +--
 51 files changed, 293 insertions(+), 219 deletions(-)

(limited to 'kernel')

diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 5306f2630905..c7587fc39015 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -306,8 +306,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
  */
 
 static int check_interval = 5 * 60; /* 5 minutes */
-static void mcheck_timer(void *data);
-static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer, NULL);
+static void mcheck_timer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
 
 static void mcheck_check_cpu(void *info)
 {
@@ -315,7 +315,7 @@ static void mcheck_check_cpu(void *info)
 		do_machine_check(NULL, 0);
 }
 
-static void mcheck_timer(void *data)
+static void mcheck_timer(struct work_struct *work)
 {
 	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
 	schedule_delayed_work(&mcheck_work, check_interval * HZ);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 62c2e747af58..9800147c4c68 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -753,14 +753,16 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
 }
 
 struct create_idle {
+	struct work_struct work;
 	struct task_struct *idle;
 	struct completion done;
 	int cpu;
 };
 
-void do_fork_idle(void *_c_idle)
+void do_fork_idle(struct work_struct *work)
 {
-	struct create_idle *c_idle = _c_idle;
+	struct create_idle *c_idle =
+		container_of(work, struct create_idle, work);
 
 	c_idle->idle = fork_idle(c_idle->cpu);
 	complete(&c_idle->done);
@@ -775,10 +777,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	int timeout;
 	unsigned long start_rip;
 	struct create_idle c_idle = {
+		.work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
 		.cpu = cpu,
 		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
-	DECLARE_WORK(work, do_fork_idle, &c_idle);
 
 	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
 	if (!cpu_gdt_descr[cpu].address &&
@@ -825,9 +827,9 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	 * thread.
 	 */
 	if (!keventd_up() || current_is_keventd())
-		work.func(work.data);
+		c_idle.work.func(&c_idle.work);
 	else {
-		schedule_work(&work);
+		schedule_work(&c_idle.work);
 		wait_for_completion(&c_idle.done);
 	}
 
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index e3ef544d2cfb..9f05bc9b2dad 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -563,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0;
 static unsigned int cpufreq_init = 0;
 static struct work_struct cpufreq_delayed_get_work;
 
-static void handle_cpufreq_delayed_get(void *v)
+static void handle_cpufreq_delayed_get(struct work_struct *v)
 {
 	unsigned int cpu;
 	for_each_online_cpu(cpu) {
@@ -639,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = {
 
 static int __init cpufreq_tsc(void)
 {
-	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
 	if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
 				       CPUFREQ_TRANSITION_NOTIFIER))
 		cpufreq_init = 1;
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 50b95e4c1425..f371c9359999 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1274,9 +1274,10 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
  *
  * FIXME! dispatch queue is not a queue at all!
  */
-static void as_work_handler(void *data)
+static void as_work_handler(struct work_struct *work)
 {
-	struct request_queue *q = data;
+	struct as_data *ad = container_of(work, struct as_data, antic_work);
+	struct request_queue *q = ad->q;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -1332,7 +1333,7 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 	ad->antic_timer.function = as_antic_timeout;
 	ad->antic_timer.data = (unsigned long)q;
 	init_timer(&ad->antic_timer);
-	INIT_WORK(&ad->antic_work, as_work_handler, q);
+	INIT_WORK(&ad->antic_work, as_work_handler);
 
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1d9c3c70a9a0..6cec3a1dccb8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1841,9 +1841,11 @@ queue_fail:
 	return 1;
 }
 
-static void cfq_kick_queue(void *data)
+static void cfq_kick_queue(struct work_struct *work)
 {
-	request_queue_t *q = data;
+	struct cfq_data *cfqd =
+		container_of(work, struct cfq_data, unplug_work);
+	request_queue_t *q = cfqd->queue;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -1987,7 +1989,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	cfqd->idle_class_timer.function = cfq_idle_class_timer;
 	cfqd->idle_class_timer.data = (unsigned long) cfqd;
 
-	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9eaee6640535..eb4cf6df7374 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -34,7 +34,7 @@
  */
 #include <scsi/scsi_cmnd.h>
 
-static void blk_unplug_work(void *data);
+static void blk_unplug_work(struct work_struct *work);
 static void blk_unplug_timeout(unsigned long data);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
@@ -227,7 +227,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	if (q->unplug_delay == 0)
 		q->unplug_delay = 1;
 
-	INIT_WORK(&q->unplug_work, blk_unplug_work, q);
+	INIT_WORK(&q->unplug_work, blk_unplug_work);
 
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
@@ -1631,9 +1631,9 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 	}
 }
 
-static void blk_unplug_work(void *data)
+static void blk_unplug_work(struct work_struct *work)
 {
-	request_queue_t *q = data;
+	request_queue_t *q = container_of(work, request_queue_t, unplug_work);
 
 	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c
index 9b5b15601068..2ebffb84f1d9 100644
--- a/crypto/cryptomgr.c
+++ b/crypto/cryptomgr.c
@@ -40,9 +40,10 @@ struct cryptomgr_param {
 	char template[CRYPTO_MAX_ALG_NAME];
 };
 
-static void cryptomgr_probe(void *data)
+static void cryptomgr_probe(struct work_struct *work)
 {
-	struct cryptomgr_param *param = data;
+	struct cryptomgr_param *param =
+		container_of(work, struct cryptomgr_param, work);
 	struct crypto_template *tmpl;
 	struct crypto_instance *inst;
 	int err;
@@ -112,7 +113,7 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 	param->larval.type = larval->alg.cra_flags;
 	param->larval.mask = larval->mask;
 
-	INIT_WORK(&param->work, cryptomgr_probe, param);
+	INIT_WORK(&param->work, cryptomgr_probe);
 	schedule_work(&param->work);
 
 	return NOTIFY_STOP;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 068fe4f100b0..02b30ae6a68e 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -50,6 +50,7 @@ ACPI_MODULE_NAME("osl")
 struct acpi_os_dpc {
 	acpi_osd_exec_callback function;
 	void *context;
+	struct work_struct work;
 };
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -564,12 +565,9 @@ void acpi_os_derive_pci_id(acpi_handle rhandle,	/* upper bound  */
 	acpi_os_derive_pci_id_2(rhandle, chandle, id, &is_bridge, &bus_number);
 }
 
-static void acpi_os_execute_deferred(void *context)
+static void acpi_os_execute_deferred(struct work_struct *work)
 {
-	struct acpi_os_dpc *dpc = NULL;
-
-
-	dpc = (struct acpi_os_dpc *)context;
+	struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work);
 	if (!dpc) {
 		printk(KERN_ERR PREFIX "Invalid (NULL) context\n");
 		return;
@@ -602,7 +600,6 @@ acpi_status acpi_os_execute(acpi_execute_type type,
 {
 	acpi_status status = AE_OK;
 	struct acpi_os_dpc *dpc;
-	struct work_struct *task;
 
 	ACPI_FUNCTION_TRACE("os_queue_for_execution");
 
@@ -615,28 +612,22 @@ acpi_status acpi_os_execute(acpi_execute_type type,
 
 	/*
 	 * Allocate/initialize DPC structure.  Note that this memory will be
-	 * freed by the callee.  The kernel handles the tq_struct list  in a
+	 * freed by the callee.  The kernel handles the work_struct list  in a
 	 * way that allows us to also free its memory inside the callee.
 	 * Because we may want to schedule several tasks with different
 	 * parameters we can't use the approach some kernel code uses of
-	 * having a static tq_struct.
-	 * We can save time and code by allocating the DPC and tq_structs
-	 * from the same memory.
+	 * having a static work_struct.
 	 */
 
-	dpc =
-	    kmalloc(sizeof(struct acpi_os_dpc) + sizeof(struct work_struct),
-		    GFP_ATOMIC);
+	dpc = kmalloc(sizeof(struct acpi_os_dpc), GFP_ATOMIC);
 	if (!dpc)
 		return_ACPI_STATUS(AE_NO_MEMORY);
 
 	dpc->function = function;
 	dpc->context = context;
 
-	task = (void *)(dpc + 1);
-	INIT_WORK(task, acpi_os_execute_deferred, (void *)dpc);
-
-	if (!queue_work(kacpid_wq, task)) {
+	INIT_WORK(&dpc->work, acpi_os_execute_deferred);
+	if (!queue_work(kacpid_wq, &dpc->work)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "Call to queue_work() failed.\n"));
 		kfree(dpc);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 0bb4b4dced76..b5f2da6ac80e 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -914,7 +914,7 @@ static unsigned int ata_id_xfermask(const u16 *id)
  *	ata_port_queue_task - Queue port_task
  *	@ap: The ata_port to queue port_task for
  *	@fn: workqueue function to be scheduled
- *	@data: data value to pass to workqueue function
+ *	@data: data for @fn to use
  *	@delay: delay time for workqueue function
  *
  *	Schedule @fn(@data) for execution after @delay jiffies using
@@ -929,7 +929,7 @@ static unsigned int ata_id_xfermask(const u16 *id)
  *	LOCKING:
  *	Inherited from caller.
  */
-void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
+void ata_port_queue_task(struct ata_port *ap, work_func_t fn, void *data,
 			 unsigned long delay)
 {
 	int rc;
@@ -937,7 +937,8 @@ void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
 	if (ap->pflags & ATA_PFLAG_FLUSH_PORT_TASK)
 		return;
 
-	PREPARE_DELAYED_WORK(&ap->port_task, fn, data);
+	PREPARE_DELAYED_WORK(&ap->port_task, fn);
+	ap->port_task_data = data;
 
 	rc = queue_delayed_work(ata_wq, &ap->port_task, delay);
 
@@ -4292,10 +4293,11 @@ fsm_start:
 	return poll_next;
 }
 
-static void ata_pio_task(void *_data)
+static void ata_pio_task(struct work_struct *work)
 {
-	struct ata_queued_cmd *qc = _data;
-	struct ata_port *ap = qc->ap;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, port_task.work);
+	struct ata_queued_cmd *qc = ap->port_task_data;
 	u8 status;
 	int poll_next;
 
@@ -5317,9 +5319,9 @@ void ata_port_init(struct ata_port *ap, struct ata_host *host,
 	ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
 #endif
 
-	INIT_DELAYED_WORK(&ap->port_task, NULL, NULL);
-	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug, ap);
-	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan, ap);
+	INIT_DELAYED_WORK(&ap->port_task, NULL);
+	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
+	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 5c1fc467fc7f..c872b324dbd3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3079,7 +3079,7 @@ static void ata_scsi_remove_dev(struct ata_device *dev)
 
 /**
  *	ata_scsi_hotplug - SCSI part of hotplug
- *	@data: Pointer to ATA port to perform SCSI hotplug on
+ *	@work: Pointer to ATA port to perform SCSI hotplug on
  *
  *	Perform SCSI part of hotplug.  It's executed from a separate
  *	workqueue after EH completes.  This is necessary because SCSI
@@ -3089,9 +3089,10 @@ static void ata_scsi_remove_dev(struct ata_device *dev)
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-void ata_scsi_hotplug(void *data)
+void ata_scsi_hotplug(struct work_struct *work)
 {
-	struct ata_port *ap = data;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, hotplug_task.work);
 	int i;
 
 	if (ap->pflags & ATA_PFLAG_UNLOADING) {
@@ -3190,7 +3191,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 
 /**
  *	ata_scsi_dev_rescan - initiate scsi_rescan_device()
- *	@data: Pointer to ATA port to perform scsi_rescan_device()
+ *	@work: Pointer to ATA port to perform scsi_rescan_device()
  *
  *	After ATA pass thru (SAT) commands are executed successfully,
  *	libata need to propagate the changes to SCSI layer.  This
@@ -3200,9 +3201,10 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-void ata_scsi_dev_rescan(void *data)
+void ata_scsi_dev_rescan(struct work_struct *work)
 {
-	struct ata_port *ap = data;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, scsi_rescan_task);
 	struct ata_device *dev;
 	unsigned int i;
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 0ed263be652a..7e0f3aff873d 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -81,7 +81,7 @@ extern struct scsi_transport_template ata_scsi_transport_template;
 
 extern void ata_scsi_scan_host(struct ata_port *ap);
 extern int ata_scsi_offline_dev(struct ata_device *dev);
-extern void ata_scsi_hotplug(void *data);
+extern void ata_scsi_hotplug(struct work_struct *work);
 extern unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf,
 			       unsigned int buflen);
 
@@ -111,7 +111,7 @@ extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
                         unsigned int (*actor) (struct ata_scsi_args *args,
                                            u8 *rbuf, unsigned int buflen));
 extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
-extern void ata_scsi_dev_rescan(void *data);
+extern void ata_scsi_dev_rescan(struct work_struct *work);
 extern int ata_bus_probe(struct ata_port *ap);
 
 /* libata-eh.c */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index aa1eb4466f9d..3f1b38276e96 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -992,11 +992,11 @@ static void empty(void)
 {
 }
 
-static DECLARE_WORK(floppy_work, NULL, NULL);
+static DECLARE_WORK(floppy_work, NULL);
 
 static void schedule_bh(void (*handler) (void))
 {
-	PREPARE_WORK(&floppy_work, (work_func_t)handler, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)handler);
 	schedule_work(&floppy_work);
 }
 
@@ -1008,7 +1008,7 @@ static void cancel_activity(void)
 
 	spin_lock_irqsave(&floppy_lock, flags);
 	do_floppy = NULL;
-	PREPARE_WORK(&floppy_work, (work_func_t)empty, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)empty);
 	del_timer(&fd_timer);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 }
diff --git a/drivers/char/random.c b/drivers/char/random.c
index f2ab61f3e8ae..fa764688cad1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1422,9 +1422,9 @@ static struct keydata {
 
 static unsigned int ip_cnt;
 
-static void rekey_seq_generator(void *private_);
+static void rekey_seq_generator(struct work_struct *work);
 
-static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
+static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator);
 
 /*
  * Lock avoidance:
@@ -1438,7 +1438,7 @@ static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
  * happen, and even if that happens only a not perfectly compliant
  * ISN is generated, nothing fatal.
  */
-static void rekey_seq_generator(void *private_)
+static void rekey_seq_generator(struct work_struct *work)
 {
 	struct keydata *keyptr = &ip_keydata[1 ^ (ip_cnt & 1)];
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 5f49280779fb..c64f5bcff947 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -219,13 +219,13 @@ static struct sysrq_key_op sysrq_term_op = {
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
-static void moom_callback(void *ignored)
+static void moom_callback(struct work_struct *ignored)
 {
 	out_of_memory(&NODE_DATA(0)->node_zonelists[ZONE_NORMAL],
 			GFP_KERNEL, 0);
 }
 
-static DECLARE_WORK(moom_work, moom_callback, NULL);
+static DECLARE_WORK(moom_work, moom_callback);
 
 static void sysrq_handle_moom(int key, struct tty_struct *tty)
 {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 7297acfe520c..83e9e7d9b58c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1254,7 +1254,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 	
 /**
  *	do_tty_hangup		-	actual handler for hangup events
- *	@data: tty device
+ *	@work: tty device
  *
  *	This can be called by the "eventd" kernel thread.  That is process
  *	synchronous but doesn't hold any locks, so we need to make sure we
@@ -1274,9 +1274,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *		tasklist_lock to walk task list for hangup event
  *
  */
-static void do_tty_hangup(void *data)
+static void do_tty_hangup(struct work_struct *work)
 {
-	struct tty_struct *tty = (struct tty_struct *) data;
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, hangup_work);
 	struct file * cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
@@ -1433,7 +1434,7 @@ void tty_vhangup(struct tty_struct * tty)
 
 	printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
 #endif
-	do_tty_hangup((void *) tty);
+	do_tty_hangup(&tty->hangup_work);
 }
 EXPORT_SYMBOL(tty_vhangup);
 
@@ -3304,12 +3305,13 @@ int tty_ioctl(struct inode * inode, struct file * file,
  * Nasty bug: do_SAK is being called in interrupt context.  This can
  * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
  */
-static void __do_SAK(void *arg)
+static void __do_SAK(struct work_struct *work)
 {
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, SAK_work);
 #ifdef TTY_SOFT_SAK
 	tty_hangup(tty);
 #else
-	struct tty_struct *tty = arg;
 	struct task_struct *g, *p;
 	int session;
 	int		i;
@@ -3388,7 +3390,7 @@ void do_SAK(struct tty_struct *tty)
 {
 	if (!tty)
 		return;
-	PREPARE_WORK(&tty->SAK_work, __do_SAK, tty);
+	PREPARE_WORK(&tty->SAK_work, __do_SAK);
 	schedule_work(&tty->SAK_work);
 }
 
@@ -3396,7 +3398,7 @@ EXPORT_SYMBOL(do_SAK);
 
 /**
  *	flush_to_ldisc
- *	@private_: tty structure passed from work queue.
+ *	@work: tty structure passed from work queue.
  *
  *	This routine is called out of the software interrupt to flush data
  *	from the buffer chain to the line discipline.
@@ -3406,9 +3408,10 @@ EXPORT_SYMBOL(do_SAK);
  *	receive_buf method is single threaded for each tty instance.
  */
  
-static void flush_to_ldisc(void *private_)
+static void flush_to_ldisc(struct work_struct *work)
 {
-	struct tty_struct *tty = (struct tty_struct *) private_;
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, buf.work.work);
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
 	struct tty_buffer *tbuf, *head;
@@ -3553,7 +3556,7 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 
 	if (tty->low_latency)
-		flush_to_ldisc((void *) tty);
+		flush_to_ldisc(&tty->buf.work.work);
 	else
 		schedule_delayed_work(&tty->buf.work, 1);
 }
@@ -3580,17 +3583,17 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	tty->overrun_time = jiffies;
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
-	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc, tty);
+	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc);
 	init_MUTEX(&tty->buf.pty_sem);
 	mutex_init(&tty->termios_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
-	INIT_WORK(&tty->hangup_work, do_tty_hangup, tty);
+	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->read_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
-	INIT_WORK(&tty->SAK_work, NULL, NULL);
+	INIT_WORK(&tty->SAK_work, NULL);
 }
 
 /*
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 8e4413f6fbaf..8ee04adc37f0 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -155,7 +155,7 @@ static void con_flush_chars(struct tty_struct *tty);
 static void set_vesa_blanking(char __user *p);
 static void set_cursor(struct vc_data *vc);
 static void hide_cursor(struct vc_data *vc);
-static void console_callback(void *ignored);
+static void console_callback(struct work_struct *ignored);
 static void blank_screen_t(unsigned long dummy);
 static void set_palette(struct vc_data *vc);
 
@@ -174,7 +174,7 @@ static int vesa_blank_mode; /* 0:none 1:suspendV 2:suspendH 3:powerdown */
 static int blankinterval = 10*60*HZ;
 static int vesa_off_interval;
 
-static DECLARE_WORK(console_work, console_callback, NULL);
+static DECLARE_WORK(console_work, console_callback);
 
 /*
  * fg_console is the current virtual console,
@@ -2154,7 +2154,7 @@ out:
  * with other console code and prevention of re-entrancy is
  * ensured with console_sem.
  */
-static void console_callback(void *ignored)
+static void console_callback(struct work_struct *ignored)
 {
 	acquire_console_sem();
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index dd0c2623e27b..7a7c6e6dfe4f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -42,7 +42,7 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock);
 
 /* internal prototypes */
 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
-static void handle_update(void *data);
+static void handle_update(struct work_struct *work);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -665,7 +665,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
 	mutex_init(&policy->lock);
 	mutex_lock(&policy->lock);
 	init_completion(&policy->kobj_unregister);
-	INIT_WORK(&policy->update, handle_update, (void *)(long)cpu);
+	INIT_WORK(&policy->update, handle_update);
 
 	/* call driver. From then on the cpufreq must be able
 	 * to accept all calls to ->verify and ->setpolicy for this CPU
@@ -895,9 +895,11 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
 }
 
 
-static void handle_update(void *data)
+static void handle_update(struct work_struct *work)
 {
-	unsigned int cpu = (unsigned int)(long)data;
+	struct cpufreq_policy *policy =
+		container_of(work, struct cpufreq_policy, update);
+	unsigned int cpu = policy->cpu;
 	dprintk("handle_update for cpu %u called\n", cpu);
 	cpufreq_update_policy(cpu);
 }
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index cbb93669d1ce..8451b29a3db5 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -567,9 +567,9 @@ static int atkbd_set_leds(struct atkbd *atkbd)
  * interrupt context.
  */
 
-static void atkbd_event_work(void *data)
+static void atkbd_event_work(struct work_struct *work)
 {
-	struct atkbd *atkbd = data;
+	struct atkbd *atkbd = container_of(work, struct atkbd, event_work);
 
 	mutex_lock(&atkbd->event_mutex);
 
@@ -943,7 +943,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
-	INIT_WORK(&atkbd->event_work, atkbd_event_work, atkbd);
+	INIT_WORK(&atkbd->event_work, atkbd_event_work);
 	mutex_init(&atkbd->event_mutex);
 
 	switch (serio->id.type) {
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index e5b1b60757bb..b3e84d3bb7f7 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -251,9 +251,9 @@ EXPORT_SYMBOL(ps2_command);
  * ps2_schedule_command(), to a PS/2 device (keyboard, mouse, etc.)
  */
 
-static void ps2_execute_scheduled_command(void *data)
+static void ps2_execute_scheduled_command(struct work_struct *work)
 {
-	struct ps2work *ps2work = data;
+	struct ps2work *ps2work = container_of(work, struct ps2work, work);
 
 	ps2_command(ps2work->ps2dev, ps2work->param, ps2work->command);
 	kfree(ps2work);
@@ -278,7 +278,7 @@ int ps2_schedule_command(struct ps2dev *ps2dev, unsigned char *param, int comman
 	ps2work->ps2dev = ps2dev;
 	ps2work->command = command;
 	memcpy(ps2work->param, param, send);
-	INIT_WORK(&ps2work->work, ps2_execute_scheduled_command, ps2work);
+	INIT_WORK(&ps2work->work, ps2_execute_scheduled_command);
 
 	if (!schedule_work(&ps2work->work)) {
 		kfree(ps2work);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 726ec5e88ab2..03294400bc90 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -183,7 +183,7 @@ void e1000_set_ethtool_ops(struct net_device *netdev);
 static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
 static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
 static void e1000_tx_timeout(struct net_device *dev);
-static void e1000_reset_task(struct net_device *dev);
+static void e1000_reset_task(struct work_struct *work);
 static void e1000_smartspeed(struct e1000_adapter *adapter);
 static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
                                        struct sk_buff *skb);
@@ -908,8 +908,7 @@ e1000_probe(struct pci_dev *pdev,
 	adapter->phy_info_timer.function = &e1000_update_phy_info;
 	adapter->phy_info_timer.data = (unsigned long) adapter;
 
-	INIT_WORK(&adapter->reset_task,
-		(void (*)(void *))e1000_reset_task, netdev);
+	INIT_WORK(&adapter->reset_task, e1000_reset_task);
 
 	e1000_check_options(adapter);
 
@@ -3154,9 +3153,10 @@ e1000_tx_timeout(struct net_device *netdev)
 }
 
 static void
-e1000_reset_task(struct net_device *netdev)
+e1000_reset_task(struct work_struct *work)
 {
-	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_adapter *adapter =
+		container_of(work, struct e1000_adapter, reset_task);
 
 	e1000_reinit_locked(adapter);
 }
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 04c43ef529ac..55866b6b26fa 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -160,7 +160,7 @@ static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev)
 	rpc->e_lock = SPIN_LOCK_UNLOCKED;
 
 	rpc->rpd = dev;
-	INIT_WORK(&rpc->dpc_handler, aer_isr, (void *)dev);
+	INIT_WORK(&rpc->dpc_handler, aer_isr);
 	rpc->prod_idx = rpc->cons_idx = 0;
 	mutex_init(&rpc->rpc_mutex);
 	init_waitqueue_head(&rpc->wait_release);
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index daf0cad88fc8..3c0a58f64dd8 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -118,7 +118,7 @@ extern struct bus_type pcie_port_bus_type;
 extern void aer_enable_rootport(struct aer_rpc *rpc);
 extern void aer_delete_rootport(struct aer_rpc *rpc);
 extern int aer_init(struct pcie_device *dev);
-extern void aer_isr(void *context);
+extern void aer_isr(struct work_struct *work);
 extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 extern int aer_osc_setup(struct pci_dev *dev);
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 1c7e660d6535..08e13033ced8 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -690,14 +690,14 @@ static void aer_isr_one_error(struct pcie_device *p_device,
 
 /**
  * aer_isr - consume errors detected by root port
- * @context: pointer to a private data of pcie device
+ * @work: definition of this work item
  *
  * Invoked, as DPC, when root port records new detected error
  **/
-void aer_isr(void *context)
+void aer_isr(struct work_struct *work)
 {
-	struct pcie_device *p_device = (struct pcie_device *) context;
-	struct aer_rpc *rpc = get_service_data(p_device);
+	struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
+	struct pcie_device *p_device = rpc->rpd;
 	struct aer_err_source *e_src;
 
 	mutex_lock(&rpc->rpc_mutex);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 94a274645f6f..d3c5e964c964 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -362,9 +362,10 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	goto retry;
 }
 
-static void scsi_target_reap_usercontext(void *data)
+static void scsi_target_reap_usercontext(struct work_struct *work)
 {
-	struct scsi_target *starget = data;
+	struct scsi_target *starget =
+		container_of(work, struct scsi_target, ew.work);
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 	unsigned long flags;
 
@@ -400,7 +401,7 @@ void scsi_target_reap(struct scsi_target *starget)
 		starget->state = STARGET_DEL;
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		execute_in_process_context(scsi_target_reap_usercontext,
-					   starget, &starget->ew);
+					   &starget->ew);
 		return;
 
 	}
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index e1a91665d1c2..259c90cfa367 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -218,16 +218,16 @@ static void scsi_device_cls_release(struct class_device *class_dev)
 	put_device(&sdev->sdev_gendev);
 }
 
-static void scsi_device_dev_release_usercontext(void *data)
+static void scsi_device_dev_release_usercontext(struct work_struct *work)
 {
-	struct device *dev = data;
 	struct scsi_device *sdev;
 	struct device *parent;
 	struct scsi_target *starget;
 	unsigned long flags;
 
-	parent = dev->parent;
-	sdev = to_scsi_device(dev);
+	sdev = container_of(work, struct scsi_device, ew.work);
+
+	parent = sdev->sdev_gendev.parent;
 	starget = to_scsi_target(parent);
 
 	spin_lock_irqsave(sdev->host->host_lock, flags);
@@ -258,7 +258,7 @@ static void scsi_device_dev_release_usercontext(void *data)
 static void scsi_device_dev_release(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	execute_in_process_context(scsi_device_dev_release_usercontext, dev,
+	execute_in_process_context(scsi_device_dev_release_usercontext,
 				   &sdp->ew);
 }
 
diff --git a/fs/aio.c b/fs/aio.c
index 11a1a7100ad6..ca1c5180a17f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -53,13 +53,13 @@ static kmem_cache_t	*kioctx_cachep;
 static struct workqueue_struct *aio_wq;
 
 /* Used for rare fput completion. */
-static void aio_fput_routine(void *);
-static DECLARE_WORK(fput_work, aio_fput_routine, NULL);
+static void aio_fput_routine(struct work_struct *);
+static DECLARE_WORK(fput_work, aio_fput_routine);
 
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-static void aio_kick_handler(void *);
+static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
@@ -227,7 +227,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
-	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler, ctx);
+	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler);
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freectx;
@@ -470,7 +470,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 		wake_up(&ctx->wait);
 }
 
-static void aio_fput_routine(void *data)
+static void aio_fput_routine(struct work_struct *data)
 {
 	spin_lock_irq(&fput_lock);
 	while (likely(!list_empty(&fput_head))) {
@@ -859,9 +859,9 @@ static inline void aio_run_all_iocbs(struct kioctx *ctx)
  *      space.
  * Run on aiod's context.
  */
-static void aio_kick_handler(void *data)
+static void aio_kick_handler(struct work_struct *work)
 {
-	struct kioctx *ctx = data;
+	struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
 	mm_segment_t oldfs = get_fs();
 	int requeue;
 
diff --git a/fs/bio.c b/fs/bio.c
index f95c8749499f..c6c07ca5b5a9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -955,16 +955,16 @@ static void bio_release_pages(struct bio *bio)
  * run one bio_put() against the BIO.
  */
 
-static void bio_dirty_fn(void *data);
+static void bio_dirty_fn(struct work_struct *work);
 
-static DECLARE_WORK(bio_dirty_work, bio_dirty_fn, NULL);
+static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
 static DEFINE_SPINLOCK(bio_dirty_lock);
 static struct bio *bio_dirty_list;
 
 /*
  * This runs in process context
  */
-static void bio_dirty_fn(void *data)
+static void bio_dirty_fn(struct work_struct *work)
 {
 	unsigned long flags;
 	struct bio *bio;
diff --git a/fs/file.c b/fs/file.c
index 8e81775c5dc8..3787e82f54c1 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -91,8 +91,10 @@ out:
 	spin_unlock(&fddef->lock);
 }
 
-static void free_fdtable_work(struct fdtable_defer *f)
+static void free_fdtable_work(struct work_struct *work)
 {
+	struct fdtable_defer *f =
+		container_of(work, struct fdtable_defer, wq);
 	struct fdtable *fdt;
 
 	spin_lock_bh(&f->lock);
@@ -351,7 +353,7 @@ static void __devinit fdtable_defer_list_init(int cpu)
 {
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
 	spin_lock_init(&fddef->lock);
-	INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+	INIT_WORK(&fddef->wq, free_fdtable_work);
 	init_timer(&fddef->timer);
 	fddef->timer.data = (unsigned long)fddef;
 	fddef->timer.function = fdtable_timer;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 6f0487d6f44a..23ab145daa2d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,7 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	INIT_LIST_HEAD(&clp->cl_state_owners);
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
-	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 5ed798bc1cf7..371b804e7cc8 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -18,11 +18,10 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static void nfs_expire_automounts(void *list);
+static void nfs_expire_automounts(struct work_struct *work);
 
 LIST_HEAD(nfs_automount_list);
-static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts,
-			    &nfs_automount_list);
+static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
 static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
@@ -165,9 +164,9 @@ struct inode_operations nfs_referral_inode_operations = {
 	.follow_link	= nfs_follow_mountpoint,
 };
 
-static void nfs_expire_automounts(void *data)
+static void nfs_expire_automounts(struct work_struct *work)
 {
-	struct list_head *list = (struct list_head *)data;
+	struct list_head *list = &nfs_automount_list;
 
 	mark_mounts_for_expiry(list);
 	if (!list_empty(list))
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6f346677332d..c26cd978c7cc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -185,7 +185,7 @@ extern const u32 nfs4_fs_locations_bitmap[2];
 extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
-extern void nfs4_renew_state(void *);
+extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 7b6df1852e75..823298561c0a 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -59,9 +59,10 @@
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
 void
-nfs4_renew_state(void *data)
+nfs4_renew_state(struct work_struct *work)
 {
-	struct nfs_client *clp = (struct nfs_client *)data;
+	struct nfs_client *clp =
+		container_of(work, struct nfs_client, cl_renewd.work);
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5f04006e8dd2..b3f32eadbef5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -568,6 +568,7 @@ struct ata_port {
 	struct ata_host		*host;
 	struct device 		*dev;
 
+	void			*port_task_data;
 	struct delayed_work	port_task;
 	struct delayed_work	hotplug_task;
 	struct work_struct	scsi_rescan_task;
@@ -747,7 +748,7 @@ extern int ata_ratelimit(void);
 extern unsigned int ata_busy_sleep(struct ata_port *ap,
 				   unsigned long timeout_pat,
 				   unsigned long timeout);
-extern void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *),
+extern void ata_port_queue_task(struct ata_port *ap, work_func_t fn,
 				void *data, unsigned long delay);
 extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
 			     unsigned long interval_msec,
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ecc017d24cf3..4a3ea83c6d16 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -11,18 +11,19 @@
 
 struct workqueue_struct;
 
-typedef void (*work_func_t)(void *data);
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
 
 struct work_struct {
-	/* the first word is the work queue pointer and the pending flag
-	 * rolled into one */
+	/* the first word is the work queue pointer and the flags rolled into
+	 * one */
 	unsigned long management;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
+#define WORK_STRUCT_NOAUTOREL 1		/* F if work item automatically released on exec */
 #define WORK_STRUCT_FLAG_MASK (3UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
-	void *data;
 };
 
 struct delayed_work {
@@ -34,48 +35,77 @@ struct execute_work {
 	struct work_struct work;
 };
 
-#define __WORK_INITIALIZER(n, f, d) {				\
+#define __WORK_INITIALIZER(n, f) {				\
+	.management = 0,					\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
-	.data = (d),						\
 	}
 
-#define __DELAYED_WORK_INITIALIZER(n, f, d) {			\
-	.work = __WORK_INITIALIZER((n).work, (f), (d)),		\
+#define __WORK_INITIALIZER_NAR(n, f) {				\
+	.management = (1 << WORK_STRUCT_NOAUTOREL),		\
+        .entry	= { &(n).entry, &(n).entry },			\
+	.func = (f),						\
+	}
+
+#define __DELAYED_WORK_INITIALIZER(n, f) {			\
+	.work = __WORK_INITIALIZER((n).work, (f)),		\
+	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
+	}
+
+#define __DELAYED_WORK_INITIALIZER_NAR(n, f) {			\
+	.work = __WORK_INITIALIZER_NAR((n).work, (f)),		\
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
-#define DECLARE_WORK(n, f, d)					\
-	struct work_struct n = __WORK_INITIALIZER(n, f, d)
+#define DECLARE_WORK(n, f)					\
+	struct work_struct n = __WORK_INITIALIZER(n, f)
+
+#define DECLARE_WORK_NAR(n, f)					\
+	struct work_struct n = __WORK_INITIALIZER_NAR(n, f)
 
-#define DECLARE_DELAYED_WORK(n, f, d)				\
-	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, d)
+#define DECLARE_DELAYED_WORK(n, f)				\
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
+
+#define DECLARE_DELAYED_WORK_NAR(n, f)			\
+	struct dwork_struct n = __DELAYED_WORK_INITIALIZER_NAR(n, f)
 
 /*
- * initialize a work item's function and data pointers
+ * initialize a work item's function pointer
  */
-#define PREPARE_WORK(_work, _func, _data)			\
+#define PREPARE_WORK(_work, _func)				\
 	do {							\
 		(_work)->func = (_func);			\
-		(_work)->data = (_data);			\
 	} while (0)
 
-#define PREPARE_DELAYED_WORK(_work, _func, _data)		\
-	PREPARE_WORK(&(_work)->work, (_func), (_data))
+#define PREPARE_DELAYED_WORK(_work, _func)			\
+	PREPARE_WORK(&(_work)->work, (_func))
 
 /*
  * initialize all of a work item in one go
  */
-#define INIT_WORK(_work, _func, _data)				\
+#define INIT_WORK(_work, _func)					\
 	do {							\
-		INIT_LIST_HEAD(&(_work)->entry);		\
 		(_work)->management = 0;			\
-		PREPARE_WORK((_work), (_func), (_data));	\
+		INIT_LIST_HEAD(&(_work)->entry);		\
+		PREPARE_WORK((_work), (_func));			\
+	} while (0)
+
+#define INIT_WORK_NAR(_work, _func)					\
+	do {								\
+		(_work)->management = (1 << WORK_STRUCT_NOAUTOREL);	\
+		INIT_LIST_HEAD(&(_work)->entry);			\
+		PREPARE_WORK((_work), (_func));				\
+	} while (0)
+
+#define INIT_DELAYED_WORK(_work, _func)				\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func));		\
+		init_timer(&(_work)->timer);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK(_work, _func, _data)		\
+#define INIT_DELAYED_WORK_NAR(_work, _func)			\
 	do {							\
-		INIT_WORK(&(_work)->work, (_func), (_data));	\
+		INIT_WORK_NAR(&(_work)->work, (_func));		\
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
@@ -94,6 +124,27 @@ struct execute_work {
 #define delayed_work_pending(work) \
 	test_bit(WORK_STRUCT_PENDING, &(work)->work.management)
 
+/**
+ * work_release - Release a work item under execution
+ * @work: The work item to release
+ *
+ * This is used to release a work item that has been initialised with automatic
+ * release mode disabled (WORK_STRUCT_NOAUTOREL is set).  This gives the work
+ * function the opportunity to grab auxiliary data from the container of the
+ * work_struct before clearing the pending bit as the work_struct may be
+ * subject to deallocation the moment the pending bit is cleared.
+ *
+ * In such a case, this should be called in the work function after it has
+ * fetched any data it may require from the containter of the work_struct.
+ * After this function has been called, the work_struct may be scheduled for
+ * further execution or it may be deallocated unless other precautions are
+ * taken.
+ *
+ * This should also be used to release a delayed work item.
+ */
+#define work_release(work) \
+	clear_bit(WORK_STRUCT_PENDING, &(work)->management)
+
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread);
@@ -112,7 +163,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
-extern int schedule_on_each_cpu(work_func_t func, void *info);
+extern int schedule_on_each_cpu(work_func_t func);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
@@ -121,7 +172,7 @@ extern void init_workqueues(void);
 void cancel_rearming_delayed_work(struct delayed_work *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
 				       struct delayed_work *);
-int execute_in_process_context(work_func_t fn, void *, struct execute_work *);
+int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 5f48748fe017..f7be1ac73601 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -84,7 +84,7 @@ struct inet_timewait_death_row {
 };
 
 extern void inet_twdr_hangman(unsigned long data);
-extern void inet_twdr_twkill_work(void *data);
+extern void inet_twdr_twkill_work(struct work_struct *work);
 extern void inet_twdr_twcal_tick(unsigned long data);
 
 #if (BITS_PER_LONG == 64)
diff --git a/ipc/util.c b/ipc/util.c
index cd8bb14a431f..a9b7a227b8d4 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -514,6 +514,11 @@ void ipc_rcu_getref(void *ptr)
 	container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
 }
 
+static void ipc_do_vfree(struct work_struct *work)
+{
+	vfree(container_of(work, struct ipc_rcu_sched, work));
+}
+
 /**
  * ipc_schedule_free - free ipc + rcu space
  * @head: RCU callback structure for queued work
@@ -528,7 +533,7 @@ static void ipc_schedule_free(struct rcu_head *head)
 	struct ipc_rcu_sched *sched =
 			container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
 
-	INIT_WORK(&sched->work, vfree, sched);
+	INIT_WORK(&sched->work, ipc_do_vfree);
 	schedule_work(&sched->work);
 }
 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bb4e29d924e4..7dc7a9dad6ac 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -114,6 +114,7 @@ EXPORT_SYMBOL(request_module);
 #endif /* CONFIG_KMOD */
 
 struct subprocess_info {
+	struct work_struct work;
 	struct completion *complete;
 	char *path;
 	char **argv;
@@ -221,9 +222,10 @@ static int wait_for_helper(void *data)
 }
 
 /* This is run by khelper thread  */
-static void __call_usermodehelper(void *data)
+static void __call_usermodehelper(struct work_struct *work)
 {
-	struct subprocess_info *sub_info = data;
+	struct subprocess_info *sub_info =
+		container_of(work, struct subprocess_info, work);
 	pid_t pid;
 	int wait = sub_info->wait;
 
@@ -264,6 +266,8 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct subprocess_info sub_info = {
+		.work		= __WORK_INITIALIZER(sub_info.work,
+						     __call_usermodehelper),
 		.complete	= &done,
 		.path		= path,
 		.argv		= argv,
@@ -272,7 +276,6 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 		.wait		= wait,
 		.retval		= 0,
 	};
-	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
 	if (!khelper_wq)
 		return -EBUSY;
@@ -280,7 +283,7 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 	if (path[0] == '\0')
 		return 0;
 
-	queue_work(khelper_wq, &work);
+	queue_work(khelper_wq, &sub_info.work);
 	wait_for_completion(&done);
 	return sub_info.retval;
 }
@@ -291,6 +294,8 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 {
 	DECLARE_COMPLETION(done);
 	struct subprocess_info sub_info = {
+		.work		= __WORK_INITIALIZER(sub_info.work,
+						     __call_usermodehelper),
 		.complete	= &done,
 		.path		= path,
 		.argv		= argv,
@@ -298,7 +303,6 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 		.retval		= 0,
 	};
 	struct file *f;
-	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
 	if (!khelper_wq)
 		return -EBUSY;
@@ -318,7 +322,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 	}
 	sub_info.stdin = f;
 
-	queue_work(khelper_wq, &work);
+	queue_work(khelper_wq, &sub_info.work);
 	wait_for_completion(&done);
 	return sub_info.retval;
 }
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4f9c60ef95e8..1db8c72d0d38 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -31,6 +31,8 @@ struct kthread_create_info
 	/* Result passed back to kthread_create() from keventd. */
 	struct task_struct *result;
 	struct completion done;
+
+	struct work_struct work;
 };
 
 struct kthread_stop_info
@@ -111,9 +113,10 @@ static int kthread(void *_create)
 }
 
 /* We are keventd: create a thread. */
-static void keventd_create_kthread(void *_create)
+static void keventd_create_kthread(struct work_struct *work)
 {
-	struct kthread_create_info *create = _create;
+	struct kthread_create_info *create =
+		container_of(work, struct kthread_create_info, work);
 	int pid;
 
 	/* We want our own signal handler (we take no signals by default). */
@@ -154,20 +157,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 				   ...)
 {
 	struct kthread_create_info create;
-	DECLARE_WORK(work, keventd_create_kthread, &create);
 
 	create.threadfn = threadfn;
 	create.data = data;
 	init_completion(&create.started);
 	init_completion(&create.done);
+	INIT_WORK(&create.work, keventd_create_kthread);
 
 	/*
 	 * The workqueue needs to start up first:
 	 */
 	if (!helper_wq)
-		work.func(work.data);
+		create.work.func(&create.work);
 	else {
-		queue_work(helper_wq, &work);
+		queue_work(helper_wq, &create.work);
 		wait_for_completion(&create.done);
 	}
 	if (!IS_ERR(create.result)) {
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index f1f900ac3164..678ec736076b 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -16,12 +16,12 @@
  * callback we use.
  */
 
-static void do_poweroff(void *dummy)
+static void do_poweroff(struct work_struct *dummy)
 {
 	kernel_power_off();
 }
 
-static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
+static DECLARE_WORK(poweroff_work, do_poweroff);
 
 static void handle_poweroff(int key, struct tty_struct *tty)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index 98489d82801b..c87b461de38d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -880,7 +880,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
 	return 0;
 }
 
-static void deferred_cad(void *dummy)
+static void deferred_cad(struct work_struct *dummy)
 {
 	kernel_restart(NULL);
 }
@@ -892,7 +892,7 @@ static void deferred_cad(void *dummy)
  */
 void ctrl_alt_del(void)
 {
-	static DECLARE_WORK(cad_work, deferred_cad, NULL);
+	static DECLARE_WORK(cad_work, deferred_cad);
 
 	if (C_A_D)
 		schedule_work(&cad_work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 967479756511..8d1e7cb8a51a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -241,14 +241,14 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
 		work_func_t f = work->func;
-		void *data = work->data;
 
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
 		BUG_ON(get_wq_data(work) != cwq);
-		clear_bit(WORK_STRUCT_PENDING, &work->management);
-		f(data);
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+			work_release(work);
+		f(work);
 
 		spin_lock_irqsave(&cwq->lock, flags);
 		cwq->remove_sequence++;
@@ -527,7 +527,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
 /**
  * schedule_on_each_cpu - call a function on each online CPU from keventd
  * @func: the function to call
- * @info: a pointer to pass to func()
  *
  * Returns zero on success.
  * Returns -ve errno on failure.
@@ -536,7 +535,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
  *
  * schedule_on_each_cpu() is very slow.
  */
-int schedule_on_each_cpu(work_func_t func, void *info)
+int schedule_on_each_cpu(work_func_t func)
 {
 	int cpu;
 	struct work_struct *works;
@@ -547,7 +546,7 @@ int schedule_on_each_cpu(work_func_t func, void *info)
 
 	mutex_lock(&workqueue_mutex);
 	for_each_online_cpu(cpu) {
-		INIT_WORK(per_cpu_ptr(works, cpu), func, info);
+		INIT_WORK(per_cpu_ptr(works, cpu), func);
 		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
 				per_cpu_ptr(works, cpu));
 	}
@@ -591,7 +590,6 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
 /**
  * execute_in_process_context - reliably execute the routine with user context
  * @fn:		the function to execute
- * @data:	data to pass to the function
  * @ew:		guaranteed storage for the execute work structure (must
  *		be available when the work executes)
  *
@@ -601,15 +599,14 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
  * Returns:	0 - function was executed
  *		1 - function was scheduled for execution
  */
-int execute_in_process_context(work_func_t fn, void *data,
-			       struct execute_work *ew)
+int execute_in_process_context(work_func_t fn, struct execute_work *ew)
 {
 	if (!in_interrupt()) {
-		fn(data);
+		fn(&ew->work);
 		return 0;
 	}
 
-	INIT_WORK(&ew->work, fn, data);
+	INIT_WORK(&ew->work, fn);
 	schedule_work(&ew->work);
 
 	return 1;
diff --git a/mm/slab.c b/mm/slab.c
index a65bc5e992c3..5de81473df34 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
 			int node);
 static int enable_cpucache(struct kmem_cache *cachep);
-static void cache_reap(void *unused);
+static void cache_reap(struct work_struct *unused);
 
 /*
  * This function must be completely optimized away if a constant is passed to
@@ -925,7 +925,7 @@ static void __devinit start_cpu_timer(int cpu)
 	 */
 	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
-		INIT_DELAYED_WORK(reap_work, cache_reap, NULL);
+		INIT_DELAYED_WORK(reap_work, cache_reap);
 		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
 	}
 }
@@ -3815,7 +3815,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
  * If we cannot acquire the cache chain mutex then just give up - we'll try
  * again on the next iteration.
  */
-static void cache_reap(void *unused)
+static void cache_reap(struct work_struct *unused)
 {
 	struct kmem_cache *searchp;
 	struct kmem_list3 *l3;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index f2ed09e25dfd..549a2ce951b0 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -34,8 +34,8 @@ enum lw_bits {
 static unsigned long linkwatch_flags;
 static unsigned long linkwatch_nextevent;
 
-static void linkwatch_event(void *dummy);
-static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event, NULL);
+static void linkwatch_event(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
 
 static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
@@ -127,7 +127,7 @@ void linkwatch_run_queue(void)
 }       
 
 
-static void linkwatch_event(void *dummy)
+static void linkwatch_event(struct work_struct *dummy)
 {
 	/* Limit the number of linkwatch events to one
 	 * per second so that a runaway driver does not
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index cdd805344c61..8c74f9168b7d 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -197,9 +197,10 @@ EXPORT_SYMBOL_GPL(inet_twdr_hangman);
 
 extern void twkill_slots_invalid(void);
 
-void inet_twdr_twkill_work(void *data)
+void inet_twdr_twkill_work(struct work_struct *work)
 {
-	struct inet_timewait_death_row *twdr = data;
+	struct inet_timewait_death_row *twdr =
+		container_of(work, struct inet_timewait_death_row, twkill_work);
 	int i;
 
 	if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0163d9826907..af7b2c986b1f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -45,8 +45,7 @@ struct inet_timewait_death_row tcp_death_row = {
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&tcp_death_row),
 	.twkill_work	= __WORK_INITIALIZER(tcp_death_row.twkill_work,
-					     inet_twdr_twkill_work,
-					     &tcp_death_row),
+					     inet_twdr_twkill_work),
 /* Short-time timewait calendar */
 
 	.twcal_hand	= -1,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index d5725cb1491e..d96fd466a9a4 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -284,8 +284,8 @@ static struct file_operations cache_file_operations;
 static struct file_operations content_file_operations;
 static struct file_operations cache_flush_operations;
 
-static void do_cache_clean(void *data);
-static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean, NULL);
+static void do_cache_clean(struct work_struct *work);
+static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
 
 void cache_register(struct cache_detail *cd)
 {
@@ -461,7 +461,7 @@ static int cache_clean(void)
 /*
  * We want to regularly clean the cache, so we need to schedule some work ...
  */
-static void do_cache_clean(void *data)
+static void do_cache_clean(struct work_struct *work)
 {
 	int delay = 5;
 	if (cache_clean() == -1)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 97be3f7fed44..49dba5febbbd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -54,10 +54,11 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
 }
 
 static void
-rpc_timeout_upcall_queue(void *data)
+rpc_timeout_upcall_queue(struct work_struct *work)
 {
 	LIST_HEAD(free_list);
-	struct rpc_inode *rpci = (struct rpc_inode *)data;
+	struct rpc_inode *rpci =
+		container_of(work, struct rpc_inode, queue_timeout.work);
 	struct inode *inode = &rpci->vfs_inode;
 	void (*destroy_msg)(struct rpc_pipe_msg *);
 
@@ -838,7 +839,7 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		rpci->pipelen = 0;
 		init_waitqueue_head(&rpci->waitq);
 		INIT_DELAYED_WORK(&rpci->queue_timeout,
-				    rpc_timeout_upcall_queue, rpci);
+				    rpc_timeout_upcall_queue);
 		rpci->ops = NULL;
 	}
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a1ab4eed41f4..eff44bcdc95a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,7 +41,7 @@ static mempool_t	*rpc_buffer_mempool __read_mostly;
 
 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-static void			rpc_async_schedule(void *);
+static void			rpc_async_schedule(struct work_struct *);
 
 /*
  * RPC tasks sit here while waiting for conditions to improve.
@@ -305,7 +305,7 @@ static void rpc_make_runnable(struct rpc_task *task)
 	if (RPC_IS_ASYNC(task)) {
 		int status;
 
-		INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
+		INIT_WORK(&task->u.tk_work, rpc_async_schedule);
 		status = queue_work(task->tk_workqueue, &task->u.tk_work);
 		if (status < 0) {
 			printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
@@ -695,9 +695,9 @@ rpc_execute(struct rpc_task *task)
 	return __rpc_execute(task);
 }
 
-static void rpc_async_schedule(void *arg)
+static void rpc_async_schedule(struct work_struct *work)
 {
-	__rpc_execute((struct rpc_task *)arg);
+	__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
 }
 
 /**
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 80857470dc11..4f9a5d9791fb 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -479,9 +479,10 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
 	return status;
 }
 
-static void xprt_autoclose(void *args)
+static void xprt_autoclose(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, task_cleanup);
 
 	xprt_disconnect(xprt);
 	xprt->ops->close(xprt);
@@ -932,7 +933,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
 
 	INIT_LIST_HEAD(&xprt->free);
 	INIT_LIST_HEAD(&xprt->recv);
-	INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt);
+	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
 	init_timer(&xprt->timer);
 	xprt->timer.function = xprt_init_autodisconnect;
 	xprt->timer.data = (unsigned long) xprt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3c7532cd009e..cfe3c15be948 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1060,13 +1060,14 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 
 /**
  * xs_udp_connect_worker - set up a UDP socket
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_udp_connect_worker(void *args)
+static void xs_udp_connect_worker(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *) args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, connect_worker.work);
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
@@ -1144,13 +1145,14 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
 
 /**
  * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_tcp_connect_worker(void *args)
+static void xs_tcp_connect_worker(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, connect_worker.work);
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
@@ -1375,7 +1377,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1420,7 +1422,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
diff --git a/security/keys/key.c b/security/keys/key.c
index 80de8c3e9cc3..70eacbe5abde 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -30,8 +30,8 @@ DEFINE_SPINLOCK(key_user_lock);
 static LIST_HEAD(key_types_list);
 static DECLARE_RWSEM(key_types_sem);
 
-static void key_cleanup(void *data);
-static DECLARE_WORK(key_cleanup_task, key_cleanup, NULL);
+static void key_cleanup(struct work_struct *work);
+static DECLARE_WORK(key_cleanup_task, key_cleanup);
 
 /* we serialise key instantiation and link */
 DECLARE_RWSEM(key_construction_sem);
@@ -552,7 +552,7 @@ EXPORT_SYMBOL(key_negate_and_link);
  * do cleaning up in process context so that we don't have to disable
  * interrupts all over the place
  */
-static void key_cleanup(void *data)
+static void key_cleanup(struct work_struct *work)
 {
 	struct rb_node *_n;
 	struct key *key;
-- 
cgit v1.2.3


From c4028958b6ecad064b1a6303a6a5906d4fe48d73 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:57:56 +0000
Subject: WorkStruct: make allyesconfig

Fix up for make allyesconfig.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 arch/i386/kernel/cpu/mcheck/non-fatal.c         |   6 +-
 arch/i386/kernel/smpboot.c                      |  11 +-
 arch/i386/kernel/tsc.c                          |   4 +-
 arch/powerpc/platforms/pseries/eeh_event.c      |   6 +-
 drivers/atm/idt77252.c                          |   9 +-
 drivers/block/aoe/aoe.h                         |   2 +-
 drivers/block/aoe/aoecmd.c                      |   4 +-
 drivers/block/aoe/aoedev.c                      |   2 +-
 drivers/block/paride/pd.c                       |   8 +-
 drivers/block/paride/pseudo.h                   |  10 +-
 drivers/block/sx8.c                             |   7 +-
 drivers/block/ub.c                              |   8 +-
 drivers/bluetooth/bcm203x.c                     |   7 +-
 drivers/char/cyclades.c                         |   9 +-
 drivers/char/drm/via_dmablit.c                  |   6 +-
 drivers/char/epca.c                             |   8 +-
 drivers/char/esp.c                              |  14 +-
 drivers/char/genrtc.c                           |   4 +-
 drivers/char/hvsi.c                             |  16 +-
 drivers/char/ip2/i2lib.c                        |  12 +-
 drivers/char/ip2/ip2main.c                      |  23 +--
 drivers/char/isicom.c                           |  12 +-
 drivers/char/moxa.c                             |   8 +-
 drivers/char/mxser.c                            |   9 +-
 drivers/char/pcmcia/synclink_cs.c               |   8 +-
 drivers/char/sonypi.c                           |   4 +-
 drivers/char/specialix.c                        |  14 +-
 drivers/char/synclink.c                         |   9 +-
 drivers/char/synclink_gt.c                      |  10 +-
 drivers/char/synclinkmp.c                       |   8 +-
 drivers/char/tpm/tpm.c                          |   6 +-
 drivers/connector/cn_queue.c                    |   8 +-
 drivers/connector/connector.c                   |  31 ++--
 drivers/cpufreq/cpufreq_conservative.c          |   7 +-
 drivers/cpufreq/cpufreq_ondemand.c              |  28 +--
 drivers/i2c/chips/ds1374.c                      |  12 +-
 drivers/ieee1394/hosts.c                        |   9 +-
 drivers/ieee1394/hosts.h                        |   2 +-
 drivers/ieee1394/sbp2.c                         |  28 +--
 drivers/ieee1394/sbp2.h                         |   2 +-
 drivers/infiniband/core/addr.c                  |   6 +-
 drivers/infiniband/core/cache.c                 |   7 +-
 drivers/infiniband/core/cm.c                    |  19 +-
 drivers/infiniband/core/cma.c                   |  10 +-
 drivers/infiniband/core/iwcm.c                  |   7 +-
 drivers/infiniband/core/mad.c                   |  25 +--
 drivers/infiniband/core/mad_priv.h              |   2 +-
 drivers/infiniband/core/mad_rmpp.c              |  18 +-
 drivers/infiniband/core/sa_query.c              |  10 +-
 drivers/infiniband/core/uverbs_mem.c            |   7 +-
 drivers/infiniband/hw/ipath/ipath_user_pages.c  |   7 +-
 drivers/infiniband/hw/mthca/mthca_catas.c       |   4 +-
 drivers/infiniband/ulp/ipoib/ipoib.h            |  16 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c         |  25 +--
 drivers/infiniband/ulp/ipoib/ipoib_main.c       |  10 +-
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c  |  22 ++-
 drivers/infiniband/ulp/iser/iser_verbs.c        |  10 +-
 drivers/infiniband/ulp/srp/ib_srp.c             |   7 +-
 drivers/input/keyboard/lkkbd.c                  |   6 +-
 drivers/input/keyboard/sunkbd.c                 |   6 +-
 drivers/input/mouse/psmouse-base.c              |   7 +-
 drivers/isdn/act2000/capi.c                     |   4 +-
 drivers/isdn/act2000/capi.h                     |   2 +-
 drivers/isdn/act2000/module.c                   |  18 +-
 drivers/isdn/capi/kcapi.c                       |  14 +-
 drivers/isdn/hisax/amd7930_fn.c                 |   7 +-
 drivers/isdn/hisax/config.c                     |   9 +-
 drivers/isdn/hisax/hfc4s8s_l1.c                 |   5 +-
 drivers/isdn/hisax/hfc_2bds0.c                  |   9 +-
 drivers/isdn/hisax/hfc_pci.c                    |   6 +-
 drivers/isdn/hisax/hfc_sx.c                     |   6 +-
 drivers/isdn/hisax/icc.c                        |   6 +-
 drivers/isdn/hisax/isac.c                       |   6 +-
 drivers/isdn/hisax/isar.c                       |   6 +-
 drivers/isdn/hisax/isdnl1.c                     |   6 +-
 drivers/isdn/hisax/w6692.c                      |   6 +-
 drivers/isdn/i4l/isdn_net.c                     |   6 +-
 drivers/isdn/pcbit/drv.c                        |   4 +-
 drivers/isdn/pcbit/layer2.c                     |   6 +-
 drivers/isdn/pcbit/pcbit.h                      |   2 +
 drivers/macintosh/smu.c                         |   4 +-
 drivers/md/dm-crypt.c                           |   8 +-
 drivers/md/dm-mpath.c                           |  18 +-
 drivers/md/dm-raid1.c                           |   4 +-
 drivers/md/dm-snap.c                            |   9 +-
 drivers/md/kcopyd.c                             |   4 +-
 drivers/media/dvb/b2c2/flexcop-pci.c            |   9 +-
 drivers/media/dvb/cinergyT2/cinergyT2.c         |  18 +-
 drivers/media/dvb/dvb-core/dvb_net.c            |  19 +-
 drivers/media/dvb/dvb-usb/dvb-usb-remote.c      |   7 +-
 drivers/media/dvb/dvb-usb/dvb-usb.h             |   2 +-
 drivers/media/video/cpia_pp.c                   |  20 ++-
 drivers/media/video/cx88/cx88-input.c           |   6 +-
 drivers/media/video/ir-kbd-i2c.c                |   6 +-
 drivers/media/video/pvrusb2/pvrusb2-context.c   |  13 +-
 drivers/media/video/saa6588.c                   |   6 +-
 drivers/media/video/saa7134/saa7134-empress.c   |   9 +-
 drivers/message/fusion/mptfc.c                  |  14 +-
 drivers/message/fusion/mptlan.c                 |  29 +--
 drivers/message/fusion/mptsas.c                 |  25 +--
 drivers/message/fusion/mptspi.c                 |  14 +-
 drivers/message/i2o/driver.c                    |   2 +-
 drivers/message/i2o/exec-osm.c                  |  13 +-
 drivers/message/i2o/i2o_block.c                 |  15 +-
 drivers/message/i2o/i2o_block.h                 |   2 +-
 drivers/misc/tifm_7xx1.c                        |  18 +-
 drivers/mmc/mmc.c                               |  14 +-
 drivers/mmc/mmc.h                               |   2 +-
 drivers/mmc/mmc_sysfs.c                         |  10 +-
 drivers/mmc/tifm_sd.c                           |  28 +--
 drivers/net/8139too.c                           |  26 +--
 drivers/net/bnx2.c                              |   6 +-
 drivers/net/cassini.c                           |   6 +-
 drivers/net/chelsio/common.h                    |   2 +-
 drivers/net/chelsio/cxgb2.c                     |  16 +-
 drivers/net/e100.c                              |   8 +-
 drivers/net/ehea/ehea_main.c                    |   9 +-
 drivers/net/hamradio/baycom_epp.c               |  14 +-
 drivers/net/irda/mcs7780.c                      |   6 +-
 drivers/net/irda/sir-dev.h                      |   2 +-
 drivers/net/irda/sir_dev.c                      |   8 +-
 drivers/net/iseries_veth.c                      |  12 +-
 drivers/net/ixgb/ixgb_main.c                    |  10 +-
 drivers/net/myri10ge/myri10ge.c                 |   7 +-
 drivers/net/ns83820.c                           |  10 +-
 drivers/net/pcmcia/xirc2ps_cs.c                 |  12 +-
 drivers/net/phy/phy.c                           |   9 +-
 drivers/net/plip.c                              |  38 ++--
 drivers/net/qla3xxx.c                           |  20 ++-
 drivers/net/qla3xxx.h                           |   4 +-
 drivers/net/r8169.c                             |  23 ++-
 drivers/net/s2io.c                              |  16 +-
 drivers/net/s2io.h                              |   2 +-
 drivers/net/sis190.c                            |  13 +-
 drivers/net/skge.c                              |  15 +-
 drivers/net/skge.h                              |   2 +-
 drivers/net/spider_net.c                        |   9 +-
 drivers/net/sungem.c                            |   6 +-
 drivers/net/tg3.c                               |   6 +-
 drivers/net/tlan.c                              |  23 ++-
 drivers/net/tlan.h                              |   1 +
 drivers/net/tulip/21142.c                       |   7 +-
 drivers/net/tulip/timer.c                       |   7 +-
 drivers/net/tulip/tulip.h                       |   7 +-
 drivers/net/tulip/tulip_core.c                  |   3 +-
 drivers/net/wan/pc300_tty.c                     |  23 +--
 drivers/net/wireless/bcm43xx/bcm43xx.h          |   2 +-
 drivers/net/wireless/bcm43xx/bcm43xx_main.c     |  20 ++-
 drivers/net/wireless/hostap/hostap.h            |   2 +-
 drivers/net/wireless/hostap/hostap_ap.c         |  19 +-
 drivers/net/wireless/hostap/hostap_hw.c         |  21 +--
 drivers/net/wireless/hostap/hostap_info.c       |   6 +-
 drivers/net/wireless/hostap/hostap_main.c       |   8 +-
 drivers/net/wireless/ipw2100.c                  |  47 ++---
 drivers/net/wireless/ipw2100.h                  |  10 +-
 drivers/net/wireless/ipw2200.c                  | 227 +++++++++++++-----------
 drivers/net/wireless/ipw2200.h                  |  16 +-
 drivers/net/wireless/orinoco.c                  |  28 +--
 drivers/net/wireless/prism54/isl_ioctl.c        |   8 +-
 drivers/net/wireless/prism54/isl_ioctl.h        |   4 +-
 drivers/net/wireless/prism54/islpci_dev.c       |   5 +-
 drivers/net/wireless/prism54/islpci_eth.c       |   4 +-
 drivers/net/wireless/prism54/islpci_eth.h       |   2 +-
 drivers/net/wireless/prism54/islpci_mgt.c       |   2 +-
 drivers/net/wireless/zd1211rw/zd_mac.c          |   7 +-
 drivers/net/wireless/zd1211rw/zd_mac.h          |   2 +-
 drivers/oprofile/cpu_buffer.c                   |   9 +-
 drivers/oprofile/cpu_buffer.h                   |   2 +-
 drivers/pci/hotplug/shpchp.h                    |   4 +-
 drivers/pci/hotplug/shpchp_core.c               |   2 +-
 drivers/pci/hotplug/shpchp_ctrl.c               |  19 +-
 drivers/pcmcia/ds.c                             |   7 +-
 drivers/rtc/rtc-dev.c                           |   7 +-
 drivers/scsi/NCR5380.c                          |  11 +-
 drivers/scsi/NCR5380.h                          |   4 +-
 drivers/scsi/aha152x.c                          |   4 +-
 drivers/scsi/imm.c                              |  12 +-
 drivers/scsi/ipr.c                              |   9 +-
 drivers/scsi/libiscsi.c                         |   7 +-
 drivers/scsi/libsas/sas_discover.c              |  22 ++-
 drivers/scsi/libsas/sas_event.c                 |  14 +-
 drivers/scsi/libsas/sas_init.c                  |   6 +-
 drivers/scsi/libsas/sas_internal.h              |  12 +-
 drivers/scsi/libsas/sas_phy.c                   |  45 +++--
 drivers/scsi/libsas/sas_port.c                  |  30 ++--
 drivers/scsi/ppa.c                              |  12 +-
 drivers/scsi/qla4xxx/ql4_os.c                   |   7 +-
 drivers/scsi/scsi_transport_fc.c                |  60 ++++---
 drivers/scsi/scsi_transport_iscsi.c             |   8 +-
 drivers/scsi/scsi_transport_spi.c               |   7 +-
 drivers/spi/spi_bitbang.c                       |   7 +-
 drivers/usb/atm/cxacru.c                        |  12 +-
 drivers/usb/atm/speedtch.c                      |  15 +-
 drivers/usb/atm/ueagle-atm.c                    |   6 +-
 drivers/usb/class/cdc-acm.c                     |   6 +-
 drivers/usb/core/hub.c                          |  18 +-
 drivers/usb/core/hub.h                          |   2 +-
 drivers/usb/core/message.c                      |   7 +-
 drivers/usb/core/usb.c                          |   9 +-
 drivers/usb/gadget/ether.c                      |   6 +-
 drivers/usb/host/u132-hcd.c                     |  62 +++----
 drivers/usb/input/hid-core.c                    |   7 +-
 drivers/usb/misc/ftdi-elan.c                    |  86 +++------
 drivers/usb/misc/phidgetkit.c                   |  21 ++-
 drivers/usb/misc/phidgetmotorcontrol.c          |  11 +-
 drivers/usb/net/kaweth.c                        |   9 +-
 drivers/usb/net/pegasus.c                       |   6 +-
 drivers/usb/net/pegasus.h                       |   2 +-
 drivers/usb/net/usbnet.c                        |   7 +-
 drivers/usb/serial/aircable.c                   |  13 +-
 drivers/usb/serial/digi_acceleport.c            |  14 +-
 drivers/usb/serial/ftdi_sio.c                   |  19 +-
 drivers/usb/serial/keyspan_pda.c                |  22 ++-
 drivers/usb/serial/usb-serial.c                 |   7 +-
 drivers/usb/serial/whiteheat.c                  |  15 +-
 drivers/video/console/fbcon.c                   |   6 +-
 fs/9p/mux.c                                     |  16 +-
 fs/gfs2/glock.c                                 |   8 +-
 fs/ncpfs/inode.c                                |   8 +-
 fs/ncpfs/sock.c                                 |  20 ++-
 fs/nfsd/nfs4state.c                             |   7 +-
 fs/ocfs2/alloc.c                                |   9 +-
 fs/ocfs2/cluster/heartbeat.c                    |  10 +-
 fs/ocfs2/cluster/quorum.c                       |   4 +-
 fs/ocfs2/cluster/tcp.c                          |  78 ++++----
 fs/ocfs2/cluster/tcp_internal.h                 |   8 +-
 fs/ocfs2/dlm/dlmcommon.h                        |   2 +-
 fs/ocfs2/dlm/dlmdomain.c                        |   2 +-
 fs/ocfs2/dlm/dlmrecovery.c                      |   5 +-
 fs/ocfs2/dlm/userdlm.c                          |  10 +-
 fs/ocfs2/journal.c                              |   7 +-
 fs/ocfs2/journal.h                              |   2 +-
 fs/ocfs2/ocfs2.h                                |   2 +-
 fs/ocfs2/super.c                                |   2 +-
 fs/reiserfs/journal.c                           |  12 +-
 fs/xfs/linux-2.6/xfs_aops.c                     |  21 ++-
 fs/xfs/linux-2.6/xfs_buf.c                      |   9 +-
 include/linux/connector.h                       |   4 +-
 include/linux/i2o.h                             |   2 +-
 include/linux/mmc/host.h                        |   2 +-
 include/linux/ncp_fs_sb.h                       |   8 +-
 include/linux/reiserfs_fs_sb.h                  |   3 +-
 include/linux/relay.h                           |   2 +-
 include/linux/usb.h                             |   2 +-
 include/net/ieee80211softmac.h                  |   4 +-
 include/net/sctp/structs.h                      |   2 +-
 include/scsi/libsas.h                           |  23 ++-
 include/scsi/scsi_transport_fc.h                |   4 +-
 include/scsi/scsi_transport_iscsi.h             |   2 +-
 include/sound/ac97_codec.h                      |   2 +-
 include/sound/ak4114.h                          |   2 +-
 kernel/relay.c                                  |  10 +-
 mm/swap.c                                       |   4 +-
 net/atm/lec.c                                   |   9 +-
 net/atm/lec.h                                   |   2 +-
 net/bluetooth/hci_sysfs.c                       |  12 +-
 net/bridge/br_if.c                              |  10 +-
 net/bridge/br_private.h                         |   2 +-
 net/core/netpoll.c                              |   4 +-
 net/dccp/minisocks.c                            |   3 +-
 net/ieee80211/softmac/ieee80211softmac_assoc.c  |  18 +-
 net/ieee80211/softmac/ieee80211softmac_auth.c   |  23 +--
 net/ieee80211/softmac/ieee80211softmac_event.c  |  12 +-
 net/ieee80211/softmac/ieee80211softmac_module.c |   4 +-
 net/ieee80211/softmac/ieee80211softmac_priv.h   |  13 +-
 net/ieee80211/softmac/ieee80211softmac_scan.c   |  13 +-
 net/ieee80211/softmac/ieee80211softmac_wx.c     |   6 +-
 net/ipv4/ipvs/ip_vs_ctl.c                       |   6 +-
 net/irda/ircomm/ircomm_tty.c                    |  11 +-
 net/sctp/associola.c                            |  11 +-
 net/sctp/endpointola.c                          |  10 +-
 net/sctp/inqueue.c                              |   9 +-
 net/xfrm/xfrm_policy.c                          |   8 +-
 net/xfrm/xfrm_state.c                           |   8 +-
 sound/aoa/aoa-gpio.h                            |   2 +-
 sound/aoa/core/snd-aoa-gpio-feature.c           |  16 +-
 sound/aoa/core/snd-aoa-gpio-pmf.c               |  16 +-
 sound/i2c/other/ak4114.c                        |   8 +-
 sound/pci/ac97/ac97_codec.c                     |   7 +-
 sound/pci/hda/hda_codec.c                       |  10 +-
 sound/pci/hda/hda_local.h                       |   1 +
 sound/ppc/tumbler.c                             |   8 +-
 282 files changed, 1771 insertions(+), 1450 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c
index 1f9153ae5b03..6b5d3518a1c0 100644
--- a/arch/i386/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c
@@ -51,10 +51,10 @@ static void mce_checkregs (void *info)
 	}
 }
 
-static void mce_work_fn(void *data);
-static DECLARE_WORK(mce_work, mce_work_fn, NULL);
+static void mce_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
 
-static void mce_work_fn(void *data)
+static void mce_work_fn(struct work_struct *work)
 { 
 	on_each_cpu(mce_checkregs, NULL, 1, 1);
 	schedule_delayed_work(&mce_work, MCE_RATE);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 4bb8b77cd65b..02a9b66b6ac3 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -1049,13 +1049,15 @@ void cpu_exit_clear(void)
 
 struct warm_boot_cpu_info {
 	struct completion *complete;
+	struct work_struct task;
 	int apicid;
 	int cpu;
 };
 
-static void __cpuinit do_warm_boot_cpu(void *p)
+static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
 {
-	struct warm_boot_cpu_info *info = p;
+	struct warm_boot_cpu_info *info =
+		container_of(work, struct warm_boot_cpu_info, task);
 	do_boot_cpu(info->apicid, info->cpu);
 	complete(info->complete);
 }
@@ -1064,7 +1066,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct warm_boot_cpu_info info;
-	struct work_struct task;
 	int	apicid, ret;
 	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
 
@@ -1089,7 +1090,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
 	info.complete = &done;
 	info.apicid = apicid;
 	info.cpu = cpu;
-	INIT_WORK(&task, do_warm_boot_cpu, &info);
+	INIT_WORK(&info.task, do_warm_boot_cpu);
 
 	tsc_sync_disabled = 1;
 
@@ -1097,7 +1098,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
 	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
 			KERNEL_PGD_PTRS);
 	flush_tlb_all();
-	schedule_work(&task);
+	schedule_work(&info.task);
 	wait_for_completion(&done);
 
 	tsc_sync_disabled = 0;
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index fbc95828cd74..9810c8c90750 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -217,7 +217,7 @@ static unsigned int cpufreq_delayed_issched = 0;
 static unsigned int cpufreq_init = 0;
 static struct work_struct cpufreq_delayed_get_work;
 
-static void handle_cpufreq_delayed_get(void *v)
+static void handle_cpufreq_delayed_get(struct work_struct *work)
 {
 	unsigned int cpu;
 
@@ -306,7 +306,7 @@ static int __init cpufreq_tsc(void)
 {
 	int ret;
 
-	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
 	ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
 					CPUFREQ_TRANSITION_NOTIFIER);
 	if (!ret)
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 137077451316..49037edf7d39 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -37,8 +37,8 @@
 /* EEH event workqueue setup. */
 static DEFINE_SPINLOCK(eeh_eventlist_lock);
 LIST_HEAD(eeh_eventlist);
-static void eeh_thread_launcher(void *);
-DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL);
+static void eeh_thread_launcher(struct work_struct *);
+DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
 
 /* Serialize reset sequences for a given pci device */
 DEFINE_MUTEX(eeh_event_mutex);
@@ -103,7 +103,7 @@ static int eeh_event_handler(void * dummy)
  * eeh_thread_launcher
  * @dummy - unused
  */
-static void eeh_thread_launcher(void *dummy)
+static void eeh_thread_launcher(struct work_struct *dummy)
 {
 	if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0)
 		printk(KERN_ERR "Failed to start EEH daemon\n");
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 87b17c33b3f9..f40786121948 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -135,7 +135,7 @@ static int idt77252_change_qos(struct atm_vcc *vcc, struct atm_qos *qos,
 			       int flags);
 static int idt77252_proc_read(struct atm_dev *dev, loff_t * pos,
 			      char *page);
-static void idt77252_softint(void *dev_id);
+static void idt77252_softint(struct work_struct *work);
 
 
 static struct atmdev_ops idt77252_ops =
@@ -2866,9 +2866,10 @@ out:
 }
 
 static void
-idt77252_softint(void *dev_id)
+idt77252_softint(struct work_struct *work)
 {
-	struct idt77252_dev *card = dev_id;
+	struct idt77252_dev *card =
+		container_of(work, struct idt77252_dev, tqueue);
 	u32 stat;
 	int done;
 
@@ -3697,7 +3698,7 @@ idt77252_init_one(struct pci_dev *pcidev, const struct pci_device_id *id)
 	card->pcidev = pcidev;
 	sprintf(card->name, "idt77252-%d", card->index);
 
-	INIT_WORK(&card->tqueue, idt77252_softint, (void *)card);
+	INIT_WORK(&card->tqueue, idt77252_softint);
 
 	membase = pci_resource_start(pcidev, 1);
 	srambase = pci_resource_start(pcidev, 2);
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 6d111228cfac..2308e83e5f33 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -159,7 +159,7 @@ void aoecmd_work(struct aoedev *d);
 void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
 void aoecmd_ata_rsp(struct sk_buff *);
 void aoecmd_cfg_rsp(struct sk_buff *);
-void aoecmd_sleepwork(void *vp);
+void aoecmd_sleepwork(struct work_struct *);
 struct sk_buff *new_skb(ulong);
 
 int aoedev_init(void);
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8a13b1af8bab..97f7f535f412 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -408,9 +408,9 @@ rexmit_timer(ulong vp)
 /* this function performs work that has been deferred until sleeping is OK
  */
 void
-aoecmd_sleepwork(void *vp)
+aoecmd_sleepwork(struct work_struct *work)
 {
-	struct aoedev *d = (struct aoedev *) vp;
+	struct aoedev *d = container_of(work, struct aoedev, work);
 
 	if (d->flags & DEVFL_GDALLOC)
 		aoeblk_gdalloc(d);
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 6125921bbec4..05a97197c918 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -88,7 +88,7 @@ aoedev_newdev(ulong nframes)
  			kfree(d);
 		return NULL;
 	}
-	INIT_WORK(&d->work, aoecmd_sleepwork, d);
+	INIT_WORK(&d->work, aoecmd_sleepwork);
 	spin_lock_init(&d->lock);
 	init_timer(&d->timer);
 	d->timer.data = (ulong) d;
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 40a11e567970..9d9bff23f426 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -352,19 +352,19 @@ static enum action (*phase)(void);
 
 static void run_fsm(void);
 
-static void ps_tq_int( void *data);
+static void ps_tq_int(struct work_struct *work);
 
-static DECLARE_WORK(fsm_tq, ps_tq_int, NULL);
+static DECLARE_DELAYED_WORK(fsm_tq, ps_tq_int);
 
 static void schedule_fsm(void)
 {
 	if (!nice)
-		schedule_work(&fsm_tq);
+		schedule_delayed_work(&fsm_tq, 0);
 	else
 		schedule_delayed_work(&fsm_tq, nice-1);
 }
 
-static void ps_tq_int(void *data)
+static void ps_tq_int(struct work_struct *work)
 {
 	run_fsm();
 }
diff --git a/drivers/block/paride/pseudo.h b/drivers/block/paride/pseudo.h
index 932342d7a8eb..bc3703294143 100644
--- a/drivers/block/paride/pseudo.h
+++ b/drivers/block/paride/pseudo.h
@@ -35,7 +35,7 @@
 #include <linux/sched.h>
 #include <linux/workqueue.h>
 
-static void ps_tq_int( void *data);
+static void ps_tq_int(struct work_struct *work);
 
 static void (* ps_continuation)(void);
 static int (* ps_ready)(void);
@@ -45,7 +45,7 @@ static int ps_nice = 0;
 
 static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused)));
 
-static DECLARE_WORK(ps_tq, ps_tq_int, NULL);
+static DECLARE_DELAYED_WORK(ps_tq, ps_tq_int);
 
 static void ps_set_intr(void (*continuation)(void), 
 			int (*ready)(void),
@@ -63,14 +63,14 @@ static void ps_set_intr(void (*continuation)(void),
 	if (!ps_tq_active) {
 		ps_tq_active = 1;
 		if (!ps_nice)
-			schedule_work(&ps_tq);
+			schedule_delayed_work(&ps_tq, 0);
 		else
 			schedule_delayed_work(&ps_tq, ps_nice-1);
 	}
 	spin_unlock_irqrestore(&ps_spinlock,flags);
 }
 
-static void ps_tq_int(void *data)
+static void ps_tq_int(struct work_struct *work)
 {
 	void (*con)(void);
 	unsigned long flags;
@@ -92,7 +92,7 @@ static void ps_tq_int(void *data)
 	}
 	ps_tq_active = 1;
 	if (!ps_nice)
-		schedule_work(&ps_tq);
+		schedule_delayed_work(&ps_tq, 0);
 	else
 		schedule_delayed_work(&ps_tq, ps_nice-1);
 	spin_unlock_irqrestore(&ps_spinlock,flags);
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 47d6975268ff..54509eb3391b 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1244,9 +1244,10 @@ out:
 	return IRQ_RETVAL(handled);
 }
 
-static void carm_fsm_task (void *_data)
+static void carm_fsm_task (struct work_struct *work)
 {
-	struct carm_host *host = _data;
+	struct carm_host *host =
+		container_of(work, struct carm_host, fsm_task);
 	unsigned long flags;
 	unsigned int state;
 	int rc, i, next_dev;
@@ -1619,7 +1620,7 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	host->pdev = pdev;
 	host->flags = pci_dac ? FL_DAC : 0;
 	spin_lock_init(&host->lock);
-	INIT_WORK(&host->fsm_task, carm_fsm_task, host);
+	INIT_WORK(&host->fsm_task, carm_fsm_task);
 	init_completion(&host->probe_comp);
 
 	for (i = 0; i < ARRAY_SIZE(host->req); i++)
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 0d5c73f07265..2098eff91e14 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -376,7 +376,7 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
     int stalled_pipe);
 static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd);
 static void ub_reset_enter(struct ub_dev *sc, int try);
-static void ub_reset_task(void *arg);
+static void ub_reset_task(struct work_struct *work);
 static int ub_sync_tur(struct ub_dev *sc, struct ub_lun *lun);
 static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_capacity *ret);
@@ -1558,9 +1558,9 @@ static void ub_reset_enter(struct ub_dev *sc, int try)
 	schedule_work(&sc->reset_work);
 }
 
-static void ub_reset_task(void *arg)
+static void ub_reset_task(struct work_struct *work)
 {
-	struct ub_dev *sc = arg;
+	struct ub_dev *sc = container_of(work, struct ub_dev, reset_work);
 	unsigned long flags;
 	struct list_head *p;
 	struct ub_lun *lun;
@@ -2179,7 +2179,7 @@ static int ub_probe(struct usb_interface *intf,
 	usb_init_urb(&sc->work_urb);
 	tasklet_init(&sc->tasklet, ub_scsi_action, (unsigned long)sc);
 	atomic_set(&sc->poison, 0);
-	INIT_WORK(&sc->reset_work, ub_reset_task, sc);
+	INIT_WORK(&sc->reset_work, ub_reset_task);
 	init_waitqueue_head(&sc->reset_wait);
 
 	init_timer(&sc->work_timer);
diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c
index 516751754aa9..9256985cbe36 100644
--- a/drivers/bluetooth/bcm203x.c
+++ b/drivers/bluetooth/bcm203x.c
@@ -157,9 +157,10 @@ static void bcm203x_complete(struct urb *urb)
 	}
 }
 
-static void bcm203x_work(void *user_data)
+static void bcm203x_work(struct work_struct *work)
 {
-	struct bcm203x_data *data = user_data;
+	struct bcm203x_data *data =
+		container_of(work, struct bcm203x_data, work);
 
 	if (usb_submit_urb(data->urb, GFP_ATOMIC) < 0)
 		BT_ERR("Can't submit URB");
@@ -246,7 +247,7 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id
 
 	release_firmware(firmware);
 
-	INIT_WORK(&data->work, bcm203x_work, (void *) data);
+	INIT_WORK(&data->work, bcm203x_work);
 
 	usb_set_intfdata(intf, data);
 
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index e608dadece2f..acb2de5e3a98 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -926,9 +926,10 @@ cy_sched_event(struct cyclades_port *info, int event)
  * had to poll every port to see if that port needed servicing.
  */
 static void
-do_softint(void *private_)
+do_softint(struct work_struct *work)
 {
-  struct cyclades_port *info = (struct cyclades_port *) private_;
+	struct cyclades_port *info =
+		container_of(work, struct cyclades_port, tqueue);
   struct tty_struct    *tty;
 
     tty = info->tty;
@@ -5328,7 +5329,7 @@ cy_init(void)
                     info->blocked_open = 0;
                     info->default_threshold = 0;
                     info->default_timeout = 0;
-		    INIT_WORK(&info->tqueue, do_softint, info);
+		    INIT_WORK(&info->tqueue, do_softint);
 		    init_waitqueue_head(&info->open_wait);
 		    init_waitqueue_head(&info->close_wait);
 		    init_waitqueue_head(&info->shutdown_wait);
@@ -5403,7 +5404,7 @@ cy_init(void)
                     info->blocked_open = 0;
                     info->default_threshold = 0;
                     info->default_timeout = 0;
-		    INIT_WORK(&info->tqueue, do_softint, info);
+		    INIT_WORK(&info->tqueue, do_softint);
 		    init_waitqueue_head(&info->open_wait);
 		    init_waitqueue_head(&info->close_wait);
 		    init_waitqueue_head(&info->shutdown_wait);
diff --git a/drivers/char/drm/via_dmablit.c b/drivers/char/drm/via_dmablit.c
index 60c1695db300..806f9ce5f47b 100644
--- a/drivers/char/drm/via_dmablit.c
+++ b/drivers/char/drm/via_dmablit.c
@@ -500,9 +500,9 @@ via_dmablit_timer(unsigned long data)
 
 
 static void 
-via_dmablit_workqueue(void *data)
+via_dmablit_workqueue(struct work_struct *work)
 {
-	drm_via_blitq_t *blitq = (drm_via_blitq_t *) data;
+	drm_via_blitq_t *blitq = container_of(work, drm_via_blitq_t, wq);
 	drm_device_t *dev = blitq->dev;
 	unsigned long irqsave;
 	drm_via_sg_info_t *cur_sg;
@@ -571,7 +571,7 @@ via_init_dmablit(drm_device_t *dev)
 			DRM_INIT_WAITQUEUE(blitq->blit_queue + j);
 		}
 		DRM_INIT_WAITQUEUE(&blitq->busy_queue);
-		INIT_WORK(&blitq->wq, via_dmablit_workqueue, blitq);
+		INIT_WORK(&blitq->wq, via_dmablit_workqueue);
 		init_timer(&blitq->poll_timer);
 		blitq->poll_timer.function = &via_dmablit_timer;
 		blitq->poll_timer.data = (unsigned long) blitq;
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 706733c0b36a..7c71eb779802 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -200,7 +200,7 @@ static int pc_ioctl(struct tty_struct *, struct file *,
 static int info_ioctl(struct tty_struct *, struct file *,
                     unsigned int, unsigned long);
 static void pc_set_termios(struct tty_struct *, struct termios *);
-static void do_softint(void *);
+static void do_softint(struct work_struct *work);
 static void pc_stop(struct tty_struct *);
 static void pc_start(struct tty_struct *);
 static void pc_throttle(struct tty_struct * tty);
@@ -1505,7 +1505,7 @@ static void post_fep_init(unsigned int crd)
 
 		ch->brdchan        = bc;
 		ch->mailbox        = gd; 
-		INIT_WORK(&ch->tqueue, do_softint, ch);
+		INIT_WORK(&ch->tqueue, do_softint);
 		ch->board          = &boards[crd];
 
 		spin_lock_irqsave(&epca_lock, flags);
@@ -2566,9 +2566,9 @@ static void pc_set_termios(struct tty_struct *tty, struct termios *old_termios)
 
 /* --------------------- Begin do_softint  ----------------------- */
 
-static void do_softint(void *private_)
+static void do_softint(struct work_struct *work)
 { /* Begin do_softint */
-	struct channel *ch = (struct channel *) private_;
+	struct channel *ch = container_of(work, struct channel, tqueue);
 	/* Called in response to a modem change event */
 	if (ch && ch->magic == EPCA_MAGIC)  { /* Begin EPCA_MAGIC */
 		struct tty_struct *tty = ch->tty;
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 15a4ea896328..93b551962513 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -723,9 +723,10 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
  * -------------------------------------------------------------------
  */
 
-static void do_softint(void *private_)
+static void do_softint(struct work_struct *work)
 {
-	struct esp_struct	*info = (struct esp_struct *) private_;
+	struct esp_struct	*info =
+		container_of(work, struct esp_struct, tqueue);
 	struct tty_struct	*tty;
 	
 	tty = info->tty;
@@ -746,9 +747,10 @@ static void do_softint(void *private_)
  * 	do_serial_hangup() -> tty->hangup() -> esp_hangup()
  * 
  */
-static void do_serial_hangup(void *private_)
+static void do_serial_hangup(struct work_struct *work)
 {
-	struct esp_struct	*info = (struct esp_struct *) private_;
+	struct esp_struct	*info =
+		container_of(work, struct esp_struct, tqueue_hangup);
 	struct tty_struct	*tty;
 	
 	tty = info->tty;
@@ -2501,8 +2503,8 @@ static int __init espserial_init(void)
 		info->magic = ESP_MAGIC;
 		info->close_delay = 5*HZ/10;
 		info->closing_wait = 30*HZ;
-		INIT_WORK(&info->tqueue, do_softint, info);
-		INIT_WORK(&info->tqueue_hangup, do_serial_hangup, info);
+		INIT_WORK(&info->tqueue, do_softint);
+		INIT_WORK(&info->tqueue_hangup, do_serial_hangup);
 		info->config.rx_timeout = rx_timeout;
 		info->config.flow_on = flow_on;
 		info->config.flow_off = flow_off;
diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c
index 817dc409ac20..23b25ada65ea 100644
--- a/drivers/char/genrtc.c
+++ b/drivers/char/genrtc.c
@@ -102,7 +102,7 @@ static void gen_rtc_interrupt(unsigned long arg);
  * Routine to poll RTC seconds field for change as often as possible,
  * after first RTC_UIE use timer to reduce polling
  */
-static void genrtc_troutine(void *data)
+static void genrtc_troutine(struct work_struct *work)
 {
 	unsigned int tmp = get_rtc_ss();
 	
@@ -255,7 +255,7 @@ static inline int gen_set_rtc_irq_bit(unsigned char bit)
 		irq_active = 1;
 		stop_rtc_timers = 0;
 		lostint = 0;
-		INIT_WORK(&genrtc_task, genrtc_troutine, NULL);
+		INIT_WORK(&genrtc_task, genrtc_troutine);
 		oldsecs = get_rtc_ss();
 		init_timer(&timer_task);
 
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index 2cf63e7305a3..82a41d5b4ed0 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -69,7 +69,7 @@
 #define __ALIGNED__	__attribute__((__aligned__(sizeof(long))))
 
 struct hvsi_struct {
-	struct work_struct writer;
+	struct delayed_work writer;
 	struct work_struct handshaker;
 	wait_queue_head_t emptyq; /* woken when outbuf is emptied */
 	wait_queue_head_t stateq; /* woken when HVSI state changes */
@@ -744,9 +744,10 @@ static int hvsi_handshake(struct hvsi_struct *hp)
 	return 0;
 }
 
-static void hvsi_handshaker(void *arg)
+static void hvsi_handshaker(struct work_struct *work)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)arg;
+	struct hvsi_struct *hp =
+		container_of(work, struct hvsi_struct, handshaker);
 
 	if (hvsi_handshake(hp) >= 0)
 		return;
@@ -951,9 +952,10 @@ static void hvsi_push(struct hvsi_struct *hp)
 }
 
 /* hvsi_write_worker will keep rescheduling itself until outbuf is empty */
-static void hvsi_write_worker(void *arg)
+static void hvsi_write_worker(struct work_struct *work)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)arg;
+	struct hvsi_struct *hp =
+		container_of(work, struct hvsi_struct, writer.work);
 	unsigned long flags;
 #ifdef DEBUG
 	static long start_j = 0;
@@ -1287,8 +1289,8 @@ static int __init hvsi_console_init(void)
 		}
 
 		hp = &hvsi_ports[hvsi_count];
-		INIT_WORK(&hp->writer, hvsi_write_worker, hp);
-		INIT_WORK(&hp->handshaker, hvsi_handshaker, hp);
+		INIT_DELAYED_WORK(&hp->writer, hvsi_write_worker);
+		INIT_WORK(&hp->handshaker, hvsi_handshaker);
 		init_waitqueue_head(&hp->emptyq);
 		init_waitqueue_head(&hp->stateq);
 		spin_lock_init(&hp->lock);
diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c
index 54d93f0345e8..c213fdbdb2b0 100644
--- a/drivers/char/ip2/i2lib.c
+++ b/drivers/char/ip2/i2lib.c
@@ -84,8 +84,8 @@ static void iiSendPendingMail(i2eBordStrPtr);
 static void serviceOutgoingFifo(i2eBordStrPtr);
 
 // Functions defined in ip2.c as part of interrupt handling
-static void do_input(void *);
-static void do_status(void *);
+static void do_input(struct work_struct *);
+static void do_status(struct work_struct *);
 
 //***************
 //* Debug  Data *
@@ -331,8 +331,8 @@ i2InitChannels ( i2eBordStrPtr pB, int nChannels, i2ChanStrPtr pCh)
 		pCh->ClosingWaitTime  = 30*HZ;
 
 		// Initialize task queue objects
-		INIT_WORK(&pCh->tqueue_input, do_input, pCh);
-		INIT_WORK(&pCh->tqueue_status, do_status, pCh);
+		INIT_WORK(&pCh->tqueue_input, do_input);
+		INIT_WORK(&pCh->tqueue_status, do_status);
 
 #ifdef IP2DEBUG_TRACE
 		pCh->trace = ip2trace;
@@ -1573,7 +1573,7 @@ i2StripFifo(i2eBordStrPtr pB)
 #ifdef USE_IQ
 			schedule_work(&pCh->tqueue_input);
 #else
-			do_input(pCh);
+			do_input(&pCh->tqueue_input);
 #endif
 
 			// Note we do not need to maintain any flow-control credits at this
@@ -1810,7 +1810,7 @@ i2StripFifo(i2eBordStrPtr pB)
 #ifdef USE_IQ
 						schedule_work(&pCh->tqueue_status);
 #else
-						do_status(pCh);
+						do_status(&pCh->tqueue_status);
 #endif
 					}
 				}
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index a3f32d46d2f8..cda2459c1d60 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -189,12 +189,12 @@ static int  ip2_tiocmset(struct tty_struct *tty, struct file *file,
 			 unsigned int set, unsigned int clear);
 
 static void set_irq(int, int);
-static void ip2_interrupt_bh(i2eBordStrPtr pB);
+static void ip2_interrupt_bh(struct work_struct *work);
 static irqreturn_t ip2_interrupt(int irq, void *dev_id);
 static void ip2_poll(unsigned long arg);
 static inline void service_all_boards(void);
-static void do_input(void *p);
-static void do_status(void *p);
+static void do_input(struct work_struct *);
+static void do_status(struct work_struct *);
 
 static void ip2_wait_until_sent(PTTY,int);
 
@@ -918,7 +918,7 @@ ip2_init_board( int boardnum )
 		pCh++;
 	}
 ex_exit:
-	INIT_WORK(&pB->tqueue_interrupt, (void(*)(void*)) ip2_interrupt_bh, pB);
+	INIT_WORK(&pB->tqueue_interrupt, ip2_interrupt_bh);
 	return;
 
 err_release_region:
@@ -1125,8 +1125,8 @@ service_all_boards(void)
 
 
 /******************************************************************************/
-/* Function:   ip2_interrupt_bh(pB)                                           */
-/* Parameters: pB - pointer to the board structure                            */
+/* Function:   ip2_interrupt_bh(work)                                         */
+/* Parameters: work - pointer to the board structure                          */
 /* Returns:    Nothing                                                        */
 /*                                                                            */
 /* Description:                                                               */
@@ -1135,8 +1135,9 @@ service_all_boards(void)
 /*                                                                            */
 /******************************************************************************/
 static void
-ip2_interrupt_bh(i2eBordStrPtr pB)
+ip2_interrupt_bh(struct work_struct *work)
 {
+	i2eBordStrPtr pB = container_of(work, i2eBordStr, tqueue_interrupt);
 //	pB better well be set or we have a problem!  We can only get
 //	here from the IMMEDIATE queue.  Here, we process the boards.
 //	Checking pB doesn't cost much and it saves us from the sanity checkers.
@@ -1245,9 +1246,9 @@ ip2_poll(unsigned long arg)
 	ip2trace (ITRC_NO_PORT, ITRC_INTR, ITRC_RETURN, 0 );
 }
 
-static void do_input(void *p)
+static void do_input(struct work_struct *work)
 {
-	i2ChanStrPtr pCh = p;
+	i2ChanStrPtr pCh = container_of(work, i2ChanStr, tqueue_input);
 	unsigned long flags;
 
 	ip2trace(CHANN, ITRC_INPUT, 21, 0 );
@@ -1279,9 +1280,9 @@ static inline void  isig(int sig, struct tty_struct *tty, int flush)
 	}
 }
 
-static void do_status(void *p)
+static void do_status(struct work_struct *work)
 {
-	i2ChanStrPtr pCh = p;
+	i2ChanStrPtr pCh = container_of(work, i2ChanStr, tqueue_status);
 	int status;
 
 	status =  i2GetStatus( pCh, (I2_BRK|I2_PAR|I2_FRA|I2_OVR) );
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index 58c955e390b3..1637c1d9a4ba 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -530,9 +530,9 @@ sched_again:
 /* 	Interrupt handlers 	*/
 
 
-static void isicom_bottomhalf(void *data)
+static void isicom_bottomhalf(struct work_struct *work)
 {
-	struct isi_port *port = (struct isi_port *) data;
+	struct isi_port *port = container_of(work, struct isi_port, bh_tqueue);
 	struct tty_struct *tty = port->tty;
 
 	if (!tty)
@@ -1474,9 +1474,9 @@ static void isicom_start(struct tty_struct *tty)
 }
 
 /* hangup et all */
-static void do_isicom_hangup(void *data)
+static void do_isicom_hangup(struct work_struct *work)
 {
-	struct isi_port *port = data;
+	struct isi_port *port = container_of(work, struct isi_port, hangup_tq);
 	struct tty_struct *tty;
 
 	tty = port->tty;
@@ -1966,8 +1966,8 @@ static int __devinit isicom_setup(void)
 			port->channel = channel;
 			port->close_delay = 50 * HZ/100;
 			port->closing_wait = 3000 * HZ/100;
-			INIT_WORK(&port->hangup_tq, do_isicom_hangup, port);
-			INIT_WORK(&port->bh_tqueue, isicom_bottomhalf, port);
+			INIT_WORK(&port->hangup_tq, do_isicom_hangup);
+			INIT_WORK(&port->bh_tqueue, isicom_bottomhalf);
 			port->status = 0;
 			init_waitqueue_head(&port->open_wait);
 			init_waitqueue_head(&port->close_wait);
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 96cb1f07332b..2d025a9fd14d 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -222,7 +222,7 @@ static struct semaphore moxaBuffSem;
 /*
  * static functions:
  */
-static void do_moxa_softint(void *);
+static void do_moxa_softint(struct work_struct *);
 static int moxa_open(struct tty_struct *, struct file *);
 static void moxa_close(struct tty_struct *, struct file *);
 static int moxa_write(struct tty_struct *, const unsigned char *, int);
@@ -363,7 +363,7 @@ static int __init moxa_init(void)
 	for (i = 0, ch = moxaChannels; i < MAX_PORTS; i++, ch++) {
 		ch->type = PORT_16550A;
 		ch->port = i;
-		INIT_WORK(&ch->tqueue, do_moxa_softint, ch);
+		INIT_WORK(&ch->tqueue, do_moxa_softint);
 		ch->tty = NULL;
 		ch->close_delay = 5 * HZ / 10;
 		ch->closing_wait = 30 * HZ;
@@ -509,9 +509,9 @@ static void __exit moxa_exit(void)
 module_init(moxa_init);
 module_exit(moxa_exit);
 
-static void do_moxa_softint(void *private_)
+static void do_moxa_softint(struct work_struct *work)
 {
-	struct moxa_str *ch = (struct moxa_str *) private_;
+	struct moxa_str *ch = container_of(work, struct moxa_str, tqueue);
 	struct tty_struct *tty;
 
 	if (ch && (tty = ch->tty)) {
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 048d91142c17..5ed2486b7581 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -389,7 +389,7 @@ static int mxser_init(void);
 /* static void   mxser_poll(unsigned long); */
 static int mxser_get_ISA_conf(int, struct mxser_hwconf *);
 static int mxser_get_PCI_conf(int, int, int, struct mxser_hwconf *);
-static void mxser_do_softint(void *);
+static void mxser_do_softint(struct work_struct *);
 static int mxser_open(struct tty_struct *, struct file *);
 static void mxser_close(struct tty_struct *, struct file *);
 static int mxser_write(struct tty_struct *, const unsigned char *, int);
@@ -590,7 +590,7 @@ static int mxser_initbrd(int board, struct mxser_hwconf *hwconf)
 		info->custom_divisor = hwconf->baud_base[i] * 16;
 		info->close_delay = 5 * HZ / 10;
 		info->closing_wait = 30 * HZ;
-		INIT_WORK(&info->tqueue, mxser_do_softint, info);
+		INIT_WORK(&info->tqueue, mxser_do_softint);
 		info->normal_termios = mxvar_sdriver->init_termios;
 		init_waitqueue_head(&info->open_wait);
 		init_waitqueue_head(&info->close_wait);
@@ -917,9 +917,10 @@ static int mxser_init(void)
 	return 0;
 }
 
-static void mxser_do_softint(void *private_)
+static void mxser_do_softint(struct work_struct *work)
 {
-	struct mxser_struct *info = private_;
+	struct mxser_struct *info =
+		container_of(work, struct mxser_struct, tqueue);
 	struct tty_struct *tty;
 
 	tty = info->tty;
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 1a0bc30b79d1..e4d950072b5a 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -421,7 +421,7 @@ static irqreturn_t mgslpc_isr(int irq, void *dev_id);
 /*
  * Bottom half interrupt handlers
  */
-static void bh_handler(void* Context);
+static void bh_handler(struct work_struct *work);
 static void bh_transmit(MGSLPC_INFO *info);
 static void bh_status(MGSLPC_INFO *info);
 
@@ -547,7 +547,7 @@ static int mgslpc_probe(struct pcmcia_device *link)
 
     memset(info, 0, sizeof(MGSLPC_INFO));
     info->magic = MGSLPC_MAGIC;
-    INIT_WORK(&info->task, bh_handler, info);
+    INIT_WORK(&info->task, bh_handler);
     info->max_frame_size = 4096;
     info->close_delay = 5*HZ/10;
     info->closing_wait = 30*HZ;
@@ -842,9 +842,9 @@ static int bh_action(MGSLPC_INFO *info)
 	return rc;
 }
 
-static void bh_handler(void* Context)
+static void bh_handler(struct work_struct *work)
 {
-	MGSLPC_INFO *info = (MGSLPC_INFO*)Context;
+	MGSLPC_INFO *info = container_of(work, MGSLPC_INFO, task);
 	int action;
 
 	if (!info)
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index c084149153de..fc87070f1866 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -765,7 +765,7 @@ static void sonypi_setbluetoothpower(u8 state)
 	sonypi_device.bluetooth_power = state;
 }
 
-static void input_keyrelease(void *data)
+static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
@@ -1412,7 +1412,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			goto err_inpdev_unregister;
 		}
 
-		INIT_WORK(&sonypi_device.input_work, input_keyrelease, NULL);
+		INIT_WORK(&sonypi_device.input_work, input_keyrelease);
 	}
 
 	sonypi_enable(0);
diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c
index 7e1bd9562c2a..99137ab66b62 100644
--- a/drivers/char/specialix.c
+++ b/drivers/char/specialix.c
@@ -2261,9 +2261,10 @@ static void sx_start(struct tty_struct * tty)
  * 	do_sx_hangup() -> tty->hangup() -> sx_hangup()
  *
  */
-static void do_sx_hangup(void *private_)
+static void do_sx_hangup(struct work_struct *work)
 {
-	struct specialix_port	*port = (struct specialix_port *) private_;
+	struct specialix_port	*port =
+		container_of(work, struct specialix_port, tqueue_hangup);
 	struct tty_struct	*tty;
 
 	func_enter();
@@ -2336,9 +2337,10 @@ static void sx_set_termios(struct tty_struct * tty, struct termios * old_termios
 }
 
 
-static void do_softint(void *private_)
+static void do_softint(struct work_struct *work)
 {
-	struct specialix_port	*port = (struct specialix_port *) private_;
+	struct specialix_port	*port =
+		container_of(work, struct specialix_port, tqueue);
 	struct tty_struct	*tty;
 
 	func_enter();
@@ -2411,8 +2413,8 @@ static int sx_init_drivers(void)
 	memset(sx_port, 0, sizeof(sx_port));
 	for (i = 0; i < SX_NPORT * SX_NBOARD; i++) {
 		sx_port[i].magic = SPECIALIX_MAGIC;
-		INIT_WORK(&sx_port[i].tqueue, do_softint, &sx_port[i]);
-		INIT_WORK(&sx_port[i].tqueue_hangup, do_sx_hangup, &sx_port[i]);
+		INIT_WORK(&sx_port[i].tqueue, do_softint);
+		INIT_WORK(&sx_port[i].tqueue_hangup, do_sx_hangup);
 		sx_port[i].close_delay = 50 * HZ/100;
 		sx_port[i].closing_wait = 3000 * HZ/100;
 		init_waitqueue_head(&sx_port[i].open_wait);
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 06784adcc35c..147c30da81ea 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -802,7 +802,7 @@ static int save_tx_buffer_request(struct mgsl_struct *info,const char *Buffer, u
 /*
  * Bottom half interrupt handlers
  */
-static void mgsl_bh_handler(void* Context);
+static void mgsl_bh_handler(struct work_struct *work);
 static void mgsl_bh_receive(struct mgsl_struct *info);
 static void mgsl_bh_transmit(struct mgsl_struct *info);
 static void mgsl_bh_status(struct mgsl_struct *info);
@@ -1071,9 +1071,10 @@ static int mgsl_bh_action(struct mgsl_struct *info)
 /*
  * 	Perform bottom half processing of work items queued by ISR.
  */
-static void mgsl_bh_handler(void* Context)
+static void mgsl_bh_handler(struct work_struct *work)
 {
-	struct mgsl_struct *info = (struct mgsl_struct*)Context;
+	struct mgsl_struct *info =
+		container_of(work, struct mgsl_struct, task);
 	int action;
 
 	if (!info)
@@ -4337,7 +4338,7 @@ static struct mgsl_struct* mgsl_allocate_device(void)
 	} else {
 		memset(info, 0, sizeof(struct mgsl_struct));
 		info->magic = MGSL_MAGIC;
-		INIT_WORK(&info->task, mgsl_bh_handler, info);
+		INIT_WORK(&info->task, mgsl_bh_handler);
 		info->max_frame_size = 4096;
 		info->close_delay = 5*HZ/10;
 		info->closing_wait = 30*HZ;
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index d4334c79f8d4..07f34d43dc7f 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -485,7 +485,7 @@ static void enable_loopback(struct slgt_info *info);
 static void set_rate(struct slgt_info *info, u32 data_rate);
 
 static int  bh_action(struct slgt_info *info);
-static void bh_handler(void* context);
+static void bh_handler(struct work_struct *work);
 static void bh_transmit(struct slgt_info *info);
 static void isr_serial(struct slgt_info *info);
 static void isr_rdma(struct slgt_info *info);
@@ -1878,9 +1878,9 @@ static int bh_action(struct slgt_info *info)
 /*
  * perform bottom half processing
  */
-static void bh_handler(void* context)
+static void bh_handler(struct work_struct *work)
 {
-	struct slgt_info *info = context;
+	struct slgt_info *info = container_of(work, struct slgt_info, task);
 	int action;
 
 	if (!info)
@@ -3326,7 +3326,7 @@ static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev
 	} else {
 		memset(info, 0, sizeof(struct slgt_info));
 		info->magic = MGSL_MAGIC;
-		INIT_WORK(&info->task, bh_handler, info);
+		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
 		info->raw_rx_size = DMABUFSIZE;
 		info->close_delay = 5*HZ/10;
@@ -4799,6 +4799,6 @@ static void rx_timeout(unsigned long context)
 	spin_lock_irqsave(&info->lock, flags);
 	info->pending_bh |= BH_RECEIVE;
 	spin_unlock_irqrestore(&info->lock, flags);
-	bh_handler(info);
+	bh_handler(&info->task);
 }
 
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 3e932b681371..13a57245cf2e 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -602,7 +602,7 @@ static void enable_loopback(SLMP_INFO *info, int enable);
 static void set_rate(SLMP_INFO *info, u32 data_rate);
 
 static int  bh_action(SLMP_INFO *info);
-static void bh_handler(void* Context);
+static void bh_handler(struct work_struct *work);
 static void bh_receive(SLMP_INFO *info);
 static void bh_transmit(SLMP_INFO *info);
 static void bh_status(SLMP_INFO *info);
@@ -2063,9 +2063,9 @@ int bh_action(SLMP_INFO *info)
 
 /* Perform bottom half processing of work items queued by ISR.
  */
-void bh_handler(void* Context)
+void bh_handler(struct work_struct *work)
 {
-	SLMP_INFO *info = (SLMP_INFO*)Context;
+	SLMP_INFO *info = container_of(work, SLMP_INFO, task);
 	int action;
 
 	if (!info)
@@ -3805,7 +3805,7 @@ static SLMP_INFO *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev)
 	} else {
 		memset(info, 0, sizeof(SLMP_INFO));
 		info->magic = MGSL_MAGIC;
-		INIT_WORK(&info->task, bh_handler, info);
+		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
 		info->close_delay = 5*HZ/10;
 		info->closing_wait = 30*HZ;
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 6ad2d3bb945c..36f91a655275 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -325,9 +325,9 @@ static void user_reader_timeout(unsigned long ptr)
 	schedule_work(&chip->work);
 }
 
-static void timeout_work(void *ptr)
+static void timeout_work(struct work_struct *work)
 {
-	struct tpm_chip *chip = ptr;
+	struct tpm_chip *chip = container_of(work, struct tpm_chip, work);
 
 	down(&chip->buffer_mutex);
 	atomic_set(&chip->data_pending, 0);
@@ -1105,7 +1105,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, const struct tpm_vend
 	init_MUTEX(&chip->tpm_mutex);
 	INIT_LIST_HEAD(&chip->list);
 
-	INIT_WORK(&chip->work, timeout_work, chip);
+	INIT_WORK(&chip->work, timeout_work);
 
 	init_timer(&chip->user_read_timer);
 	chip->user_read_timer.function = user_reader_timeout;
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 05f8ce2cfb4a..b418b16e910e 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -31,9 +31,11 @@
 #include <linux/connector.h>
 #include <linux/delay.h>
 
-void cn_queue_wrapper(void *data)
+void cn_queue_wrapper(struct work_struct *work)
 {
-	struct cn_callback_data *d = data;
+	struct cn_callback_entry *cbq =
+		container_of(work, struct cn_callback_entry, work.work);
+	struct cn_callback_data *d = &cbq->data;
 
 	d->callback(d->callback_priv);
 
@@ -57,7 +59,7 @@ static struct cn_callback_entry *cn_queue_alloc_callback_entry(char *name, struc
 	memcpy(&cbq->id.id, id, sizeof(struct cb_id));
 	cbq->data.callback = callback;
 	
-	INIT_WORK(&cbq->work, &cn_queue_wrapper, &cbq->data);
+	INIT_DELAYED_WORK(&cbq->work, &cn_queue_wrapper);
 	return cbq;
 }
 
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index b49bacfd8de8..5e7cd45d10ee 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -135,40 +135,39 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 	spin_lock_bh(&dev->cbdev->queue_lock);
 	list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) {
 		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
-			if (likely(!test_bit(0, &__cbq->work.pending) &&
+			if (likely(!test_bit(WORK_STRUCT_PENDING,
+					     &__cbq->work.work.management) &&
 					__cbq->data.ddata == NULL)) {
 				__cbq->data.callback_priv = msg;
 
 				__cbq->data.ddata = data;
 				__cbq->data.destruct_data = destruct_data;
 
-				if (queue_work(dev->cbdev->cn_queue,
-						&__cbq->work))
+				if (queue_delayed_work(
+					    dev->cbdev->cn_queue,
+					    &__cbq->work, 0))
 					err = 0;
 			} else {
-				struct work_struct *w;
 				struct cn_callback_data *d;
 				
-				w = kzalloc(sizeof(*w) + sizeof(*d), GFP_ATOMIC);
-				if (w) {
-					d = (struct cn_callback_data *)(w+1);
-
+				__cbq = kzalloc(sizeof(*__cbq), GFP_ATOMIC);
+				if (__cbq) {
+					d = &__cbq->data;
 					d->callback_priv = msg;
 					d->callback = __cbq->data.callback;
 					d->ddata = data;
 					d->destruct_data = destruct_data;
-					d->free = w;
+					d->free = __cbq;
 
-					INIT_LIST_HEAD(&w->entry);
-					w->pending = 0;
-					w->func = &cn_queue_wrapper;
-					w->data = d;
-					init_timer(&w->timer);
+					INIT_DELAYED_WORK(&__cbq->work,
+							  &cn_queue_wrapper);
 					
-					if (queue_work(dev->cbdev->cn_queue, w))
+					if (queue_delayed_work(
+						    dev->cbdev->cn_queue,
+						    &__cbq->work, 0))
 						err = 0;
 					else {
-						kfree(w);
+						kfree(__cbq);
 						err = -EINVAL;
 					}
 				} else
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index c4c578defabf..5ef5ede5b884 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -59,7 +59,7 @@ static unsigned int 				def_sampling_rate;
 #define MAX_SAMPLING_DOWN_FACTOR		(10)
 #define TRANSITION_LATENCY_LIMIT		(10 * 1000)
 
-static void do_dbs_timer(void *data);
+static void do_dbs_timer(struct work_struct *work);
 
 struct cpu_dbs_info_s {
 	struct cpufreq_policy 	*cur_policy;
@@ -82,7 +82,7 @@ static unsigned int dbs_enable;	/* number of CPUs using this policy */
  * is recursive for the same process. -Venki
  */
 static DEFINE_MUTEX 	(dbs_mutex);
-static DECLARE_WORK	(dbs_work, do_dbs_timer, NULL);
+static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer);
 
 struct dbs_tuners {
 	unsigned int 		sampling_rate;
@@ -420,7 +420,7 @@ static void dbs_check_cpu(int cpu)
 	}
 }
 
-static void do_dbs_timer(void *data)
+static void do_dbs_timer(struct work_struct *work)
 { 
 	int i;
 	lock_cpu_hotplug();
@@ -435,7 +435,6 @@ static void do_dbs_timer(void *data)
 
 static inline void dbs_timer_init(void)
 {
-	INIT_WORK(&dbs_work, do_dbs_timer, NULL);
 	schedule_delayed_work(&dbs_work,
 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
 	return;
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index bf8aa45d4f01..e1cc5113c2ae 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -47,13 +47,17 @@ static unsigned int def_sampling_rate;
 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
 #define TRANSITION_LATENCY_LIMIT		(10 * 1000)
 
-static void do_dbs_timer(void *data);
+static void do_dbs_timer(struct work_struct *work);
+
+/* Sampling types */
+enum dbs_sample {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
 
 struct cpu_dbs_info_s {
 	cputime64_t prev_cpu_idle;
 	cputime64_t prev_cpu_wall;
 	struct cpufreq_policy *cur_policy;
- 	struct work_struct work;
+ 	struct delayed_work work;
+	enum dbs_sample sample_type;
 	unsigned int enable;
 	struct cpufreq_frequency_table *freq_table;
 	unsigned int freq_lo;
@@ -407,30 +411,31 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	}
 }
 
-/* Sampling types */
-enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
-
-static void do_dbs_timer(void *data)
+static void do_dbs_timer(struct work_struct *work)
 {
 	unsigned int cpu = smp_processor_id();
 	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	enum dbs_sample sample_type = dbs_info->sample_type;
 	/* We want all CPUs to do sampling nearly on same jiffy */
 	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+
+	/* Permit rescheduling of this work item */
+	work_release(work);
+
 	delay -= jiffies % delay;
 
 	if (!dbs_info->enable)
 		return;
 	/* Common NORMAL_SAMPLE setup */
-	INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE);
+	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
 	if (!dbs_tuners_ins.powersave_bias ||
-	    (unsigned long) data == DBS_NORMAL_SAMPLE) {
+	    sample_type == DBS_NORMAL_SAMPLE) {
 		lock_cpu_hotplug();
 		dbs_check_cpu(dbs_info);
 		unlock_cpu_hotplug();
 		if (dbs_info->freq_lo) {
 			/* Setup timer for SUB_SAMPLE */
-			INIT_WORK(&dbs_info->work, do_dbs_timer,
-					(void *)DBS_SUB_SAMPLE);
+			dbs_info->sample_type = DBS_SUB_SAMPLE;
 			delay = dbs_info->freq_hi_jiffies;
 		}
 	} else {
@@ -449,7 +454,8 @@ static inline void dbs_timer_init(unsigned int cpu)
 	delay -= jiffies % delay;
 
 	ondemand_powersave_bias_init();
-	INIT_WORK(&dbs_info->work, do_dbs_timer, NULL);
+	INIT_DELAYED_WORK_NAR(&dbs_info->work, do_dbs_timer);
+	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
 	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
diff --git a/drivers/i2c/chips/ds1374.c b/drivers/i2c/chips/ds1374.c
index 4630f1969a09..15edf40828b4 100644
--- a/drivers/i2c/chips/ds1374.c
+++ b/drivers/i2c/chips/ds1374.c
@@ -140,12 +140,14 @@ ulong ds1374_get_rtc_time(void)
 	return t1;
 }
 
-static void ds1374_set_work(void *arg)
+static ulong new_time;
+
+static void ds1374_set_work(struct work_struct *work)
 {
 	ulong t1, t2;
 	int limit = 10;		/* arbitrary retry limit */
 
-	t1 = *(ulong *) arg;
+	t1 = new_time;
 
 	mutex_lock(&ds1374_mutex);
 
@@ -167,11 +169,9 @@ static void ds1374_set_work(void *arg)
 			 "can't confirm time set from rtc chip\n");
 }
 
-static ulong new_time;
-
 static struct workqueue_struct *ds1374_workqueue;
 
-static DECLARE_WORK(ds1374_work, ds1374_set_work, &new_time);
+static DECLARE_WORK(ds1374_work, ds1374_set_work);
 
 int ds1374_set_rtc_time(ulong nowtime)
 {
@@ -180,7 +180,7 @@ int ds1374_set_rtc_time(ulong nowtime)
 	if (in_interrupt())
 		queue_work(ds1374_workqueue, &ds1374_work);
 	else
-		ds1374_set_work(&new_time);
+		ds1374_set_work(NULL);
 
 	return 0;
 }
diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c
index d90a3a1898c0..8f4378a1631c 100644
--- a/drivers/ieee1394/hosts.c
+++ b/drivers/ieee1394/hosts.c
@@ -31,9 +31,10 @@
 #include "config_roms.h"
 
 
-static void delayed_reset_bus(void * __reset_info)
+static void delayed_reset_bus(struct work_struct *work)
 {
-	struct hpsb_host *host = (struct hpsb_host*)__reset_info;
+	struct hpsb_host *host =
+		container_of(work, struct hpsb_host, delayed_reset.work);
 	int generation = host->csr.generation + 1;
 
 	/* The generation field rolls over to 2 rather than 0 per IEEE
@@ -145,7 +146,7 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
 
 	atomic_set(&h->generation, 0);
 
-	INIT_WORK(&h->delayed_reset, delayed_reset_bus, h);
+	INIT_DELAYED_WORK(&h->delayed_reset, delayed_reset_bus);
 	
 	init_timer(&h->timeout);
 	h->timeout.data = (unsigned long) h;
@@ -234,7 +235,7 @@ int hpsb_update_config_rom_image(struct hpsb_host *host)
 		 * Config ROM in the near future. */
 		reset_delay = HZ;
 
-	PREPARE_WORK(&host->delayed_reset, delayed_reset_bus, host);
+	PREPARE_DELAYED_WORK(&host->delayed_reset, delayed_reset_bus);
 	schedule_delayed_work(&host->delayed_reset, reset_delay);
 
 	return 0;
diff --git a/drivers/ieee1394/hosts.h b/drivers/ieee1394/hosts.h
index bc6dbfadb891..d553e38c9543 100644
--- a/drivers/ieee1394/hosts.h
+++ b/drivers/ieee1394/hosts.h
@@ -62,7 +62,7 @@ struct hpsb_host {
 	struct class_device class_dev;
 
 	int update_config_rom;
-	struct work_struct delayed_reset;
+	struct delayed_work delayed_reset;
 	unsigned int config_roms;
 
 	struct list_head addr_space;
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 6986ac188281..cd156d4e779e 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -493,20 +493,25 @@ static void sbp2util_notify_fetch_agent(struct scsi_id_instance_data *scsi_id,
 		scsi_unblock_requests(scsi_id->scsi_host);
 }
 
-static void sbp2util_write_orb_pointer(void *p)
+static void sbp2util_write_orb_pointer(struct work_struct *work)
 {
+	struct scsi_id_instance_data *scsi_id =
+		container_of(work, struct scsi_id_instance_data,
+			     protocol_work.work);
 	quadlet_t data[2];
 
-	data[0] = ORB_SET_NODE_ID(
-			((struct scsi_id_instance_data *)p)->hi->host->node_id);
-	data[1] = ((struct scsi_id_instance_data *)p)->last_orb_dma;
+	data[0] = ORB_SET_NODE_ID(scsi_id->hi->host->node_id);
+	data[1] = scsi_id->last_orb_dma;
 	sbp2util_cpu_to_be32_buffer(data, 8);
-	sbp2util_notify_fetch_agent(p, SBP2_ORB_POINTER_OFFSET, data, 8);
+	sbp2util_notify_fetch_agent(scsi_id, SBP2_ORB_POINTER_OFFSET, data, 8);
 }
 
-static void sbp2util_write_doorbell(void *p)
+static void sbp2util_write_doorbell(struct work_struct *work)
 {
-	sbp2util_notify_fetch_agent(p, SBP2_DOORBELL_OFFSET, NULL, 4);
+	struct scsi_id_instance_data *scsi_id =
+		container_of(work, struct scsi_id_instance_data,
+			     protocol_work.work);
+	sbp2util_notify_fetch_agent(scsi_id, SBP2_DOORBELL_OFFSET, NULL, 4);
 }
 
 /*
@@ -843,7 +848,7 @@ static struct scsi_id_instance_data *sbp2_alloc_device(struct unit_directory *ud
 	INIT_LIST_HEAD(&scsi_id->scsi_list);
 	spin_lock_init(&scsi_id->sbp2_command_orb_lock);
 	atomic_set(&scsi_id->state, SBP2LU_STATE_RUNNING);
-	INIT_WORK(&scsi_id->protocol_work, NULL, NULL);
+	INIT_DELAYED_WORK(&scsi_id->protocol_work, NULL);
 
 	ud->device.driver_data = scsi_id;
 
@@ -2047,11 +2052,10 @@ static void sbp2_link_orb_command(struct scsi_id_instance_data *scsi_id,
 		 * We do not accept new commands until the job is over.
 		 */
 		scsi_block_requests(scsi_id->scsi_host);
-		PREPARE_WORK(&scsi_id->protocol_work,
+		PREPARE_DELAYED_WORK(&scsi_id->protocol_work,
 			     last_orb ? sbp2util_write_doorbell:
-					sbp2util_write_orb_pointer,
-			     scsi_id);
-		schedule_work(&scsi_id->protocol_work);
+					sbp2util_write_orb_pointer);
+		schedule_delayed_work(&scsi_id->protocol_work, 0);
 	}
 }
 
diff --git a/drivers/ieee1394/sbp2.h b/drivers/ieee1394/sbp2.h
index abbe48e646c3..1b16d6b9cf11 100644
--- a/drivers/ieee1394/sbp2.h
+++ b/drivers/ieee1394/sbp2.h
@@ -348,7 +348,7 @@ struct scsi_id_instance_data {
 	unsigned workarounds;
 
 	atomic_t state;
-	struct work_struct protocol_work;
+	struct delayed_work protocol_work;
 };
 
 /* For use in scsi_id_instance_data.state */
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index e11187ecc931..84b2f5cb3722 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -55,11 +55,11 @@ struct addr_req {
 	int status;
 };
 
-static void process_req(void *data);
+static void process_req(struct work_struct *work);
 
 static DEFINE_MUTEX(lock);
 static LIST_HEAD(req_list);
-static DECLARE_WORK(work, process_req, NULL);
+static DECLARE_DELAYED_WORK(work, process_req);
 static struct workqueue_struct *addr_wq;
 
 void rdma_addr_register_client(struct rdma_addr_client *client)
@@ -215,7 +215,7 @@ out:
 	return ret;
 }
 
-static void process_req(void *data)
+static void process_req(struct work_struct *work)
 {
 	struct addr_req *req, *temp_req;
 	struct sockaddr_in *src_in, *dst_in;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 20e9f64e67a6..98272fbbfb31 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -285,9 +285,10 @@ err:
 	kfree(tprops);
 }
 
-static void ib_cache_task(void *work_ptr)
+static void ib_cache_task(struct work_struct *_work)
 {
-	struct ib_update_work *work = work_ptr;
+	struct ib_update_work *work =
+		container_of(_work, struct ib_update_work, work);
 
 	ib_cache_update(work->device, work->port_num);
 	kfree(work);
@@ -306,7 +307,7 @@ static void ib_cache_event(struct ib_event_handler *handler,
 	    event->event == IB_EVENT_CLIENT_REREGISTER) {
 		work = kmalloc(sizeof *work, GFP_ATOMIC);
 		if (work) {
-			INIT_WORK(&work->work, ib_cache_task, work);
+			INIT_WORK(&work->work, ib_cache_task);
 			work->device   = event->device;
 			work->port_num = event->element.port_num;
 			schedule_work(&work->work);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 25b1018a476c..e1990f531d0a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -101,7 +101,7 @@ struct cm_av {
 };
 
 struct cm_work {
-	struct work_struct work;
+	struct delayed_work work;
 	struct list_head list;
 	struct cm_port *port;
 	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
@@ -161,7 +161,7 @@ struct cm_id_private {
 	atomic_t work_count;
 };
 
-static void cm_work_handler(void *data);
+static void cm_work_handler(struct work_struct *work);
 
 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
 {
@@ -669,8 +669,7 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
 		return ERR_PTR(-ENOMEM);
 
 	timewait_info->work.local_id = local_id;
-	INIT_WORK(&timewait_info->work.work, cm_work_handler,
-		  &timewait_info->work);
+	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
 	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
 	return timewait_info;
 }
@@ -2987,9 +2986,9 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
 	}
 }
 
-static void cm_work_handler(void *data)
+static void cm_work_handler(struct work_struct *_work)
 {
-	struct cm_work *work = data;
+	struct cm_work *work = container_of(_work, struct cm_work, work.work);
 	int ret;
 
 	switch (work->cm_event.event) {
@@ -3079,12 +3078,12 @@ int ib_cm_establish(struct ib_cm_id *cm_id)
 	 * we need to find the cm_id once we're in the context of the
 	 * worker thread, rather than holding a reference on it.
 	 */
-	INIT_WORK(&work->work, cm_work_handler, work);
+	INIT_DELAYED_WORK(&work->work, cm_work_handler);
 	work->local_id = cm_id->local_id;
 	work->remote_id = cm_id->remote_id;
 	work->mad_recv_wc = NULL;
 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
-	queue_work(cm.wq, &work->work);
+	queue_delayed_work(cm.wq, &work->work, 0);
 out:
 	return ret;
 }
@@ -3146,11 +3145,11 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
 		return;
 	}
 
-	INIT_WORK(&work->work, cm_work_handler, work);
+	INIT_DELAYED_WORK(&work->work, cm_work_handler);
 	work->cm_event.event = event;
 	work->mad_recv_wc = mad_recv_wc;
 	work->port = (struct cm_port *)mad_agent->context;
-	queue_work(cm.wq, &work->work);
+	queue_delayed_work(cm.wq, &work->work, 0);
 }
 
 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 845090b0859c..189f73f3f721 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1341,9 +1341,9 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
 }
 
-static void cma_work_handler(void *data)
+static void cma_work_handler(struct work_struct *_work)
 {
-	struct cma_work *work = data;
+	struct cma_work *work = container_of(_work, struct cma_work, work);
 	struct rdma_id_private *id_priv = work->id;
 	int destroy = 0;
 
@@ -1374,7 +1374,7 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 		return -ENOMEM;
 
 	work->id = id_priv;
-	INIT_WORK(&work->work, cma_work_handler, work);
+	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = CMA_ROUTE_QUERY;
 	work->new_state = CMA_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
@@ -1431,7 +1431,7 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
 		return -ENOMEM;
 
 	work->id = id_priv;
-	INIT_WORK(&work->work, cma_work_handler, work);
+	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = CMA_ROUTE_QUERY;
 	work->new_state = CMA_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
@@ -1585,7 +1585,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 	}
 
 	work->id = id_priv;
-	INIT_WORK(&work->work, cma_work_handler, work);
+	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = CMA_ADDR_QUERY;
 	work->new_state = CMA_ADDR_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index c3fb304a4e86..9bfa785252dc 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -828,9 +828,10 @@ static int process_event(struct iwcm_id_private *cm_id_priv,
  * thread asleep on the destroy_comp list vs. an object destroyed
  * here synchronously when the last reference is removed.
  */
-static void cm_work_handler(void *arg)
+static void cm_work_handler(struct work_struct *_work)
 {
-	struct iwcm_work *work = arg, lwork;
+	struct iwcm_work lwork, *work =
+		container_of(_work, struct iwcm_work, work);
 	struct iwcm_id_private *cm_id_priv = work->cm_id;
 	unsigned long flags;
 	int empty;
@@ -899,7 +900,7 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
 		goto out;
 	}
 
-	INIT_WORK(&work->work, cm_work_handler, work);
+	INIT_WORK(&work->work, cm_work_handler);
 	work->cm_id = cm_id_priv;
 	work->event = *iw_event;
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a72bcea46ff6..5a54ac35e961 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -65,8 +65,8 @@ static struct ib_mad_agent_private *find_mad_agent(
 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 				    struct ib_mad_private *mad);
 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
-static void timeout_sends(void *data);
-static void local_completions(void *data);
+static void timeout_sends(struct work_struct *work);
+static void local_completions(struct work_struct *work);
 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			      struct ib_mad_agent_private *agent_priv,
 			      u8 mgmt_class);
@@ -356,10 +356,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
 	INIT_LIST_HEAD(&mad_agent_priv->done_list);
 	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
-	INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv);
+	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
 	INIT_LIST_HEAD(&mad_agent_priv->local_list);
-	INIT_WORK(&mad_agent_priv->local_work, local_completions,
-		   mad_agent_priv);
+	INIT_WORK(&mad_agent_priv->local_work, local_completions);
 	atomic_set(&mad_agent_priv->refcount, 1);
 	init_completion(&mad_agent_priv->comp);
 
@@ -2198,12 +2197,12 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
 /*
  * IB MAD completion callback
  */
-static void ib_mad_completion_handler(void *data)
+static void ib_mad_completion_handler(struct work_struct *work)
 {
 	struct ib_mad_port_private *port_priv;
 	struct ib_wc wc;
 
-	port_priv = (struct ib_mad_port_private *)data;
+	port_priv = container_of(work, struct ib_mad_port_private, work);
 	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
 
 	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
@@ -2324,7 +2323,7 @@ void ib_cancel_mad(struct ib_mad_agent *mad_agent,
 }
 EXPORT_SYMBOL(ib_cancel_mad);
 
-static void local_completions(void *data)
+static void local_completions(struct work_struct *work)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_local_private *local;
@@ -2334,7 +2333,8 @@ static void local_completions(void *data)
 	struct ib_wc wc;
 	struct ib_mad_send_wc mad_send_wc;
 
-	mad_agent_priv = (struct ib_mad_agent_private *)data;
+	mad_agent_priv =
+		container_of(work, struct ib_mad_agent_private, local_work);
 
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
 	while (!list_empty(&mad_agent_priv->local_list)) {
@@ -2434,14 +2434,15 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 	return ret;
 }
 
-static void timeout_sends(void *data)
+static void timeout_sends(struct work_struct *work)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
 	struct ib_mad_send_wc mad_send_wc;
 	unsigned long flags, delay;
 
-	mad_agent_priv = (struct ib_mad_agent_private *)data;
+	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
+				      timed_work.work);
 	mad_send_wc.vendor_err = 0;
 
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -2799,7 +2800,7 @@ static int ib_mad_port_open(struct ib_device *device,
 		ret = -ENOMEM;
 		goto error8;
 	}
-	INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
+	INIT_WORK(&port_priv->work, ib_mad_completion_handler);
 
 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d06b59083f6e..d5548e73e068 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -102,7 +102,7 @@ struct ib_mad_agent_private {
 	struct list_head send_list;
 	struct list_head wait_list;
 	struct list_head done_list;
-	struct work_struct timed_work;
+	struct delayed_work timed_work;
 	unsigned long timeout;
 	struct list_head local_list;
 	struct work_struct local_work;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 1ef79d015a1e..3663fd7022be 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -45,8 +45,8 @@ enum rmpp_state {
 struct mad_rmpp_recv {
 	struct ib_mad_agent_private *agent;
 	struct list_head list;
-	struct work_struct timeout_work;
-	struct work_struct cleanup_work;
+	struct delayed_work timeout_work;
+	struct delayed_work cleanup_work;
 	struct completion comp;
 	enum rmpp_state state;
 	spinlock_t lock;
@@ -233,9 +233,10 @@ static void nack_recv(struct ib_mad_agent_private *agent,
 	}
 }
 
-static void recv_timeout_handler(void *data)
+static void recv_timeout_handler(struct work_struct *work)
 {
-	struct mad_rmpp_recv *rmpp_recv = data;
+	struct mad_rmpp_recv *rmpp_recv =
+		container_of(work, struct mad_rmpp_recv, timeout_work.work);
 	struct ib_mad_recv_wc *rmpp_wc;
 	unsigned long flags;
 
@@ -254,9 +255,10 @@ static void recv_timeout_handler(void *data)
 	ib_free_recv_mad(rmpp_wc);
 }
 
-static void recv_cleanup_handler(void *data)
+static void recv_cleanup_handler(struct work_struct *work)
 {
-	struct mad_rmpp_recv *rmpp_recv = data;
+	struct mad_rmpp_recv *rmpp_recv =
+		container_of(work, struct mad_rmpp_recv, cleanup_work.work);
 	unsigned long flags;
 
 	spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
@@ -285,8 +287,8 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
 
 	rmpp_recv->agent = agent;
 	init_completion(&rmpp_recv->comp);
-	INIT_WORK(&rmpp_recv->timeout_work, recv_timeout_handler, rmpp_recv);
-	INIT_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler, rmpp_recv);
+	INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler);
+	INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
 	spin_lock_init(&rmpp_recv->lock);
 	rmpp_recv->state = RMPP_STATE_ACTIVE;
 	atomic_set(&rmpp_recv->refcount, 1);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1706d3c7e95e..e45afba75341 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -360,9 +360,10 @@ static void free_sm_ah(struct kref *kref)
 	kfree(sm_ah);
 }
 
-static void update_sm_ah(void *port_ptr)
+static void update_sm_ah(struct work_struct *work)
 {
-	struct ib_sa_port *port = port_ptr;
+	struct ib_sa_port *port =
+		container_of(work, struct ib_sa_port, update_task);
 	struct ib_sa_sm_ah *new_ah, *old_ah;
 	struct ib_port_attr port_attr;
 	struct ib_ah_attr   ah_attr;
@@ -992,8 +993,7 @@ static void ib_sa_add_one(struct ib_device *device)
 		if (IS_ERR(sa_dev->port[i].agent))
 			goto err;
 
-		INIT_WORK(&sa_dev->port[i].update_task,
-			  update_sm_ah, &sa_dev->port[i]);
+		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
 	}
 
 	ib_set_client_data(device, &sa_client, sa_dev);
@@ -1010,7 +1010,7 @@ static void ib_sa_add_one(struct ib_device *device)
 		goto err;
 
 	for (i = 0; i <= e - s; ++i)
-		update_sm_ah(&sa_dev->port[i]);
+		update_sm_ah(&sa_dev->port[i].update_task);
 
 	return;
 
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
index efe147dbeb42..db12cc0841df 100644
--- a/drivers/infiniband/core/uverbs_mem.c
+++ b/drivers/infiniband/core/uverbs_mem.c
@@ -179,9 +179,10 @@ void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
 	up_write(&current->mm->mmap_sem);
 }
 
-static void ib_umem_account(void *work_ptr)
+static void ib_umem_account(struct work_struct *_work)
 {
-	struct ib_umem_account_work *work = work_ptr;
+	struct ib_umem_account_work *work =
+		container_of(_work, struct ib_umem_account_work, work);
 
 	down_write(&work->mm->mmap_sem);
 	work->mm->locked_vm -= work->diff;
@@ -216,7 +217,7 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
 		return;
 	}
 
-	INIT_WORK(&work->work, ib_umem_account, work);
+	INIT_WORK(&work->work, ib_umem_account);
 	work->mm   = mm;
 	work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 413754b1d8a2..8536aeb96af8 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -214,9 +214,10 @@ struct ipath_user_pages_work {
 	unsigned long num_pages;
 };
 
-static void user_pages_account(void *ptr)
+static void user_pages_account(struct work_struct *_work)
 {
-	struct ipath_user_pages_work *work = ptr;
+	struct ipath_user_pages_work *work =
+		container_of(_work, struct ipath_user_pages_work, work);
 
 	down_write(&work->mm->mmap_sem);
 	work->mm->locked_vm -= work->num_pages;
@@ -242,7 +243,7 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
 
 	goto bail;
 
-	INIT_WORK(&work->work, user_pages_account, work);
+	INIT_WORK(&work->work, user_pages_account);
 	work->mm = mm;
 	work->num_pages = num_pages;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index cd044ea2dfa4..e948158a28d9 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -57,7 +57,7 @@ static int catas_reset_disable;
 module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
 MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
 
-static void catas_reset(void *work_ptr)
+static void catas_reset(struct work_struct *work)
 {
 	struct mthca_dev *dev, *tmpdev;
 	LIST_HEAD(tlist);
@@ -203,7 +203,7 @@ void mthca_stop_catas_poll(struct mthca_dev *dev)
 
 int __init mthca_catas_init(void)
 {
-	INIT_WORK(&catas_work, catas_reset, NULL);
+	INIT_WORK(&catas_work, catas_reset);
 
 	catas_wq = create_singlethread_workqueue("mthca_catas");
 	if (!catas_wq)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 0b8a79d53a00..3b2ee0eba05e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -136,11 +136,11 @@ struct ipoib_dev_priv {
 	struct list_head multicast_list;
 	struct rb_root multicast_tree;
 
-	struct work_struct pkey_task;
-	struct work_struct mcast_task;
+	struct delayed_work pkey_task;
+	struct delayed_work mcast_task;
 	struct work_struct flush_task;
 	struct work_struct restart_task;
-	struct work_struct ah_reap_task;
+	struct delayed_work ah_reap_task;
 
 	struct ib_device *ca;
 	u8            	  port;
@@ -254,13 +254,13 @@ int ipoib_add_pkey_attr(struct net_device *dev);
 
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn);
-void ipoib_reap_ah(void *dev_ptr);
+void ipoib_reap_ah(struct work_struct *work);
 
 void ipoib_flush_paths(struct net_device *dev);
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
-void ipoib_ib_dev_flush(void *dev);
+void ipoib_ib_dev_flush(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
 int ipoib_ib_dev_open(struct net_device *dev);
@@ -271,10 +271,10 @@ int ipoib_ib_dev_stop(struct net_device *dev);
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
 
-void ipoib_mcast_join_task(void *dev_ptr);
+void ipoib_mcast_join_task(struct work_struct *work);
 void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
 
-void ipoib_mcast_restart_task(void *dev_ptr);
+void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
 int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
 
@@ -312,7 +312,7 @@ void ipoib_event(struct ib_event_handler *handler,
 int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
 int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
 
-void ipoib_pkey_poll(void *dev);
+void ipoib_pkey_poll(struct work_struct *work);
 int ipoib_pkey_dev_delay_open(struct net_device *dev);
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 8bf5e9ec7c95..f10fba5d3265 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -400,10 +400,11 @@ static void __ipoib_reap_ah(struct net_device *dev)
 	spin_unlock_irq(&priv->tx_lock);
 }
 
-void ipoib_reap_ah(void *dev_ptr)
+void ipoib_reap_ah(struct work_struct *work)
 {
-	struct net_device *dev = dev_ptr;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, ah_reap_task.work);
+	struct net_device *dev = priv->dev;
 
 	__ipoib_reap_ah(dev);
 
@@ -613,10 +614,11 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 	return 0;
 }
 
-void ipoib_ib_dev_flush(void *_dev)
+void ipoib_ib_dev_flush(struct work_struct *work)
 {
-	struct net_device *dev = (struct net_device *)_dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv;
+	struct ipoib_dev_priv *cpriv, *priv =
+		container_of(work, struct ipoib_dev_priv, flush_task);
+	struct net_device *dev = priv->dev;
 
 	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
@@ -638,14 +640,14 @@ void ipoib_ib_dev_flush(void *_dev)
 	 */
 	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
 		ipoib_ib_dev_up(dev);
-		ipoib_mcast_restart_task(dev);
+		ipoib_mcast_restart_task(&priv->restart_task);
 	}
 
 	mutex_lock(&priv->vlan_mutex);
 
 	/* Flush any child interfaces too */
 	list_for_each_entry(cpriv, &priv->child_intfs, list)
-		ipoib_ib_dev_flush(cpriv->dev);
+		ipoib_ib_dev_flush(&cpriv->flush_task);
 
 	mutex_unlock(&priv->vlan_mutex);
 }
@@ -672,10 +674,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
  * change async notification is available.
  */
 
-void ipoib_pkey_poll(void *dev_ptr)
+void ipoib_pkey_poll(struct work_struct *work)
 {
-	struct net_device *dev = dev_ptr;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, pkey_task.work);
+	struct net_device *dev = priv->dev;
 
 	ipoib_pkey_dev_check_presence(dev);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 85522daeb946..71114a8c12b9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -933,11 +933,11 @@ static void ipoib_setup(struct net_device *dev)
 	INIT_LIST_HEAD(&priv->dead_ahs);
 	INIT_LIST_HEAD(&priv->multicast_list);
 
-	INIT_WORK(&priv->pkey_task,    ipoib_pkey_poll,          priv->dev);
-	INIT_WORK(&priv->mcast_task,   ipoib_mcast_join_task,    priv->dev);
-	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush,       priv->dev);
-	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
-	INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah,            priv->dev);
+	INIT_DELAYED_WORK(&priv->pkey_task,    ipoib_pkey_poll);
+	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
+	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush);
+	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
+	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
 }
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 3faa1820f0e9..f0a4fac1a215 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -399,7 +399,8 @@ static void ipoib_mcast_join_complete(int status,
 		mcast->backoff = 1;
 		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-			queue_work(ipoib_workqueue, &priv->mcast_task);
+			queue_delayed_work(ipoib_workqueue,
+					   &priv->mcast_task, 0);
 		mutex_unlock(&mcast_mutex);
 		complete(&mcast->done);
 		return;
@@ -435,7 +436,8 @@ static void ipoib_mcast_join_complete(int status,
 
 	if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
 		if (status == -ETIMEDOUT)
-			queue_work(ipoib_workqueue, &priv->mcast_task);
+			queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+					   0);
 		else
 			queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
 					   mcast->backoff * HZ);
@@ -517,10 +519,11 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 		mcast->query_id = ret;
 }
 
-void ipoib_mcast_join_task(void *dev_ptr)
+void ipoib_mcast_join_task(struct work_struct *work)
 {
-	struct net_device *dev = dev_ptr;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, mcast_task.work);
+	struct net_device *dev = priv->dev;
 
 	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
 		return;
@@ -610,7 +613,7 @@ int ipoib_mcast_start_thread(struct net_device *dev)
 
 	mutex_lock(&mcast_mutex);
 	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-		queue_work(ipoib_workqueue, &priv->mcast_task);
+		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
 	mutex_unlock(&mcast_mutex);
 
 	spin_lock_irq(&priv->lock);
@@ -818,10 +821,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 	}
 }
 
-void ipoib_mcast_restart_task(void *dev_ptr)
+void ipoib_mcast_restart_task(struct work_struct *work)
 {
-	struct net_device *dev = dev_ptr;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, restart_task);
+	struct net_device *dev = priv->dev;
 	struct dev_mc_list *mclist;
 	struct ipoib_mcast *mcast, *tmcast;
 	LIST_HEAD(remove_list);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 18a000034996..693b77002897 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -48,7 +48,7 @@
 
 static void iser_cq_tasklet_fn(unsigned long data);
 static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-static void iser_comp_error_worker(void *data);
+static void iser_comp_error_worker(struct work_struct *work);
 
 static void iser_cq_event_callback(struct ib_event *cause, void *context)
 {
@@ -480,8 +480,7 @@ int iser_conn_init(struct iser_conn **ibconn)
 	init_waitqueue_head(&ib_conn->wait);
 	atomic_set(&ib_conn->post_recv_buf_count, 0);
 	atomic_set(&ib_conn->post_send_buf_count, 0);
-	INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker,
-		  ib_conn);
+	INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker);
 	INIT_LIST_HEAD(&ib_conn->conn_list);
 	spin_lock_init(&ib_conn->lock);
 
@@ -754,9 +753,10 @@ int iser_post_send(struct iser_desc *tx_desc)
 	return ret_val;
 }
 
-static void iser_comp_error_worker(void *data)
+static void iser_comp_error_worker(struct work_struct *work)
 {
-	struct iser_conn *ib_conn = data;
+	struct iser_conn *ib_conn =
+		container_of(work, struct iser_conn, comperror_work);
 
 	/* getting here when the state is UP means that the conn is being *
 	 * terminated asynchronously from the iSCSI layer's perspective.  */
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 4b09147f438f..214f66195af2 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -390,9 +390,10 @@ static void srp_disconnect_target(struct srp_target_port *target)
 	wait_for_completion(&target->done);
 }
 
-static void srp_remove_work(void *target_ptr)
+static void srp_remove_work(struct work_struct *work)
 {
-	struct srp_target_port *target = target_ptr;
+	struct srp_target_port *target =
+		container_of(work, struct srp_target_port, work);
 
 	spin_lock_irq(target->scsi_host->host_lock);
 	if (target->state != SRP_TARGET_DEAD) {
@@ -575,7 +576,7 @@ err:
 	spin_lock_irq(target->scsi_host->host_lock);
 	if (target->state == SRP_TARGET_CONNECTING) {
 		target->state = SRP_TARGET_DEAD;
-		INIT_WORK(&target->work, srp_remove_work, target);
+		INIT_WORK(&target->work, srp_remove_work);
 		schedule_work(&target->work);
 	}
 	spin_unlock_irq(target->scsi_host->host_lock);
diff --git a/drivers/input/keyboard/lkkbd.c b/drivers/input/keyboard/lkkbd.c
index 979b93e33da7..b7f049b45b6b 100644
--- a/drivers/input/keyboard/lkkbd.c
+++ b/drivers/input/keyboard/lkkbd.c
@@ -572,9 +572,9 @@ lkkbd_event (struct input_dev *dev, unsigned int type, unsigned int code,
  * were in.
  */
 static void
-lkkbd_reinit (void *data)
+lkkbd_reinit (struct work_struct *work)
 {
-	struct lkkbd *lk = data;
+	struct lkkbd *lk = container_of(work, struct lkkbd, tq);
 	int division;
 	unsigned char leds_on = 0;
 	unsigned char leds_off = 0;
@@ -651,7 +651,7 @@ lkkbd_connect (struct serio *serio, struct serio_driver *drv)
 
 	lk->serio = serio;
 	lk->dev = input_dev;
-	INIT_WORK (&lk->tq, lkkbd_reinit, lk);
+	INIT_WORK (&lk->tq, lkkbd_reinit);
 	lk->bell_volume = bell_volume;
 	lk->keyclick_volume = keyclick_volume;
 	lk->ctrlclick_volume = ctrlclick_volume;
diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c
index cac4781103c3..6cd887c5eb0a 100644
--- a/drivers/input/keyboard/sunkbd.c
+++ b/drivers/input/keyboard/sunkbd.c
@@ -208,9 +208,9 @@ static int sunkbd_initialize(struct sunkbd *sunkbd)
  * were in.
  */
 
-static void sunkbd_reinit(void *data)
+static void sunkbd_reinit(struct work_struct *work)
 {
-	struct sunkbd *sunkbd = data;
+	struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq);
 
 	wait_event_interruptible_timeout(sunkbd->wait, sunkbd->reset >= 0, HZ);
 
@@ -248,7 +248,7 @@ static int sunkbd_connect(struct serio *serio, struct serio_driver *drv)
 	sunkbd->serio = serio;
 	sunkbd->dev = input_dev;
 	init_waitqueue_head(&sunkbd->wait);
-	INIT_WORK(&sunkbd->tq, sunkbd_reinit, sunkbd);
+	INIT_WORK(&sunkbd->tq, sunkbd_reinit);
 	snprintf(sunkbd->phys, sizeof(sunkbd->phys), "%s/input0", serio->phys);
 
 	serio_set_drvdata(serio, sunkbd);
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 6f9b2c7cc9c2..52bb2226ce2f 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -888,9 +888,10 @@ static int psmouse_poll(struct psmouse *psmouse)
  * psmouse_resync() attempts to re-validate current protocol.
  */
 
-static void psmouse_resync(void *p)
+static void psmouse_resync(struct work_struct *work)
 {
-	struct psmouse *psmouse = p, *parent = NULL;
+	struct psmouse *parent = NULL, *psmouse =
+		container_of(work, struct psmouse, resync_work);
 	struct serio *serio = psmouse->ps2dev.serio;
 	psmouse_ret_t rc = PSMOUSE_GOOD_DATA;
 	int failed = 0, enabled = 0;
@@ -1121,7 +1122,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv)
 		goto out;
 
 	ps2_init(&psmouse->ps2dev, serio);
-	INIT_WORK(&psmouse->resync_work, psmouse_resync, psmouse);
+	INIT_WORK(&psmouse->resync_work, psmouse_resync);
 	psmouse->dev = input_dev;
 	snprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys);
 
diff --git a/drivers/isdn/act2000/capi.c b/drivers/isdn/act2000/capi.c
index 6ae6eb322111..946c38cf6f8a 100644
--- a/drivers/isdn/act2000/capi.c
+++ b/drivers/isdn/act2000/capi.c
@@ -627,8 +627,10 @@ handle_ack(act2000_card *card, act2000_chan *chan, __u8 blocknr) {
 }
 
 void
-actcapi_dispatch(act2000_card *card)
+actcapi_dispatch(struct work_struct *work)
 {
+	struct act2000_card *card =
+		container_of(work, struct act2000_card, rcv_tq);
 	struct sk_buff *skb;
 	actcapi_msg *msg;
 	__u16 ccmd;
diff --git a/drivers/isdn/act2000/capi.h b/drivers/isdn/act2000/capi.h
index 49f453c53c64..e55f6a931f66 100644
--- a/drivers/isdn/act2000/capi.h
+++ b/drivers/isdn/act2000/capi.h
@@ -356,7 +356,7 @@ extern int actcapi_connect_req(act2000_card *, act2000_chan *, char *, char, int
 extern void actcapi_select_b2_protocol_req(act2000_card *, act2000_chan *);
 extern void actcapi_disconnect_b3_req(act2000_card *, act2000_chan *);
 extern void actcapi_connect_resp(act2000_card *, act2000_chan *, __u8);
-extern void actcapi_dispatch(act2000_card *);
+extern void actcapi_dispatch(struct work_struct *);
 #ifdef DEBUG_MSG
 extern void actcapi_debug_msg(struct sk_buff *skb, int);
 #else
diff --git a/drivers/isdn/act2000/module.c b/drivers/isdn/act2000/module.c
index d89dcde4eade..90593e2ef872 100644
--- a/drivers/isdn/act2000/module.c
+++ b/drivers/isdn/act2000/module.c
@@ -192,8 +192,11 @@ act2000_set_msn(act2000_card *card, char *eazmsn)
 }
 
 static void
-act2000_transmit(struct act2000_card *card)
+act2000_transmit(struct work_struct *work)
 {
+	struct act2000_card *card =
+		container_of(work, struct act2000_card, snd_tq);
+
 	switch (card->bus) {
 		case ACT2000_BUS_ISA:
 			act2000_isa_send(card);
@@ -207,8 +210,11 @@ act2000_transmit(struct act2000_card *card)
 }
 
 static void
-act2000_receive(struct act2000_card *card)
+act2000_receive(struct work_struct *work)
 {
+	struct act2000_card *card =
+		container_of(work, struct act2000_card, poll_tq);
+
 	switch (card->bus) {
 		case ACT2000_BUS_ISA:
 			act2000_isa_receive(card);
@@ -227,7 +233,7 @@ act2000_poll(unsigned long data)
 	act2000_card * card = (act2000_card *)data;
 	unsigned long flags;
 
-	act2000_receive(card);
+	act2000_receive(&card->poll_tq);
 	spin_lock_irqsave(&card->lock, flags);
 	mod_timer(&card->ptimer, jiffies+3);
 	spin_unlock_irqrestore(&card->lock, flags);
@@ -578,9 +584,9 @@ act2000_alloccard(int bus, int port, int irq, char *id)
 	skb_queue_head_init(&card->sndq);
 	skb_queue_head_init(&card->rcvq);
 	skb_queue_head_init(&card->ackq);
-	INIT_WORK(&card->snd_tq, (void *) (void *) act2000_transmit, card);
-	INIT_WORK(&card->rcv_tq, (void *) (void *) actcapi_dispatch, card);
-	INIT_WORK(&card->poll_tq, (void *) (void *) act2000_receive, card);
+	INIT_WORK(&card->snd_tq, act2000_transmit);
+	INIT_WORK(&card->rcv_tq, actcapi_dispatch);
+	INIT_WORK(&card->poll_tq, act2000_receive);
 	init_timer(&card->ptimer);
 	card->interface.owner = THIS_MODULE;
         card->interface.channels = ACT2000_BCH;
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 8c4fcb9027b3..783a25526315 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -208,9 +208,10 @@ static void notify_down(u32 contr)
 	}
 }
 
-static void notify_handler(void *data)
+static void notify_handler(struct work_struct *work)
 {
-	struct capi_notifier *np = data;
+	struct capi_notifier *np =
+		container_of(work, struct capi_notifier, work);
 
 	switch (np->cmd) {
 	case KCI_CONTRUP:
@@ -235,7 +236,7 @@ static int notify_push(unsigned int cmd, u32 controller, u16 applid, u32 ncci)
 	if (!np)
 		return -ENOMEM;
 
-	INIT_WORK(&np->work, notify_handler, np);
+	INIT_WORK(&np->work, notify_handler);
 	np->cmd = cmd;
 	np->controller = controller;
 	np->applid = applid;
@@ -248,10 +249,11 @@ static int notify_push(unsigned int cmd, u32 controller, u16 applid, u32 ncci)
 	
 /* -------- Receiver ------------------------------------------ */
 
-static void recv_handler(void *_ap)
+static void recv_handler(struct work_struct *work)
 {
 	struct sk_buff *skb;
-	struct capi20_appl *ap = (struct capi20_appl *) _ap;
+	struct capi20_appl *ap =
+		container_of(work, struct capi20_appl, recv_work);
 
 	if ((!ap) || (ap->release_in_progress))
 		return;
@@ -527,7 +529,7 @@ u16 capi20_register(struct capi20_appl *ap)
 	ap->callback = NULL;
 	init_MUTEX(&ap->recv_sem);
 	skb_queue_head_init(&ap->recv_queue);
-	INIT_WORK(&ap->recv_work, recv_handler, (void *)ap);
+	INIT_WORK(&ap->recv_work, recv_handler);
 	ap->release_in_progress = 0;
 
 	write_unlock_irqrestore(&application_lock, flags);
diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c
index bec59010bc66..3b19caeba258 100644
--- a/drivers/isdn/hisax/amd7930_fn.c
+++ b/drivers/isdn/hisax/amd7930_fn.c
@@ -232,9 +232,10 @@ Amd7930_new_ph(struct IsdnCardState *cs)
 
 
 static void
-Amd7930_bh(struct IsdnCardState *cs)
+Amd7930_bh(struct work_struct *work)
 {
-
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
         struct PStack *stptr;
 
 	if (!cs)
@@ -789,7 +790,7 @@ Amd7930_init(struct IsdnCardState *cs)
 void __devinit
 setup_Amd7930(struct IsdnCardState *cs)
 {
-        INIT_WORK(&cs->tqueue, (void *)(void *) Amd7930_bh, cs);
+        INIT_WORK(&cs->tqueue, Amd7930_bh);
 	cs->dbusytimer.function = (void *) dbusy_timer_handler;
 	cs->dbusytimer.data = (long) cs;
 	init_timer(&cs->dbusytimer);
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index 785b08554fca..cede72cdbb31 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -1137,7 +1137,6 @@ static int checkcard(int cardnr, char *id, int *busy_flag, struct module *lockow
 	cs->tx_skb = NULL;
 	cs->tx_cnt = 0;
 	cs->event = 0;
-	cs->tqueue.data = cs;
 
 	skb_queue_head_init(&cs->rq);
 	skb_queue_head_init(&cs->sq);
@@ -1554,7 +1553,7 @@ static void hisax_b_l2l1(struct PStack *st, int pr, void *arg);
 static int hisax_cardmsg(struct IsdnCardState *cs, int mt, void *arg);
 static int hisax_bc_setstack(struct PStack *st, struct BCState *bcs);
 static void hisax_bc_close(struct BCState *bcs);
-static void hisax_bh(struct IsdnCardState *cs);
+static void hisax_bh(struct work_struct *work);
 static void EChannel_proc_rcv(struct hisax_d_if *d_if);
 
 int hisax_register(struct hisax_d_if *hisax_d_if, struct hisax_b_if *b_if[],
@@ -1586,7 +1585,7 @@ int hisax_register(struct hisax_d_if *hisax_d_if, struct hisax_b_if *b_if[],
 	hisax_d_if->cs = cs;
 	cs->hw.hisax_d_if = hisax_d_if;
 	cs->cardmsg = hisax_cardmsg;
-	INIT_WORK(&cs->tqueue, (void *)(void *)hisax_bh, cs);
+	INIT_WORK(&cs->tqueue, hisax_bh);
 	cs->channel[0].d_st->l2.l2l1 = hisax_d_l2l1;
 	for (i = 0; i < 2; i++) {
 		cs->bcs[i].BC_SetStack = hisax_bc_setstack;
@@ -1618,8 +1617,10 @@ static void hisax_sched_event(struct IsdnCardState *cs, int event)
 	schedule_work(&cs->tqueue);
 }
 
-static void hisax_bh(struct IsdnCardState *cs)
+static void hisax_bh(struct work_struct *work)
 {
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
 	struct PStack *st;
 	int pr;
 
diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c
index d852c9d998b2..de9b1a4d6bac 100644
--- a/drivers/isdn/hisax/hfc4s8s_l1.c
+++ b/drivers/isdn/hisax/hfc4s8s_l1.c
@@ -1083,8 +1083,9 @@ tx_b_frame(struct hfc4s8s_btype *bch)
 /* bottom half handler for interrupt */
 /*************************************/
 static void
-hfc4s8s_bh(hfc4s8s_hw * hw)
+hfc4s8s_bh(struct work_struct *work)
 {
+	hfc4s8s_hw *hw = container_of(work, hfc4s8s_hw, tqueue);
 	u_char b;
 	struct hfc4s8s_l1 *l1p;
 	volatile u_char *fifo_stat;
@@ -1550,7 +1551,7 @@ setup_instance(hfc4s8s_hw * hw)
 		goto out;
 	}
 
-	INIT_WORK(&hw->tqueue, (void *) (void *) hfc4s8s_bh, hw);
+	INIT_WORK(&hw->tqueue, hfc4s8s_bh);
 
 	if (request_irq
 	    (hw->irq, hfc4s8s_interrupt, IRQF_SHARED, hw->card_name, hw)) {
diff --git a/drivers/isdn/hisax/hfc_2bds0.c b/drivers/isdn/hisax/hfc_2bds0.c
index 6360e8214720..8d9864453a23 100644
--- a/drivers/isdn/hisax/hfc_2bds0.c
+++ b/drivers/isdn/hisax/hfc_2bds0.c
@@ -549,10 +549,11 @@ setstack_2b(struct PStack *st, struct BCState *bcs)
 }
 
 static void
-hfcd_bh(struct IsdnCardState *cs)
+hfcd_bh(struct work_struct *work)
 {
-	if (!cs)
-		return;
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
+
 	if (test_and_clear_bit(D_L1STATECHANGE, &cs->event)) {
 		switch (cs->dc.hfcd.ph_state) {
 			case (0):
@@ -1072,5 +1073,5 @@ set_cs_func(struct IsdnCardState *cs)
 	cs->dbusytimer.function = (void *) hfc_dbusy_timer;
 	cs->dbusytimer.data = (long) cs;
 	init_timer(&cs->dbusytimer);
-	INIT_WORK(&cs->tqueue, (void *)(void *) hfcd_bh, cs);
+	INIT_WORK(&cs->tqueue, hfcd_bh);
 }
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 93f60b563515..5db0a85b827f 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -1506,8 +1506,10 @@ setstack_2b(struct PStack *st, struct BCState *bcs)
 /* handle L1 state changes */
 /***************************/
 static void
-hfcpci_bh(struct IsdnCardState *cs)
+hfcpci_bh(struct work_struct *work)
 {
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
 	u_long	flags;
 //      struct PStack *stptr;
 
@@ -1722,7 +1724,7 @@ setup_hfcpci(struct IsdnCard *card)
 		Write_hfc(cs, HFCPCI_INT_M2, cs->hw.hfcpci.int_m2);
 		/* At this point the needed PCI config is done */
 		/* fifos are still not enabled */
-		INIT_WORK(&cs->tqueue, (void *)(void *) hfcpci_bh, cs);
+		INIT_WORK(&cs->tqueue,  hfcpci_bh);
 		cs->setstack_d = setstack_hfcpci;
 		cs->BC_Send_Data = &hfcpci_send_data;
 		cs->readisac = NULL;
diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c
index 954d1536db1f..4fd09d21a27f 100644
--- a/drivers/isdn/hisax/hfc_sx.c
+++ b/drivers/isdn/hisax/hfc_sx.c
@@ -1251,8 +1251,10 @@ setstack_2b(struct PStack *st, struct BCState *bcs)
 /* handle L1 state changes */
 /***************************/
 static void
-hfcsx_bh(struct IsdnCardState *cs)
+hfcsx_bh(struct work_struct *work)
 {
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
 	u_long flags;
 
 	if (!cs)
@@ -1499,7 +1501,7 @@ setup_hfcsx(struct IsdnCard *card)
 	cs->dbusytimer.function = (void *) hfcsx_dbusy_timer;
 	cs->dbusytimer.data = (long) cs;
 	init_timer(&cs->dbusytimer);
-	INIT_WORK(&cs->tqueue, (void *)(void *) hfcsx_bh, cs);
+	INIT_WORK(&cs->tqueue, hfcsx_bh);
 	cs->readisac = NULL;
 	cs->writeisac = NULL;
 	cs->readisacfifo = NULL;
diff --git a/drivers/isdn/hisax/icc.c b/drivers/isdn/hisax/icc.c
index da706925d54d..682cac32f259 100644
--- a/drivers/isdn/hisax/icc.c
+++ b/drivers/isdn/hisax/icc.c
@@ -77,8 +77,10 @@ icc_new_ph(struct IsdnCardState *cs)
 }
 
 static void
-icc_bh(struct IsdnCardState *cs)
+icc_bh(struct work_struct *work)
 {
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
 	struct PStack *stptr;
 	
 	if (!cs)
@@ -674,7 +676,7 @@ clear_pending_icc_ints(struct IsdnCardState *cs)
 void __devinit
 setup_icc(struct IsdnCardState *cs)
 {
-	INIT_WORK(&cs->tqueue, (void *)(void *) icc_bh, cs);
+	INIT_WORK(&cs->tqueue, icc_bh);
 	cs->dbusytimer.function = (void *) dbusy_timer_handler;
 	cs->dbusytimer.data = (long) cs;
 	init_timer(&cs->dbusytimer);
diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c
index 282f349408bc..4e9f23803dae 100644
--- a/drivers/isdn/hisax/isac.c
+++ b/drivers/isdn/hisax/isac.c
@@ -81,8 +81,10 @@ isac_new_ph(struct IsdnCardState *cs)
 }
 
 static void
-isac_bh(struct IsdnCardState *cs)
+isac_bh(struct work_struct *work)
 {
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
 	struct PStack *stptr;
 	
 	if (!cs)
@@ -674,7 +676,7 @@ clear_pending_isac_ints(struct IsdnCardState *cs)
 void __devinit
 setup_isac(struct IsdnCardState *cs)
 {
-	INIT_WORK(&cs->tqueue, (void *)(void *) isac_bh, cs);
+	INIT_WORK(&cs->tqueue, isac_bh);
 	cs->dbusytimer.function = (void *) dbusy_timer_handler;
 	cs->dbusytimer.data = (long) cs;
 	init_timer(&cs->dbusytimer);
diff --git a/drivers/isdn/hisax/isar.c b/drivers/isdn/hisax/isar.c
index 674af673ff96..6f1a6583b17d 100644
--- a/drivers/isdn/hisax/isar.c
+++ b/drivers/isdn/hisax/isar.c
@@ -437,8 +437,10 @@ extern void BChannel_bh(struct BCState *);
 #define B_LL_OK		10
 
 static void
-isar_bh(struct BCState *bcs)
+isar_bh(struct work_struct *work)
 {
+	struct BCState *bcs = container_of(work, struct BCState, tqueue);
+
 	BChannel_bh(bcs);
 	if (test_and_clear_bit(B_LL_NOCARRIER, &bcs->event))
 		ll_deliver_faxstat(bcs, ISDN_FAX_CLASS1_NOCARR);
@@ -1580,7 +1582,7 @@ isar_setup(struct IsdnCardState *cs)
 		cs->bcs[i].mode = 0;
 		cs->bcs[i].hw.isar.dpath = i + 1;
 		modeisar(&cs->bcs[i], 0, 0);
-		INIT_WORK(&cs->bcs[i].tqueue, (void *)(void *) isar_bh, &cs->bcs[i]);
+		INIT_WORK(&cs->bcs[i].tqueue, isar_bh);
 	}
 }
 
diff --git a/drivers/isdn/hisax/isdnl1.c b/drivers/isdn/hisax/isdnl1.c
index bab356886483..a14204ec88ee 100644
--- a/drivers/isdn/hisax/isdnl1.c
+++ b/drivers/isdn/hisax/isdnl1.c
@@ -315,8 +315,10 @@ BChannel_proc_ack(struct BCState *bcs)
 }
 
 void
-BChannel_bh(struct BCState *bcs)
+BChannel_bh(struct work_struct *work)
 {
+	struct BCState *bcs = container_of(work, struct BCState, tqueue);
+
 	if (!bcs)
 		return;
 	if (test_and_clear_bit(B_RCVBUFREADY, &bcs->event))
@@ -362,7 +364,7 @@ init_bcstate(struct IsdnCardState *cs, int bc)
 
 	bcs->cs = cs;
 	bcs->channel = bc;
-	INIT_WORK(&bcs->tqueue, (void *)(void *) BChannel_bh, bcs);
+	INIT_WORK(&bcs->tqueue, BChannel_bh);
 	spin_lock_init(&bcs->aclock);
 	bcs->BC_SetStack = NULL;
 	bcs->BC_Close = NULL;
diff --git a/drivers/isdn/hisax/w6692.c b/drivers/isdn/hisax/w6692.c
index 1655341797a9..3aeceaf9769e 100644
--- a/drivers/isdn/hisax/w6692.c
+++ b/drivers/isdn/hisax/w6692.c
@@ -101,8 +101,10 @@ W6692_new_ph(struct IsdnCardState *cs)
 }
 
 static void
-W6692_bh(struct IsdnCardState *cs)
+W6692_bh(struct work_struct *work)
 {
+	struct IsdnCardState *cs =
+		container_of(work, struct IsdnCardState, tqueue);
 	struct PStack *stptr;
 
 	if (!cs)
@@ -1070,7 +1072,7 @@ setup_w6692(struct IsdnCard *card)
 	       id_list[cs->subtyp].card_name, cs->irq,
 	       cs->hw.w6692.iobase);
 
-	INIT_WORK(&cs->tqueue, (void *)(void *) W6692_bh, cs);
+	INIT_WORK(&cs->tqueue, W6692_bh);
 	cs->readW6692 = &ReadW6692;
 	cs->writeW6692 = &WriteW6692;
 	cs->readisacfifo = &ReadISACfifo;
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 1f8d6ae66b41..2e4daebfb7e0 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -984,9 +984,9 @@ void isdn_net_write_super(isdn_net_local *lp, struct sk_buff *skb)
 /*
  * called from tq_immediate
  */
-static void isdn_net_softint(void *private)
+static void isdn_net_softint(struct work_struct *work)
 {
-	isdn_net_local *lp = private;
+	isdn_net_local *lp = container_of(work, isdn_net_local, tqueue);
 	struct sk_buff *skb;
 
 	spin_lock_bh(&lp->xmit_lock);
@@ -2596,7 +2596,7 @@ isdn_net_new(char *name, struct net_device *master)
 	netdev->local->netdev = netdev;
 	netdev->local->next = netdev->local;
 
-	INIT_WORK(&netdev->local->tqueue, (void *)(void *) isdn_net_softint, netdev->local);
+	INIT_WORK(&netdev->local->tqueue, isdn_net_softint);
 	spin_lock_init(&netdev->local->xmit_lock);
 
 	netdev->local->isdn_device = -1;
diff --git a/drivers/isdn/pcbit/drv.c b/drivers/isdn/pcbit/drv.c
index 6ead5e1508b7..1966f3410a13 100644
--- a/drivers/isdn/pcbit/drv.c
+++ b/drivers/isdn/pcbit/drv.c
@@ -68,8 +68,6 @@ static void pcbit_set_msn(struct pcbit_dev *dev, char *list);
 static int pcbit_check_msn(struct pcbit_dev *dev, char *msn);
 
 
-extern void pcbit_deliver(void * data);
-
 int pcbit_init_dev(int board, int mem_base, int irq)
 {
 	struct pcbit_dev *dev;
@@ -129,7 +127,7 @@ int pcbit_init_dev(int board, int mem_base, int irq)
 	memset(dev->b2, 0, sizeof(struct pcbit_chan));
 	dev->b2->id = 1;
 
-	INIT_WORK(&dev->qdelivery, pcbit_deliver, dev);
+	INIT_WORK(&dev->qdelivery, pcbit_deliver);
 
 	/*
 	 *  interrupts
diff --git a/drivers/isdn/pcbit/layer2.c b/drivers/isdn/pcbit/layer2.c
index 937fd2120381..0c9f6df873fc 100644
--- a/drivers/isdn/pcbit/layer2.c
+++ b/drivers/isdn/pcbit/layer2.c
@@ -67,7 +67,6 @@ extern void pcbit_l3_receive(struct pcbit_dev *dev, ulong msg,
  *  Prototypes
  */
 
-void pcbit_deliver(void *data);
 static void pcbit_transmit(struct pcbit_dev *dev);
 
 static void pcbit_recv_ack(struct pcbit_dev *dev, unsigned char ack);
@@ -299,11 +298,12 @@ pcbit_transmit(struct pcbit_dev *dev)
  */
 
 void
-pcbit_deliver(void *data)
+pcbit_deliver(struct work_struct *work)
 {
 	struct frame_buf *frame;
 	unsigned long flags, msg;
-	struct pcbit_dev *dev = (struct pcbit_dev *) data;
+	struct pcbit_dev *dev =
+		container_of(work, struct pcbit_dev, qdelivery);
 
 	spin_lock_irqsave(&dev->lock, flags);
 
diff --git a/drivers/isdn/pcbit/pcbit.h b/drivers/isdn/pcbit/pcbit.h
index 388bacefd23a..19c18e88ff16 100644
--- a/drivers/isdn/pcbit/pcbit.h
+++ b/drivers/isdn/pcbit/pcbit.h
@@ -166,4 +166,6 @@ struct pcbit_ioctl {
 #define L2_RUNNING  5
 #define L2_ERROR    6
 
+extern void pcbit_deliver(struct work_struct *work);
+
 #endif
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index ade25b3fbb35..4871158aca3e 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -600,7 +600,7 @@ core_initcall(smu_late_init);
  * sysfs visibility
  */
 
-static void smu_expose_childs(void *unused)
+static void smu_expose_childs(struct work_struct *unused)
 {
 	struct device_node *np;
 
@@ -610,7 +610,7 @@ static void smu_expose_childs(void *unused)
 						  &smu->of_dev->dev);
 }
 
-static DECLARE_WORK(smu_expose_childs_work, smu_expose_childs, NULL);
+static DECLARE_WORK(smu_expose_childs_work, smu_expose_childs);
 
 static int smu_platform_probe(struct of_device* dev,
 			      const struct of_device_id *match)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 08a40f4e4f60..ed2d4ef27fd8 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -458,11 +458,11 @@ static void dec_pending(struct crypt_io *io, int error)
  * interrupt context.
  */
 static struct workqueue_struct *_kcryptd_workqueue;
-static void kcryptd_do_work(void *data);
+static void kcryptd_do_work(struct work_struct *work);
 
 static void kcryptd_queue_io(struct crypt_io *io)
 {
-	INIT_WORK(&io->work, kcryptd_do_work, io);
+	INIT_WORK(&io->work, kcryptd_do_work);
 	queue_work(_kcryptd_workqueue, &io->work);
 }
 
@@ -618,9 +618,9 @@ static void process_read_endio(struct crypt_io *io)
 	dec_pending(io, crypt_convert(cc, &ctx));
 }
 
-static void kcryptd_do_work(void *data)
+static void kcryptd_do_work(struct work_struct *work)
 {
-	struct crypt_io *io = data;
+	struct crypt_io *io = container_of(work, struct crypt_io, work);
 
 	if (io->post_process)
 		process_read_endio(io);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d754e0bc6e90..e77ee6fd1044 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -104,8 +104,8 @@ typedef int (*action_fn) (struct pgpath *pgpath);
 static kmem_cache_t *_mpio_cache;
 
 struct workqueue_struct *kmultipathd;
-static void process_queued_ios(void *data);
-static void trigger_event(void *data);
+static void process_queued_ios(struct work_struct *work);
+static void trigger_event(struct work_struct *work);
 
 
 /*-----------------------------------------------
@@ -173,8 +173,8 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
 		INIT_LIST_HEAD(&m->priority_groups);
 		spin_lock_init(&m->lock);
 		m->queue_io = 1;
-		INIT_WORK(&m->process_queued_ios, process_queued_ios, m);
-		INIT_WORK(&m->trigger_event, trigger_event, m);
+		INIT_WORK(&m->process_queued_ios, process_queued_ios);
+		INIT_WORK(&m->trigger_event, trigger_event);
 		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
 		if (!m->mpio_pool) {
 			kfree(m);
@@ -379,9 +379,10 @@ static void dispatch_queued_ios(struct multipath *m)
 	}
 }
 
-static void process_queued_ios(void *data)
+static void process_queued_ios(struct work_struct *work)
 {
-	struct multipath *m = (struct multipath *) data;
+	struct multipath *m =
+		container_of(work, struct multipath, process_queued_ios);
 	struct hw_handler *hwh = &m->hw_handler;
 	struct pgpath *pgpath = NULL;
 	unsigned init_required = 0, must_queue = 1;
@@ -421,9 +422,10 @@ out:
  * An event is triggered whenever a path is taken out of use.
  * Includes path failure and PG bypass.
  */
-static void trigger_event(void *data)
+static void trigger_event(struct work_struct *work)
 {
-	struct multipath *m = (struct multipath *) data;
+	struct multipath *m =
+		container_of(work, struct multipath, trigger_event);
 
 	dm_table_event(m->ti->table);
 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 48a653b3f518..fc8cbb168e3e 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -883,7 +883,7 @@ static void do_mirror(struct mirror_set *ms)
 	do_writes(ms, &writes);
 }
 
-static void do_work(void *ignored)
+static void do_work(struct work_struct *ignored)
 {
 	struct mirror_set *ms;
 
@@ -1269,7 +1269,7 @@ static int __init dm_mirror_init(void)
 		dm_dirty_log_exit();
 		return r;
 	}
-	INIT_WORK(&_kmirrord_work, do_work, NULL);
+	INIT_WORK(&_kmirrord_work, do_work);
 
 	r = dm_register_target(&mirror_target);
 	if (r < 0) {
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5281e0094072..91c7aa1fed0e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -40,7 +40,7 @@
 #define SNAPSHOT_PAGES 256
 
 struct workqueue_struct *ksnapd;
-static void flush_queued_bios(void *data);
+static void flush_queued_bios(struct work_struct *work);
 
 struct pending_exception {
 	struct exception e;
@@ -528,7 +528,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	bio_list_init(&s->queued_bios);
-	INIT_WORK(&s->queued_bios_work, flush_queued_bios, s);
+	INIT_WORK(&s->queued_bios_work, flush_queued_bios);
 
 	/* Add snapshot to the list of snapshots for this origin */
 	/* Exceptions aren't triggered till snapshot_resume() is called */
@@ -603,9 +603,10 @@ static void flush_bios(struct bio *bio)
 	}
 }
 
-static void flush_queued_bios(void *data)
+static void flush_queued_bios(struct work_struct *work)
 {
-	struct dm_snapshot *s = (struct dm_snapshot *) data;
+	struct dm_snapshot *s =
+		container_of(work, struct dm_snapshot, queued_bios_work);
 	struct bio *queued_bios;
 	unsigned long flags;
 
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index f1db6eff4857..b3c01496c737 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -417,7 +417,7 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
 /*
  * kcopyd does this every time it's woken up.
  */
-static void do_work(void *ignored)
+static void do_work(struct work_struct *ignored)
 {
 	/*
 	 * The order that these are called is *very* important.
@@ -628,7 +628,7 @@ static int kcopyd_init(void)
 	}
 
 	kcopyd_clients++;
-	INIT_WORK(&_kcopyd_work, do_work, NULL);
+	INIT_WORK(&_kcopyd_work, do_work);
 	mutex_unlock(&kcopyd_init_lock);
 	return 0;
 }
diff --git a/drivers/media/dvb/b2c2/flexcop-pci.c b/drivers/media/dvb/b2c2/flexcop-pci.c
index 06893243f3d4..6e166801505d 100644
--- a/drivers/media/dvb/b2c2/flexcop-pci.c
+++ b/drivers/media/dvb/b2c2/flexcop-pci.c
@@ -63,7 +63,7 @@ struct flexcop_pci {
 
 	unsigned long last_irq;
 
-	struct work_struct irq_check_work;
+	struct delayed_work irq_check_work;
 
 	struct flexcop_device *fc_dev;
 };
@@ -97,9 +97,10 @@ static int flexcop_pci_write_ibi_reg(struct flexcop_device *fc, flexcop_ibi_regi
 	return 0;
 }
 
-static void flexcop_pci_irq_check_work(void *data)
+static void flexcop_pci_irq_check_work(struct work_struct *work)
 {
-	struct flexcop_pci *fc_pci = data;
+	struct flexcop_pci *fc_pci =
+		container_of(work, struct flexcop_pci, irq_check_work.work);
 	struct flexcop_device *fc = fc_pci->fc_dev;
 
 	flexcop_ibi_value v = fc->read_ibi_reg(fc,sram_dest_reg_714);
@@ -371,7 +372,7 @@ static int flexcop_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
 	if ((ret = flexcop_pci_dma_init(fc_pci)) != 0)
 		goto err_fc_exit;
 
-	INIT_WORK(&fc_pci->irq_check_work, flexcop_pci_irq_check_work, fc_pci);
+	INIT_DELAYED_WORK(&fc_pci->irq_check_work, flexcop_pci_irq_check_work);
 
 	return ret;
 
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index ff7d4f56ced3..dd0bcbe140bd 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -127,7 +127,7 @@ struct cinergyt2 {
 
 	struct dvbt_set_parameters_msg param;
 	struct dvbt_get_status_msg status;
-	struct work_struct query_work;
+	struct delayed_work query_work;
 
 	wait_queue_head_t poll_wq;
 	int pending_fe_events;
@@ -141,7 +141,7 @@ struct cinergyt2 {
 #ifdef ENABLE_RC
 	struct input_dev *rc_input_dev;
 	char phys[64];
-	struct work_struct rc_query_work;
+	struct delayed_work rc_query_work;
 	int rc_input_event;
 	u32 rc_last_code;
 	unsigned long last_event_jiffies;
@@ -724,9 +724,10 @@ static struct dvb_device cinergyt2_fe_template = {
 
 #ifdef ENABLE_RC
 
-static void cinergyt2_query_rc (void *data)
+static void cinergyt2_query_rc (struct work_struct *work)
 {
-	struct cinergyt2 *cinergyt2 = data;
+	struct cinergyt2 *cinergyt2 =
+		container_of(work, struct cinergyt2, rc_query_work.work);
 	char buf[1] = { CINERGYT2_EP1_GET_RC_EVENTS };
 	struct cinergyt2_rc_event rc_events[12];
 	int n, len, i;
@@ -807,7 +808,7 @@ static int cinergyt2_register_rc(struct cinergyt2 *cinergyt2)
 	strlcat(cinergyt2->phys, "/input0", sizeof(cinergyt2->phys));
 	cinergyt2->rc_input_event = KEY_MAX;
 	cinergyt2->rc_last_code = ~0;
-	INIT_WORK(&cinergyt2->rc_query_work, cinergyt2_query_rc, cinergyt2);
+	INIT_DELAYED_WORK(&cinergyt2->rc_query_work, cinergyt2_query_rc);
 
 	input_dev->name = DRIVER_NAME " remote control";
 	input_dev->phys = cinergyt2->phys;
@@ -848,9 +849,10 @@ static inline void cinergyt2_resume_rc(struct cinergyt2 *cinergyt2) { }
 
 #endif /* ENABLE_RC */
 
-static void cinergyt2_query (void *data)
+static void cinergyt2_query (struct work_struct *work)
 {
-	struct cinergyt2 *cinergyt2 = (struct cinergyt2 *) data;
+	struct cinergyt2 *cinergyt2 =
+		container_of(work, struct cinergyt2, query_work.work);
 	char cmd [] = { CINERGYT2_EP1_GET_TUNER_STATUS };
 	struct dvbt_get_status_msg *s = &cinergyt2->status;
 	uint8_t lock_bits;
@@ -894,7 +896,7 @@ static int cinergyt2_probe (struct usb_interface *intf,
 
 	mutex_init(&cinergyt2->sem);
 	init_waitqueue_head (&cinergyt2->poll_wq);
-	INIT_WORK(&cinergyt2->query_work, cinergyt2_query, cinergyt2);
+	INIT_DELAYED_WORK(&cinergyt2->query_work, cinergyt2_query);
 
 	cinergyt2->udev = interface_to_usbdev(intf);
 	cinergyt2->param.cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS;
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 8859ab74f0fe..ebf4dc5190f6 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -127,6 +127,7 @@ struct dvb_net_priv {
 	int in_use;
 	struct net_device_stats stats;
 	u16 pid;
+	struct net_device *net;
 	struct dvb_net *host;
 	struct dmx_demux *demux;
 	struct dmx_section_feed *secfeed;
@@ -1123,10 +1124,11 @@ static int dvb_set_mc_filter (struct net_device *dev, struct dev_mc_list *mc)
 }
 
 
-static void wq_set_multicast_list (void *data)
+static void wq_set_multicast_list (struct work_struct *work)
 {
-	struct net_device *dev = data;
-	struct dvb_net_priv *priv = dev->priv;
+	struct dvb_net_priv *priv =
+		container_of(work, struct dvb_net_priv, set_multicast_list_wq);
+	struct net_device *dev = priv->net;
 
 	dvb_net_feed_stop(dev);
 	priv->rx_mode = RX_MODE_UNI;
@@ -1167,9 +1169,11 @@ static void dvb_net_set_multicast_list (struct net_device *dev)
 }
 
 
-static void wq_restart_net_feed (void *data)
+static void wq_restart_net_feed (struct work_struct *work)
 {
-	struct net_device *dev = data;
+	struct dvb_net_priv *priv =
+		container_of(work, struct dvb_net_priv, restart_net_feed_wq);
+	struct net_device *dev = priv->net;
 
 	if (netif_running(dev)) {
 		dvb_net_feed_stop(dev);
@@ -1276,6 +1280,7 @@ static int dvb_net_add_if(struct dvb_net *dvbnet, u16 pid, u8 feedtype)
 	dvbnet->device[if_num] = net;
 
 	priv = net->priv;
+	priv->net = net;
 	priv->demux = dvbnet->demux;
 	priv->pid = pid;
 	priv->rx_mode = RX_MODE_UNI;
@@ -1284,8 +1289,8 @@ static int dvb_net_add_if(struct dvb_net *dvbnet, u16 pid, u8 feedtype)
 	priv->feedtype = feedtype;
 	reset_ule(priv);
 
-	INIT_WORK(&priv->set_multicast_list_wq, wq_set_multicast_list, net);
-	INIT_WORK(&priv->restart_net_feed_wq, wq_restart_net_feed, net);
+	INIT_WORK(&priv->set_multicast_list_wq, wq_set_multicast_list);
+	INIT_WORK(&priv->restart_net_feed_wq, wq_restart_net_feed);
 	mutex_init(&priv->mutex);
 
 	net->base_addr = pid;
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index 0a3a0b6c2350..794e4471561c 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -13,9 +13,10 @@
  *
  * TODO: Fix the repeat rate of the input device.
  */
-static void dvb_usb_read_remote_control(void *data)
+static void dvb_usb_read_remote_control(struct work_struct *work)
 {
-	struct dvb_usb_device *d = data;
+	struct dvb_usb_device *d =
+		container_of(work, struct dvb_usb_device, rc_query_work.work);
 	u32 event;
 	int state;
 
@@ -128,7 +129,7 @@ int dvb_usb_remote_init(struct dvb_usb_device *d)
 
 	input_register_device(d->rc_input_dev);
 
-	INIT_WORK(&d->rc_query_work, dvb_usb_read_remote_control, d);
+	INIT_DELAYED_WORK(&d->rc_query_work, dvb_usb_read_remote_control);
 
 	info("schedule remote query interval to %d msecs.", d->props.rc_interval);
 	schedule_delayed_work(&d->rc_query_work,msecs_to_jiffies(d->props.rc_interval));
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb.h b/drivers/media/dvb/dvb-usb/dvb-usb.h
index 376c45a8e779..0d721731a524 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb.h
+++ b/drivers/media/dvb/dvb-usb/dvb-usb.h
@@ -369,7 +369,7 @@ struct dvb_usb_device {
 	/* remote control */
 	struct input_dev *rc_input_dev;
 	char rc_phys[64];
-	struct work_struct rc_query_work;
+	struct delayed_work rc_query_work;
 	u32 last_event;
 	int last_state;
 
diff --git a/drivers/media/video/cpia_pp.c b/drivers/media/video/cpia_pp.c
index 41f4b8d17559..b12cec94f4cc 100644
--- a/drivers/media/video/cpia_pp.c
+++ b/drivers/media/video/cpia_pp.c
@@ -82,6 +82,8 @@ struct pp_cam_entry {
 	struct pardevice *pdev;
 	struct parport *port;
 	struct work_struct cb_task;
+	void (*cb_func)(void *cbdata);
+	void *cb_data;
 	int open_count;
 	wait_queue_head_t wq_stream;
 	/* image state flags */
@@ -130,6 +132,20 @@ static void cpia_parport_disable_irq( struct parport *port ) {
 #define PARPORT_CHUNK_SIZE	PAGE_SIZE
 
 
+static void cpia_pp_run_callback(struct work_struct *work)
+{
+	void (*cb_func)(void *cbdata);
+	void *cb_data;
+	struct pp_cam_entry *cam;
+
+	cam = container_of(work, struct pp_cam_entry, cb_task);
+	cb_func = cam->cb_func;
+	cb_data = cam->cb_data;
+	work_release(work);
+
+	cb_func(cb_data);
+}
+
 /****************************************************************************
  *
  *  CPiA-specific  low-level parport functions for nibble uploads
@@ -664,7 +680,9 @@ static int cpia_pp_registerCallback(void *privdata, void (*cb)(void *cbdata), vo
 	int retval = 0;
 
 	if(cam->port->irq != PARPORT_IRQ_NONE) {
-		INIT_WORK(&cam->cb_task, cb, cbdata);
+		cam->cb_func = cb;
+		cam->cb_data = cbdata;
+		INIT_WORK_NAR(&cam->cb_task, cpia_pp_run_callback);
 	} else {
 		retval = -1;
 	}
diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c
index 57e1c024a547..e60a0a52e4b2 100644
--- a/drivers/media/video/cx88/cx88-input.c
+++ b/drivers/media/video/cx88/cx88-input.c
@@ -145,9 +145,9 @@ static void ir_timer(unsigned long data)
 	schedule_work(&ir->work);
 }
 
-static void cx88_ir_work(void *data)
+static void cx88_ir_work(struct work_struct *work)
 {
-	struct cx88_IR *ir = data;
+	struct cx88_IR *ir = container_of(work, struct cx88_IR, work);
 	unsigned long timeout;
 
 	cx88_ir_handle_key(ir);
@@ -308,7 +308,7 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 	core->ir = ir;
 
 	if (ir->polling) {
-		INIT_WORK(&ir->work, cx88_ir_work, ir);
+		INIT_WORK(&ir->work, cx88_ir_work);
 		init_timer(&ir->timer);
 		ir->timer.function = ir_timer;
 		ir->timer.data = (unsigned long)ir;
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 1457b1602221..ab87e7bfe84f 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -268,9 +268,9 @@ static void ir_timer(unsigned long data)
 	schedule_work(&ir->work);
 }
 
-static void ir_work(void *data)
+static void ir_work(struct work_struct *work)
 {
-	struct IR_i2c *ir = data;
+	struct IR_i2c *ir = container_of(work, struct IR_i2c, work);
 	ir_key_poll(ir);
 	mod_timer(&ir->timer, jiffies+HZ/10);
 }
@@ -400,7 +400,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	       ir->input->name,ir->input->phys,adap->name);
 
 	/* start polling via eventd */
-	INIT_WORK(&ir->work, ir_work, ir);
+	INIT_WORK(&ir->work, ir_work);
 	init_timer(&ir->timer);
 	ir->timer.function = ir_timer;
 	ir->timer.data     = (unsigned long)ir;
diff --git a/drivers/media/video/pvrusb2/pvrusb2-context.c b/drivers/media/video/pvrusb2/pvrusb2-context.c
index f129f316d20e..cf129746205d 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-context.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-context.c
@@ -45,16 +45,21 @@ static void pvr2_context_trigger_poll(struct pvr2_context *mp)
 }
 
 
-static void pvr2_context_poll(struct pvr2_context *mp)
+static void pvr2_context_poll(struct work_struct *work)
 {
+	struct pvr2_context *mp =
+		container_of(work, struct pvr2_context, workpoll);
 	pvr2_context_enter(mp); do {
 		pvr2_hdw_poll(mp->hdw);
 	} while (0); pvr2_context_exit(mp);
 }
 
 
-static void pvr2_context_setup(struct pvr2_context *mp)
+static void pvr2_context_setup(struct work_struct *work)
 {
+	struct pvr2_context *mp =
+		container_of(work, struct pvr2_context, workinit);
+
 	pvr2_context_enter(mp); do {
 		if (!pvr2_hdw_dev_ok(mp->hdw)) break;
 		pvr2_hdw_setup(mp->hdw);
@@ -92,8 +97,8 @@ struct pvr2_context *pvr2_context_create(
 	}
 
 	mp->workqueue = create_singlethread_workqueue("pvrusb2");
-	INIT_WORK(&mp->workinit,(void (*)(void*))pvr2_context_setup,mp);
-	INIT_WORK(&mp->workpoll,(void (*)(void*))pvr2_context_poll,mp);
+	INIT_WORK(&mp->workinit, pvr2_context_setup);
+	INIT_WORK(&mp->workpoll, pvr2_context_poll);
 	queue_work(mp->workqueue,&mp->workinit);
  done:
 	return mp;
diff --git a/drivers/media/video/saa6588.c b/drivers/media/video/saa6588.c
index a81285ca7d5b..8ba05c214ca7 100644
--- a/drivers/media/video/saa6588.c
+++ b/drivers/media/video/saa6588.c
@@ -322,9 +322,9 @@ static void saa6588_timer(unsigned long data)
 	schedule_work(&s->work);
 }
 
-static void saa6588_work(void *data)
+static void saa6588_work(struct work_struct *work)
 {
-	struct saa6588 *s = (struct saa6588 *)data;
+	struct saa6588 *s = container_of(work, struct saa6588, work);
 
 	saa6588_i2c_poll(s);
 	mod_timer(&s->timer, jiffies + msecs_to_jiffies(20));
@@ -417,7 +417,7 @@ static int saa6588_attach(struct i2c_adapter *adap, int addr, int kind)
 	saa6588_configure(s);
 
 	/* start polling via eventd */
-	INIT_WORK(&s->work, saa6588_work, s);
+	INIT_WORK(&s->work, saa6588_work);
 	init_timer(&s->timer);
 	s->timer.function = saa6588_timer;
 	s->timer.data = (unsigned long)s;
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 65d044086ce9..daaae870a2c4 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -343,9 +343,10 @@ static struct video_device saa7134_empress_template =
 	.minor	       = -1,
 };
 
-static void empress_signal_update(void* data)
+static void empress_signal_update(struct work_struct *work)
 {
-	struct saa7134_dev* dev = (struct saa7134_dev*) data;
+	struct saa7134_dev* dev =
+		container_of(work, struct saa7134_dev, empress_workqueue);
 
 	if (dev->nosignal) {
 		dprintk("no video signal\n");
@@ -378,7 +379,7 @@ static int empress_init(struct saa7134_dev *dev)
 		 "%s empress (%s)", dev->name,
 		 saa7134_boards[dev->board].name);
 
-	INIT_WORK(&dev->empress_workqueue, empress_signal_update, (void*) dev);
+	INIT_WORK(&dev->empress_workqueue, empress_signal_update);
 
 	err = video_register_device(dev->empress_dev,VFL_TYPE_GRABBER,
 				    empress_nr[dev->nr]);
@@ -399,7 +400,7 @@ static int empress_init(struct saa7134_dev *dev)
 			    sizeof(struct saa7134_buf),
 			    dev);
 
-	empress_signal_update(dev);
+	empress_signal_update(&dev->empress_workqueue);
 	return 0;
 }
 
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index 1dd491773150..ef2b55e19910 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -1018,9 +1018,10 @@ mptfc_init_host_attr(MPT_ADAPTER *ioc,int portnum)
 }
 
 static void
-mptfc_setup_reset(void *arg)
+mptfc_setup_reset(struct work_struct *work)
 {
-	MPT_ADAPTER		*ioc = (MPT_ADAPTER *)arg;
+	MPT_ADAPTER		*ioc =
+		container_of(work, MPT_ADAPTER, fc_setup_reset_work);
 	u64			pn;
 	struct mptfc_rport_info *ri;
 
@@ -1043,9 +1044,10 @@ mptfc_setup_reset(void *arg)
 }
 
 static void
-mptfc_rescan_devices(void *arg)
+mptfc_rescan_devices(struct work_struct *work)
 {
-	MPT_ADAPTER		*ioc = (MPT_ADAPTER *)arg;
+	MPT_ADAPTER		*ioc =
+		container_of(work, MPT_ADAPTER, fc_rescan_work);
 	int			ii;
 	u64			pn;
 	struct mptfc_rport_info *ri;
@@ -1154,8 +1156,8 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         }
 
 	spin_lock_init(&ioc->fc_rescan_work_lock);
-	INIT_WORK(&ioc->fc_rescan_work, mptfc_rescan_devices,(void *)ioc);
-	INIT_WORK(&ioc->fc_setup_reset_work, mptfc_setup_reset, (void *)ioc);
+	INIT_WORK(&ioc->fc_rescan_work, mptfc_rescan_devices);
+	INIT_WORK(&ioc->fc_setup_reset_work, mptfc_setup_reset);
 
 	spin_lock_irqsave(&ioc->FreeQlock, flags);
 
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 314c3a27585d..b7c4407c5e3f 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -111,7 +111,8 @@ struct mpt_lan_priv {
 	u32 total_received;
 	struct net_device_stats stats;	/* Per device statistics */
 
-	struct work_struct post_buckets_task;
+	struct delayed_work post_buckets_task;
+	struct net_device *dev;
 	unsigned long post_buckets_active;
 };
 
@@ -132,7 +133,7 @@ static int  lan_reply (MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf,
 static int  mpt_lan_open(struct net_device *dev);
 static int  mpt_lan_reset(struct net_device *dev);
 static int  mpt_lan_close(struct net_device *dev);
-static void mpt_lan_post_receive_buckets(void *dev_id);
+static void mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv);
 static void mpt_lan_wake_post_buckets_task(struct net_device *dev,
 					   int priority);
 static int  mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg);
@@ -345,7 +346,7 @@ mpt_lan_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 			priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = i;
 		spin_unlock_irqrestore(&priv->rxfidx_lock, flags);
 	} else {
-		mpt_lan_post_receive_buckets(dev);
+		mpt_lan_post_receive_buckets(priv);
 		netif_wake_queue(dev);
 	}
 
@@ -441,7 +442,7 @@ mpt_lan_open(struct net_device *dev)
 
 	dlprintk((KERN_INFO MYNAM "/lo: Finished initializing RcvCtl\n"));
 
-	mpt_lan_post_receive_buckets(dev);
+	mpt_lan_post_receive_buckets(priv);
 	printk(KERN_INFO MYNAM ": %s/%s: interface up & active\n",
 			IOC_AND_NETDEV_NAMES_s_s(dev));
 
@@ -854,7 +855,7 @@ mpt_lan_wake_post_buckets_task(struct net_device *dev, int priority)
 	
 	if (test_and_set_bit(0, &priv->post_buckets_active) == 0) {
 		if (priority) {
-			schedule_work(&priv->post_buckets_task);
+			schedule_delayed_work(&priv->post_buckets_task, 0);
 		} else {
 			schedule_delayed_work(&priv->post_buckets_task, 1);
 			dioprintk((KERN_INFO MYNAM ": post_buckets queued on "
@@ -1188,10 +1189,9 @@ mpt_lan_receive_post_reply(struct net_device *dev,
 /* Simple SGE's only at the moment */
 
 static void
-mpt_lan_post_receive_buckets(void *dev_id)
+mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv)
 {
-	struct net_device *dev = dev_id;
-	struct mpt_lan_priv *priv = dev->priv;
+	struct net_device *dev = priv->dev;
 	MPT_ADAPTER *mpt_dev = priv->mpt_dev;
 	MPT_FRAME_HDR *mf;
 	LANReceivePostRequest_t *pRecvReq;
@@ -1335,6 +1335,13 @@ out:
 	clear_bit(0, &priv->post_buckets_active);
 }
 
+static void
+mpt_lan_post_receive_buckets_work(struct work_struct *work)
+{
+	mpt_lan_post_receive_buckets(container_of(work, struct mpt_lan_priv,
+						  post_buckets_task.work));
+}
+
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 static struct net_device *
 mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum)
@@ -1350,11 +1357,13 @@ mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum)
 
 	priv = netdev_priv(dev);
 
+	priv->dev = dev;
 	priv->mpt_dev = mpt_dev;
 	priv->pnum = pnum;
 
-	memset(&priv->post_buckets_task, 0, sizeof(struct work_struct));
-	INIT_WORK(&priv->post_buckets_task, mpt_lan_post_receive_buckets, dev);
+	memset(&priv->post_buckets_task, 0, sizeof(priv->post_buckets_task));
+	INIT_DELAYED_WORK(&priv->post_buckets_task,
+			  mpt_lan_post_receive_buckets_work);
 	priv->post_buckets_active = 0;
 
 	dlprintk((KERN_INFO MYNAM "@%d: bucketlen = %d\n",
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index b752a479f6db..4f0c530e47b0 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2006,9 +2006,10 @@ __mptsas_discovery_work(MPT_ADAPTER *ioc)
  *(Mutex LOCKED)
  */
 static void
-mptsas_discovery_work(void * arg)
+mptsas_discovery_work(struct work_struct *work)
 {
-	struct mptsas_discovery_event *ev = arg;
+	struct mptsas_discovery_event *ev =
+		container_of(work, struct mptsas_discovery_event, work);
 	MPT_ADAPTER *ioc = ev->ioc;
 
 	mutex_lock(&ioc->sas_discovery_mutex);
@@ -2068,9 +2069,9 @@ mptsas_find_phyinfo_by_target(MPT_ADAPTER *ioc, u32 id)
  * Work queue thread to clear the persitency table
  */
 static void
-mptsas_persist_clear_table(void * arg)
+mptsas_persist_clear_table(struct work_struct *work)
 {
-	MPT_ADAPTER *ioc = (MPT_ADAPTER *)arg;
+	MPT_ADAPTER *ioc = container_of(work, MPT_ADAPTER, sas_persist_task);
 
 	mptbase_sas_persist_operation(ioc, MPI_SAS_OP_CLEAR_NOT_PRESENT);
 }
@@ -2093,9 +2094,10 @@ mptsas_reprobe_target(struct scsi_target *starget, int uld_attach)
  * Work queue thread to handle SAS hotplug events
  */
 static void
-mptsas_hotplug_work(void *arg)
+mptsas_hotplug_work(struct work_struct *work)
 {
-	struct mptsas_hotplug_event *ev = arg;
+	struct mptsas_hotplug_event *ev =
+		container_of(work, struct mptsas_hotplug_event, work);
 	MPT_ADAPTER *ioc = ev->ioc;
 	struct mptsas_phyinfo *phy_info;
 	struct sas_rphy *rphy;
@@ -2341,7 +2343,7 @@ mptsas_send_sas_event(MPT_ADAPTER *ioc,
 			break;
 		}
 
-		INIT_WORK(&ev->work, mptsas_hotplug_work, ev);
+		INIT_WORK(&ev->work, mptsas_hotplug_work);
 		ev->ioc = ioc;
 		ev->handle = le16_to_cpu(sas_event_data->DevHandle);
 		ev->parent_handle =
@@ -2366,7 +2368,7 @@ mptsas_send_sas_event(MPT_ADAPTER *ioc,
 	 * Persistent table is full.
 	 */
 		INIT_WORK(&ioc->sas_persist_task,
-		    mptsas_persist_clear_table, (void *)ioc);
+		    mptsas_persist_clear_table);
 		schedule_work(&ioc->sas_persist_task);
 		break;
 	case MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA:
@@ -2395,7 +2397,7 @@ mptsas_send_raid_event(MPT_ADAPTER *ioc,
 		return;
 	}
 
-	INIT_WORK(&ev->work, mptsas_hotplug_work, ev);
+	INIT_WORK(&ev->work, mptsas_hotplug_work);
 	ev->ioc = ioc;
 	ev->id = raid_event_data->VolumeID;
 	ev->event_type = MPTSAS_IGNORE_EVENT;
@@ -2474,7 +2476,7 @@ mptsas_send_discovery_event(MPT_ADAPTER *ioc,
 	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
 	if (!ev)
 		return;
-	INIT_WORK(&ev->work, mptsas_discovery_work, ev);
+	INIT_WORK(&ev->work, mptsas_discovery_work);
 	ev->ioc = ioc;
 	schedule_work(&ev->work);
 };
@@ -2511,8 +2513,7 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 		break;
 	case MPI_EVENT_PERSISTENT_TABLE_FULL:
 		INIT_WORK(&ioc->sas_persist_task,
-		    mptsas_persist_clear_table,
-		    (void *)ioc);
+		    mptsas_persist_clear_table);
 		schedule_work(&ioc->sas_persist_task);
 		break;
 	 case MPI_EVENT_SAS_DISCOVERY:
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index e4cc3dd5fc9f..f422c0d0621c 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -646,9 +646,10 @@ struct work_queue_wrapper {
 	int			disk;
 };
 
-static void mpt_work_wrapper(void *data)
+static void mpt_work_wrapper(struct work_struct *work)
 {
-	struct work_queue_wrapper *wqw = (struct work_queue_wrapper *)data;
+	struct work_queue_wrapper *wqw =
+		container_of(work, struct work_queue_wrapper, work);
 	struct _MPT_SCSI_HOST *hd = wqw->hd;
 	struct Scsi_Host *shost = hd->ioc->sh;
 	struct scsi_device *sdev;
@@ -695,7 +696,7 @@ static void mpt_dv_raid(struct _MPT_SCSI_HOST *hd, int disk)
 			   disk);
 		return;
 	}
-	INIT_WORK(&wqw->work, mpt_work_wrapper, wqw);
+	INIT_WORK(&wqw->work, mpt_work_wrapper);
 	wqw->hd = hd;
 	wqw->disk = disk;
 
@@ -784,9 +785,10 @@ MODULE_DEVICE_TABLE(pci, mptspi_pci_table);
  * renegotiate for a given target
  */
 static void
-mptspi_dv_renegotiate_work(void *data)
+mptspi_dv_renegotiate_work(struct work_struct *work)
 {
-	struct work_queue_wrapper *wqw = (struct work_queue_wrapper *)data;
+	struct work_queue_wrapper *wqw =
+		container_of(work, struct work_queue_wrapper, work);
 	struct _MPT_SCSI_HOST *hd = wqw->hd;
 	struct scsi_device *sdev;
 
@@ -804,7 +806,7 @@ mptspi_dv_renegotiate(struct _MPT_SCSI_HOST *hd)
 	if (!wqw)
 		return;
 
-	INIT_WORK(&wqw->work, mptspi_dv_renegotiate_work, wqw);
+	INIT_WORK(&wqw->work, mptspi_dv_renegotiate_work);
 	wqw->hd = hd;
 
 	schedule_work(&wqw->work);
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index 64130227574f..7fc7399bd2ec 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -232,7 +232,7 @@ int i2o_driver_dispatch(struct i2o_controller *c, u32 m)
 			break;
 		}
 
-		INIT_WORK(&evt->work, (void (*)(void *))drv->event, evt);
+		INIT_WORK(&evt->work, drv->event);
 		queue_work(drv->event_queue, &evt->work);
 		return 1;
 	}
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index a2350640384b..9e529d8dd5cb 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -371,8 +371,10 @@ static int i2o_exec_remove(struct device *dev)
  *	new LCT and if the buffer for the LCT was to small sends a LCT NOTIFY
  *	again, otherwise send LCT NOTIFY to get informed on next LCT change.
  */
-static void i2o_exec_lct_modified(struct i2o_exec_lct_notify_work *work)
+static void i2o_exec_lct_modified(struct work_struct *_work)
 {
+	struct i2o_exec_lct_notify_work *work =
+		container_of(_work, struct i2o_exec_lct_notify_work, work);
 	u32 change_ind = 0;
 	struct i2o_controller *c = work->c;
 
@@ -439,8 +441,7 @@ static int i2o_exec_reply(struct i2o_controller *c, u32 m,
 
 		work->c = c;
 
-		INIT_WORK(&work->work, (void (*)(void *))i2o_exec_lct_modified,
-			  work);
+		INIT_WORK(&work->work, i2o_exec_lct_modified);
 		queue_work(i2o_exec_driver.event_queue, &work->work);
 		return 1;
 	}
@@ -460,13 +461,15 @@ static int i2o_exec_reply(struct i2o_controller *c, u32 m,
 
 /**
  *	i2o_exec_event - Event handling function
- *	@evt: Event which occurs
+ *	@work: Work item in occurring event
  *
  *	Handles events send by the Executive device. At the moment does not do
  *	anything useful.
  */
-static void i2o_exec_event(struct i2o_event *evt)
+static void i2o_exec_event(struct work_struct *work)
 {
+	struct i2o_event *evt = container_of(work, struct i2o_event, work);
+
 	if (likely(evt->i2o_dev))
 		osm_debug("Event received from device: %d\n",
 			  evt->i2o_dev->lct_data.tid);
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index eaba81bf2eca..70ae00253321 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -419,16 +419,18 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
 
 /**
  *	i2o_block_delayed_request_fn - delayed request queue function
- *	delayed_request: the delayed request with the queue to start
+ *	@work: the delayed request with the queue to start
  *
  *	If the request queue is stopped for a disk, and there is no open
  *	request, a new event is created, which calls this function to start
  *	the queue after I2O_BLOCK_REQUEST_TIME. Otherwise the queue will never
  *	be started again.
  */
-static void i2o_block_delayed_request_fn(void *delayed_request)
+static void i2o_block_delayed_request_fn(struct work_struct *work)
 {
-	struct i2o_block_delayed_request *dreq = delayed_request;
+	struct i2o_block_delayed_request *dreq =
+		container_of(work, struct i2o_block_delayed_request,
+			     work.work);
 	struct request_queue *q = dreq->queue;
 	unsigned long flags;
 
@@ -538,8 +540,9 @@ static int i2o_block_reply(struct i2o_controller *c, u32 m,
 	return 1;
 };
 
-static void i2o_block_event(struct i2o_event *evt)
+static void i2o_block_event(struct work_struct *work)
 {
+	struct i2o_event *evt = container_of(work, struct i2o_event, work);
 	osm_debug("event received\n");
 	kfree(evt);
 };
@@ -938,8 +941,8 @@ static void i2o_block_request_fn(struct request_queue *q)
 				continue;
 
 			dreq->queue = q;
-			INIT_WORK(&dreq->work, i2o_block_delayed_request_fn,
-				  dreq);
+			INIT_DELAYED_WORK(&dreq->work,
+					  i2o_block_delayed_request_fn);
 
 			if (!queue_delayed_work(i2o_block_driver.event_queue,
 						&dreq->work,
diff --git a/drivers/message/i2o/i2o_block.h b/drivers/message/i2o/i2o_block.h
index 4fdaa5bda412..d9fdc95b440d 100644
--- a/drivers/message/i2o/i2o_block.h
+++ b/drivers/message/i2o/i2o_block.h
@@ -96,7 +96,7 @@ struct i2o_block_request {
 
 /* I2O Block device delayed request */
 struct i2o_block_delayed_request {
-	struct work_struct work;
+	struct delayed_work work;
 	struct request_queue *queue;
 };
 
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
index 1ba8754e9383..2ab7add78f94 100644
--- a/drivers/misc/tifm_7xx1.c
+++ b/drivers/misc/tifm_7xx1.c
@@ -33,9 +33,10 @@ static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock)
 	spin_unlock_irqrestore(&fm->lock, flags);
 }
 
-static void tifm_7xx1_remove_media(void *adapter)
+static void tifm_7xx1_remove_media(struct work_struct *work)
 {
-	struct tifm_adapter *fm = adapter;
+	struct tifm_adapter *fm =
+		container_of(work, struct tifm_adapter, media_remover);
 	unsigned long flags;
 	int cnt;
 	struct tifm_dev *sock;
@@ -169,9 +170,10 @@ tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num)
 	return base_addr + ((sock_num + 1) << 10);
 }
 
-static void tifm_7xx1_insert_media(void *adapter)
+static void tifm_7xx1_insert_media(struct work_struct *work)
 {
-	struct tifm_adapter *fm = adapter;
+	struct tifm_adapter *fm =
+		container_of(work, struct tifm_adapter, media_inserter);
 	unsigned long flags;
 	tifm_media_id media_id;
 	char *card_name = "xx";
@@ -261,7 +263,7 @@ static int tifm_7xx1_suspend(struct pci_dev *dev, pm_message_t state)
 	spin_unlock_irqrestore(&fm->lock, flags);
 	flush_workqueue(fm->wq);
 
-	tifm_7xx1_remove_media(fm);
+	tifm_7xx1_remove_media(&fm->media_remover);
 
 	pci_set_power_state(dev, PCI_D3hot);
         pci_disable_device(dev);
@@ -328,8 +330,8 @@ static int tifm_7xx1_probe(struct pci_dev *dev,
 	if (!fm->sockets)
 		goto err_out_free;
 
-	INIT_WORK(&fm->media_inserter, tifm_7xx1_insert_media, fm);
-	INIT_WORK(&fm->media_remover, tifm_7xx1_remove_media, fm);
+	INIT_WORK(&fm->media_inserter, tifm_7xx1_insert_media);
+	INIT_WORK(&fm->media_remover, tifm_7xx1_remove_media);
 	fm->eject = tifm_7xx1_eject;
 	pci_set_drvdata(dev, fm);
 
@@ -384,7 +386,7 @@ static void tifm_7xx1_remove(struct pci_dev *dev)
 
 	flush_workqueue(fm->wq);
 
-	tifm_7xx1_remove_media(fm);
+	tifm_7xx1_remove_media(&fm->media_remover);
 
 	writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
 	free_irq(dev->irq, fm);
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 766bc54406e5..21fd39e4a20f 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -1165,18 +1165,16 @@ static void mmc_setup(struct mmc_host *host)
  */
 void mmc_detect_change(struct mmc_host *host, unsigned long delay)
 {
-	if (delay)
-		mmc_schedule_delayed_work(&host->detect, delay);
-	else
-		mmc_schedule_work(&host->detect);
+	mmc_schedule_delayed_work(&host->detect, delay);
 }
 
 EXPORT_SYMBOL(mmc_detect_change);
 
 
-static void mmc_rescan(void *data)
+static void mmc_rescan(struct work_struct *work)
 {
-	struct mmc_host *host = data;
+	struct mmc_host *host =
+		container_of(work, struct mmc_host, detect.work);
 	struct list_head *l, *n;
 	unsigned char power_mode;
 
@@ -1259,7 +1257,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 		spin_lock_init(&host->lock);
 		init_waitqueue_head(&host->wq);
 		INIT_LIST_HEAD(&host->cards);
-		INIT_WORK(&host->detect, mmc_rescan, host);
+		INIT_DELAYED_WORK(&host->detect, mmc_rescan);
 
 		/*
 		 * By default, hosts do not support SGIO or large requests.
@@ -1357,7 +1355,7 @@ EXPORT_SYMBOL(mmc_suspend_host);
  */
 int mmc_resume_host(struct mmc_host *host)
 {
-	mmc_rescan(host);
+	mmc_rescan(&host->detect.work);
 
 	return 0;
 }
diff --git a/drivers/mmc/mmc.h b/drivers/mmc/mmc.h
index cd5e0ab3d84b..149affe0b686 100644
--- a/drivers/mmc/mmc.h
+++ b/drivers/mmc/mmc.h
@@ -20,6 +20,6 @@ void mmc_remove_host_sysfs(struct mmc_host *host);
 void mmc_free_host_sysfs(struct mmc_host *host);
 
 int mmc_schedule_work(struct work_struct *work);
-int mmc_schedule_delayed_work(struct work_struct *work, unsigned long delay);
+int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 void mmc_flush_scheduled_work(void);
 #endif
diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c
index 10cc9734eaa0..fd9a5fc6db7b 100644
--- a/drivers/mmc/mmc_sysfs.c
+++ b/drivers/mmc/mmc_sysfs.c
@@ -320,18 +320,10 @@ void mmc_free_host_sysfs(struct mmc_host *host)
 
 static struct workqueue_struct *workqueue;
 
-/*
- * Internal function. Schedule work in the MMC work queue.
- */
-int mmc_schedule_work(struct work_struct *work)
-{
-	return queue_work(workqueue, work);
-}
-
 /*
  * Internal function. Schedule delayed work in the MMC work queue.
  */
-int mmc_schedule_delayed_work(struct work_struct *work, unsigned long delay)
+int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay)
 {
 	return queue_delayed_work(workqueue, work, delay);
 }
diff --git a/drivers/mmc/tifm_sd.c b/drivers/mmc/tifm_sd.c
index 0fdc55b08a6d..e846499a004c 100644
--- a/drivers/mmc/tifm_sd.c
+++ b/drivers/mmc/tifm_sd.c
@@ -99,7 +99,7 @@ struct tifm_sd {
 
 	struct mmc_request    *req;
 	struct work_struct    cmd_handler;
-	struct work_struct    abort_handler;
+	struct delayed_work   abort_handler;
 	wait_queue_head_t     can_eject;
 
 	size_t                written_blocks;
@@ -496,9 +496,9 @@ err_out:
 	mmc_request_done(mmc, mrq);
 }
 
-static void tifm_sd_end_cmd(void *data)
+static void tifm_sd_end_cmd(struct work_struct *work)
 {
-	struct tifm_sd *host = data;
+	struct tifm_sd *host = container_of(work, struct tifm_sd, cmd_handler);
 	struct tifm_dev *sock = host->dev;
 	struct mmc_host *mmc = tifm_get_drvdata(sock);
 	struct mmc_request *mrq;
@@ -608,9 +608,9 @@ err_out:
 	mmc_request_done(mmc, mrq);
 }
 
-static void tifm_sd_end_cmd_nodma(void *data)
+static void tifm_sd_end_cmd_nodma(struct work_struct *work)
 {
-	struct tifm_sd *host = (struct tifm_sd*)data;
+	struct tifm_sd *host = container_of(work, struct tifm_sd, cmd_handler);
 	struct tifm_dev *sock = host->dev;
 	struct mmc_host *mmc = tifm_get_drvdata(sock);
 	struct mmc_request *mrq;
@@ -661,11 +661,14 @@ static void tifm_sd_end_cmd_nodma(void *data)
 	mmc_request_done(mmc, mrq);
 }
 
-static void tifm_sd_abort(void *data)
+static void tifm_sd_abort(struct work_struct *work)
 {
+	struct tifm_sd *host =
+		container_of(work, struct tifm_sd, abort_handler.work);
+
 	printk(KERN_ERR DRIVER_NAME
 		": card failed to respond for a long period of time");
-	tifm_eject(((struct tifm_sd*)data)->dev);
+	tifm_eject(host->dev);
 }
 
 static void tifm_sd_ios(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -762,9 +765,9 @@ static struct mmc_host_ops tifm_sd_ops = {
 	.get_ro  = tifm_sd_ro
 };
 
-static void tifm_sd_register_host(void *data)
+static void tifm_sd_register_host(struct work_struct *work)
 {
-	struct tifm_sd *host = (struct tifm_sd*)data;
+	struct tifm_sd *host = container_of(work, struct tifm_sd, cmd_handler);
 	struct tifm_dev *sock = host->dev;
 	struct mmc_host *mmc = tifm_get_drvdata(sock);
 	unsigned long flags;
@@ -772,8 +775,7 @@ static void tifm_sd_register_host(void *data)
 	spin_lock_irqsave(&sock->lock, flags);
 	host->flags |= HOST_REG;
 	PREPARE_WORK(&host->cmd_handler,
-			no_dma ? tifm_sd_end_cmd_nodma : tifm_sd_end_cmd,
-			data);
+			no_dma ? tifm_sd_end_cmd_nodma : tifm_sd_end_cmd);
 	spin_unlock_irqrestore(&sock->lock, flags);
 	dev_dbg(&sock->dev, "adding host\n");
 	mmc_add_host(mmc);
@@ -799,8 +801,8 @@ static int tifm_sd_probe(struct tifm_dev *sock)
 	host->dev = sock;
 	host->clk_div = 61;
 	init_waitqueue_head(&host->can_eject);
-	INIT_WORK(&host->cmd_handler, tifm_sd_register_host, host);
-	INIT_WORK(&host->abort_handler, tifm_sd_abort, host);
+	INIT_WORK(&host->cmd_handler, tifm_sd_register_host);
+	INIT_DELAYED_WORK(&host->abort_handler, tifm_sd_abort);
 
 	tifm_set_drvdata(sock, mmc);
 	sock->signal_irq = tifm_sd_signal_irq;
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index d02ed51abfcc..931028f672de 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -594,7 +594,7 @@ struct rtl8139_private {
 	u32 rx_config;
 	struct rtl_extra_stats xstats;
 
-	struct work_struct thread;
+	struct delayed_work thread;
 
 	struct mii_if_info mii;
 	unsigned int regs_len;
@@ -636,8 +636,8 @@ static struct net_device_stats *rtl8139_get_stats (struct net_device *dev);
 static void rtl8139_set_rx_mode (struct net_device *dev);
 static void __set_rx_mode (struct net_device *dev);
 static void rtl8139_hw_start (struct net_device *dev);
-static void rtl8139_thread (void *_data);
-static void rtl8139_tx_timeout_task(void *_data);
+static void rtl8139_thread (struct work_struct *work);
+static void rtl8139_tx_timeout_task(struct work_struct *work);
 static const struct ethtool_ops rtl8139_ethtool_ops;
 
 /* write MMIO register, with flush */
@@ -1010,7 +1010,7 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
 		(debug < 0 ? RTL8139_DEF_MSG_ENABLE : ((1 << debug) - 1));
 	spin_lock_init (&tp->lock);
 	spin_lock_init (&tp->rx_lock);
-	INIT_WORK(&tp->thread, rtl8139_thread, dev);
+	INIT_DELAYED_WORK(&tp->thread, rtl8139_thread);
 	tp->mii.dev = dev;
 	tp->mii.mdio_read = mdio_read;
 	tp->mii.mdio_write = mdio_write;
@@ -1596,15 +1596,16 @@ static inline void rtl8139_thread_iter (struct net_device *dev,
 		 RTL_R8 (Config1));
 }
 
-static void rtl8139_thread (void *_data)
+static void rtl8139_thread (struct work_struct *work)
 {
-	struct net_device *dev = _data;
-	struct rtl8139_private *tp = netdev_priv(dev);
+	struct rtl8139_private *tp =
+		container_of(work, struct rtl8139_private, thread.work);
+	struct net_device *dev = tp->mii.dev;
 	unsigned long thr_delay = next_tick;
 
 	if (tp->watchdog_fired) {
 		tp->watchdog_fired = 0;
-		rtl8139_tx_timeout_task(_data);
+		rtl8139_tx_timeout_task(work);
 	} else if (rtnl_trylock()) {
 		rtl8139_thread_iter (dev, tp, tp->mmio_addr);
 		rtnl_unlock ();
@@ -1646,10 +1647,11 @@ static inline void rtl8139_tx_clear (struct rtl8139_private *tp)
 	/* XXX account for unsent Tx packets in tp->stats.tx_dropped */
 }
 
-static void rtl8139_tx_timeout_task (void *_data)
+static void rtl8139_tx_timeout_task (struct work_struct *work)
 {
-	struct net_device *dev = _data;
-	struct rtl8139_private *tp = netdev_priv(dev);
+	struct rtl8139_private *tp =
+		container_of(work, struct rtl8139_private, thread.work);
+	struct net_device *dev = tp->mii.dev;
 	void __iomem *ioaddr = tp->mmio_addr;
 	int i;
 	u8 tmp8;
@@ -1695,7 +1697,7 @@ static void rtl8139_tx_timeout (struct net_device *dev)
 	struct rtl8139_private *tp = netdev_priv(dev);
 
 	if (!tp->have_thread) {
-		INIT_WORK(&tp->thread, rtl8139_tx_timeout_task, dev);
+		INIT_DELAYED_WORK(&tp->thread, rtl8139_tx_timeout_task);
 		schedule_delayed_work(&tp->thread, next_tick);
 	} else
 		tp->watchdog_fired = 1;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 01b76d3aa42f..b12cc4596b8d 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4339,9 +4339,9 @@ bnx2_open(struct net_device *dev)
 }
 
 static void
-bnx2_reset_task(void *data)
+bnx2_reset_task(struct work_struct *work)
 {
-	struct bnx2 *bp = data;
+	struct bnx2 *bp = container_of(work, struct bnx2, reset_task);
 
 	if (!netif_running(bp->dev))
 		return;
@@ -5630,7 +5630,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 	bp->pdev = pdev;
 
 	spin_lock_init(&bp->phy_lock);
-	INIT_WORK(&bp->reset_task, bnx2_reset_task, bp);
+	INIT_WORK(&bp->reset_task, bnx2_reset_task);
 
 	dev->base_addr = dev->mem_start = pci_resource_start(pdev, 0);
 	mem_len = MB_GET_CID_ADDR(17);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 521c5b71023c..fe08f3845491 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -4066,9 +4066,9 @@ static int cas_alloc_rxds(struct cas *cp)
 	return 0;
 }
 
-static void cas_reset_task(void *data)
+static void cas_reset_task(struct work_struct *work)
 {
-	struct cas *cp = (struct cas *) data;
+	struct cas *cp = container_of(work, struct cas, reset_task);
 #if 0
 	int pending = atomic_read(&cp->reset_task_pending);
 #else
@@ -5006,7 +5006,7 @@ static int __devinit cas_init_one(struct pci_dev *pdev,
 	atomic_set(&cp->reset_task_pending_spare, 0);
 	atomic_set(&cp->reset_task_pending_mtu, 0);
 #endif
-	INIT_WORK(&cp->reset_task, cas_reset_task, cp);
+	INIT_WORK(&cp->reset_task, cas_reset_task);
 
 	/* Default link parameters */
 	if (link_mode >= 0 && link_mode <= 6)
diff --git a/drivers/net/chelsio/common.h b/drivers/net/chelsio/common.h
index 5d9dd14427c5..8b1bedbce0d5 100644
--- a/drivers/net/chelsio/common.h
+++ b/drivers/net/chelsio/common.h
@@ -209,7 +209,7 @@ struct adapter {
 	struct peespi *espi;
 
 	struct port_info port[MAX_NPORTS];
-	struct work_struct stats_update_task;
+	struct delayed_work stats_update_task;
 	struct timer_list stats_update_timer;
 
 	struct semaphore mib_mutex;
diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c
index ad7ff9641a7e..f607cc6a276b 100644
--- a/drivers/net/chelsio/cxgb2.c
+++ b/drivers/net/chelsio/cxgb2.c
@@ -927,10 +927,11 @@ static void t1_netpoll(struct net_device *dev)
  * Periodic accumulation of MAC statistics.  This is used only if the MAC
  * does not have any other way to prevent stats counter overflow.
  */
-static void mac_stats_task(void *data)
+static void mac_stats_task(struct work_struct *work)
 {
 	int i;
-	struct adapter *adapter = data;
+	struct adapter *adapter =
+		container_of(work, struct adapter, stats_update_task.work);
 
 	for_each_port(adapter, i) {
 		struct port_info *p = &adapter->port[i];
@@ -951,9 +952,10 @@ static void mac_stats_task(void *data)
 /*
  * Processes elmer0 external interrupts in process context.
  */
-static void ext_intr_task(void *data)
+static void ext_intr_task(struct work_struct *work)
 {
-	struct adapter *adapter = data;
+	struct adapter *adapter =
+		container_of(work, struct adapter, ext_intr_handler_task);
 
 	elmer0_ext_intr_handler(adapter);
 
@@ -1087,9 +1089,9 @@ static int __devinit init_one(struct pci_dev *pdev,
 			spin_lock_init(&adapter->async_lock);
 
 			INIT_WORK(&adapter->ext_intr_handler_task,
-				  ext_intr_task, adapter);
-			INIT_WORK(&adapter->stats_update_task, mac_stats_task,
-				  adapter);
+				  ext_intr_task);
+			INIT_DELAYED_WORK(&adapter->stats_update_task,
+					  mac_stats_task);
 #ifdef work_struct
 			init_timer(&adapter->stats_update_timer);
 			adapter->stats_update_timer.function = mac_stats_timer;
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 19ab3441269c..e7737d02bb05 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -2102,9 +2102,10 @@ static void e100_tx_timeout(struct net_device *netdev)
 	schedule_work(&nic->tx_timeout_task);
 }
 
-static void e100_tx_timeout_task(struct net_device *netdev)
+static void e100_tx_timeout_task(struct work_struct *work)
 {
-	struct nic *nic = netdev_priv(netdev);
+	struct nic *nic = container_of(work, struct nic, tx_timeout_task);
+	struct net_device *netdev = nic->netdev;
 
 	DPRINTK(TX_ERR, DEBUG, "scb.status=0x%02X\n",
 		readb(&nic->csr->scb.status));
@@ -2637,8 +2638,7 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	nic->blink_timer.function = e100_blink_led;
 	nic->blink_timer.data = (unsigned long)nic;
 
-	INIT_WORK(&nic->tx_timeout_task,
-		(void (*)(void *))e100_tx_timeout_task, netdev);
+	INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
 
 	if((err = e100_alloc(nic))) {
 		DPRINTK(PROBE, ERR, "Cannot alloc driver memory, aborting.\n");
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 6ad696101418..83fa32f72398 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -2224,11 +2224,12 @@ static int ehea_stop(struct net_device *dev)
 	return ret;
 }
 
-static void ehea_reset_port(void *data)
+static void ehea_reset_port(struct work_struct *work)
 {
 	int ret;
-	struct net_device *dev = data;
-	struct ehea_port *port = netdev_priv(dev);
+	struct ehea_port *port =
+		container_of(work, struct ehea_port, reset_task);
+	struct net_device *dev = port->netdev;
 
 	port->resets++;
 	down(&port->port_lock);
@@ -2379,7 +2380,7 @@ static int ehea_setup_single_port(struct ehea_port *port,
 	dev->tx_timeout = &ehea_tx_watchdog;
 	dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT;
 
-	INIT_WORK(&port->reset_task, ehea_reset_port, dev);
+	INIT_WORK(&port->reset_task, ehea_reset_port);
 
 	ehea_set_ethtool_ops(dev);
 
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 1ed9cccd3c11..3c33d6f6a6a6 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -168,8 +168,9 @@ struct baycom_state {
 	int magic;
 
         struct pardevice *pdev;
+	struct net_device *dev;
 	unsigned int work_running;
-	struct work_struct run_work;
+	struct delayed_work run_work;
 	unsigned int modem;
 	unsigned int bitrate;
 	unsigned char stat;
@@ -659,16 +660,18 @@ static int receive(struct net_device *dev, int cnt)
 #define GETTICK(x)
 #endif /* __i386__ */
 
-static void epp_bh(struct net_device *dev)
+static void epp_bh(struct work_struct *work)
 {
+	struct net_device *dev;
 	struct baycom_state *bc;
 	struct parport *pp;
 	unsigned char stat;
 	unsigned char tmp[2];
 	unsigned int time1 = 0, time2 = 0, time3 = 0;
 	int cnt, cnt2;
-	
-	bc = netdev_priv(dev);
+
+	bc = container_of(work, struct baycom_state, run_work.work);
+	dev = bc->dev;
 	if (!bc->work_running)
 		return;
 	baycom_int_freq(bc);
@@ -889,7 +892,7 @@ static int epp_open(struct net_device *dev)
                 return -EBUSY;
         }
         dev->irq = /*pp->irq*/ 0;
-	INIT_WORK(&bc->run_work, (void *)(void *)epp_bh, dev);
+	INIT_DELAYED_WORK(&bc->run_work, epp_bh);
 	bc->work_running = 1;
 	bc->modem = EPP_CONVENTIONAL;
 	if (eppconfig(bc))
@@ -1213,6 +1216,7 @@ static void __init baycom_epp_dev_setup(struct net_device *dev)
 	/*
 	 * initialize part of the baycom_state struct
 	 */
+	bc->dev = dev;
 	bc->magic = BAYCOM_MAGIC;
 	bc->cfg.fclk = 19666600;
 	bc->cfg.bps = 9600;
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index b32c52ed19d7..f0c61f3b2a82 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -560,9 +560,9 @@ static inline int mcs_find_endpoints(struct mcs_cb *mcs,
 	return ret;
 }
 
-static void mcs_speed_work(void *arg)
+static void mcs_speed_work(struct work_struct *work)
 {
-	struct mcs_cb *mcs = arg;
+	struct mcs_cb *mcs = container_of(work, struct mcs_cb, work);
 	struct net_device *netdev = mcs->netdev;
 
 	mcs_speed_change(mcs);
@@ -927,7 +927,7 @@ static int mcs_probe(struct usb_interface *intf,
 	irda_qos_bits_to_value(&mcs->qos);
 
 	/* Speed change work initialisation*/
-	INIT_WORK(&mcs->work, mcs_speed_work, mcs);
+	INIT_WORK(&mcs->work, mcs_speed_work);
 
 	/* Override the network functions we need to use */
 	ndev->hard_start_xmit = mcs_hard_xmit;
diff --git a/drivers/net/irda/sir-dev.h b/drivers/net/irda/sir-dev.h
index 9fa294a546d6..2a57bc67ce35 100644
--- a/drivers/net/irda/sir-dev.h
+++ b/drivers/net/irda/sir-dev.h
@@ -22,7 +22,7 @@
 
 struct sir_fsm {
 	struct semaphore	sem;
-	struct work_struct      work;
+	struct delayed_work	work;
 	unsigned		state, substate;
 	int			param;
 	int			result;
diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c
index 3b5854d10c17..17b0c3ab6201 100644
--- a/drivers/net/irda/sir_dev.c
+++ b/drivers/net/irda/sir_dev.c
@@ -100,9 +100,9 @@ static int sirdev_tx_complete_fsm(struct sir_dev *dev)
  * Both must be unlocked/restarted on completion - but only on final exit.
  */
 
-static void sirdev_config_fsm(void *data)
+static void sirdev_config_fsm(struct work_struct *work)
 {
-	struct sir_dev *dev = data;
+	struct sir_dev *dev = container_of(work, struct sir_dev, fsm.work.work);
 	struct sir_fsm *fsm = &dev->fsm;
 	int next_state;
 	int ret = -1;
@@ -309,8 +309,8 @@ int sirdev_schedule_request(struct sir_dev *dev, int initial_state, unsigned par
 	fsm->param = param;
 	fsm->result = 0;
 
-	INIT_WORK(&fsm->work, sirdev_config_fsm, dev);
-	queue_work(irda_sir_wq, &fsm->work);
+	INIT_DELAYED_WORK(&fsm->work, sirdev_config_fsm);
+	queue_delayed_work(irda_sir_wq, &fsm->work, 0);
 	return 0;
 }
 
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index 2284e2ce1692..d6f4f185bf37 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -166,7 +166,7 @@ struct veth_msg {
 
 struct veth_lpar_connection {
 	HvLpIndex remote_lp;
-	struct work_struct statemachine_wq;
+	struct delayed_work statemachine_wq;
 	struct veth_msg *msgs;
 	int num_events;
 	struct veth_cap_data local_caps;
@@ -456,7 +456,7 @@ static struct kobj_type veth_port_ktype = {
 
 static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
 {
-	schedule_work(&cnx->statemachine_wq);
+	schedule_delayed_work(&cnx->statemachine_wq, 0);
 }
 
 static void veth_take_cap(struct veth_lpar_connection *cnx,
@@ -638,9 +638,11 @@ static int veth_process_caps(struct veth_lpar_connection *cnx)
 }
 
 /* FIXME: The gotos here are a bit dubious */
-static void veth_statemachine(void *p)
+static void veth_statemachine(struct work_struct *work)
 {
-	struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)p;
+	struct veth_lpar_connection *cnx =
+		container_of(work, struct veth_lpar_connection,
+			     statemachine_wq.work);
 	int rlp = cnx->remote_lp;
 	int rc;
 
@@ -827,7 +829,7 @@ static int veth_init_connection(u8 rlp)
 
 	cnx->remote_lp = rlp;
 	spin_lock_init(&cnx->lock);
-	INIT_WORK(&cnx->statemachine_wq, veth_statemachine, cnx);
+	INIT_DELAYED_WORK(&cnx->statemachine_wq, veth_statemachine);
 
 	init_timer(&cnx->ack_timer);
 	cnx->ack_timer.function = veth_timed_ack;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index e09f575a3a38..d1ebb91ed278 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -106,7 +106,7 @@ static boolean_t ixgb_clean_rx_irq(struct ixgb_adapter *adapter);
 static void ixgb_alloc_rx_buffers(struct ixgb_adapter *adapter);
 void ixgb_set_ethtool_ops(struct net_device *netdev);
 static void ixgb_tx_timeout(struct net_device *dev);
-static void ixgb_tx_timeout_task(struct net_device *dev);
+static void ixgb_tx_timeout_task(struct work_struct *work);
 static void ixgb_vlan_rx_register(struct net_device *netdev,
 				  struct vlan_group *grp);
 static void ixgb_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid);
@@ -489,8 +489,7 @@ ixgb_probe(struct pci_dev *pdev,
 	adapter->watchdog_timer.function = &ixgb_watchdog;
 	adapter->watchdog_timer.data = (unsigned long)adapter;
 
-	INIT_WORK(&adapter->tx_timeout_task,
-		  (void (*)(void *))ixgb_tx_timeout_task, netdev);
+	INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task);
 
 	strcpy(netdev->name, "eth%d");
 	if((err = register_netdev(netdev)))
@@ -1493,9 +1492,10 @@ ixgb_tx_timeout(struct net_device *netdev)
 }
 
 static void
-ixgb_tx_timeout_task(struct net_device *netdev)
+ixgb_tx_timeout_task(struct work_struct *work)
 {
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
+	struct ixgb_adapter *adapter =
+		container_of(work, struct ixgb_adapter, tx_timeout_task);
 
 	adapter->tx_timeout_count++;
 	ixgb_down(adapter, TRUE);
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 806081b59733..98703e086ee7 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -2615,9 +2615,10 @@ static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp)
  * This watchdog is used to check whether the board has suffered
  * from a parity error and needs to be recovered.
  */
-static void myri10ge_watchdog(void *arg)
+static void myri10ge_watchdog(struct work_struct *work)
 {
-	struct myri10ge_priv *mgp = arg;
+	struct myri10ge_priv *mgp =
+		container_of(work, struct myri10ge_priv, watchdog_work);
 	u32 reboot;
 	int status;
 	u16 cmd, vendor;
@@ -2887,7 +2888,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		    (unsigned long)mgp);
 
 	SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops);
-	INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog, mgp);
+	INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog);
 	status = register_netdev(netdev);
 	if (status != 0) {
 		dev_err(&pdev->dev, "register_netdev failed: %d\n", status);
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index b0127c71a5b6..312e0e331712 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -427,6 +427,7 @@ struct ns83820 {
 	u8			__iomem *base;
 
 	struct pci_dev		*pci_dev;
+	struct net_device	*ndev;
 
 #ifdef NS83820_VLAN_ACCEL_SUPPORT
 	struct vlan_group	*vlgrp;
@@ -631,10 +632,10 @@ static void fastcall rx_refill_atomic(struct net_device *ndev)
 }
 
 /* REFILL */
-static inline void queue_refill(void *_dev)
+static inline void queue_refill(struct work_struct *work)
 {
-	struct net_device *ndev = _dev;
-	struct ns83820 *dev = PRIV(ndev);
+	struct ns83820 *dev = container_of(work, struct ns83820, tq_refill);
+	struct net_device *ndev = dev->ndev;
 
 	rx_refill(ndev, GFP_KERNEL);
 	if (dev->rx_info.up)
@@ -1841,6 +1842,7 @@ static int __devinit ns83820_init_one(struct pci_dev *pci_dev, const struct pci_
 
 	ndev = alloc_etherdev(sizeof(struct ns83820));
 	dev = PRIV(ndev);
+	dev->ndev = ndev;
 	err = -ENOMEM;
 	if (!dev)
 		goto out;
@@ -1853,7 +1855,7 @@ static int __devinit ns83820_init_one(struct pci_dev *pci_dev, const struct pci_
 	SET_MODULE_OWNER(ndev);
 	SET_NETDEV_DEV(ndev, &pci_dev->dev);
 
-	INIT_WORK(&dev->tq_refill, queue_refill, ndev);
+	INIT_WORK(&dev->tq_refill, queue_refill);
 	tasklet_init(&dev->rx_tasklet, rx_action, (unsigned long)ndev);
 
 	err = pci_enable_device(pci_dev);
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index f3914f58d67f..5de8850f2323 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -332,6 +332,7 @@ static irqreturn_t xirc2ps_interrupt(int irq, void *dev_id);
  */
 
 typedef struct local_info_t {
+	struct net_device	*dev;
 	struct pcmcia_device	*p_dev;
     dev_node_t node;
     struct net_device_stats stats;
@@ -353,7 +354,7 @@ typedef struct local_info_t {
  */
 static int do_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static void do_tx_timeout(struct net_device *dev);
-static void xirc2ps_tx_timeout_task(void *data);
+static void xirc2ps_tx_timeout_task(struct work_struct *work);
 static struct net_device_stats *do_get_stats(struct net_device *dev);
 static void set_addresses(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
@@ -567,6 +568,7 @@ xirc2ps_probe(struct pcmcia_device *link)
     if (!dev)
 	    return -ENOMEM;
     local = netdev_priv(dev);
+    local->dev = dev;
     local->p_dev = link;
     link->priv = dev;
 
@@ -591,7 +593,7 @@ xirc2ps_probe(struct pcmcia_device *link)
 #ifdef HAVE_TX_TIMEOUT
     dev->tx_timeout = do_tx_timeout;
     dev->watchdog_timeo = TX_TIMEOUT;
-    INIT_WORK(&local->tx_timeout_task, xirc2ps_tx_timeout_task, dev);
+    INIT_WORK(&local->tx_timeout_task, xirc2ps_tx_timeout_task);
 #endif
 
     return xirc2ps_config(link);
@@ -1344,9 +1346,11 @@ xirc2ps_interrupt(int irq, void *dev_id)
 /*====================================================================*/
 
 static void
-xirc2ps_tx_timeout_task(void *data)
+xirc2ps_tx_timeout_task(struct work_struct *work)
 {
-    struct net_device *dev = data;
+	local_info_t *local =
+		container_of(work, local_info_t, tx_timeout_task);
+	struct net_device *dev = local->dev;
     /* reset the card */
     do_reset(dev,1);
     dev->trans_start = jiffies;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 3af9fcf76c81..a443976d5dcf 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -394,7 +394,7 @@ out_unlock:
 EXPORT_SYMBOL(phy_start_aneg);
 
 
-static void phy_change(void *data);
+static void phy_change(struct work_struct *work);
 static void phy_timer(unsigned long data);
 
 /* phy_start_machine:
@@ -549,7 +549,7 @@ int phy_start_interrupts(struct phy_device *phydev)
 {
 	int err = 0;
 
-	INIT_WORK(&phydev->phy_queue, phy_change, phydev);
+	INIT_WORK(&phydev->phy_queue, phy_change);
 
 	if (request_irq(phydev->irq, phy_interrupt,
 				IRQF_SHARED,
@@ -585,10 +585,11 @@ EXPORT_SYMBOL(phy_stop_interrupts);
 
 
 /* Scheduled by the phy_interrupt/timer to handle PHY changes */
-static void phy_change(void *data)
+static void phy_change(struct work_struct *work)
 {
 	int err;
-	struct phy_device *phydev = data;
+	struct phy_device *phydev =
+		container_of(work, struct phy_device, phy_queue);
 
 	err = phy_disable_interrupts(phydev);
 
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 71afb274498f..6bb085f54437 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -138,9 +138,9 @@ static const unsigned int net_debug = NET_DEBUG;
 #define PLIP_NIBBLE_WAIT        3000
 
 /* Bottom halves */
-static void plip_kick_bh(struct net_device *dev);
-static void plip_bh(struct net_device *dev);
-static void plip_timer_bh(struct net_device *dev);
+static void plip_kick_bh(struct work_struct *work);
+static void plip_bh(struct work_struct *work);
+static void plip_timer_bh(struct work_struct *work);
 
 /* Interrupt handler */
 static void plip_interrupt(int irq, void *dev_id);
@@ -207,9 +207,10 @@ struct plip_local {
 
 struct net_local {
 	struct net_device_stats enet_stats;
+	struct net_device *dev;
 	struct work_struct immediate;
-	struct work_struct deferred;
-	struct work_struct timer;
+	struct delayed_work deferred;
+	struct delayed_work timer;
 	struct plip_local snd_data;
 	struct plip_local rcv_data;
 	struct pardevice *pardev;
@@ -306,11 +307,11 @@ plip_init_netdev(struct net_device *dev)
 	nl->nibble	= PLIP_NIBBLE_WAIT;
 
 	/* Initialize task queue structures */
-	INIT_WORK(&nl->immediate, (void (*)(void *))plip_bh, dev);
-	INIT_WORK(&nl->deferred, (void (*)(void *))plip_kick_bh, dev);
+	INIT_WORK(&nl->immediate, plip_bh);
+	INIT_DELAYED_WORK(&nl->deferred, plip_kick_bh);
 
 	if (dev->irq == -1)
-		INIT_WORK(&nl->timer, (void (*)(void *))plip_timer_bh, dev);
+		INIT_DELAYED_WORK(&nl->timer, plip_timer_bh);
 
 	spin_lock_init(&nl->lock);
 }
@@ -319,9 +320,10 @@ plip_init_netdev(struct net_device *dev)
    This routine is kicked by do_timer().
    Request `plip_bh' to be invoked. */
 static void
-plip_kick_bh(struct net_device *dev)
+plip_kick_bh(struct work_struct *work)
 {
-	struct net_local *nl = netdev_priv(dev);
+	struct net_local *nl =
+		container_of(work, struct net_local, deferred.work);
 
 	if (nl->is_deferred)
 		schedule_work(&nl->immediate);
@@ -362,9 +364,9 @@ static const plip_func connection_state_table[] =
 
 /* Bottom half handler of PLIP. */
 static void
-plip_bh(struct net_device *dev)
+plip_bh(struct work_struct *work)
 {
-	struct net_local *nl = netdev_priv(dev);
+	struct net_local *nl = container_of(work, struct net_local, immediate);
 	struct plip_local *snd = &nl->snd_data;
 	struct plip_local *rcv = &nl->rcv_data;
 	plip_func f;
@@ -372,20 +374,21 @@ plip_bh(struct net_device *dev)
 
 	nl->is_deferred = 0;
 	f = connection_state_table[nl->connection];
-	if ((r = (*f)(dev, nl, snd, rcv)) != OK
-	    && (r = plip_bh_timeout_error(dev, nl, snd, rcv, r)) != OK) {
+	if ((r = (*f)(nl->dev, nl, snd, rcv)) != OK
+	    && (r = plip_bh_timeout_error(nl->dev, nl, snd, rcv, r)) != OK) {
 		nl->is_deferred = 1;
 		schedule_delayed_work(&nl->deferred, 1);
 	}
 }
 
 static void
-plip_timer_bh(struct net_device *dev)
+plip_timer_bh(struct work_struct *work)
 {
-	struct net_local *nl = netdev_priv(dev);
+	struct net_local *nl =
+		container_of(work, struct net_local, timer.work);
 
 	if (!(atomic_read (&nl->kill_timer))) {
-		plip_interrupt (-1, dev);
+		plip_interrupt (-1, nl->dev);
 
 		schedule_delayed_work(&nl->timer, 1);
 	}
@@ -1284,6 +1287,7 @@ static void plip_attach (struct parport *port)
 		}
 
 		nl = netdev_priv(dev);
+		nl->dev = dev;
 		nl->pardev = parport_register_device(port, name, plip_preempt,
 						 plip_wakeup, plip_interrupt,
 						 0, dev);
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index ec640f6229ae..d79d141a601d 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -2008,7 +2008,7 @@ static irqreturn_t ql3xxx_isr(int irq, void *dev_id)
 			       "%s: Another function issued a reset to the "
 			       "chip. ISR value = %x.\n", ndev->name, value);
 		}
-		queue_work(qdev->workqueue, &qdev->reset_work);
+		queue_delayed_work(qdev->workqueue, &qdev->reset_work, 0);
 		spin_unlock(&qdev->adapter_lock);
 	} else if (value & ISP_IMR_DISABLE_CMPL_INT) {
 		ql_disable_interrupts(qdev);
@@ -3182,11 +3182,13 @@ static void ql3xxx_tx_timeout(struct net_device *ndev)
 	/*
 	 * Wake up the worker to process this event.
 	 */
-	queue_work(qdev->workqueue, &qdev->tx_timeout_work);
+	queue_delayed_work(qdev->workqueue, &qdev->tx_timeout_work, 0);
 }
 
-static void ql_reset_work(struct ql3_adapter *qdev)
+static void ql_reset_work(struct work_struct *work)
 {
+	struct ql3_adapter *qdev =
+		container_of(work, struct ql3_adapter, reset_work.work);
 	struct net_device *ndev = qdev->ndev;
 	u32 value;
 	struct ql_tx_buf_cb *tx_cb;
@@ -3278,9 +3280,12 @@ static void ql_reset_work(struct ql3_adapter *qdev)
 	}
 }
 
-static void ql_tx_timeout_work(struct ql3_adapter *qdev)
+static void ql_tx_timeout_work(struct work_struct *work)
 {
-	ql_cycle_adapter(qdev,QL_DO_RESET);
+	struct ql3_adapter *qdev =
+		container_of(work, struct ql3_adapter, tx_timeout_work.work);
+
+	ql_cycle_adapter(qdev, QL_DO_RESET);
 }
 
 static void ql_get_board_info(struct ql3_adapter *qdev)
@@ -3459,9 +3464,8 @@ static int __devinit ql3xxx_probe(struct pci_dev *pdev,
 	netif_stop_queue(ndev);
 
 	qdev->workqueue = create_singlethread_workqueue(ndev->name);
-	INIT_WORK(&qdev->reset_work, (void (*)(void *))ql_reset_work, qdev);
-	INIT_WORK(&qdev->tx_timeout_work,
-		  (void (*)(void *))ql_tx_timeout_work, qdev);
+	INIT_DELAYED_WORK(&qdev->reset_work, ql_reset_work);
+	INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work);
 
 	init_timer(&qdev->adapter_timer);
 	qdev->adapter_timer.function = ql3xxx_timer;
diff --git a/drivers/net/qla3xxx.h b/drivers/net/qla3xxx.h
index 65da2c0bfda6..ea94de7fd071 100644
--- a/drivers/net/qla3xxx.h
+++ b/drivers/net/qla3xxx.h
@@ -1186,8 +1186,8 @@ struct ql3_adapter {
 	u32 numPorts;
 	struct net_device_stats stats;
 	struct workqueue_struct *workqueue;
-	struct work_struct reset_work;
-	struct work_struct tx_timeout_work;
+	struct delayed_work reset_work;
+	struct delayed_work tx_timeout_work;
 	u32 max_frame_size;
 };
 
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 27f90b2139c0..1f9663a70823 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -424,6 +424,7 @@ struct ring_info {
 struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;	/* Index of PCI device */
+	struct net_device *dev;
 	struct net_device_stats stats;	/* statistics of net device */
 	spinlock_t lock;		/* spin lock flag */
 	u32 msg_enable;
@@ -455,7 +456,7 @@ struct rtl8169_private {
 	void (*phy_reset_enable)(void __iomem *);
 	unsigned int (*phy_reset_pending)(void __iomem *);
 	unsigned int (*link_ok)(void __iomem *);
-	struct work_struct task;
+	struct delayed_work task;
 	unsigned wol_enabled : 1;
 };
 
@@ -1492,6 +1493,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	SET_MODULE_OWNER(dev);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	tp = netdev_priv(dev);
+	tp->dev = dev;
 	tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
 
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
@@ -1764,7 +1766,7 @@ static int rtl8169_open(struct net_device *dev)
 	if (retval < 0)
 		goto err_free_rx;
 
-	INIT_WORK(&tp->task, NULL, dev);
+	INIT_DELAYED_WORK(&tp->task, NULL);
 
 	rtl8169_hw_start(dev);
 
@@ -2087,11 +2089,11 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
 	tp->cur_tx = tp->dirty_tx = 0;
 }
 
-static void rtl8169_schedule_work(struct net_device *dev, void (*task)(void *))
+static void rtl8169_schedule_work(struct net_device *dev, work_func_t task)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	PREPARE_WORK(&tp->task, task, dev);
+	PREPARE_DELAYED_WORK(&tp->task, task);
 	schedule_delayed_work(&tp->task, 4);
 }
 
@@ -2110,9 +2112,11 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev)
 	netif_poll_enable(dev);
 }
 
-static void rtl8169_reinit_task(void *_data)
+static void rtl8169_reinit_task(struct work_struct *work)
 {
-	struct net_device *dev = _data;
+	struct rtl8169_private *tp =
+		container_of(work, struct rtl8169_private, task.work);
+	struct net_device *dev = tp->dev;
 	int ret;
 
 	if (netif_running(dev)) {
@@ -2135,10 +2139,11 @@ static void rtl8169_reinit_task(void *_data)
 	}
 }
 
-static void rtl8169_reset_task(void *_data)
+static void rtl8169_reset_task(struct work_struct *work)
 {
-	struct net_device *dev = _data;
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp =
+		container_of(work, struct rtl8169_private, task.work);
+	struct net_device *dev = tp->dev;
 
 	if (!netif_running(dev))
 		return;
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 33569ec9dbfc..250cdbeefdfd 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -5872,9 +5872,9 @@ static void s2io_tasklet(unsigned long dev_addr)
  * Description: Sets the link status for the adapter
  */
 
-static void s2io_set_link(unsigned long data)
+static void s2io_set_link(struct work_struct *work)
 {
-	nic_t *nic = (nic_t *) data;
+	nic_t *nic = container_of(work, nic_t, set_link_task);
 	struct net_device *dev = nic->dev;
 	XENA_dev_config_t __iomem *bar0 = nic->bar0;
 	register u64 val64;
@@ -6379,10 +6379,10 @@ static int s2io_card_up(nic_t * sp)
  * spin lock.
  */
 
-static void s2io_restart_nic(unsigned long data)
+static void s2io_restart_nic(struct work_struct *work)
 {
-	struct net_device *dev = (struct net_device *) data;
-	nic_t *sp = dev->priv;
+	nic_t *sp = container_of(work, nic_t, rst_timer_task);
+	struct net_device *dev = sp->dev;
 
 	s2io_card_down(sp);
 	if (s2io_card_up(sp)) {
@@ -6992,10 +6992,8 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
 	dev->tx_timeout = &s2io_tx_watchdog;
 	dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
-	INIT_WORK(&sp->rst_timer_task,
-		  (void (*)(void *)) s2io_restart_nic, dev);
-	INIT_WORK(&sp->set_link_task,
-		  (void (*)(void *)) s2io_set_link, sp);
+	INIT_WORK(&sp->rst_timer_task, s2io_restart_nic);
+	INIT_WORK(&sp->set_link_task, s2io_set_link);
 
 	pci_save_state(sp->pdev);
 
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index 12b719f4d00f..3b0bafd273c8 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -1000,7 +1000,7 @@ s2io_msix_fifo_handle(int irq, void *dev_id);
 static irqreturn_t s2io_isr(int irq, void *dev_id);
 static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag);
 static const struct ethtool_ops netdev_ethtool_ops;
-static void s2io_set_link(unsigned long data);
+static void s2io_set_link(struct work_struct *work);
 static int s2io_set_swapper(nic_t * sp);
 static void s2io_card_down(nic_t *nic);
 static int s2io_card_up(nic_t *nic);
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index aaba458584fb..b70ed79d4121 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -280,6 +280,7 @@ enum sis190_feature {
 struct sis190_private {
 	void __iomem *mmio_addr;
 	struct pci_dev *pci_dev;
+	struct net_device *dev;
 	struct net_device_stats stats;
 	spinlock_t lock;
 	u32 rx_buf_sz;
@@ -897,10 +898,11 @@ static void sis190_hw_start(struct net_device *dev)
 	netif_start_queue(dev);
 }
 
-static void sis190_phy_task(void * data)
+static void sis190_phy_task(struct work_struct *work)
 {
-	struct net_device *dev = data;
-	struct sis190_private *tp = netdev_priv(dev);
+	struct sis190_private *tp =
+		container_of(work, struct sis190_private, phy_task);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->mmio_addr;
 	int phy_id = tp->mii_if.phy_id;
 	u16 val;
@@ -1047,7 +1049,7 @@ static int sis190_open(struct net_device *dev)
 	if (rc < 0)
 		goto err_free_rx_1;
 
-	INIT_WORK(&tp->phy_task, sis190_phy_task, dev);
+	INIT_WORK(&tp->phy_task, sis190_phy_task);
 
 	sis190_request_timer(dev);
 
@@ -1436,6 +1438,7 @@ static struct net_device * __devinit sis190_init_board(struct pci_dev *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	tp = netdev_priv(dev);
+	tp->dev = dev;
 	tp->msg_enable = netif_msg_init(debug.msg_enable, SIS190_MSG_DEFAULT);
 
 	rc = pci_enable_device(pdev);
@@ -1798,7 +1801,7 @@ static int __devinit sis190_init_one(struct pci_dev *pdev,
 
 	sis190_init_rxfilter(dev);
 
-	INIT_WORK(&tp->phy_task, sis190_phy_task, dev);
+	INIT_WORK(&tp->phy_task, sis190_phy_task);
 
 	dev->open = sis190_open;
 	dev->stop = sis190_close;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index b2949035f66a..3b67614372a7 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -1327,10 +1327,11 @@ static void xm_check_link(struct net_device *dev)
  * Since internal PHY is wired to a level triggered pin, can't
  * get an interrupt when carrier is detected.
  */
-static void xm_link_timer(void *arg)
+static void xm_link_timer(struct work_struct *work)
 {
-	struct net_device *dev = arg;
-	struct skge_port *skge = netdev_priv(arg);
+	struct skge_port *skge =
+		container_of(work, struct skge_port, link_thread.work);
+	struct net_device *dev = skge->netdev;
  	struct skge_hw *hw = skge->hw;
 	int port = skge->port;
 
@@ -3072,9 +3073,9 @@ static void skge_error_irq(struct skge_hw *hw)
  * because accessing phy registers requires spin wait which might
  * cause excess interrupt latency.
  */
-static void skge_extirq(void *arg)
+static void skge_extirq(struct work_struct *work)
 {
-	struct skge_hw *hw = arg;
+	struct skge_hw *hw = container_of(work, struct skge_hw, phy_work);
 	int port;
 
 	mutex_lock(&hw->phy_mutex);
@@ -3456,7 +3457,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
 	skge->port = port;
 
 	/* Only used for Genesis XMAC */
-	INIT_WORK(&skge->link_thread, xm_link_timer, dev);
+	INIT_DELAYED_WORK(&skge->link_thread, xm_link_timer);
 
 	if (hw->chip_id != CHIP_ID_GENESIS) {
 		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
@@ -3543,7 +3544,7 @@ static int __devinit skge_probe(struct pci_dev *pdev,
 
 	hw->pdev = pdev;
 	mutex_init(&hw->phy_mutex);
-	INIT_WORK(&hw->phy_work, skge_extirq, hw);
+	INIT_WORK(&hw->phy_work, skge_extirq);
 	spin_lock_init(&hw->hw_lock);
 
 	hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000);
diff --git a/drivers/net/skge.h b/drivers/net/skge.h
index 537c0aaa1db8..23e5275d920c 100644
--- a/drivers/net/skge.h
+++ b/drivers/net/skge.h
@@ -2456,7 +2456,7 @@ struct skge_port {
 
 	struct net_device_stats net_stats;
 
-	struct work_struct   link_thread;
+	struct delayed_work  link_thread;
 	enum pause_control   flow_control;
 	enum pause_status    flow_status;
 	u8		     rx_csum;
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 418138dd6c68..f88fcac0e46a 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1945,10 +1945,11 @@ spider_net_stop(struct net_device *netdev)
  * called as task when tx hangs, resets interface (if interface is up)
  */
 static void
-spider_net_tx_timeout_task(void *data)
+spider_net_tx_timeout_task(struct work_struct *work)
 {
-	struct net_device *netdev = data;
-	struct spider_net_card *card = netdev_priv(netdev);
+	struct spider_net_card *card =
+		container_of(work, struct spider_net_card, tx_timeout_task);
+	struct net_device *netdev = card->netdev;
 
 	if (!(netdev->flags & IFF_UP))
 		goto out;
@@ -2122,7 +2123,7 @@ spider_net_alloc_card(void)
 	card = netdev_priv(netdev);
 	card->netdev = netdev;
 	card->msg_enable = SPIDER_NET_DEFAULT_MSG;
-	INIT_WORK(&card->tx_timeout_task, spider_net_tx_timeout_task, netdev);
+	INIT_WORK(&card->tx_timeout_task, spider_net_tx_timeout_task);
 	init_waitqueue_head(&card->waitq);
 	atomic_set(&card->tx_timeout_task_counter, 0);
 
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e7ad20..004d651681ad 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -2281,9 +2281,9 @@ static void gem_do_stop(struct net_device *dev, int wol)
 	}
 }
 
-static void gem_reset_task(void *data)
+static void gem_reset_task(struct work_struct *work)
 {
-	struct gem *gp = (struct gem *) data;
+	struct gem *gp = container_of(work, struct gem, reset_task);
 
 	mutex_lock(&gp->pm_mutex);
 
@@ -3043,7 +3043,7 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
 	gp->link_timer.function = gem_link_timer;
 	gp->link_timer.data = (unsigned long) gp;
 
-	INIT_WORK(&gp->reset_task, gem_reset_task, gp);
+	INIT_WORK(&gp->reset_task, gem_reset_task);
 
 	gp->lstate = link_down;
 	gp->timer_ticks = 0;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index c20bb998e0e5..d9123c9adc1e 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3654,9 +3654,9 @@ static void tg3_poll_controller(struct net_device *dev)
 }
 #endif
 
-static void tg3_reset_task(void *_data)
+static void tg3_reset_task(struct work_struct *work)
 {
-	struct tg3 *tp = _data;
+	struct tg3 *tp = container_of(work, struct tg3, reset_task);
 	unsigned int restart_timer;
 
 	tg3_full_lock(tp, 0);
@@ -11734,7 +11734,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 #endif
 	spin_lock_init(&tp->lock);
 	spin_lock_init(&tp->indirect_lock);
-	INIT_WORK(&tp->reset_task, tg3_reset_task, tp);
+	INIT_WORK(&tp->reset_task, tg3_reset_task);
 
 	tp->regs = ioremap_nocache(tg3reg_base, tg3reg_len);
 	if (tp->regs == 0UL) {
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index e14f5a00f65a..f85f00251123 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -296,6 +296,7 @@ static void	TLan_SetMulticastList( struct net_device *);
 static int	TLan_ioctl( struct net_device *dev, struct ifreq *rq, int cmd);
 static int      TLan_probe1( struct pci_dev *pdev, long ioaddr, int irq, int rev, const struct pci_device_id *ent);
 static void	TLan_tx_timeout( struct net_device *dev);
+static void	TLan_tx_timeout_work(struct work_struct *work);
 static int 	tlan_init_one( struct pci_dev *pdev, const struct pci_device_id *ent);
 
 static u32	TLan_HandleInvalid( struct net_device *, u16 );
@@ -562,6 +563,7 @@ static int __devinit TLan_probe1(struct pci_dev *pdev,
 	priv = netdev_priv(dev);
 
 	priv->pciDev = pdev;
+	priv->dev = dev;
 
 	/* Is this a PCI device? */
 	if (pdev) {
@@ -634,7 +636,7 @@ static int __devinit TLan_probe1(struct pci_dev *pdev,
 
 	/* This will be used when we get an adapter error from
 	 * within our irq handler */
-	INIT_WORK(&priv->tlan_tqueue, (void *)(void*)TLan_tx_timeout, dev);
+	INIT_WORK(&priv->tlan_tqueue, TLan_tx_timeout_work);
 
 	spin_lock_init(&priv->lock);
 
@@ -1040,6 +1042,25 @@ static void TLan_tx_timeout(struct net_device *dev)
 }
 
 
+	/***************************************************************
+	 * 	TLan_tx_timeout_work
+	 *
+	 * 	Returns: nothing
+	 *
+	 * 	Params:
+	 * 		work	work item of device which timed out
+	 *
+	 **************************************************************/
+
+static void TLan_tx_timeout_work(struct work_struct *work)
+{
+	TLanPrivateInfo	*priv =
+		container_of(work, TLanPrivateInfo, tlan_tqueue);
+
+	TLan_tx_timeout(priv->dev);
+}
+
+
 
 	/***************************************************************
 	 *	TLan_StartTx
diff --git a/drivers/net/tlan.h b/drivers/net/tlan.h
index a44e2f2ef62a..41ce0b665937 100644
--- a/drivers/net/tlan.h
+++ b/drivers/net/tlan.h
@@ -170,6 +170,7 @@ typedef u8 TLanBuffer[TLAN_MAX_FRAME_SIZE];
 typedef struct tlan_private_tag {
 	struct net_device       *nextDevice;
 	struct pci_dev		*pciDev;
+	struct net_device       *dev;
 	void			*dmaStorage;
 	dma_addr_t		dmaStorageDMA;
 	unsigned int		dmaSize;
diff --git a/drivers/net/tulip/21142.c b/drivers/net/tulip/21142.c
index fa3a2bb105ad..942b839ccc5b 100644
--- a/drivers/net/tulip/21142.c
+++ b/drivers/net/tulip/21142.c
@@ -26,10 +26,11 @@ static u16 t21142_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, };
 
 /* Handle the 21143 uniquely: do autoselect with NWay, not the EEPROM list
    of available transceivers.  */
-void t21142_media_task(void *data)
+void t21142_media_task(struct work_struct *work)
 {
-	struct net_device *dev = data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp =
+		container_of(work, struct tulip_private, media_work);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->base_addr;
 	int csr12 = ioread32(ioaddr + CSR12);
 	int next_tick = 60*HZ;
diff --git a/drivers/net/tulip/timer.c b/drivers/net/tulip/timer.c
index 066e5d6bcbd8..df326fe1cc8f 100644
--- a/drivers/net/tulip/timer.c
+++ b/drivers/net/tulip/timer.c
@@ -18,10 +18,11 @@
 #include "tulip.h"
 
 
-void tulip_media_task(void *data)
+void tulip_media_task(struct work_struct *work)
 {
-	struct net_device *dev = data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp =
+		container_of(work, struct tulip_private, media_work);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->base_addr;
 	u32 csr12 = ioread32(ioaddr + CSR12);
 	int next_tick = 2*HZ;
diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h
index ad107f45c7b1..25f25da76917 100644
--- a/drivers/net/tulip/tulip.h
+++ b/drivers/net/tulip/tulip.h
@@ -44,7 +44,7 @@ struct tulip_chip_table {
 	int valid_intrs;	/* CSR7 interrupt enable settings */
 	int flags;
 	void (*media_timer) (unsigned long);
-	void (*media_task) (void *);
+	work_func_t media_task;
 };
 
 
@@ -392,6 +392,7 @@ struct tulip_private {
 	int csr12_shadow;
 	int pad0;		/* Used for 8-byte alignment */
 	struct work_struct media_work;
+	struct net_device *dev;
 };
 
 
@@ -406,7 +407,7 @@ struct eeprom_fixup {
 
 /* 21142.c */
 extern u16 t21142_csr14[];
-void t21142_media_task(void *data);
+void t21142_media_task(struct work_struct *work);
 void t21142_start_nway(struct net_device *dev);
 void t21142_lnk_change(struct net_device *dev, int csr5);
 
@@ -444,7 +445,7 @@ void pnic_lnk_change(struct net_device *dev, int csr5);
 void pnic_timer(unsigned long data);
 
 /* timer.c */
-void tulip_media_task(void *data);
+void tulip_media_task(struct work_struct *work);
 void mxic_timer(unsigned long data);
 void comet_timer(unsigned long data);
 
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 0aee618f883c..5a35354aa523 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -1367,6 +1367,7 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	 * it is zeroed and aligned in alloc_etherdev
 	 */
 	tp = netdev_priv(dev);
+	tp->dev = dev;
 
 	tp->rx_ring = pci_alloc_consistent(pdev,
 					   sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
@@ -1389,7 +1390,7 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	tp->timer.data = (unsigned long)dev;
 	tp->timer.function = tulip_tbl[tp->chip_id].media_timer;
 
-	INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task, dev);
+	INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task);
 
 	dev->base_addr = (unsigned long)ioaddr;
 
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index 931cbdf6d791..b2a23aed4428 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -125,8 +125,8 @@ static int cpc_tty_write_room(struct tty_struct *tty);
 static int cpc_tty_chars_in_buffer(struct tty_struct *tty);
 static void cpc_tty_flush_buffer(struct tty_struct *tty);
 static void cpc_tty_hangup(struct tty_struct *tty);
-static void cpc_tty_rx_work(void *data);
-static void cpc_tty_tx_work(void *data);
+static void cpc_tty_rx_work(struct work_struct *work);
+static void cpc_tty_tx_work(struct work_struct *work);
 static int cpc_tty_send_to_card(pc300dev_t *dev,void *buf, int len);
 static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx);
 static void cpc_tty_signal_off(pc300dev_t *pc300dev, unsigned char);
@@ -261,8 +261,8 @@ void cpc_tty_init(pc300dev_t *pc300dev)
 	cpc_tty->tty_minor = port + CPC_TTY_MINOR_START;
 	cpc_tty->pc300dev = pc300dev; 
 
-	INIT_WORK(&cpc_tty->tty_tx_work, cpc_tty_tx_work, (void *)cpc_tty);
-	INIT_WORK(&cpc_tty->tty_rx_work, cpc_tty_rx_work, (void *)port);
+	INIT_WORK(&cpc_tty->tty_tx_work, cpc_tty_tx_work);
+	INIT_WORK(&cpc_tty->tty_rx_work, cpc_tty_rx_work);
 	
 	cpc_tty->buf_rx.first = cpc_tty->buf_rx.last = NULL;
 
@@ -659,21 +659,23 @@ static void cpc_tty_hangup(struct tty_struct *tty)
  * o call the line disc. read
  * o free memory
  */
-static void cpc_tty_rx_work(void * data)
+static void cpc_tty_rx_work(struct work_struct *work)
 {
+	st_cpc_tty_area *cpc_tty;
 	unsigned long port;
 	int i, j;
-	st_cpc_tty_area *cpc_tty; 
 	volatile st_cpc_rx_buf *buf;
 	char flags=0,flg_rx=1; 
 	struct tty_ldisc *ld;
 
 	if (cpc_tty_cnt == 0) return;
-
 	
 	for (i=0; (i < 4) && flg_rx ; i++) {
 		flg_rx = 0;
-		port = (unsigned long)data;
+
+		cpc_tty = container_of(work, st_cpc_tty_area, tty_rx_work);
+		port = cpc_tty - cpc_tty_area;
+
 		for (j=0; j < CPC_TTY_NPORTS; j++) {
 			cpc_tty = &cpc_tty_area[port];
 		
@@ -882,9 +884,10 @@ void cpc_tty_receive(pc300dev_t *pc300dev)
  * o if need call line discipline wakeup
  * o call wake_up_interruptible
  */
-static void cpc_tty_tx_work(void *data)
+static void cpc_tty_tx_work(struct work_struct *work)
 {
-	st_cpc_tty_area *cpc_tty = (st_cpc_tty_area *) data; 
+	st_cpc_tty_area *cpc_tty =
+		container_of(work, st_cpc_tty_area, tty_tx_work);
 	struct tty_struct *tty; 
 
 	CPC_TTY_DBG("%s: cpc_tty_tx_work init\n",cpc_tty->name);
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx.h b/drivers/net/wireless/bcm43xx/bcm43xx.h
index d6a8bf09878e..fbc0c087f53c 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx.h
+++ b/drivers/net/wireless/bcm43xx/bcm43xx.h
@@ -787,7 +787,7 @@ struct bcm43xx_private {
 	struct tasklet_struct isr_tasklet;
 
 	/* Periodic tasks */
-	struct work_struct periodic_work;
+	struct delayed_work periodic_work;
 	unsigned int periodic_state;
 
 	struct work_struct restart_work;
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_main.c b/drivers/net/wireless/bcm43xx/bcm43xx_main.c
index a1b783813d8e..728a9b789fdf 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_main.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_main.c
@@ -3177,9 +3177,10 @@ static int estimate_periodic_work_badness(unsigned int state)
 	return badness;
 }
 
-static void bcm43xx_periodic_work_handler(void *d)
+static void bcm43xx_periodic_work_handler(struct work_struct *work)
 {
-	struct bcm43xx_private *bcm = d;
+	struct bcm43xx_private *bcm =
+		container_of(work, struct bcm43xx_private, periodic_work.work);
 	struct net_device *net_dev = bcm->net_dev;
 	unsigned long flags;
 	u32 savedirqs = 0;
@@ -3242,11 +3243,11 @@ void bcm43xx_periodic_tasks_delete(struct bcm43xx_private *bcm)
 
 void bcm43xx_periodic_tasks_setup(struct bcm43xx_private *bcm)
 {
-	struct work_struct *work = &(bcm->periodic_work);
+	struct delayed_work *work = &bcm->periodic_work;
 
 	assert(bcm43xx_status(bcm) == BCM43xx_STAT_INITIALIZED);
-	INIT_WORK(work, bcm43xx_periodic_work_handler, bcm);
-	schedule_work(work);
+	INIT_DELAYED_WORK(work, bcm43xx_periodic_work_handler);
+	schedule_delayed_work(work, 0);
 }
 
 static void bcm43xx_security_init(struct bcm43xx_private *bcm)
@@ -3598,7 +3599,7 @@ static int bcm43xx_init_board(struct bcm43xx_private *bcm)
 	bcm43xx_periodic_tasks_setup(bcm);
 
 	/*FIXME: This should be handled by softmac instead. */
-	schedule_work(&bcm->softmac->associnfo.work);
+	schedule_delayed_work(&bcm->softmac->associnfo.work, 0);
 
 out:
 	mutex_unlock(&(bcm)->mutex);
@@ -4149,9 +4150,10 @@ static void __devexit bcm43xx_remove_one(struct pci_dev *pdev)
 /* Hard-reset the chip. Do not call this directly.
  * Use bcm43xx_controller_restart()
  */
-static void bcm43xx_chip_reset(void *_bcm)
+static void bcm43xx_chip_reset(struct work_struct *work)
 {
-	struct bcm43xx_private *bcm = _bcm;
+	struct bcm43xx_private *bcm =
+		container_of(work, struct bcm43xx_private, restart_work);
 	struct bcm43xx_phyinfo *phy;
 	int err = -ENODEV;
 
@@ -4178,7 +4180,7 @@ void bcm43xx_controller_restart(struct bcm43xx_private *bcm, const char *reason)
 	if (bcm43xx_status(bcm) != BCM43xx_STAT_INITIALIZED)
 		return;
 	printk(KERN_ERR PFX "Controller RESET (%s) ...\n", reason);
-	INIT_WORK(&bcm->restart_work, bcm43xx_chip_reset, bcm);
+	INIT_WORK(&bcm->restart_work, bcm43xx_chip_reset);
 	schedule_work(&bcm->restart_work);
 }
 
diff --git a/drivers/net/wireless/hostap/hostap.h b/drivers/net/wireless/hostap/hostap.h
index e663518bd570..e89c890d16fd 100644
--- a/drivers/net/wireless/hostap/hostap.h
+++ b/drivers/net/wireless/hostap/hostap.h
@@ -35,7 +35,7 @@ int hostap_80211_get_hdrlen(u16 fc);
 struct net_device_stats *hostap_get_stats(struct net_device *dev);
 void hostap_setup_dev(struct net_device *dev, local_info_t *local,
 		      int main_dev);
-void hostap_set_multicast_list_queue(void *data);
+void hostap_set_multicast_list_queue(struct work_struct *work);
 int hostap_set_hostapd(local_info_t *local, int val, int rtnl_locked);
 int hostap_set_hostapd_sta(local_info_t *local, int val, int rtnl_locked);
 void hostap_cleanup(local_info_t *local);
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index ba13125024cb..08bc57a4b895 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -49,10 +49,10 @@ MODULE_PARM_DESC(autom_ap_wds, "Add WDS connections to other APs "
 static struct sta_info* ap_get_sta(struct ap_data *ap, u8 *sta);
 static void hostap_event_expired_sta(struct net_device *dev,
 				     struct sta_info *sta);
-static void handle_add_proc_queue(void *data);
+static void handle_add_proc_queue(struct work_struct *work);
 
 #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
-static void handle_wds_oper_queue(void *data);
+static void handle_wds_oper_queue(struct work_struct *work);
 static void prism2_send_mgmt(struct net_device *dev,
 			     u16 type_subtype, char *body,
 			     int body_len, u8 *addr, u16 tx_cb_idx);
@@ -807,7 +807,7 @@ void hostap_init_data(local_info_t *local)
 	INIT_LIST_HEAD(&ap->sta_list);
 
 	/* Initialize task queue structure for AP management */
-	INIT_WORK(&local->ap->add_sta_proc_queue, handle_add_proc_queue, ap);
+	INIT_WORK(&local->ap->add_sta_proc_queue, handle_add_proc_queue);
 
 	ap->tx_callback_idx =
 		hostap_tx_callback_register(local, hostap_ap_tx_cb, ap);
@@ -815,7 +815,7 @@ void hostap_init_data(local_info_t *local)
 		printk(KERN_WARNING "%s: failed to register TX callback for "
 		       "AP\n", local->dev->name);
 #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
-	INIT_WORK(&local->ap->wds_oper_queue, handle_wds_oper_queue, local);
+	INIT_WORK(&local->ap->wds_oper_queue, handle_wds_oper_queue);
 
 	ap->tx_callback_auth =
 		hostap_tx_callback_register(local, hostap_ap_tx_cb_auth, ap);
@@ -1062,9 +1062,10 @@ static int prism2_sta_proc_read(char *page, char **start, off_t off,
 }
 
 
-static void handle_add_proc_queue(void *data)
+static void handle_add_proc_queue(struct work_struct *work)
 {
-	struct ap_data *ap = (struct ap_data *) data;
+	struct ap_data *ap = container_of(work, struct ap_data,
+					  add_sta_proc_queue);
 	struct sta_info *sta;
 	char name[20];
 	struct add_sta_proc_data *entry, *prev;
@@ -1952,9 +1953,11 @@ static void handle_pspoll(local_info_t *local,
 
 #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
 
-static void handle_wds_oper_queue(void *data)
+static void handle_wds_oper_queue(struct work_struct *work)
 {
-	local_info_t *local = data;
+	struct ap_data *ap = container_of(work, struct ap_data,
+					  wds_oper_queue);
+	local_info_t *local = ap->local;
 	struct wds_oper_data *entry, *prev;
 
 	spin_lock_bh(&local->lock);
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index ed00ebb6e7f4..c19e68636a1c 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -1645,9 +1645,9 @@ static void prism2_schedule_reset(local_info_t *local)
 
 /* Called only as scheduled task after noticing card timeout in interrupt
  * context */
-static void handle_reset_queue(void *data)
+static void handle_reset_queue(struct work_struct *work)
 {
-	local_info_t *local = (local_info_t *) data;
+	local_info_t *local = container_of(work, local_info_t, reset_queue);
 
 	printk(KERN_DEBUG "%s: scheduled card reset\n", local->dev->name);
 	prism2_hw_reset(local->dev);
@@ -2896,9 +2896,10 @@ static void hostap_passive_scan(unsigned long data)
 
 /* Called only as a scheduled task when communications quality values should
  * be updated. */
-static void handle_comms_qual_update(void *data)
+static void handle_comms_qual_update(struct work_struct *work)
 {
-	local_info_t *local = data;
+	local_info_t *local =
+		container_of(work, local_info_t, comms_qual_update);
 	prism2_update_comms_qual(local->dev);
 }
 
@@ -3050,9 +3051,9 @@ static int prism2_set_tim(struct net_device *dev, int aid, int set)
 }
 
 
-static void handle_set_tim_queue(void *data)
+static void handle_set_tim_queue(struct work_struct *work)
 {
-	local_info_t *local = (local_info_t *) data;
+	local_info_t *local = container_of(work, local_info_t, set_tim_queue);
 	struct set_tim_data *entry;
 	u16 val;
 
@@ -3209,15 +3210,15 @@ prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
 	local->scan_channel_mask = 0xffff;
 
 	/* Initialize task queue structures */
-	INIT_WORK(&local->reset_queue, handle_reset_queue, local);
+	INIT_WORK(&local->reset_queue, handle_reset_queue);
 	INIT_WORK(&local->set_multicast_list_queue,
-		  hostap_set_multicast_list_queue, local->dev);
+		  hostap_set_multicast_list_queue);
 
-	INIT_WORK(&local->set_tim_queue, handle_set_tim_queue, local);
+	INIT_WORK(&local->set_tim_queue, handle_set_tim_queue);
 	INIT_LIST_HEAD(&local->set_tim_list);
 	spin_lock_init(&local->set_tim_lock);
 
-	INIT_WORK(&local->comms_qual_update, handle_comms_qual_update, local);
+	INIT_WORK(&local->comms_qual_update, handle_comms_qual_update);
 
 	/* Initialize tasklets for handling hardware IRQ related operations
 	 * outside hw IRQ handler */
diff --git a/drivers/net/wireless/hostap/hostap_info.c b/drivers/net/wireless/hostap/hostap_info.c
index 50f72d831cf4..5fd2b1ad7f5e 100644
--- a/drivers/net/wireless/hostap/hostap_info.c
+++ b/drivers/net/wireless/hostap/hostap_info.c
@@ -474,9 +474,9 @@ static void handle_info_queue_scanresults(local_info_t *local)
 
 /* Called only as scheduled task after receiving info frames (used to avoid
  * pending too much time in HW IRQ handler). */
-static void handle_info_queue(void *data)
+static void handle_info_queue(struct work_struct *work)
 {
-	local_info_t *local = (local_info_t *) data;
+	local_info_t *local = container_of(work, local_info_t, info_queue);
 
 	if (test_and_clear_bit(PRISM2_INFO_PENDING_LINKSTATUS,
 			       &local->pending_info))
@@ -493,7 +493,7 @@ void hostap_info_init(local_info_t *local)
 {
 	skb_queue_head_init(&local->info_list);
 #ifndef PRISM2_NO_STATION_MODES
-	INIT_WORK(&local->info_queue, handle_info_queue, local);
+	INIT_WORK(&local->info_queue, handle_info_queue);
 #endif /* PRISM2_NO_STATION_MODES */
 }
 
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 53374fcba77e..0796be9d9e77 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -767,14 +767,14 @@ static int prism2_set_mac_address(struct net_device *dev, void *p)
 
 /* TODO: to be further implemented as soon as Prism2 fully supports
  *       GroupAddresses and correct documentation is available */
-void hostap_set_multicast_list_queue(void *data)
+void hostap_set_multicast_list_queue(struct work_struct *work)
 {
-	struct net_device *dev = (struct net_device *) data;
+	local_info_t *local =
+		container_of(work, local_info_t, set_multicast_list_queue);
+	struct net_device *dev = local->dev;
 	struct hostap_interface *iface;
-	local_info_t *local;
 
 	iface = netdev_priv(dev);
-	local = iface->local;
 	if (hostap_set_word(dev, HFA384X_RID_PROMISCUOUSMODE,
 			    local->is_promisc)) {
 		printk(KERN_INFO "%s: %sabling promiscuous mode failed\n",
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 4e4eaa2a99ca..0f554373a60d 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -316,7 +316,7 @@ static void ipw2100_release_firmware(struct ipw2100_priv *priv,
 				     struct ipw2100_fw *fw);
 static int ipw2100_ucode_download(struct ipw2100_priv *priv,
 				  struct ipw2100_fw *fw);
-static void ipw2100_wx_event_work(struct ipw2100_priv *priv);
+static void ipw2100_wx_event_work(struct work_struct *work);
 static struct iw_statistics *ipw2100_wx_wireless_stats(struct net_device *dev);
 static struct iw_handler_def ipw2100_wx_handler_def;
 
@@ -679,7 +679,8 @@ static void schedule_reset(struct ipw2100_priv *priv)
 			queue_delayed_work(priv->workqueue, &priv->reset_work,
 					   priv->reset_backoff * HZ);
 		else
-			queue_work(priv->workqueue, &priv->reset_work);
+			queue_delayed_work(priv->workqueue, &priv->reset_work,
+					   0);
 
 		if (priv->reset_backoff < MAX_RESET_BACKOFF)
 			priv->reset_backoff++;
@@ -1873,8 +1874,10 @@ static void ipw2100_down(struct ipw2100_priv *priv)
 	netif_stop_queue(priv->net_dev);
 }
 
-static void ipw2100_reset_adapter(struct ipw2100_priv *priv)
+static void ipw2100_reset_adapter(struct work_struct *work)
 {
+	struct ipw2100_priv *priv =
+		container_of(work, struct ipw2100_priv, reset_work.work);
 	unsigned long flags;
 	union iwreq_data wrqu = {
 		.ap_addr = {
@@ -2071,9 +2074,9 @@ static void isr_indicate_association_lost(struct ipw2100_priv *priv, u32 status)
 		return;
 
 	if (priv->status & STATUS_SECURITY_UPDATED)
-		queue_work(priv->workqueue, &priv->security_work);
+		queue_delayed_work(priv->workqueue, &priv->security_work, 0);
 
-	queue_work(priv->workqueue, &priv->wx_event_work);
+	queue_delayed_work(priv->workqueue, &priv->wx_event_work, 0);
 }
 
 static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status)
@@ -5524,8 +5527,11 @@ static int ipw2100_configure_security(struct ipw2100_priv *priv, int batch_mode)
 	return err;
 }
 
-static void ipw2100_security_work(struct ipw2100_priv *priv)
+static void ipw2100_security_work(struct work_struct *work)
 {
+	struct ipw2100_priv *priv =
+		container_of(work, struct ipw2100_priv, security_work.work);
+
 	/* If we happen to have reconnected before we get a chance to
 	 * process this, then update the security settings--which causes
 	 * a disassociation to occur */
@@ -5748,7 +5754,7 @@ static int ipw2100_set_address(struct net_device *dev, void *p)
 
 	priv->reset_backoff = 0;
 	mutex_unlock(&priv->action_mutex);
-	ipw2100_reset_adapter(priv);
+	ipw2100_reset_adapter(&priv->reset_work.work);
 	return 0;
 
       done:
@@ -5923,9 +5929,10 @@ static const struct ethtool_ops ipw2100_ethtool_ops = {
 	.get_drvinfo = ipw_ethtool_get_drvinfo,
 };
 
-static void ipw2100_hang_check(void *adapter)
+static void ipw2100_hang_check(struct work_struct *work)
 {
-	struct ipw2100_priv *priv = adapter;
+	struct ipw2100_priv *priv =
+		container_of(work, struct ipw2100_priv, hang_check.work);
 	unsigned long flags;
 	u32 rtc = 0xa5a5a5a5;
 	u32 len = sizeof(rtc);
@@ -5965,9 +5972,10 @@ static void ipw2100_hang_check(void *adapter)
 	spin_unlock_irqrestore(&priv->low_lock, flags);
 }
 
-static void ipw2100_rf_kill(void *adapter)
+static void ipw2100_rf_kill(struct work_struct *work)
 {
-	struct ipw2100_priv *priv = adapter;
+	struct ipw2100_priv *priv =
+		container_of(work, struct ipw2100_priv, rf_kill.work);
 	unsigned long flags;
 
 	spin_lock_irqsave(&priv->low_lock, flags);
@@ -6117,14 +6125,11 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
 
 	priv->workqueue = create_workqueue(DRV_NAME);
 
-	INIT_WORK(&priv->reset_work,
-		  (void (*)(void *))ipw2100_reset_adapter, priv);
-	INIT_WORK(&priv->security_work,
-		  (void (*)(void *))ipw2100_security_work, priv);
-	INIT_WORK(&priv->wx_event_work,
-		  (void (*)(void *))ipw2100_wx_event_work, priv);
-	INIT_WORK(&priv->hang_check, ipw2100_hang_check, priv);
-	INIT_WORK(&priv->rf_kill, ipw2100_rf_kill, priv);
+	INIT_DELAYED_WORK(&priv->reset_work, ipw2100_reset_adapter);
+	INIT_DELAYED_WORK(&priv->security_work, ipw2100_security_work);
+	INIT_DELAYED_WORK(&priv->wx_event_work, ipw2100_wx_event_work);
+	INIT_DELAYED_WORK(&priv->hang_check, ipw2100_hang_check);
+	INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill);
 
 	tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
 		     ipw2100_irq_tasklet, (unsigned long)priv);
@@ -8290,8 +8295,10 @@ static struct iw_handler_def ipw2100_wx_handler_def = {
 	.get_wireless_stats = ipw2100_wx_wireless_stats,
 };
 
-static void ipw2100_wx_event_work(struct ipw2100_priv *priv)
+static void ipw2100_wx_event_work(struct work_struct *work)
 {
+	struct ipw2100_priv *priv =
+		container_of(work, struct ipw2100_priv, wx_event_work.work);
 	union iwreq_data wrqu;
 	int len = ETH_ALEN;
 
diff --git a/drivers/net/wireless/ipw2100.h b/drivers/net/wireless/ipw2100.h
index 55b7227198df..de7d384d38af 100644
--- a/drivers/net/wireless/ipw2100.h
+++ b/drivers/net/wireless/ipw2100.h
@@ -583,11 +583,11 @@ struct ipw2100_priv {
 	struct tasklet_struct irq_tasklet;
 
 	struct workqueue_struct *workqueue;
-	struct work_struct reset_work;
-	struct work_struct security_work;
-	struct work_struct wx_event_work;
-	struct work_struct hang_check;
-	struct work_struct rf_kill;
+	struct delayed_work reset_work;
+	struct delayed_work security_work;
+	struct delayed_work wx_event_work;
+	struct delayed_work hang_check;
+	struct delayed_work rf_kill;
 
 	u32 interrupts;
 	int tx_interrupts;
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index 1f742814a01c..587a0918fa52 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -187,9 +187,9 @@ static struct ipw_rx_queue *ipw_rx_queue_alloc(struct ipw_priv *);
 static void ipw_rx_queue_free(struct ipw_priv *, struct ipw_rx_queue *);
 static void ipw_rx_queue_replenish(void *);
 static int ipw_up(struct ipw_priv *);
-static void ipw_bg_up(void *);
+static void ipw_bg_up(struct work_struct *work);
 static void ipw_down(struct ipw_priv *);
-static void ipw_bg_down(void *);
+static void ipw_bg_down(struct work_struct *work);
 static int ipw_config(struct ipw_priv *);
 static int init_supported_rates(struct ipw_priv *priv,
 				struct ipw_supported_rates *prates);
@@ -862,11 +862,12 @@ static void ipw_led_link_on(struct ipw_priv *priv)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static void ipw_bg_led_link_on(void *data)
+static void ipw_bg_led_link_on(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, led_link_on.work);
 	mutex_lock(&priv->mutex);
-	ipw_led_link_on(data);
+	ipw_led_link_on(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -906,11 +907,12 @@ static void ipw_led_link_off(struct ipw_priv *priv)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static void ipw_bg_led_link_off(void *data)
+static void ipw_bg_led_link_off(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, led_link_off.work);
 	mutex_lock(&priv->mutex);
-	ipw_led_link_off(data);
+	ipw_led_link_off(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -985,11 +987,12 @@ static void ipw_led_activity_off(struct ipw_priv *priv)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static void ipw_bg_led_activity_off(void *data)
+static void ipw_bg_led_activity_off(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, led_act_off.work);
 	mutex_lock(&priv->mutex);
-	ipw_led_activity_off(data);
+	ipw_led_activity_off(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -2228,11 +2231,12 @@ static void ipw_adapter_restart(void *adapter)
 	}
 }
 
-static void ipw_bg_adapter_restart(void *data)
+static void ipw_bg_adapter_restart(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, adapter_restart);
 	mutex_lock(&priv->mutex);
-	ipw_adapter_restart(data);
+	ipw_adapter_restart(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -2249,11 +2253,12 @@ static void ipw_scan_check(void *data)
 	}
 }
 
-static void ipw_bg_scan_check(void *data)
+static void ipw_bg_scan_check(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, scan_check.work);
 	mutex_lock(&priv->mutex);
-	ipw_scan_check(data);
+	ipw_scan_check(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -3831,17 +3836,19 @@ static int ipw_disassociate(void *data)
 	return 1;
 }
 
-static void ipw_bg_disassociate(void *data)
+static void ipw_bg_disassociate(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, disassociate);
 	mutex_lock(&priv->mutex);
-	ipw_disassociate(data);
+	ipw_disassociate(priv);
 	mutex_unlock(&priv->mutex);
 }
 
-static void ipw_system_config(void *data)
+static void ipw_system_config(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, system_config);
 
 #ifdef CONFIG_IPW2200_PROMISCUOUS
 	if (priv->prom_net_dev && netif_running(priv->prom_net_dev)) {
@@ -4208,11 +4215,12 @@ static void ipw_gather_stats(struct ipw_priv *priv)
 			   IPW_STATS_INTERVAL);
 }
 
-static void ipw_bg_gather_stats(void *data)
+static void ipw_bg_gather_stats(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, gather_stats.work);
 	mutex_lock(&priv->mutex);
-	ipw_gather_stats(data);
+	ipw_gather_stats(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -4268,8 +4276,8 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv,
 		if (!(priv->status & STATUS_ROAMING)) {
 			priv->status |= STATUS_ROAMING;
 			if (!(priv->status & STATUS_SCANNING))
-				queue_work(priv->workqueue,
-					   &priv->request_scan);
+				queue_delayed_work(priv->workqueue,
+						   &priv->request_scan, 0);
 		}
 		return;
 	}
@@ -4607,8 +4615,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 #ifdef CONFIG_IPW2200_MONITOR
 			if (priv->ieee->iw_mode == IW_MODE_MONITOR) {
 				priv->status |= STATUS_SCAN_FORCED;
-				queue_work(priv->workqueue,
-					   &priv->request_scan);
+				queue_delayed_work(priv->workqueue,
+						   &priv->request_scan, 0);
 				break;
 			}
 			priv->status &= ~STATUS_SCAN_FORCED;
@@ -4631,8 +4639,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					/* Don't schedule if we aborted the scan */
 					priv->status &= ~STATUS_ROAMING;
 			} else if (priv->status & STATUS_SCAN_PENDING)
-				queue_work(priv->workqueue,
-					   &priv->request_scan);
+				queue_delayed_work(priv->workqueue,
+						   &priv->request_scan, 0);
 			else if (priv->config & CFG_BACKGROUND_SCAN
 				 && priv->status & STATUS_ASSOCIATED)
 				queue_delayed_work(priv->workqueue,
@@ -5055,11 +5063,12 @@ static void ipw_rx_queue_replenish(void *data)
 	ipw_rx_queue_restock(priv);
 }
 
-static void ipw_bg_rx_queue_replenish(void *data)
+static void ipw_bg_rx_queue_replenish(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, rx_replenish);
 	mutex_lock(&priv->mutex);
-	ipw_rx_queue_replenish(data);
+	ipw_rx_queue_replenish(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -5489,9 +5498,10 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	return 1;
 }
 
-static void ipw_merge_adhoc_network(void *data)
+static void ipw_merge_adhoc_network(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, merge_networks);
 	struct ieee80211_network *network = NULL;
 	struct ipw_network_match match = {
 		.network = priv->assoc_network
@@ -5948,11 +5958,12 @@ static void ipw_adhoc_check(void *data)
 			   priv->assoc_request.beacon_interval);
 }
 
-static void ipw_bg_adhoc_check(void *data)
+static void ipw_bg_adhoc_check(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, adhoc_check.work);
 	mutex_lock(&priv->mutex);
-	ipw_adhoc_check(data);
+	ipw_adhoc_check(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -6299,19 +6310,26 @@ done:
 	return err;
 }
 
-static int ipw_request_passive_scan(struct ipw_priv *priv) {
-  	return ipw_request_scan_helper(priv, IW_SCAN_TYPE_PASSIVE);
+static void ipw_request_passive_scan(struct work_struct *work)
+{
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, request_passive_scan);
+  	ipw_request_scan_helper(priv, IW_SCAN_TYPE_PASSIVE);
 }
 
-static int ipw_request_scan(struct ipw_priv *priv) {
-	return ipw_request_scan_helper(priv, IW_SCAN_TYPE_ACTIVE);
+static void ipw_request_scan(struct work_struct *work)
+{
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, request_scan.work);
+	ipw_request_scan_helper(priv, IW_SCAN_TYPE_ACTIVE);
 }
 
-static void ipw_bg_abort_scan(void *data)
+static void ipw_bg_abort_scan(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, abort_scan);
 	mutex_lock(&priv->mutex);
-	ipw_abort_scan(data);
+	ipw_abort_scan(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -7084,9 +7102,10 @@ static int ipw_qos_set_tx_queue_command(struct ipw_priv *priv,
 /*
 * background support to run QoS activate functionality
 */
-static void ipw_bg_qos_activate(void *data)
+static void ipw_bg_qos_activate(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, qos_activate);
 
 	if (priv == NULL)
 		return;
@@ -7394,11 +7413,12 @@ static void ipw_roam(void *data)
 	priv->status &= ~STATUS_ROAMING;
 }
 
-static void ipw_bg_roam(void *data)
+static void ipw_bg_roam(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, roam);
 	mutex_lock(&priv->mutex);
-	ipw_roam(data);
+	ipw_roam(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -7479,8 +7499,8 @@ static int ipw_associate(void *data)
 						   &priv->request_scan,
 						   SCAN_INTERVAL);
 			else
-				queue_work(priv->workqueue,
-					   &priv->request_scan);
+				queue_delayed_work(priv->workqueue,
+						   &priv->request_scan, 0);
 		}
 
 		return 0;
@@ -7491,11 +7511,12 @@ static int ipw_associate(void *data)
 	return 1;
 }
 
-static void ipw_bg_associate(void *data)
+static void ipw_bg_associate(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, associate);
 	mutex_lock(&priv->mutex);
-	ipw_associate(data);
+	ipw_associate(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -9410,7 +9431,7 @@ static int ipw_wx_set_scan(struct net_device *dev,
 
 	IPW_DEBUG_WX("Start scan\n");
 
-	queue_work(priv->workqueue, &priv->request_scan);
+	queue_delayed_work(priv->workqueue, &priv->request_scan, 0);
 
 	return 0;
 }
@@ -10547,11 +10568,12 @@ static void ipw_rf_kill(void *adapter)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static void ipw_bg_rf_kill(void *data)
+static void ipw_bg_rf_kill(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, rf_kill.work);
 	mutex_lock(&priv->mutex);
-	ipw_rf_kill(data);
+	ipw_rf_kill(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -10582,11 +10604,12 @@ static void ipw_link_up(struct ipw_priv *priv)
 		queue_delayed_work(priv->workqueue, &priv->request_scan, HZ);
 }
 
-static void ipw_bg_link_up(void *data)
+static void ipw_bg_link_up(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, link_up);
 	mutex_lock(&priv->mutex);
-	ipw_link_up(data);
+	ipw_link_up(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -10606,15 +10629,16 @@ static void ipw_link_down(struct ipw_priv *priv)
 
 	if (!(priv->status & STATUS_EXIT_PENDING)) {
 		/* Queue up another scan... */
-		queue_work(priv->workqueue, &priv->request_scan);
+		queue_delayed_work(priv->workqueue, &priv->request_scan, 0);
 	}
 }
 
-static void ipw_bg_link_down(void *data)
+static void ipw_bg_link_down(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, link_down);
 	mutex_lock(&priv->mutex);
-	ipw_link_down(data);
+	ipw_link_down(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -10626,38 +10650,30 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv)
 	init_waitqueue_head(&priv->wait_command_queue);
 	init_waitqueue_head(&priv->wait_state);
 
-	INIT_WORK(&priv->adhoc_check, ipw_bg_adhoc_check, priv);
-	INIT_WORK(&priv->associate, ipw_bg_associate, priv);
-	INIT_WORK(&priv->disassociate, ipw_bg_disassociate, priv);
-	INIT_WORK(&priv->system_config, ipw_system_config, priv);
-	INIT_WORK(&priv->rx_replenish, ipw_bg_rx_queue_replenish, priv);
-	INIT_WORK(&priv->adapter_restart, ipw_bg_adapter_restart, priv);
-	INIT_WORK(&priv->rf_kill, ipw_bg_rf_kill, priv);
-	INIT_WORK(&priv->up, (void (*)(void *))ipw_bg_up, priv);
-	INIT_WORK(&priv->down, (void (*)(void *))ipw_bg_down, priv);
-	INIT_WORK(&priv->request_scan,
-		  (void (*)(void *))ipw_request_scan, priv);
-	INIT_WORK(&priv->request_passive_scan,
-		  (void (*)(void *))ipw_request_passive_scan, priv);
-	INIT_WORK(&priv->gather_stats,
-		  (void (*)(void *))ipw_bg_gather_stats, priv);
-	INIT_WORK(&priv->abort_scan, (void (*)(void *))ipw_bg_abort_scan, priv);
-	INIT_WORK(&priv->roam, ipw_bg_roam, priv);
-	INIT_WORK(&priv->scan_check, ipw_bg_scan_check, priv);
-	INIT_WORK(&priv->link_up, (void (*)(void *))ipw_bg_link_up, priv);
-	INIT_WORK(&priv->link_down, (void (*)(void *))ipw_bg_link_down, priv);
-	INIT_WORK(&priv->led_link_on, (void (*)(void *))ipw_bg_led_link_on,
-		  priv);
-	INIT_WORK(&priv->led_link_off, (void (*)(void *))ipw_bg_led_link_off,
-		  priv);
-	INIT_WORK(&priv->led_act_off, (void (*)(void *))ipw_bg_led_activity_off,
-		  priv);
-	INIT_WORK(&priv->merge_networks,
-		  (void (*)(void *))ipw_merge_adhoc_network, priv);
+	INIT_DELAYED_WORK(&priv->adhoc_check, ipw_bg_adhoc_check);
+	INIT_WORK(&priv->associate, ipw_bg_associate);
+	INIT_WORK(&priv->disassociate, ipw_bg_disassociate);
+	INIT_WORK(&priv->system_config, ipw_system_config);
+	INIT_WORK(&priv->rx_replenish, ipw_bg_rx_queue_replenish);
+	INIT_WORK(&priv->adapter_restart, ipw_bg_adapter_restart);
+	INIT_DELAYED_WORK(&priv->rf_kill, ipw_bg_rf_kill);
+	INIT_WORK(&priv->up, ipw_bg_up);
+	INIT_WORK(&priv->down, ipw_bg_down);
+	INIT_DELAYED_WORK(&priv->request_scan, ipw_request_scan);
+	INIT_WORK(&priv->request_passive_scan, ipw_request_passive_scan);
+	INIT_DELAYED_WORK(&priv->gather_stats, ipw_bg_gather_stats);
+	INIT_WORK(&priv->abort_scan, ipw_bg_abort_scan);
+	INIT_WORK(&priv->roam, ipw_bg_roam);
+	INIT_DELAYED_WORK(&priv->scan_check, ipw_bg_scan_check);
+	INIT_WORK(&priv->link_up, ipw_bg_link_up);
+	INIT_WORK(&priv->link_down, ipw_bg_link_down);
+	INIT_DELAYED_WORK(&priv->led_link_on, ipw_bg_led_link_on);
+	INIT_DELAYED_WORK(&priv->led_link_off, ipw_bg_led_link_off);
+	INIT_DELAYED_WORK(&priv->led_act_off, ipw_bg_led_activity_off);
+	INIT_WORK(&priv->merge_networks, ipw_merge_adhoc_network);
 
 #ifdef CONFIG_IPW2200_QOS
-	INIT_WORK(&priv->qos_activate, (void (*)(void *))ipw_bg_qos_activate,
-		  priv);
+	INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate);
 #endif				/* CONFIG_IPW2200_QOS */
 
 	tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
@@ -11190,7 +11206,8 @@ static int ipw_up(struct ipw_priv *priv)
 
 			/* If configure to try and auto-associate, kick
 			 * off a scan. */
-			queue_work(priv->workqueue, &priv->request_scan);
+			queue_delayed_work(priv->workqueue,
+					   &priv->request_scan, 0);
 
 			return 0;
 		}
@@ -11211,11 +11228,12 @@ static int ipw_up(struct ipw_priv *priv)
 	return -EIO;
 }
 
-static void ipw_bg_up(void *data)
+static void ipw_bg_up(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, up);
 	mutex_lock(&priv->mutex);
-	ipw_up(data);
+	ipw_up(priv);
 	mutex_unlock(&priv->mutex);
 }
 
@@ -11282,11 +11300,12 @@ static void ipw_down(struct ipw_priv *priv)
 	ipw_led_radio_off(priv);
 }
 
-static void ipw_bg_down(void *data)
+static void ipw_bg_down(struct work_struct *work)
 {
-	struct ipw_priv *priv = data;
+	struct ipw_priv *priv =
+		container_of(work, struct ipw_priv, down);
 	mutex_lock(&priv->mutex);
-	ipw_down(data);
+	ipw_down(priv);
 	mutex_unlock(&priv->mutex);
 }
 
diff --git a/drivers/net/wireless/ipw2200.h b/drivers/net/wireless/ipw2200.h
index dad5eedefbf1..626a240a87d8 100644
--- a/drivers/net/wireless/ipw2200.h
+++ b/drivers/net/wireless/ipw2200.h
@@ -1290,21 +1290,21 @@ struct ipw_priv {
 
 	struct workqueue_struct *workqueue;
 
-	struct work_struct adhoc_check;
+	struct delayed_work adhoc_check;
 	struct work_struct associate;
 	struct work_struct disassociate;
 	struct work_struct system_config;
 	struct work_struct rx_replenish;
-	struct work_struct request_scan;
+	struct delayed_work request_scan;
   	struct work_struct request_passive_scan;
 	struct work_struct adapter_restart;
-	struct work_struct rf_kill;
+	struct delayed_work rf_kill;
 	struct work_struct up;
 	struct work_struct down;
-	struct work_struct gather_stats;
+	struct delayed_work gather_stats;
 	struct work_struct abort_scan;
 	struct work_struct roam;
-	struct work_struct scan_check;
+	struct delayed_work scan_check;
 	struct work_struct link_up;
 	struct work_struct link_down;
 
@@ -1319,9 +1319,9 @@ struct ipw_priv {
 	u32 led_ofdm_on;
 	u32 led_ofdm_off;
 
-	struct work_struct led_link_on;
-	struct work_struct led_link_off;
-	struct work_struct led_act_off;
+	struct delayed_work led_link_on;
+	struct delayed_work led_link_off;
+	struct delayed_work led_act_off;
 	struct work_struct merge_networks;
 
 	struct ipw_cmd_log *cmdlog;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 336cabac13b3..936c888e03e1 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -980,9 +980,11 @@ static void print_linkstatus(struct net_device *dev, u16 status)
 }
 
 /* Search scan results for requested BSSID, join it if found */
-static void orinoco_join_ap(struct net_device *dev)
+static void orinoco_join_ap(struct work_struct *work)
 {
-	struct orinoco_private *priv = netdev_priv(dev);
+	struct orinoco_private *priv =
+		container_of(work, struct orinoco_private, join_work);
+	struct net_device *dev = priv->ndev;
 	struct hermes *hw = &priv->hw;
 	int err;
 	unsigned long flags;
@@ -1055,9 +1057,11 @@ static void orinoco_join_ap(struct net_device *dev)
 }
 
 /* Send new BSSID to userspace */
-static void orinoco_send_wevents(struct net_device *dev)
+static void orinoco_send_wevents(struct work_struct *work)
 {
-	struct orinoco_private *priv = netdev_priv(dev);
+	struct orinoco_private *priv =
+		container_of(work, struct orinoco_private, wevent_work);
+	struct net_device *dev = priv->ndev;
 	struct hermes *hw = &priv->hw;
 	union iwreq_data wrqu;
 	int err;
@@ -1864,9 +1868,11 @@ __orinoco_set_multicast_list(struct net_device *dev)
 
 /* This must be called from user context, without locks held - use
  * schedule_work() */
-static void orinoco_reset(struct net_device *dev)
+static void orinoco_reset(struct work_struct *work)
 {
-	struct orinoco_private *priv = netdev_priv(dev);
+	struct orinoco_private *priv =
+		container_of(work, struct orinoco_private, reset_work);
+	struct net_device *dev = priv->ndev;
 	struct hermes *hw = &priv->hw;
 	int err;
 	unsigned long flags;
@@ -2434,9 +2440,9 @@ struct net_device *alloc_orinocodev(int sizeof_card,
 	priv->hw_unavailable = 1; /* orinoco_init() must clear this
 				   * before anything else touches the
 				   * hardware */
-	INIT_WORK(&priv->reset_work, (void (*)(void *))orinoco_reset, dev);
-	INIT_WORK(&priv->join_work, (void (*)(void *))orinoco_join_ap, dev);
-	INIT_WORK(&priv->wevent_work, (void (*)(void *))orinoco_send_wevents, dev);
+	INIT_WORK(&priv->reset_work, orinoco_reset);
+	INIT_WORK(&priv->join_work, orinoco_join_ap);
+	INIT_WORK(&priv->wevent_work, orinoco_send_wevents);
 
 	netif_carrier_off(dev);
 	priv->last_linkstatus = 0xffff;
@@ -3608,7 +3614,7 @@ static int orinoco_ioctl_reset(struct net_device *dev,
 		printk(KERN_DEBUG "%s: Forcing reset!\n", dev->name);
 
 		/* Firmware reset */
-		orinoco_reset(dev);
+		orinoco_reset(&priv->reset_work);
 	} else {
 		printk(KERN_DEBUG "%s: Force scheduling reset!\n", dev->name);
 
@@ -4154,7 +4160,7 @@ static int orinoco_ioctl_commit(struct net_device *dev,
 		return 0;
 
 	if (priv->broken_disableport) {
-		orinoco_reset(dev);
+		orinoco_reset(&priv->reset_work);
 		return 0;
 	}
 
diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c
index 286325ca3293..e7700b4257eb 100644
--- a/drivers/net/wireless/prism54/isl_ioctl.c
+++ b/drivers/net/wireless/prism54/isl_ioctl.c
@@ -158,8 +158,9 @@ prism54_mib_init(islpci_private *priv)
  * schedule_work(), thus we can as well use sleeping semaphore
  * locking */
 void
-prism54_update_stats(islpci_private *priv)
+prism54_update_stats(struct work_struct *work)
 {
+	islpci_private *priv = container_of(work, islpci_private, stats_work);
 	char *data;
 	int j;
 	struct obj_bss bss, *bss2;
@@ -2494,9 +2495,10 @@ prism54_process_trap_helper(islpci_private *priv, enum oid_num_t oid,
  * interrupt context, no locks held.
  */
 void
-prism54_process_trap(void *data)
+prism54_process_trap(struct work_struct *work)
 {
-	struct islpci_mgmtframe *frame = data;
+	struct islpci_mgmtframe *frame =
+		container_of(work, struct islpci_mgmtframe, ws);
 	struct net_device *ndev = frame->ndev;
 	enum oid_num_t n = mgt_oidtonum(frame->header->oid);
 
diff --git a/drivers/net/wireless/prism54/isl_ioctl.h b/drivers/net/wireless/prism54/isl_ioctl.h
index 65f33acd0a42..0802fa64996f 100644
--- a/drivers/net/wireless/prism54/isl_ioctl.h
+++ b/drivers/net/wireless/prism54/isl_ioctl.h
@@ -32,12 +32,12 @@
 void prism54_mib_init(islpci_private *);
 
 struct iw_statistics *prism54_get_wireless_stats(struct net_device *);
-void prism54_update_stats(islpci_private *);
+void prism54_update_stats(struct work_struct *);
 
 void prism54_acl_init(struct islpci_acl *);
 void prism54_acl_clean(struct islpci_acl *);
 
-void prism54_process_trap(void *);
+void prism54_process_trap(struct work_struct *);
 
 void prism54_wpa_bss_ie_init(islpci_private *priv);
 void prism54_wpa_bss_ie_clean(islpci_private *priv);
diff --git a/drivers/net/wireless/prism54/islpci_dev.c b/drivers/net/wireless/prism54/islpci_dev.c
index ec1c00f19eb3..e35fcb2543c4 100644
--- a/drivers/net/wireless/prism54/islpci_dev.c
+++ b/drivers/net/wireless/prism54/islpci_dev.c
@@ -861,11 +861,10 @@ islpci_setup(struct pci_dev *pdev)
 	priv->state_off = 1;
 
 	/* initialize workqueue's */
-	INIT_WORK(&priv->stats_work,
-		  (void (*)(void *)) prism54_update_stats, priv);
+	INIT_WORK(&priv->stats_work, prism54_update_stats);
 	priv->stats_timestamp = 0;
 
-	INIT_WORK(&priv->reset_task, islpci_do_reset_and_wake, priv);
+	INIT_WORK(&priv->reset_task, islpci_do_reset_and_wake);
 	priv->reset_task_pending = 0;
 
 	/* allocate various memory areas */
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index a8261d8454dd..103a37877733 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -482,9 +482,9 @@ islpci_eth_receive(islpci_private *priv)
 }
 
 void
-islpci_do_reset_and_wake(void *data)
+islpci_do_reset_and_wake(struct work_struct *work)
 {
-	islpci_private *priv = (islpci_private *) data;
+	islpci_private *priv = container_of(work, islpci_private, reset_task);
 	islpci_reset(priv, 1);
 	netif_wake_queue(priv->ndev);
 	priv->reset_task_pending = 0;
diff --git a/drivers/net/wireless/prism54/islpci_eth.h b/drivers/net/wireless/prism54/islpci_eth.h
index bc9d7a60b8d6..99d37eda9f01 100644
--- a/drivers/net/wireless/prism54/islpci_eth.h
+++ b/drivers/net/wireless/prism54/islpci_eth.h
@@ -68,6 +68,6 @@ void islpci_eth_cleanup_transmit(islpci_private *, isl38xx_control_block *);
 int islpci_eth_transmit(struct sk_buff *, struct net_device *);
 int islpci_eth_receive(islpci_private *);
 void islpci_eth_tx_timeout(struct net_device *);
-void islpci_do_reset_and_wake(void *data);
+void islpci_do_reset_and_wake(struct work_struct *);
 
 #endif				/* _ISL_GEN_H */
diff --git a/drivers/net/wireless/prism54/islpci_mgt.c b/drivers/net/wireless/prism54/islpci_mgt.c
index 2e061a80b294..656ec9fa7128 100644
--- a/drivers/net/wireless/prism54/islpci_mgt.c
+++ b/drivers/net/wireless/prism54/islpci_mgt.c
@@ -387,7 +387,7 @@ islpci_mgt_receive(struct net_device *ndev)
 
 			/* Create work to handle trap out of interrupt
 			 * context. */
-			INIT_WORK(&frame->ws, prism54_process_trap, frame);
+			INIT_WORK(&frame->ws, prism54_process_trap);
 			schedule_work(&frame->ws);
 
 		} else {
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index a7d29bddb298..5e4f4b707375 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -1090,9 +1090,10 @@ void zd_dump_rx_status(const struct rx_status *status)
 
 #define LINK_LED_WORK_DELAY HZ
 
-static void link_led_handler(void *p)
+static void link_led_handler(struct work_struct *work)
 {
-	struct zd_mac *mac = p;
+	struct zd_mac *mac =
+		container_of(work, struct zd_mac, housekeeping.link_led_work.work);
 	struct zd_chip *chip = &mac->chip;
 	struct ieee80211softmac_device *sm = ieee80211_priv(mac->netdev);
 	int is_associated;
@@ -1113,7 +1114,7 @@ static void link_led_handler(void *p)
 
 static void housekeeping_init(struct zd_mac *mac)
 {
-	INIT_WORK(&mac->housekeeping.link_led_work, link_led_handler, mac);
+	INIT_DELAYED_WORK(&mac->housekeeping.link_led_work, link_led_handler);
 }
 
 static void housekeeping_enable(struct zd_mac *mac)
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.h b/drivers/net/wireless/zd1211rw/zd_mac.h
index b8ea3de7924a..7957cac3de25 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.h
+++ b/drivers/net/wireless/zd1211rw/zd_mac.h
@@ -121,7 +121,7 @@ enum mac_flags {
 };
 
 struct housekeeping {
-	struct work_struct link_led_work;
+	struct delayed_work link_led_work;
 };
 
 #define ZD_MAC_STATS_BUFFER_SIZE 16
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index fc4bc9b94c74..a83c3db7d18f 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -29,7 +29,7 @@
 
 struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
 
-static void wq_sync_buffer(void *);
+static void wq_sync_buffer(struct work_struct *work);
 
 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
 static int work_enabled;
@@ -65,7 +65,7 @@ int alloc_cpu_buffers(void)
 		b->sample_received = 0;
 		b->sample_lost_overflow = 0;
 		b->cpu = i;
-		INIT_WORK(&b->work, wq_sync_buffer, b);
+		INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
 	}
 	return 0;
 
@@ -282,9 +282,10 @@ void oprofile_add_trace(unsigned long pc)
  * By using schedule_delayed_work_on and then schedule_delayed_work
  * we guarantee this will stay on the correct cpu
  */
-static void wq_sync_buffer(void * data)
+static void wq_sync_buffer(struct work_struct *work)
 {
-	struct oprofile_cpu_buffer * b = data;
+	struct oprofile_cpu_buffer * b =
+		container_of(work, struct oprofile_cpu_buffer, work.work);
 	if (b->cpu != smp_processor_id()) {
 		printk("WQ on CPU%d, prefer CPU%d\n",
 		       smp_processor_id(), b->cpu);
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index 09abb80e0570..49900d9e3235 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -43,7 +43,7 @@ struct oprofile_cpu_buffer {
 	unsigned long sample_lost_overflow;
 	unsigned long backtrace_aborted;
 	int cpu;
-	struct work_struct work;
+	struct delayed_work work;
 } ____cacheline_aligned;
 
 extern struct oprofile_cpu_buffer cpu_buffer[];
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index ea2087c34149..50757695844f 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -70,7 +70,7 @@ struct slot {
 	struct hotplug_slot *hotplug_slot;
 	struct list_head	slot_list;
 	char name[SLOT_NAME_SIZE];
-	struct work_struct work;	/* work for button event */
+	struct delayed_work work;	/* work for button event */
 	struct mutex lock;
 };
 
@@ -187,7 +187,7 @@ extern int	shpchp_configure_device(struct slot *p_slot);
 extern int	shpchp_unconfigure_device(struct slot *p_slot);
 extern void	shpchp_remove_ctrl_files(struct controller *ctrl);
 extern void	cleanup_slots(struct controller *ctrl);
-extern void	queue_pushbutton_work(void *data);
+extern void	queue_pushbutton_work(struct work_struct *work);
 
 
 #ifdef CONFIG_ACPI
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 235c18a22393..4eac85b3d90e 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -159,7 +159,7 @@ static int init_slots(struct controller *ctrl)
 			goto error_info;
 
 		slot->number = sun;
-		INIT_WORK(&slot->work, queue_pushbutton_work, slot);
+		INIT_DELAYED_WORK(&slot->work, queue_pushbutton_work);
 
 		/* register this slot with the hotplug pci core */
 		hotplug_slot->private = slot;
diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c
index c39901dbff20..158ac7836096 100644
--- a/drivers/pci/hotplug/shpchp_ctrl.c
+++ b/drivers/pci/hotplug/shpchp_ctrl.c
@@ -36,7 +36,7 @@
 #include "../pci.h"
 #include "shpchp.h"
 
-static void interrupt_event_handler(void *data);
+static void interrupt_event_handler(struct work_struct *work);
 static int shpchp_enable_slot(struct slot *p_slot);
 static int shpchp_disable_slot(struct slot *p_slot);
 
@@ -50,7 +50,7 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
 
 	info->event_type = event_type;
 	info->p_slot = p_slot;
-	INIT_WORK(&info->work, interrupt_event_handler, info);
+	INIT_WORK(&info->work, interrupt_event_handler);
 
 	schedule_work(&info->work);
 
@@ -408,9 +408,10 @@ struct pushbutton_work_info {
  * Handles all pending events and exits.
  *
  */
-static void shpchp_pushbutton_thread(void *data)
+static void shpchp_pushbutton_thread(struct work_struct *work)
 {
-	struct pushbutton_work_info *info = data;
+	struct pushbutton_work_info *info =
+		container_of(work, struct pushbutton_work_info, work);
 	struct slot *p_slot = info->p_slot;
 
 	mutex_lock(&p_slot->lock);
@@ -436,9 +437,9 @@ static void shpchp_pushbutton_thread(void *data)
 	kfree(info);
 }
 
-void queue_pushbutton_work(void *data)
+void queue_pushbutton_work(struct work_struct *work)
 {
-	struct slot *p_slot = data;
+	struct slot *p_slot = container_of(work, struct slot, work.work);
 	struct pushbutton_work_info *info;
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -447,7 +448,7 @@ void queue_pushbutton_work(void *data)
 		return;
 	}
 	info->p_slot = p_slot;
-	INIT_WORK(&info->work, shpchp_pushbutton_thread, info);
+	INIT_WORK(&info->work, shpchp_pushbutton_thread);
 
 	mutex_lock(&p_slot->lock);
 	switch (p_slot->state) {
@@ -541,9 +542,9 @@ static void handle_button_press_event(struct slot *p_slot)
 	}
 }
 
-static void interrupt_event_handler(void *data)
+static void interrupt_event_handler(struct work_struct *work)
 {
-	struct event_info *info = data;
+	struct event_info *info = container_of(work, struct event_info, work);
 	struct slot *p_slot = info->p_slot;
 
 	mutex_lock(&p_slot->lock);
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index a20d84d707d9..e469a46a388b 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -698,9 +698,10 @@ static int pcmcia_card_add(struct pcmcia_socket *s)
 }
 
 
-static void pcmcia_delayed_add_pseudo_device(void *data)
+static void pcmcia_delayed_add_pseudo_device(struct work_struct *work)
 {
-	struct pcmcia_socket *s = data;
+	struct pcmcia_socket *s =
+		container_of(work, struct pcmcia_socket, device_add);
 	pcmcia_device_add(s, 0);
 	s->pcmcia_state.device_add_pending = 0;
 }
@@ -1246,7 +1247,7 @@ static int __devinit pcmcia_bus_add_socket(struct class_device *class_dev,
 	init_waitqueue_head(&socket->queue);
 #endif
 	INIT_LIST_HEAD(&socket->devices_list);
-	INIT_WORK(&socket->device_add, pcmcia_delayed_add_pseudo_device, socket);
+	INIT_WORK(&socket->device_add, pcmcia_delayed_add_pseudo_device);
 	memset(&socket->pcmcia_state, 0, sizeof(u8));
 	socket->device_count = 0;
 
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 583789c66cdb..dcf5f86461f7 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -53,9 +53,10 @@ static int rtc_dev_open(struct inode *inode, struct file *file)
  * Routine to poll RTC seconds field for change as often as possible,
  * after first RTC_UIE use timer to reduce polling
  */
-static void rtc_uie_task(void *data)
+static void rtc_uie_task(struct work_struct *work)
 {
-	struct rtc_device *rtc = data;
+	struct rtc_device *rtc =
+		container_of(work, struct rtc_device, uie_task);
 	struct rtc_time tm;
 	int num = 0;
 	int err;
@@ -398,7 +399,7 @@ static int rtc_dev_add_device(struct class_device *class_dev,
 	spin_lock_init(&rtc->irq_lock);
 	init_waitqueue_head(&rtc->irq_queue);
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-	INIT_WORK(&rtc->uie_task, rtc_uie_task, rtc);
+	INIT_WORK(&rtc->uie_task, rtc_uie_task);
 	setup_timer(&rtc->uie_timer, rtc_uie_timer, (unsigned long)rtc);
 #endif
 
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index a6aa91072880..bb3cb3360541 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -849,7 +849,7 @@ static int __devinit NCR5380_init(struct Scsi_Host *instance, int flags)
 	hostdata->issue_queue = NULL;
 	hostdata->disconnected_queue = NULL;
 	
-	INIT_WORK(&hostdata->coroutine, NCR5380_main, hostdata);
+	INIT_DELAYED_WORK(&hostdata->coroutine, NCR5380_main);
 	
 #ifdef NCR5380_STATS
 	for (i = 0; i < 8; ++i) {
@@ -1016,7 +1016,7 @@ static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
 
 	/* Run the coroutine if it isn't already running. */
 	/* Kick off command processing */
-	schedule_work(&hostdata->coroutine);
+	schedule_delayed_work(&hostdata->coroutine, 0);
 	return 0;
 }
 
@@ -1033,9 +1033,10 @@ static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
  *	host lock and called routines may take the isa dma lock.
  */
 
-static void NCR5380_main(void *p)
+static void NCR5380_main(struct work_struct *work)
 {
-	struct NCR5380_hostdata *hostdata = p;
+	struct NCR5380_hostdata *hostdata =
+		container_of(work, struct NCR5380_hostdata, coroutine.work);
 	struct Scsi_Host *instance = hostdata->host;
 	Scsi_Cmnd *tmp, *prev;
 	int done;
@@ -1221,7 +1222,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id)
 		}	/* if BASR_IRQ */
 		spin_unlock_irqrestore(instance->host_lock, flags);
 		if(!done)
-			schedule_work(&hostdata->coroutine);
+			schedule_delayed_work(&hostdata->coroutine, 0);
 	} while (!done);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h
index 1bc73de496b0..713a108c02ef 100644
--- a/drivers/scsi/NCR5380.h
+++ b/drivers/scsi/NCR5380.h
@@ -271,7 +271,7 @@ struct NCR5380_hostdata {
 	unsigned long time_expires;		/* in jiffies, set prior to sleeping */
 	int select_time;			/* timer in select for target response */
 	volatile Scsi_Cmnd *selecting;
-	struct work_struct coroutine;		/* our co-routine */
+	struct delayed_work coroutine;		/* our co-routine */
 #ifdef NCR5380_STATS
 	unsigned timebase;			/* Base for time calcs */
 	long time_read[8];			/* time to do reads */
@@ -298,7 +298,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance);
 #ifndef DONT_USE_INTR
 static irqreturn_t NCR5380_intr(int irq, void *dev_id);
 #endif
-static void NCR5380_main(void *ptr);
+static void NCR5380_main(struct work_struct *work);
 static void NCR5380_print_options(struct Scsi_Host *instance);
 #ifdef NDEBUG
 static void NCR5380_print_phase(struct Scsi_Host *instance);
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index 306f46b85a55..0cec742d12e9 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -1443,7 +1443,7 @@ static struct work_struct aha152x_tq;
  * Run service completions on the card with interrupts enabled.
  *
  */
-static void run(void)
+static void run(struct work_struct *work)
 {
 	struct aha152x_hostdata *hd;
 
@@ -1499,7 +1499,7 @@ static irqreturn_t intr(int irqno, void *dev_id)
 		HOSTDATA(shpnt)->service=1;
 
 		/* Poke the BH handler */
-		INIT_WORK(&aha152x_tq, (void *) run, NULL);
+		INIT_WORK(&aha152x_tq, run);
 		schedule_work(&aha152x_tq);
 	}
 	DO_UNLOCK(flags);
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index e31f6122106f..0464c182c577 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -36,7 +36,7 @@ typedef struct {
 	int base_hi;		/* Hi Base address for ECP-ISA chipset */
 	int mode;		/* Transfer mode                */
 	struct scsi_cmnd *cur_cmd;	/* Current queued command       */
-	struct work_struct imm_tq;	/* Polling interrupt stuff       */
+	struct delayed_work imm_tq;	/* Polling interrupt stuff       */
 	unsigned long jstart;	/* Jiffies at start             */
 	unsigned failed:1;	/* Failure flag                 */
 	unsigned dp:1;		/* Data phase present           */
@@ -733,9 +733,9 @@ static int imm_completion(struct scsi_cmnd *cmd)
  * the scheduler's task queue to generate a stream of call-backs and
  * complete the request when the drive is ready.
  */
-static void imm_interrupt(void *data)
+static void imm_interrupt(struct work_struct *work)
 {
-	imm_struct *dev = (imm_struct *) data;
+	imm_struct *dev = container_of(work, imm_struct, imm_tq.work);
 	struct scsi_cmnd *cmd = dev->cur_cmd;
 	struct Scsi_Host *host = cmd->device->host;
 	unsigned long flags;
@@ -745,7 +745,6 @@ static void imm_interrupt(void *data)
 		return;
 	}
 	if (imm_engine(dev, cmd)) {
-		INIT_WORK(&dev->imm_tq, imm_interrupt, (void *) dev);
 		schedule_delayed_work(&dev->imm_tq, 1);
 		return;
 	}
@@ -953,8 +952,7 @@ static int imm_queuecommand(struct scsi_cmnd *cmd,
 	cmd->result = DID_ERROR << 16;	/* default return code */
 	cmd->SCp.phase = 0;	/* bus free */
 
-	INIT_WORK(&dev->imm_tq, imm_interrupt, dev);
-	schedule_work(&dev->imm_tq);
+	schedule_delayed_work(&dev->imm_tq, 0);
 
 	imm_pb_claim(dev);
 
@@ -1225,7 +1223,7 @@ static int __imm_attach(struct parport *pb)
 	else
 		ports = 8;
 
-	INIT_WORK(&dev->imm_tq, imm_interrupt, dev);
+	INIT_DELAYED_WORK(&dev->imm_tq, imm_interrupt);
 
 	err = -ENOMEM;
 	host = scsi_host_alloc(&imm_template, sizeof(imm_struct *));
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 2dde821025f3..d51c3e764bb0 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -2093,7 +2093,7 @@ static void ipr_release_dump(struct kref *kref)
 
 /**
  * ipr_worker_thread - Worker thread
- * @data:		ioa config struct
+ * @work:		ioa config struct
  *
  * Called at task level from a work thread. This function takes care
  * of adding and removing device from the mid-layer as configuration
@@ -2102,13 +2102,14 @@ static void ipr_release_dump(struct kref *kref)
  * Return value:
  * 	nothing
  **/
-static void ipr_worker_thread(void *data)
+static void ipr_worker_thread(struct work_struct *work)
 {
 	unsigned long lock_flags;
 	struct ipr_resource_entry *res;
 	struct scsi_device *sdev;
 	struct ipr_dump *dump;
-	struct ipr_ioa_cfg *ioa_cfg = data;
+	struct ipr_ioa_cfg *ioa_cfg =
+		container_of(work, struct ipr_ioa_cfg, work_q);
 	u8 bus, target, lun;
 	int did_work;
 
@@ -6926,7 +6927,7 @@ static void __devinit ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
 	INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q);
 	INIT_LIST_HEAD(&ioa_cfg->free_res_q);
 	INIT_LIST_HEAD(&ioa_cfg->used_res_q);
-	INIT_WORK(&ioa_cfg->work_q, ipr_worker_thread, ioa_cfg);
+	INIT_WORK(&ioa_cfg->work_q, ipr_worker_thread);
 	init_waitqueue_head(&ioa_cfg->reset_wait_q);
 	ioa_cfg->sdt_state = INACTIVE;
 	if (ipr_enable_cache)
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5d8862189485..e11b23c641e2 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -719,9 +719,10 @@ again:
 	return rc;
 }
 
-static void iscsi_xmitworker(void *data)
+static void iscsi_xmitworker(struct work_struct *work)
 {
-	struct iscsi_conn *conn = data;
+	struct iscsi_conn *conn =
+		container_of(work, struct iscsi_conn, xmitwork);
 	int rc;
 	/*
 	 * serialize Xmit worker on a per-connection basis.
@@ -1512,7 +1513,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
 	if (conn->mgmtqueue == ERR_PTR(-ENOMEM))
 		goto mgmtqueue_alloc_fail;
 
-	INIT_WORK(&conn->xmitwork, iscsi_xmitworker, conn);
+	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
 	/* allocate login_mtask used for the login/text sequences */
 	spin_lock_bh(&session->lock);
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index d977bd492d8d..fb7df7b75811 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -647,10 +647,12 @@ void sas_unregister_domain_devices(struct asd_sas_port *port)
  * Discover process only interrogates devices in order to discover the
  * domain.
  */
-static void sas_discover_domain(void *data)
+static void sas_discover_domain(struct work_struct *work)
 {
 	int error = 0;
-	struct asd_sas_port *port = data;
+	struct sas_discovery_event *ev =
+		container_of(work, struct sas_discovery_event, work);
+	struct asd_sas_port *port = ev->port;
 
 	sas_begin_event(DISCE_DISCOVER_DOMAIN, &port->disc.disc_event_lock,
 			&port->disc.pending);
@@ -692,10 +694,12 @@ static void sas_discover_domain(void *data)
 		    current->pid, error);
 }
 
-static void sas_revalidate_domain(void *data)
+static void sas_revalidate_domain(struct work_struct *work)
 {
 	int res = 0;
-	struct asd_sas_port *port = data;
+	struct sas_discovery_event *ev =
+		container_of(work, struct sas_discovery_event, work);
+	struct asd_sas_port *port = ev->port;
 
 	sas_begin_event(DISCE_REVALIDATE_DOMAIN, &port->disc.disc_event_lock,
 			&port->disc.pending);
@@ -722,7 +726,7 @@ int sas_discover_event(struct asd_sas_port *port, enum discover_event ev)
 	BUG_ON(ev >= DISC_NUM_EVENTS);
 
 	sas_queue_event(ev, &disc->disc_event_lock, &disc->pending,
-			&disc->disc_work[ev], port->ha->core.shost);
+			&disc->disc_work[ev].work, port->ha->core.shost);
 
 	return 0;
 }
@@ -737,13 +741,15 @@ void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
 {
 	int i;
 
-	static void (*sas_event_fns[DISC_NUM_EVENTS])(void *) = {
+	static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = {
 		[DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
 		[DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
 	};
 
 	spin_lock_init(&disc->disc_event_lock);
 	disc->pending = 0;
-	for (i = 0; i < DISC_NUM_EVENTS; i++)
-		INIT_WORK(&disc->disc_work[i], sas_event_fns[i], port);
+	for (i = 0; i < DISC_NUM_EVENTS; i++) {
+		INIT_WORK(&disc->disc_work[i].work, sas_event_fns[i]);
+		disc->disc_work[i].port = port;
+	}
 }
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index 19110ed1c89c..d83392ee6823 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -31,7 +31,7 @@ static void notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event event)
 	BUG_ON(event >= HA_NUM_EVENTS);
 
 	sas_queue_event(event, &sas_ha->event_lock, &sas_ha->pending,
-			&sas_ha->ha_events[event], sas_ha->core.shost);
+			&sas_ha->ha_events[event].work, sas_ha->core.shost);
 }
 
 static void notify_port_event(struct asd_sas_phy *phy, enum port_event event)
@@ -41,7 +41,7 @@ static void notify_port_event(struct asd_sas_phy *phy, enum port_event event)
 	BUG_ON(event >= PORT_NUM_EVENTS);
 
 	sas_queue_event(event, &ha->event_lock, &phy->port_events_pending,
-			&phy->port_events[event], ha->core.shost);
+			&phy->port_events[event].work, ha->core.shost);
 }
 
 static void notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
@@ -51,12 +51,12 @@ static void notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
 	BUG_ON(event >= PHY_NUM_EVENTS);
 
 	sas_queue_event(event, &ha->event_lock, &phy->phy_events_pending,
-			&phy->phy_events[event], ha->core.shost);
+			&phy->phy_events[event].work, ha->core.shost);
 }
 
 int sas_init_events(struct sas_ha_struct *sas_ha)
 {
-	static void (*sas_ha_event_fns[HA_NUM_EVENTS])(void *) = {
+	static const work_func_t sas_ha_event_fns[HA_NUM_EVENTS] = {
 		[HAE_RESET] = sas_hae_reset,
 	};
 
@@ -64,8 +64,10 @@ int sas_init_events(struct sas_ha_struct *sas_ha)
 
 	spin_lock_init(&sas_ha->event_lock);
 
-	for (i = 0; i < HA_NUM_EVENTS; i++)
-		INIT_WORK(&sas_ha->ha_events[i], sas_ha_event_fns[i], sas_ha);
+	for (i = 0; i < HA_NUM_EVENTS; i++) {
+		INIT_WORK(&sas_ha->ha_events[i].work, sas_ha_event_fns[i]);
+		sas_ha->ha_events[i].ha = sas_ha;
+	}
 
 	sas_ha->notify_ha_event = notify_ha_event;
 	sas_ha->notify_port_event = notify_port_event;
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index c836a237fb79..7b4e9169f44d 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -65,9 +65,11 @@ void sas_hash_addr(u8 *hashed, const u8 *sas_addr)
 
 /* ---------- HA events ---------- */
 
-void sas_hae_reset(void *data)
+void sas_hae_reset(struct work_struct *work)
 {
-	struct sas_ha_struct *ha = data;
+	struct sas_ha_event *ev =
+		container_of(work, struct sas_ha_event, work);
+	struct sas_ha_struct *ha = ev->ha;
 
 	sas_begin_event(HAE_RESET, &ha->event_lock,
 			&ha->pending);
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index bffcee474921..137d7e496b6d 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -60,11 +60,11 @@ void sas_shutdown_queue(struct sas_ha_struct *sas_ha);
 
 void sas_deform_port(struct asd_sas_phy *phy);
 
-void sas_porte_bytes_dmaed(void *);
-void sas_porte_broadcast_rcvd(void *);
-void sas_porte_link_reset_err(void *);
-void sas_porte_timer_event(void *);
-void sas_porte_hard_reset(void *);
+void sas_porte_bytes_dmaed(struct work_struct *work);
+void sas_porte_broadcast_rcvd(struct work_struct *work);
+void sas_porte_link_reset_err(struct work_struct *work);
+void sas_porte_timer_event(struct work_struct *work);
+void sas_porte_hard_reset(struct work_struct *work);
 
 int sas_notify_lldd_dev_found(struct domain_device *);
 void sas_notify_lldd_dev_gone(struct domain_device *);
@@ -75,7 +75,7 @@ int sas_smp_get_phy_events(struct sas_phy *phy);
 
 struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy);
 
-void sas_hae_reset(void *);
+void sas_hae_reset(struct work_struct *work);
 
 static inline void sas_queue_event(int event, spinlock_t *lock,
 				   unsigned long *pending,
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index 9340cdbae4a3..b459c4b635b1 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -30,9 +30,11 @@
 
 /* ---------- Phy events ---------- */
 
-static void sas_phye_loss_of_signal(void *data)
+static void sas_phye_loss_of_signal(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 
 	sas_begin_event(PHYE_LOSS_OF_SIGNAL, &phy->ha->event_lock,
 			&phy->phy_events_pending);
@@ -40,18 +42,22 @@ static void sas_phye_loss_of_signal(void *data)
 	sas_deform_port(phy);
 }
 
-static void sas_phye_oob_done(void *data)
+static void sas_phye_oob_done(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 
 	sas_begin_event(PHYE_OOB_DONE, &phy->ha->event_lock,
 			&phy->phy_events_pending);
 	phy->error = 0;
 }
 
-static void sas_phye_oob_error(void *data)
+static void sas_phye_oob_error(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 	struct sas_ha_struct *sas_ha = phy->ha;
 	struct asd_sas_port *port = phy->port;
 	struct sas_internal *i =
@@ -80,9 +86,11 @@ static void sas_phye_oob_error(void *data)
 	}
 }
 
-static void sas_phye_spinup_hold(void *data)
+static void sas_phye_spinup_hold(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 	struct sas_ha_struct *sas_ha = phy->ha;
 	struct sas_internal *i =
 		to_sas_internal(sas_ha->core.shost->transportt);
@@ -100,14 +108,14 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
 {
 	int i;
 
-	static void (*sas_phy_event_fns[PHY_NUM_EVENTS])(void *) = {
+	static const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
 		[PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal,
 		[PHYE_OOB_DONE] = sas_phye_oob_done,
 		[PHYE_OOB_ERROR] = sas_phye_oob_error,
 		[PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
 	};
 
-	static void (*sas_port_event_fns[PORT_NUM_EVENTS])(void *) = {
+	static const work_func_t sas_port_event_fns[PORT_NUM_EVENTS] = {
 		[PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed,
 		[PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd,
 		[PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err,
@@ -122,13 +130,18 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
 
 		phy->error = 0;
 		INIT_LIST_HEAD(&phy->port_phy_el);
-		for (k = 0; k < PORT_NUM_EVENTS; k++)
-			INIT_WORK(&phy->port_events[k], sas_port_event_fns[k],
-				  phy);
+		for (k = 0; k < PORT_NUM_EVENTS; k++) {
+			INIT_WORK(&phy->port_events[k].work,
+				  sas_port_event_fns[k]);
+			phy->port_events[k].phy = phy;
+		}
+
+		for (k = 0; k < PHY_NUM_EVENTS; k++) {
+			INIT_WORK(&phy->phy_events[k].work,
+				  sas_phy_event_fns[k]);
+			phy->phy_events[k].phy = phy;
+		}
 
-		for (k = 0; k < PHY_NUM_EVENTS; k++)
-			INIT_WORK(&phy->phy_events[k], sas_phy_event_fns[k],
-				  phy);
 		phy->port = NULL;
 		phy->ha = sas_ha;
 		spin_lock_init(&phy->frame_rcvd_lock);
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index 253cdcf306a2..971c37ceecb4 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -181,9 +181,11 @@ void sas_deform_port(struct asd_sas_phy *phy)
 
 /* ---------- SAS port events ---------- */
 
-void sas_porte_bytes_dmaed(void *data)
+void sas_porte_bytes_dmaed(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 
 	sas_begin_event(PORTE_BYTES_DMAED, &phy->ha->event_lock,
 			&phy->port_events_pending);
@@ -191,11 +193,13 @@ void sas_porte_bytes_dmaed(void *data)
 	sas_form_port(phy);
 }
 
-void sas_porte_broadcast_rcvd(void *data)
+void sas_porte_broadcast_rcvd(struct work_struct *work)
 {
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 	unsigned long flags;
 	u32 prim;
-	struct asd_sas_phy *phy = data;
 
 	sas_begin_event(PORTE_BROADCAST_RCVD, &phy->ha->event_lock,
 			&phy->port_events_pending);
@@ -208,9 +212,11 @@ void sas_porte_broadcast_rcvd(void *data)
 	sas_discover_event(phy->port, DISCE_REVALIDATE_DOMAIN);
 }
 
-void sas_porte_link_reset_err(void *data)
+void sas_porte_link_reset_err(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 
 	sas_begin_event(PORTE_LINK_RESET_ERR, &phy->ha->event_lock,
 			&phy->port_events_pending);
@@ -218,9 +224,11 @@ void sas_porte_link_reset_err(void *data)
 	sas_deform_port(phy);
 }
 
-void sas_porte_timer_event(void *data)
+void sas_porte_timer_event(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 
 	sas_begin_event(PORTE_TIMER_EVENT, &phy->ha->event_lock,
 			&phy->port_events_pending);
@@ -228,9 +236,11 @@ void sas_porte_timer_event(void *data)
 	sas_deform_port(phy);
 }
 
-void sas_porte_hard_reset(void *data)
+void sas_porte_hard_reset(struct work_struct *work)
 {
-	struct asd_sas_phy *phy = data;
+	struct asd_sas_event *ev =
+		container_of(work, struct asd_sas_event, work);
+	struct asd_sas_phy *phy = ev->phy;
 
 	sas_begin_event(PORTE_HARD_RESET, &phy->ha->event_lock,
 			&phy->port_events_pending);
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index 89a2a9f11e41..584ba4d6e038 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -31,7 +31,7 @@ typedef struct {
 	int base;		/* Actual port address          */
 	int mode;		/* Transfer mode                */
 	struct scsi_cmnd *cur_cmd;	/* Current queued command       */
-	struct work_struct ppa_tq;	/* Polling interrupt stuff       */
+	struct delayed_work ppa_tq;	/* Polling interrupt stuff       */
 	unsigned long jstart;	/* Jiffies at start             */
 	unsigned long recon_tmo;	/* How many usecs to wait for reconnection (6th bit) */
 	unsigned int failed:1;	/* Failure flag                 */
@@ -627,9 +627,9 @@ static int ppa_completion(struct scsi_cmnd *cmd)
  * the scheduler's task queue to generate a stream of call-backs and
  * complete the request when the drive is ready.
  */
-static void ppa_interrupt(void *data)
+static void ppa_interrupt(struct work_struct *work)
 {
-	ppa_struct *dev = (ppa_struct *) data;
+	ppa_struct *dev = container_of(work, ppa_struct, ppa_tq.work);
 	struct scsi_cmnd *cmd = dev->cur_cmd;
 
 	if (!cmd) {
@@ -637,7 +637,6 @@ static void ppa_interrupt(void *data)
 		return;
 	}
 	if (ppa_engine(dev, cmd)) {
-		dev->ppa_tq.data = (void *) dev;
 		schedule_delayed_work(&dev->ppa_tq, 1);
 		return;
 	}
@@ -822,8 +821,7 @@ static int ppa_queuecommand(struct scsi_cmnd *cmd,
 	cmd->result = DID_ERROR << 16;	/* default return code */
 	cmd->SCp.phase = 0;	/* bus free */
 
-	dev->ppa_tq.data = dev;
-	schedule_work(&dev->ppa_tq);
+	schedule_delayed_work(&dev->ppa_tq, 0);
 
 	ppa_pb_claim(dev);
 
@@ -1086,7 +1084,7 @@ static int __ppa_attach(struct parport *pb)
 	else
 		ports = 8;
 
-	INIT_WORK(&dev->ppa_tq, ppa_interrupt, dev);
+	INIT_DELAYED_WORK(&dev->ppa_tq, ppa_interrupt);
 
 	err = -ENOMEM;
 	host = scsi_host_alloc(&ppa_template, sizeof(ppa_struct *));
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 5b8db6109536..bbbc9d039baa 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1011,9 +1011,10 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha,
  * the mid-level tries to sleep when it reaches the driver threshold
  * "host->can_queue". This can cause a panic if we were in our interrupt code.
  **/
-static void qla4xxx_do_dpc(void *data)
+static void qla4xxx_do_dpc(struct work_struct *work)
 {
-	struct scsi_qla_host *ha = (struct scsi_qla_host *) data;
+	struct scsi_qla_host *ha =
+		container_of(work, struct scsi_qla_host, dpc_work);
 	struct ddb_entry *ddb_entry, *dtemp;
 
 	DEBUG2(printk("scsi%ld: %s: DPC handler waking up.\n",
@@ -1315,7 +1316,7 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
 		ret = -ENODEV;
 		goto probe_failed;
 	}
-	INIT_WORK(&ha->dpc_work, qla4xxx_do_dpc, ha);
+	INIT_WORK(&ha->dpc_work, qla4xxx_do_dpc);
 
 	ret = request_irq(pdev->irq, qla4xxx_intr_handler,
 			  SA_INTERRUPT|SA_SHIRQ, "qla4xxx", ha);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 38c215a78f69..3571ce8934e7 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -241,9 +241,9 @@ fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names)
 #define FC_MGMTSRVR_PORTID		0x00000a
 
 
-static void fc_timeout_deleted_rport(void *data);
-static void fc_timeout_fail_rport_io(void *data);
-static void fc_scsi_scan_rport(void *data);
+static void fc_timeout_deleted_rport(struct work_struct *work);
+static void fc_timeout_fail_rport_io(struct work_struct *work);
+static void fc_scsi_scan_rport(struct work_struct *work);
 
 /*
  * Attribute counts pre object type...
@@ -1613,7 +1613,7 @@ fc_flush_work(struct Scsi_Host *shost)
  * 	1 on success / 0 already queued / < 0 for error
  **/
 static int
-fc_queue_devloss_work(struct Scsi_Host *shost, struct work_struct *work,
+fc_queue_devloss_work(struct Scsi_Host *shost, struct delayed_work *work,
 				unsigned long delay)
 {
 	if (unlikely(!fc_host_devloss_work_q(shost))) {
@@ -1625,9 +1625,6 @@ fc_queue_devloss_work(struct Scsi_Host *shost, struct work_struct *work,
 		return -EINVAL;
 	}
 
-	if (delay == 0)
-		return queue_work(fc_host_devloss_work_q(shost), work);
-
 	return queue_delayed_work(fc_host_devloss_work_q(shost), work, delay);
 }
 
@@ -1712,12 +1709,13 @@ EXPORT_SYMBOL(fc_remove_host);
  * fc_starget_delete - called to delete the scsi decendents of an rport
  *                  (target and all sdevs)
  *
- * @data:	remote port to be operated on.
+ * @work:	remote port to be operated on.
  **/
 static void
-fc_starget_delete(void *data)
+fc_starget_delete(struct work_struct *work)
 {
-	struct fc_rport *rport = (struct fc_rport *)data;
+	struct fc_rport *rport =
+		container_of(work, struct fc_rport, stgt_delete_work);
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	unsigned long flags;
 	struct fc_internal *i = to_fc_internal(shost->transportt);
@@ -1751,12 +1749,13 @@ fc_starget_delete(void *data)
 /**
  * fc_rport_final_delete - finish rport termination and delete it.
  *
- * @data:	remote port to be deleted.
+ * @work:	remote port to be deleted.
  **/
 static void
-fc_rport_final_delete(void *data)
+fc_rport_final_delete(struct work_struct *work)
 {
-	struct fc_rport *rport = (struct fc_rport *)data;
+	struct fc_rport *rport =
+		container_of(work, struct fc_rport, rport_delete_work);
 	struct device *dev = &rport->dev;
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	struct fc_internal *i = to_fc_internal(shost->transportt);
@@ -1770,7 +1769,7 @@ fc_rport_final_delete(void *data)
 
 	/* Delete SCSI target and sdevs */
 	if (rport->scsi_target_id != -1)
-		fc_starget_delete(data);
+		fc_starget_delete(&rport->stgt_delete_work);
 	else if (i->f->dev_loss_tmo_callbk)
 		i->f->dev_loss_tmo_callbk(rport);
 	else if (i->f->terminate_rport_io)
@@ -1829,11 +1828,11 @@ fc_rport_create(struct Scsi_Host *shost, int channel,
 	rport->channel = channel;
 	rport->fast_io_fail_tmo = -1;
 
-	INIT_WORK(&rport->dev_loss_work, fc_timeout_deleted_rport, rport);
-	INIT_WORK(&rport->fail_io_work, fc_timeout_fail_rport_io, rport);
-	INIT_WORK(&rport->scan_work, fc_scsi_scan_rport, rport);
-	INIT_WORK(&rport->stgt_delete_work, fc_starget_delete, rport);
-	INIT_WORK(&rport->rport_delete_work, fc_rport_final_delete, rport);
+	INIT_DELAYED_WORK(&rport->dev_loss_work, fc_timeout_deleted_rport);
+	INIT_DELAYED_WORK(&rport->fail_io_work, fc_timeout_fail_rport_io);
+	INIT_WORK(&rport->scan_work, fc_scsi_scan_rport);
+	INIT_WORK(&rport->stgt_delete_work, fc_starget_delete);
+	INIT_WORK(&rport->rport_delete_work, fc_rport_final_delete);
 
 	spin_lock_irqsave(shost->host_lock, flags);
 
@@ -1963,7 +1962,7 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 			}
 
 			if (match) {
-				struct work_struct *work = 
+				struct delayed_work *work =
 							&rport->dev_loss_work;
 
 				memcpy(&rport->node_name, &ids->node_name,
@@ -2267,12 +2266,13 @@ EXPORT_SYMBOL(fc_remote_port_rolechg);
  *                       was a SCSI target (thus was blocked), and failed
  *                       to return in the alloted time.
  * 
- * @data:	rport target that failed to reappear in the alloted time.
+ * @work:	rport target that failed to reappear in the alloted time.
  **/
 static void
-fc_timeout_deleted_rport(void  *data)
+fc_timeout_deleted_rport(struct work_struct *work)
 {
-	struct fc_rport *rport = (struct fc_rport *)data;
+	struct fc_rport *rport =
+		container_of(work, struct fc_rport, dev_loss_work.work);
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
 	unsigned long flags;
@@ -2366,15 +2366,16 @@ fc_timeout_deleted_rport(void  *data)
  * fc_timeout_fail_rport_io - Timeout handler for a fast io failing on a
  *                       disconnected SCSI target.
  *
- * @data:	rport to terminate io on.
+ * @work:	rport to terminate io on.
  *
  * Notes: Only requests the failure of the io, not that all are flushed
  *    prior to returning.
  **/
 static void
-fc_timeout_fail_rport_io(void  *data)
+fc_timeout_fail_rport_io(struct work_struct *work)
 {
-	struct fc_rport *rport = (struct fc_rport *)data;
+	struct fc_rport *rport =
+		container_of(work, struct fc_rport, fail_io_work.work);
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	struct fc_internal *i = to_fc_internal(shost->transportt);
 
@@ -2387,12 +2388,13 @@ fc_timeout_fail_rport_io(void  *data)
 /**
  * fc_scsi_scan_rport - called to perform a scsi scan on a remote port.
  *
- * @data:	remote port to be scanned.
+ * @work:	remote port to be scanned.
  **/
 static void
-fc_scsi_scan_rport(void *data)
+fc_scsi_scan_rport(struct work_struct *work)
 {
-	struct fc_rport *rport = (struct fc_rport *)data;
+	struct fc_rport *rport =
+		container_of(work, struct fc_rport, scan_work);
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	unsigned long flags;
 
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 9b25124a989e..9c22f1342715 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -234,9 +234,11 @@ static int iscsi_user_scan(struct Scsi_Host *shost, uint channel,
 	return 0;
 }
 
-static void session_recovery_timedout(void *data)
+static void session_recovery_timedout(struct work_struct *work)
 {
-	struct iscsi_cls_session *session = data;
+	struct iscsi_cls_session *session =
+		container_of(work, struct iscsi_cls_session,
+			     recovery_work.work);
 
 	dev_printk(KERN_INFO, &session->dev, "iscsi: session recovery timed "
 		  "out after %d secs\n", session->recovery_tmo);
@@ -276,7 +278,7 @@ iscsi_alloc_session(struct Scsi_Host *shost,
 
 	session->transport = transport;
 	session->recovery_tmo = 120;
-	INIT_WORK(&session->recovery_work, session_recovery_timedout, session);
+	INIT_DELAYED_WORK(&session->recovery_work, session_recovery_timedout);
 	INIT_LIST_HEAD(&session->host_list);
 	INIT_LIST_HEAD(&session->sess_list);
 
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 9f070f0d0f2b..3fded4831460 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -964,9 +964,10 @@ struct work_queue_wrapper {
 };
 
 static void
-spi_dv_device_work_wrapper(void *data)
+spi_dv_device_work_wrapper(struct work_struct *work)
 {
-	struct work_queue_wrapper *wqw = (struct work_queue_wrapper *)data;
+	struct work_queue_wrapper *wqw =
+		container_of(work, struct work_queue_wrapper, work);
 	struct scsi_device *sdev = wqw->sdev;
 
 	kfree(wqw);
@@ -1006,7 +1007,7 @@ spi_schedule_dv_device(struct scsi_device *sdev)
 		return;
 	}
 
-	INIT_WORK(&wqw->work, spi_dv_device_work_wrapper, wqw);
+	INIT_WORK(&wqw->work, spi_dv_device_work_wrapper);
 	wqw->sdev = sdev;
 
 	schedule_work(&wqw->work);
diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c
index a23862ef72b2..08c1c57c6128 100644
--- a/drivers/spi/spi_bitbang.c
+++ b/drivers/spi/spi_bitbang.c
@@ -265,9 +265,10 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t)
  * Drivers can provide word-at-a-time i/o primitives, or provide
  * transfer-at-a-time ones to leverage dma or fifo hardware.
  */
-static void bitbang_work(void *_bitbang)
+static void bitbang_work(struct work_struct *work)
 {
-	struct spi_bitbang	*bitbang = _bitbang;
+	struct spi_bitbang	*bitbang =
+		container_of(work, struct spi_bitbang, work);
 	unsigned long		flags;
 
 	spin_lock_irqsave(&bitbang->lock, flags);
@@ -456,7 +457,7 @@ int spi_bitbang_start(struct spi_bitbang *bitbang)
 	if (!bitbang->master || !bitbang->chipselect)
 		return -EINVAL;
 
-	INIT_WORK(&bitbang->work, bitbang_work, bitbang);
+	INIT_WORK(&bitbang->work, bitbang_work);
 	spin_lock_init(&bitbang->lock);
 	INIT_LIST_HEAD(&bitbang->queue);
 
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index e6565633ba0f..3dfa3e40e148 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -158,7 +158,7 @@ struct cxacru_data {
 	const struct cxacru_modem_type *modem_type;
 
 	int line_status;
-	struct work_struct poll_work;
+	struct delayed_work poll_work;
 
 	/* contol handles */
 	struct mutex cm_serialize;
@@ -347,7 +347,7 @@ static int cxacru_card_status(struct cxacru_data *instance)
 	return 0;
 }
 
-static void cxacru_poll_status(struct cxacru_data *instance);
+static void cxacru_poll_status(struct work_struct *work);
 
 static int cxacru_atm_start(struct usbatm_data *usbatm_instance,
 		struct atm_dev *atm_dev)
@@ -376,12 +376,14 @@ static int cxacru_atm_start(struct usbatm_data *usbatm_instance,
 	}
 
 	/* Start status polling */
-	cxacru_poll_status(instance);
+	cxacru_poll_status(&instance->poll_work.work);
 	return 0;
 }
 
-static void cxacru_poll_status(struct cxacru_data *instance)
+static void cxacru_poll_status(struct work_struct *work)
 {
+	struct cxacru_data *instance =
+		container_of(work, struct cxacru_data, poll_work.work);
 	u32 buf[CXINF_MAX] = {};
 	struct usbatm_data *usbatm = instance->usbatm;
 	struct atm_dev *atm_dev = usbatm->atm_dev;
@@ -720,7 +722,7 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance,
 
 	mutex_init(&instance->cm_serialize);
 
-	INIT_WORK(&instance->poll_work, (void *)cxacru_poll_status, instance);
+	INIT_DELAYED_WORK(&instance->poll_work, cxacru_poll_status);
 
 	usbatm_instance->driver_data = instance;
 
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index c870c804470f..7ed34bb1c50f 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -142,7 +142,7 @@ struct speedtch_instance_data {
 
 	struct speedtch_params params; /* set in probe, constant afterwards */
 
-	struct work_struct status_checker;
+	struct delayed_work status_checker;
 
 	unsigned char last_status;
 
@@ -498,8 +498,11 @@ static int speedtch_start_synchro(struct speedtch_instance_data *instance)
 	return ret;
 }
 
-static void speedtch_check_status(struct speedtch_instance_data *instance)
+static void speedtch_check_status(struct work_struct *work)
 {
+	struct speedtch_instance_data *instance =
+		container_of(work, struct speedtch_instance_data,
+			     status_checker.work);
 	struct usbatm_data *usbatm = instance->usbatm;
 	struct atm_dev *atm_dev = usbatm->atm_dev;
 	unsigned char *buf = instance->scratch_buffer;
@@ -576,7 +579,7 @@ static void speedtch_status_poll(unsigned long data)
 {
 	struct speedtch_instance_data *instance = (void *)data;
 
-	schedule_work(&instance->status_checker);
+	schedule_delayed_work(&instance->status_checker, 0);
 
 	/* The following check is racy, but the race is harmless */
 	if (instance->poll_delay < MAX_POLL_DELAY)
@@ -596,7 +599,7 @@ static void speedtch_resubmit_int(unsigned long data)
 	if (int_urb) {
 		ret = usb_submit_urb(int_urb, GFP_ATOMIC);
 		if (!ret)
-			schedule_work(&instance->status_checker);
+			schedule_delayed_work(&instance->status_checker, 0);
 		else {
 			atm_dbg(instance->usbatm, "%s: usb_submit_urb failed with result %d\n", __func__, ret);
 			mod_timer(&instance->resubmit_timer, jiffies + msecs_to_jiffies(RESUBMIT_DELAY));
@@ -640,7 +643,7 @@ static void speedtch_handle_int(struct urb *int_urb)
 
 	if ((int_urb = instance->int_urb)) {
 		ret = usb_submit_urb(int_urb, GFP_ATOMIC);
-		schedule_work(&instance->status_checker);
+		schedule_delayed_work(&instance->status_checker, 0);
 		if (ret < 0) {
 			atm_dbg(usbatm, "%s: usb_submit_urb failed with result %d\n", __func__, ret);
 			goto fail;
@@ -855,7 +858,7 @@ static int speedtch_bind(struct usbatm_data *usbatm,
 
 	usbatm->flags |= (use_isoc ? UDSL_USE_ISOC : 0);
 
-	INIT_WORK(&instance->status_checker, (void *)speedtch_check_status, instance);
+	INIT_DELAYED_WORK(&instance->status_checker, speedtch_check_status);
 
 	instance->status_checker.timer.function = speedtch_status_poll;
 	instance->status_checker.timer.data = (unsigned long)instance;
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index f6b9f7e1f716..e39bb09f5af9 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -658,9 +658,9 @@ static int request_dsp(struct uea_softc *sc)
 /*
  * The uea_load_page() function must be called within a process context
  */
-static void uea_load_page(void *xsc)
+static void uea_load_page(struct work_struct *work)
 {
-	struct uea_softc *sc = xsc;
+	struct uea_softc *sc = container_of(work, struct uea_softc, task);
 	u16 pageno = sc->pageno;
 	u16 ovl = sc->ovl;
 	struct block_info bi;
@@ -1352,7 +1352,7 @@ static int uea_boot(struct uea_softc *sc)
 
 	uea_enters(INS_TO_USBDEV(sc));
 
-	INIT_WORK(&sc->task, uea_load_page, sc);
+	INIT_WORK(&sc->task, uea_load_page);
 	init_waitqueue_head(&sc->sync_q);
 	init_waitqueue_head(&sc->cmv_ack_wait);
 
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 9a9012fd284b..6408e10fdbf8 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -421,9 +421,9 @@ static void acm_write_bulk(struct urb *urb)
 		schedule_work(&acm->work);
 }
 
-static void acm_softint(void *private)
+static void acm_softint(struct work_struct *work)
 {
-	struct acm *acm = private;
+	struct acm *acm = container_of(work, struct acm, work);
 	dbg("Entering acm_softint.");
 	
 	if (!ACM_READY(acm))
@@ -927,7 +927,7 @@ skip_normal_probe:
 	acm->rx_buflimit = num_rx_buf;
 	acm->urb_task.func = acm_rx_tasklet;
 	acm->urb_task.data = (unsigned long) acm;
-	INIT_WORK(&acm->work, acm_softint, acm);
+	INIT_WORK(&acm->work, acm_softint);
 	spin_lock_init(&acm->throttle_lock);
 	spin_lock_init(&acm->write_lock);
 	spin_lock_init(&acm->read_lock);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index ba165aff9ea4..ad0ffbe8f7d7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -167,9 +167,10 @@ static void set_port_led(
 
 #define	LED_CYCLE_PERIOD	((2*HZ)/3)
 
-static void led_work (void *__hub)
+static void led_work (struct work_struct *work)
 {
-	struct usb_hub		*hub = __hub;
+	struct usb_hub		*hub =
+		container_of(work, struct usb_hub, leds.work);
 	struct usb_device	*hdev = hub->hdev;
 	unsigned		i;
 	unsigned		changed = 0;
@@ -351,9 +352,10 @@ hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt)
  * talking to TTs must queue control transfers (not just bulk and iso), so
  * both can talk to the same hub concurrently.
  */
-static void hub_tt_kevent (void *arg)
+static void hub_tt_kevent (struct work_struct *work)
 {
-	struct usb_hub		*hub = arg;
+	struct usb_hub		*hub =
+		container_of(work, struct usb_hub, tt.kevent);
 	unsigned long		flags;
 
 	spin_lock_irqsave (&hub->tt.lock, flags);
@@ -641,7 +643,7 @@ static int hub_configure(struct usb_hub *hub,
 
 	spin_lock_init (&hub->tt.lock);
 	INIT_LIST_HEAD (&hub->tt.clear_list);
-	INIT_WORK (&hub->tt.kevent, hub_tt_kevent, hub);
+	INIT_WORK (&hub->tt.kevent, hub_tt_kevent);
 	switch (hdev->descriptor.bDeviceProtocol) {
 		case 0:
 			break;
@@ -880,7 +882,7 @@ descriptor_error:
 	INIT_LIST_HEAD(&hub->event_list);
 	hub->intfdev = &intf->dev;
 	hub->hdev = hdev;
-	INIT_WORK(&hub->leds, led_work, hub);
+	INIT_DELAYED_WORK(&hub->leds, led_work);
 
 	usb_set_intfdata (intf, hub);
 
@@ -2281,7 +2283,7 @@ check_highspeed (struct usb_hub *hub, struct usb_device *udev, int port1)
 		/* hub LEDs are probably harder to miss than syslog */
 		if (hub->has_indicators) {
 			hub->indicator[port1-1] = INDICATOR_GREEN_BLINK;
-			schedule_work (&hub->leds);
+			schedule_delayed_work (&hub->leds, 0);
 		}
 	}
 	kfree(qual);
@@ -2455,7 +2457,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 				if (hub->has_indicators) {
 					hub->indicator[port1-1] =
 						INDICATOR_AMBER_BLINK;
-					schedule_work (&hub->leds);
+					schedule_delayed_work (&hub->leds, 0);
 				}
 				status = -ENOTCONN;	/* Don't retry */
 				goto loop_disable;
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 0f8e82a4d480..035d3ef35888 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -230,7 +230,7 @@ struct usb_hub {
 
 	unsigned		has_indicators:1;
 	enum hub_led_mode	indicator[USB_MAXCHILDREN];
-	struct work_struct	leds;
+	struct delayed_work	leds;
 };
 
 #endif /* __LINUX_HUB_H */
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 7729c0744886..89572bc021b1 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1501,9 +1501,10 @@ struct set_config_request {
 };
 
 /* Worker routine for usb_driver_set_configuration() */
-static void driver_set_config_work(void *_req)
+static void driver_set_config_work(struct work_struct *work)
 {
-	struct set_config_request *req = _req;
+	struct set_config_request *req =
+		container_of(work, struct set_config_request, work);
 
 	usb_lock_device(req->udev);
 	usb_set_configuration(req->udev, req->config);
@@ -1541,7 +1542,7 @@ int usb_driver_set_configuration(struct usb_device *udev, int config)
 		return -ENOMEM;
 	req->udev = udev;
 	req->config = config;
-	INIT_WORK(&req->work, driver_set_config_work, req);
+	INIT_WORK(&req->work, driver_set_config_work);
 
 	usb_get_dev(udev);
 	if (!schedule_work(&req->work)) {
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 467cb02832f3..ab2f68fc7d2d 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -210,9 +210,10 @@ static void ksuspend_usb_cleanup(void)
 #ifdef	CONFIG_USB_SUSPEND
 
 /* usb_autosuspend_work - callback routine to autosuspend a USB device */
-static void usb_autosuspend_work(void *_udev)
+static void usb_autosuspend_work(struct work_struct *work)
 {
-	struct usb_device	*udev = _udev;
+	struct usb_device *udev =
+		container_of(work, struct usb_device, autosuspend.work);
 
 	usb_pm_lock(udev);
 	udev->auto_pm = 1;
@@ -222,7 +223,7 @@ static void usb_autosuspend_work(void *_udev)
 
 #else
 
-static void usb_autosuspend_work(void *_udev)
+static void usb_autosuspend_work(struct work_struct *work)
 {}
 
 #endif
@@ -304,7 +305,7 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1)
 
 #ifdef	CONFIG_PM
 	mutex_init(&dev->pm_mutex);
-	INIT_WORK(&dev->autosuspend, usb_autosuspend_work, dev);
+	INIT_DELAYED_WORK(&dev->autosuspend, usb_autosuspend_work);
 #endif
 	return dev;
 }
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 1c17d26d03b8..107119c54301 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -1833,9 +1833,9 @@ static void rx_fill (struct eth_dev *dev, gfp_t gfp_flags)
 	spin_unlock_irqrestore(&dev->req_lock, flags);
 }
 
-static void eth_work (void *_dev)
+static void eth_work (struct work_struct *work)
 {
-	struct eth_dev		*dev = _dev;
+	struct eth_dev	*dev = container_of(work, struct eth_dev, work);
 
 	if (test_and_clear_bit (WORK_RX_MEMORY, &dev->todo)) {
 		if (netif_running (dev->net))
@@ -2398,7 +2398,7 @@ autoconf_fail:
 	dev = netdev_priv(net);
 	spin_lock_init (&dev->lock);
 	spin_lock_init (&dev->req_lock);
-	INIT_WORK (&dev->work, eth_work, dev);
+	INIT_WORK (&dev->work, eth_work);
 	INIT_LIST_HEAD (&dev->tx_reqs);
 	INIT_LIST_HEAD (&dev->rx_reqs);
 
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 32c635ecbf31..4f95a249c913 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -163,7 +163,7 @@ struct u132_endp {
         u16 queue_next;
         struct urb *urb_list[ENDP_QUEUE_SIZE];
         struct list_head urb_more;
-        struct work_struct scheduler;
+        struct delayed_work scheduler;
 };
 struct u132_ring {
         unsigned in_use:1;
@@ -171,7 +171,7 @@ struct u132_ring {
         u8 number;
         struct u132 *u132;
         struct u132_endp *curr_endp;
-        struct work_struct scheduler;
+        struct delayed_work scheduler;
 };
 #define OHCI_QUIRK_AMD756 0x01
 #define OHCI_QUIRK_SUPERIO 0x02
@@ -198,7 +198,7 @@ struct u132 {
         u32 hc_roothub_portstatus[MAX_ROOT_PORTS];
         int flags;
         unsigned long next_statechange;
-        struct work_struct monitor;
+        struct delayed_work monitor;
         int num_endpoints;
         struct u132_addr addr[MAX_U132_ADDRS];
         struct u132_udev udev[MAX_U132_UDEVS];
@@ -314,7 +314,7 @@ static void u132_ring_requeue_work(struct u132 *u132, struct u132_ring *ring,
         if (delta > 0) {
                 if (queue_delayed_work(workqueue, &ring->scheduler, delta))
                         return;
-        } else if (queue_work(workqueue, &ring->scheduler))
+        } else if (queue_delayed_work(workqueue, &ring->scheduler, 0))
                 return;
         kref_put(&u132->kref, u132_hcd_delete);
         return;
@@ -393,12 +393,8 @@ static inline void u132_endp_init_kref(struct u132 *u132,
 static void u132_endp_queue_work(struct u132 *u132, struct u132_endp *endp,
         unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(workqueue, &endp->scheduler, delta))
-                        kref_get(&endp->kref);
-        } else if (queue_work(workqueue, &endp->scheduler))
-                kref_get(&endp->kref);
-        return;
+	if (queue_delayed_work(workqueue, &endp->scheduler, delta))
+		kref_get(&endp->kref);
 }
 
 static void u132_endp_cancel_work(struct u132 *u132, struct u132_endp *endp)
@@ -414,24 +410,14 @@ static inline void u132_monitor_put_kref(struct u132 *u132)
 
 static void u132_monitor_queue_work(struct u132 *u132, unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(workqueue, &u132->monitor, delta)) {
-                        kref_get(&u132->kref);
-                }
-        } else if (queue_work(workqueue, &u132->monitor))
-                kref_get(&u132->kref);
-        return;
+	if (queue_delayed_work(workqueue, &u132->monitor, delta))
+		kref_get(&u132->kref);
 }
 
 static void u132_monitor_requeue_work(struct u132 *u132, unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(workqueue, &u132->monitor, delta))
-                        return;
-        } else if (queue_work(workqueue, &u132->monitor))
-                return;
-        kref_put(&u132->kref, u132_hcd_delete);
-        return;
+	if (!queue_delayed_work(workqueue, &u132->monitor, delta))
+		kref_put(&u132->kref, u132_hcd_delete);
 }
 
 static void u132_monitor_cancel_work(struct u132 *u132)
@@ -493,9 +479,9 @@ static int read_roothub_info(struct u132 *u132)
         return 0;
 }
 
-static void u132_hcd_monitor_work(void *data)
+static void u132_hcd_monitor_work(struct work_struct *work)
 {
-        struct u132 *u132 = data;
+        struct u132 *u132 = container_of(work, struct u132, monitor.work);
         if (u132->going > 1) {
                 dev_err(&u132->platform_dev->dev, "device has been removed %d\n"
                         , u132->going);
@@ -1319,15 +1305,14 @@ static void u132_hcd_initial_setup_sent(void *data, struct urb *urb, u8 *buf,
         }
 }
 
-static void u132_hcd_ring_work_scheduler(void *data);
-static void u132_hcd_endp_work_scheduler(void *data);
 /*
 * this work function is only executed from the work queue
 *
 */
-static void u132_hcd_ring_work_scheduler(void *data)
+static void u132_hcd_ring_work_scheduler(struct work_struct *work)
 {
-        struct u132_ring *ring = data;
+        struct u132_ring *ring =
+		container_of(work, struct u132_ring, scheduler.work);
         struct u132 *u132 = ring->u132;
         down(&u132->scheduler_lock);
         if (ring->in_use) {
@@ -1386,10 +1371,11 @@ static void u132_hcd_ring_work_scheduler(void *data)
         }
 }
 
-static void u132_hcd_endp_work_scheduler(void *data)
+static void u132_hcd_endp_work_scheduler(struct work_struct *work)
 {
         struct u132_ring *ring;
-        struct u132_endp *endp = data;
+        struct u132_endp *endp =
+		container_of(work, struct u132_endp, scheduler.work);
         struct u132 *u132 = endp->u132;
         down(&u132->scheduler_lock);
         ring = endp->ring;
@@ -1947,7 +1933,7 @@ static int create_endpoint_and_queue_int(struct u132 *u132,
         if (!endp) {
                 return -ENOMEM;
         }
-        INIT_WORK(&endp->scheduler, u132_hcd_endp_work_scheduler, (void *)endp);
+        INIT_DELAYED_WORK(&endp->scheduler, u132_hcd_endp_work_scheduler);
         spin_lock_init(&endp->queue_lock.slock);
         INIT_LIST_HEAD(&endp->urb_more);
         ring = endp->ring = &u132->ring[0];
@@ -2036,7 +2022,7 @@ static int create_endpoint_and_queue_bulk(struct u132 *u132,
         if (!endp) {
                 return -ENOMEM;
         }
-        INIT_WORK(&endp->scheduler, u132_hcd_endp_work_scheduler, (void *)endp);
+        INIT_DELAYED_WORK(&endp->scheduler, u132_hcd_endp_work_scheduler);
         spin_lock_init(&endp->queue_lock.slock);
         INIT_LIST_HEAD(&endp->urb_more);
         endp->dequeueing = 0;
@@ -2121,7 +2107,7 @@ static int create_endpoint_and_queue_control(struct u132 *u132,
         if (!endp) {
                 return -ENOMEM;
         }
-        INIT_WORK(&endp->scheduler, u132_hcd_endp_work_scheduler, (void *)endp);
+        INIT_DELAYED_WORK(&endp->scheduler, u132_hcd_endp_work_scheduler);
         spin_lock_init(&endp->queue_lock.slock);
         INIT_LIST_HEAD(&endp->urb_more);
         ring = endp->ring = &u132->ring[0];
@@ -3100,10 +3086,10 @@ static void u132_initialise(struct u132 *u132, struct platform_device *pdev)
                 ring->number = rings + 1;
                 ring->length = 0;
                 ring->curr_endp = NULL;
-                INIT_WORK(&ring->scheduler, u132_hcd_ring_work_scheduler,
-                        (void *)ring);
+                INIT_DELAYED_WORK(&ring->scheduler,
+				  u132_hcd_ring_work_scheduler);
         } down(&u132->sw_lock);
-        INIT_WORK(&u132->monitor, u132_hcd_monitor_work, (void *)u132);
+        INIT_DELAYED_WORK(&u132->monitor, u132_hcd_monitor_work);
         while (ports-- > 0) {
                 struct u132_port *port = &u132->port[ports];
                 port->u132 = u132;
diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c
index 6d08a3bcc952..ebc9e823a46e 100644
--- a/drivers/usb/input/hid-core.c
+++ b/drivers/usb/input/hid-core.c
@@ -969,9 +969,10 @@ static void hid_retry_timeout(unsigned long _hid)
 }
 
 /* Workqueue routine to reset the device */
-static void hid_reset(void *_hid)
+static void hid_reset(struct work_struct *work)
 {
-	struct hid_device *hid = (struct hid_device *) _hid;
+	struct hid_device *hid =
+		container_of(work, struct hid_device, reset_work);
 	int rc_lock, rc;
 
 	dev_dbg(&hid->intf->dev, "resetting device\n");
@@ -2015,7 +2016,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 
 	init_waitqueue_head(&hid->wait);
 
-	INIT_WORK(&hid->reset_work, hid_reset, hid);
+	INIT_WORK(&hid->reset_work, hid_reset);
 	setup_timer(&hid->io_retry, hid_retry_timeout, (unsigned long) hid);
 
 	spin_lock_init(&hid->inlock);
diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index 9b591b8b9369..e4e2cf2ba915 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -156,9 +156,9 @@ struct usb_ftdi {
         struct usb_device *udev;
         struct usb_interface *interface;
         struct usb_class_driver *class;
-        struct work_struct status_work;
-        struct work_struct command_work;
-        struct work_struct respond_work;
+        struct delayed_work status_work;
+        struct delayed_work command_work;
+        struct delayed_work respond_work;
         struct u132_platform_data platform_data;
         struct resource resources[0];
         struct platform_device platform_dev;
@@ -210,23 +210,14 @@ static void ftdi_elan_init_kref(struct usb_ftdi *ftdi)
 
 static void ftdi_status_requeue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(status_queue, &ftdi->status_work, delta))
-                        return;
-        } else if (queue_work(status_queue, &ftdi->status_work))
-                return;
-        kref_put(&ftdi->kref, ftdi_elan_delete);
-        return;
+	if (!queue_delayed_work(status_queue, &ftdi->status_work, delta))
+		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_status_queue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(status_queue, &ftdi->status_work, delta))
-                        kref_get(&ftdi->kref);
-        } else if (queue_work(status_queue, &ftdi->status_work))
-                kref_get(&ftdi->kref);
-        return;
+	if (queue_delayed_work(status_queue, &ftdi->status_work, delta))
+		kref_get(&ftdi->kref);
 }
 
 static void ftdi_status_cancel_work(struct usb_ftdi *ftdi)
@@ -237,25 +228,14 @@ static void ftdi_status_cancel_work(struct usb_ftdi *ftdi)
 
 static void ftdi_command_requeue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(command_queue, &ftdi->command_work,
-                        delta))
-                        return;
-        } else if (queue_work(command_queue, &ftdi->command_work))
-                return;
-        kref_put(&ftdi->kref, ftdi_elan_delete);
-        return;
+	if (!queue_delayed_work(command_queue, &ftdi->command_work, delta))
+		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_command_queue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(command_queue, &ftdi->command_work,
-                        delta))
-                        kref_get(&ftdi->kref);
-        } else if (queue_work(command_queue, &ftdi->command_work))
-                kref_get(&ftdi->kref);
-        return;
+	if (queue_delayed_work(command_queue, &ftdi->command_work, delta))
+		kref_get(&ftdi->kref);
 }
 
 static void ftdi_command_cancel_work(struct usb_ftdi *ftdi)
@@ -267,25 +247,14 @@ static void ftdi_command_cancel_work(struct usb_ftdi *ftdi)
 static void ftdi_response_requeue_work(struct usb_ftdi *ftdi,
         unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(respond_queue, &ftdi->respond_work,
-                        delta))
-                        return;
-        } else if (queue_work(respond_queue, &ftdi->respond_work))
-                return;
-        kref_put(&ftdi->kref, ftdi_elan_delete);
-        return;
+	if (!queue_delayed_work(respond_queue, &ftdi->respond_work, delta))
+		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_respond_queue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-        if (delta > 0) {
-                if (queue_delayed_work(respond_queue, &ftdi->respond_work,
-                        delta))
-                        kref_get(&ftdi->kref);
-        } else if (queue_work(respond_queue, &ftdi->respond_work))
-                kref_get(&ftdi->kref);
-        return;
+	if (queue_delayed_work(respond_queue, &ftdi->respond_work, delta))
+		kref_get(&ftdi->kref);
 }
 
 static void ftdi_response_cancel_work(struct usb_ftdi *ftdi)
@@ -475,9 +444,11 @@ static void ftdi_elan_kick_command_queue(struct usb_ftdi *ftdi)
         return;
 }
 
-static void ftdi_elan_command_work(void *data)
+static void ftdi_elan_command_work(struct work_struct *work)
 {
-        struct usb_ftdi *ftdi = data;
+        struct usb_ftdi *ftdi =
+		container_of(work, struct usb_ftdi, command_work.work);
+
         if (ftdi->disconnected > 0) {
                 ftdi_elan_put_kref(ftdi);
                 return;
@@ -500,9 +471,10 @@ static void ftdi_elan_kick_respond_queue(struct usb_ftdi *ftdi)
         return;
 }
 
-static void ftdi_elan_respond_work(void *data)
+static void ftdi_elan_respond_work(struct work_struct *work)
 {
-        struct usb_ftdi *ftdi = data;
+        struct usb_ftdi *ftdi =
+		container_of(work, struct usb_ftdi, respond_work.work);
         if (ftdi->disconnected > 0) {
                 ftdi_elan_put_kref(ftdi);
                 return;
@@ -534,9 +506,10 @@ static void ftdi_elan_respond_work(void *data)
 * after the FTDI has been synchronized
 *
 */
-static void ftdi_elan_status_work(void *data)
+static void ftdi_elan_status_work(struct work_struct *work)
 {
-        struct usb_ftdi *ftdi = data;
+        struct usb_ftdi *ftdi =
+		container_of(work, struct usb_ftdi, status_work.work);
         int work_delay_in_msec = 0;
         if (ftdi->disconnected > 0) {
                 ftdi_elan_put_kref(ftdi);
@@ -2691,12 +2664,9 @@ static int ftdi_elan_probe(struct usb_interface *interface,
                 ftdi->class = NULL;
                 dev_info(&ftdi->udev->dev, "USB FDTI=%p ELAN interface %d now a"
                         "ctivated\n", ftdi, iface_desc->desc.bInterfaceNumber);
-                INIT_WORK(&ftdi->status_work, ftdi_elan_status_work,
-                        (void *)ftdi);
-                INIT_WORK(&ftdi->command_work, ftdi_elan_command_work,
-                        (void *)ftdi);
-                INIT_WORK(&ftdi->respond_work, ftdi_elan_respond_work,
-                        (void *)ftdi);
+                INIT_DELAYED_WORK(&ftdi->status_work, ftdi_elan_status_work);
+                INIT_DELAYED_WORK(&ftdi->command_work, ftdi_elan_command_work);
+                INIT_DELAYED_WORK(&ftdi->respond_work, ftdi_elan_respond_work);
                 ftdi_status_queue_work(ftdi, msecs_to_jiffies(3 *1000));
                 return 0;
         } else {
diff --git a/drivers/usb/misc/phidgetkit.c b/drivers/usb/misc/phidgetkit.c
index abb4dcd811ac..33e716c6a79b 100644
--- a/drivers/usb/misc/phidgetkit.c
+++ b/drivers/usb/misc/phidgetkit.c
@@ -81,8 +81,8 @@ struct interfacekit {
 	unsigned char *data;
 	dma_addr_t data_dma;
 
-	struct work_struct do_notify;
-	struct work_struct do_resubmit;
+	struct delayed_work do_notify;
+	struct delayed_work do_resubmit;
 	unsigned long input_events;
 	unsigned long sensor_events;
 };
@@ -374,7 +374,7 @@ static void interfacekit_irq(struct urb *urb)
 	}
 
 	if (kit->input_events || kit->sensor_events)
-		schedule_work(&kit->do_notify);
+		schedule_delayed_work(&kit->do_notify, 0);
 
 resubmit:
 	status = usb_submit_urb(urb, SLAB_ATOMIC);
@@ -384,9 +384,10 @@ resubmit:
 			kit->udev->devpath, status);
 }
 
-static void do_notify(void *data)
+static void do_notify(struct work_struct *work)
 {
-	struct interfacekit *kit = data;
+	struct interfacekit *kit =
+		container_of(work, struct interfacekit, do_notify.work);
 	int i;
 	char sysfs_file[8];
 
@@ -405,9 +406,11 @@ static void do_notify(void *data)
 	}
 }
 
-static void do_resubmit(void *data)
+static void do_resubmit(struct work_struct *work)
 {
-	set_outputs(data);
+	struct interfacekit *kit =
+		container_of(work, struct interfacekit, do_resubmit.work);
+	set_outputs(kit);
 }
 
 #define show_set_output(value)		\
@@ -575,8 +578,8 @@ static int interfacekit_probe(struct usb_interface *intf, const struct usb_devic
 
 	kit->udev = usb_get_dev(dev);
 	kit->intf = intf;
-	INIT_WORK(&kit->do_notify, do_notify, kit);
-	INIT_WORK(&kit->do_resubmit, do_resubmit, kit);
+	INIT_DELAYED_WORK(&kit->do_notify, do_notify);
+	INIT_DELAYED_WORK(&kit->do_resubmit, do_resubmit);
 	usb_fill_int_urb(kit->irq, kit->udev, pipe, kit->data,
 			maxp > URB_INT_SIZE ? URB_INT_SIZE : maxp,
 			interfacekit_irq, kit, endpoint->bInterval);
diff --git a/drivers/usb/misc/phidgetmotorcontrol.c b/drivers/usb/misc/phidgetmotorcontrol.c
index 5c780cab92e0..0385ffcc7419 100644
--- a/drivers/usb/misc/phidgetmotorcontrol.c
+++ b/drivers/usb/misc/phidgetmotorcontrol.c
@@ -41,7 +41,7 @@ struct motorcontrol {
 	unsigned char *data;
 	dma_addr_t data_dma;
 
-	struct work_struct do_notify;
+	struct delayed_work do_notify;
 	unsigned long input_events;
 	unsigned long speed_events;
 	unsigned long exceed_events;
@@ -148,7 +148,7 @@ static void motorcontrol_irq(struct urb *urb)
 		set_bit(1, &mc->exceed_events);
 
 	if (mc->input_events || mc->exceed_events || mc->speed_events)
-		schedule_work(&mc->do_notify);
+		schedule_delayed_work(&mc->do_notify, 0);
 
 resubmit:
 	status = usb_submit_urb(urb, SLAB_ATOMIC);
@@ -159,9 +159,10 @@ resubmit:
 			mc->udev->devpath, status);
 }
 
-static void do_notify(void *data)
+static void do_notify(struct work_struct *work)
 {
-	struct motorcontrol *mc = data;
+	struct motorcontrol *mc =
+		container_of(work, struct motorcontrol, do_notify.work);
 	int i;
 	char sysfs_file[8];
 
@@ -348,7 +349,7 @@ static int motorcontrol_probe(struct usb_interface *intf, const struct usb_devic
 	mc->udev = usb_get_dev(dev);
 	mc->intf = intf;
 	mc->acceleration[0] = mc->acceleration[1] = 10;
-	INIT_WORK(&mc->do_notify, do_notify, mc);
+	INIT_DELAYED_WORK(&mc->do_notify, do_notify);
 	usb_fill_int_urb(mc->irq, mc->udev, pipe, mc->data,
 			maxp > URB_INT_SIZE ? URB_INT_SIZE : maxp,
 			motorcontrol_irq, mc, endpoint->bInterval);
diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c
index 7c906a43e497..fa78326d0bf0 100644
--- a/drivers/usb/net/kaweth.c
+++ b/drivers/usb/net/kaweth.c
@@ -222,7 +222,7 @@ struct kaweth_device
 	int suspend_lowmem_ctrl;
 	int linkstate;
 	int opened;
-	struct work_struct lowmem_work;
+	struct delayed_work lowmem_work;
 
 	struct usb_device *dev;
 	struct net_device *net;
@@ -530,9 +530,10 @@ resubmit:
 	kaweth_resubmit_int_urb(kaweth, GFP_ATOMIC);
 }
 
-static void kaweth_resubmit_tl(void *d)
+static void kaweth_resubmit_tl(struct work_struct *work)
 {
-	struct kaweth_device *kaweth = (struct kaweth_device *)d;
+	struct kaweth_device *kaweth =
+		container_of(work, struct kaweth_device, lowmem_work.work);
 
 	if (IS_BLOCKED(kaweth->status))
 		return;
@@ -1126,7 +1127,7 @@ err_fw:
 
 	/* kaweth is zeroed as part of alloc_netdev */
 
-	INIT_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl, (void *)kaweth);
+	INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl);
 
 	SET_MODULE_OWNER(netdev);
 
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 33abbd2176b6..78cf6f091285 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -1280,9 +1280,9 @@ static inline void setup_pegasus_II(pegasus_t * pegasus)
 static struct workqueue_struct *pegasus_workqueue = NULL;
 #define CARRIER_CHECK_DELAY (2 * HZ)
 
-static void check_carrier(void *data)
+static void check_carrier(struct work_struct *work)
 {
-	pegasus_t *pegasus = data;
+	pegasus_t *pegasus = container_of(work, pegasus_t, carrier_check.work);
 	set_carrier(pegasus->net);
 	if (!(pegasus->flags & PEGASUS_UNPLUG)) {
 		queue_delayed_work(pegasus_workqueue, &pegasus->carrier_check,
@@ -1318,7 +1318,7 @@ static int pegasus_probe(struct usb_interface *intf,
 
 	tasklet_init(&pegasus->rx_tl, rx_fixup, (unsigned long) pegasus);
 
-	INIT_WORK(&pegasus->carrier_check, check_carrier, pegasus);
+	INIT_DELAYED_WORK(&pegasus->carrier_check, check_carrier);
 
 	pegasus->intf = intf;
 	pegasus->usb = dev;
diff --git a/drivers/usb/net/pegasus.h b/drivers/usb/net/pegasus.h
index 006438069b66..98f6898cae1f 100644
--- a/drivers/usb/net/pegasus.h
+++ b/drivers/usb/net/pegasus.h
@@ -95,7 +95,7 @@ typedef struct pegasus {
 	int			dev_index;
 	int			intr_interval;
 	struct tasklet_struct	rx_tl;
-	struct work_struct	carrier_check;
+	struct delayed_work	carrier_check;
 	struct urb		*ctrl_urb, *rx_urb, *tx_urb, *intr_urb;
 	struct sk_buff		*rx_pool[RX_SKBS];
 	struct sk_buff		*rx_skb;
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index 760b5327b81b..79b5474fe234 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -782,9 +782,10 @@ static struct ethtool_ops usbnet_ethtool_ops = {
  * especially now that control transfers can be queued.
  */
 static void
-kevent (void *data)
+kevent (struct work_struct *work)
 {
-	struct usbnet		*dev = data;
+	struct usbnet		*dev =
+		container_of(work, struct usbnet, kevent);
 	int			status;
 
 	/* usb_clear_halt() needs a thread context */
@@ -1146,7 +1147,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	skb_queue_head_init (&dev->done);
 	dev->bh.func = usbnet_bh;
 	dev->bh.data = (unsigned long) dev;
-	INIT_WORK (&dev->kevent, kevent, dev);
+	INIT_WORK (&dev->kevent, kevent);
 	dev->delay.function = usbnet_bh;
 	dev->delay.data = (unsigned long) dev;
 	init_timer (&dev->delay);
diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c
index 812275509137..2a4ac9bd6a3a 100644
--- a/drivers/usb/serial/aircable.c
+++ b/drivers/usb/serial/aircable.c
@@ -92,6 +92,7 @@ struct aircable_private {
 	struct circ_buf *rx_buf;	/* read buffer */
 	int rx_flags;			/* for throttilng */
 	struct work_struct rx_work;	/* work cue for the receiving line */
+	struct usb_serial_port *port;	/* USB port with which associated */
 };
 
 /* Private methods */
@@ -251,10 +252,11 @@ static void aircable_send(struct usb_serial_port *port)
 	schedule_work(&port->work);
 }
 
-static void aircable_read(void *params)
+static void aircable_read(struct work_struct *work)
 {
-	struct usb_serial_port *port = params;
-	struct aircable_private *priv = usb_get_serial_port_data(port);
+	struct aircable_private *priv =
+		container_of(work, struct aircable_private, rx_work);
+	struct usb_serial_port *port = priv->port;
 	struct tty_struct *tty;
 	unsigned char *data;
 	int count;
@@ -348,7 +350,8 @@ static int aircable_attach (struct usb_serial *serial)
 	}
 
 	priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED);
-	INIT_WORK(&priv->rx_work, aircable_read, port);
+	priv->port = port;
+	INIT_WORK(&priv->rx_work, aircable_read);
 
 	usb_set_serial_port_data(serial->port[0], priv);
 
@@ -515,7 +518,7 @@ static void aircable_read_bulk_callback(struct urb *urb)
 					package_length - shift);
 			}
 		}
-		aircable_read(port);
+		aircable_read(&priv->rx_work);
 	}
 
 	/* Schedule the next read _if_ we are still open */
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index bdb58100fc1d..fd159b040bfb 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -430,13 +430,14 @@ struct digi_port {
 	int dp_in_close;			/* close in progress */
 	wait_queue_head_t dp_close_wait;	/* wait queue for close */
 	struct work_struct dp_wakeup_work;
+	struct usb_serial_port *dp_port;
 };
 
 
 /* Local Function Declarations */
 
 static void digi_wakeup_write( struct usb_serial_port *port );
-static void digi_wakeup_write_lock(void *);
+static void digi_wakeup_write_lock(struct work_struct *work);
 static int digi_write_oob_command( struct usb_serial_port *port,
 	unsigned char *buf, int count, int interruptible );
 static int digi_write_inb_command( struct usb_serial_port *port,
@@ -598,11 +599,12 @@ static inline long cond_wait_interruptible_timeout_irqrestore(
 *  on writes.
 */
 
-static void digi_wakeup_write_lock(void *arg)
+static void digi_wakeup_write_lock(struct work_struct *work)
 {
-	struct usb_serial_port *port = arg;
+	struct digi_port *priv =
+		container_of(work, struct digi_port, dp_wakeup_work);
+	struct usb_serial_port *port = priv->dp_port;
 	unsigned long flags;
-	struct digi_port *priv = usb_get_serial_port_data(port);
 
 
 	spin_lock_irqsave( &priv->dp_port_lock, flags );
@@ -1702,8 +1704,8 @@ dbg( "digi_startup: TOP" );
 		init_waitqueue_head( &priv->dp_flush_wait );
 		priv->dp_in_close = 0;
 		init_waitqueue_head( &priv->dp_close_wait );
-		INIT_WORK(&priv->dp_wakeup_work,
-				digi_wakeup_write_lock, serial->port[i]);
+		INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock);
+		priv->dp_port = serial->port[i];
 
 		/* initialize write wait queue for this port */
 		init_waitqueue_head( &serial->port[i]->write_wait );
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index c186b4e73c72..88ed5c1d236c 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -559,7 +559,8 @@ struct ftdi_private {
 	char prev_status, diff_status;        /* Used for TIOCMIWAIT */
 	__u8 rx_flags;		/* receive state flags (throttling) */
 	spinlock_t rx_lock;	/* spinlock for receive state */
-	struct work_struct rx_work;
+	struct delayed_work rx_work;
+	struct usb_serial_port *port;
 	int rx_processed;
 	unsigned long rx_bytes;
 
@@ -593,7 +594,7 @@ static int  ftdi_write_room		(struct usb_serial_port *port);
 static int  ftdi_chars_in_buffer	(struct usb_serial_port *port);
 static void ftdi_write_bulk_callback	(struct urb *urb);
 static void ftdi_read_bulk_callback	(struct urb *urb);
-static void ftdi_process_read		(void *param);
+static void ftdi_process_read		(struct work_struct *work);
 static void ftdi_set_termios		(struct usb_serial_port *port, struct termios * old);
 static int  ftdi_tiocmget               (struct usb_serial_port *port, struct file *file);
 static int  ftdi_tiocmset		(struct usb_serial_port *port, struct file * file, unsigned int set, unsigned int clear);
@@ -1201,7 +1202,8 @@ static int ftdi_sio_attach (struct usb_serial *serial)
 		port->read_urb->transfer_buffer_length = BUFSZ;
 	}
 
-	INIT_WORK(&priv->rx_work, ftdi_process_read, port);
+	INIT_DELAYED_WORK(&priv->rx_work, ftdi_process_read);
+	priv->port = port;
 
 	/* Free port's existing write urb and transfer buffer. */
 	if (port->write_urb) {
@@ -1641,17 +1643,18 @@ static void ftdi_read_bulk_callback (struct urb *urb)
 	priv->rx_bytes += countread;
 	spin_unlock_irqrestore(&priv->rx_lock, flags);
 
-	ftdi_process_read(port);
+	ftdi_process_read(&priv->rx_work.work);
 
 } /* ftdi_read_bulk_callback */
 
 
-static void ftdi_process_read (void *param)
+static void ftdi_process_read (struct work_struct *work)
 { /* ftdi_process_read */
-	struct usb_serial_port *port = (struct usb_serial_port*)param;
+	struct ftdi_private *priv =
+		container_of(work, struct ftdi_private, rx_work.work);
+	struct usb_serial_port *port = priv->port;
 	struct urb *urb;
 	struct tty_struct *tty;
-	struct ftdi_private *priv;
 	char error_flag;
 	unsigned char *data;
 
@@ -2180,7 +2183,7 @@ static void ftdi_unthrottle (struct usb_serial_port *port)
 	spin_unlock_irqrestore(&priv->rx_lock, flags);
 
 	if (actually_throttled)
-		schedule_work(&priv->rx_work);
+		schedule_delayed_work(&priv->rx_work, 0);
 }
 
 static int __init ftdi_init (void)
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index 909005107ea2..e09a0bfe6231 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -120,6 +120,8 @@ struct keyspan_pda_private {
 	int			tx_throttled;
 	struct work_struct			wakeup_work;
 	struct work_struct			unthrottle_work;
+	struct usb_serial	*serial;
+	struct usb_serial_port	*port;
 };
 
 
@@ -175,9 +177,11 @@ static struct usb_device_id id_table_fake_xircom [] = {
 };
 #endif
 
-static void keyspan_pda_wakeup_write( struct usb_serial_port *port )
+static void keyspan_pda_wakeup_write(struct work_struct *work)
 {
-
+	struct keyspan_pda_private *priv =
+		container_of(work, struct keyspan_pda_private, wakeup_work);
+	struct usb_serial_port *port = priv->port;
 	struct tty_struct *tty = port->tty;
 
 	/* wake up port processes */
@@ -187,8 +191,11 @@ static void keyspan_pda_wakeup_write( struct usb_serial_port *port )
 	tty_wakeup(tty);
 }
 
-static void keyspan_pda_request_unthrottle( struct usb_serial *serial )
+static void keyspan_pda_request_unthrottle(struct work_struct *work)
 {
+	struct keyspan_pda_private *priv =
+		container_of(work, struct keyspan_pda_private, unthrottle_work);
+	struct usb_serial *serial = priv->serial;
 	int result;
 
 	dbg(" request_unthrottle");
@@ -765,11 +772,10 @@ static int keyspan_pda_startup (struct usb_serial *serial)
 		return (1); /* error */
 	usb_set_serial_port_data(serial->port[0], priv);
 	init_waitqueue_head(&serial->port[0]->write_wait);
-	INIT_WORK(&priv->wakeup_work, (void *)keyspan_pda_wakeup_write,
-			(void *)(serial->port[0]));
-	INIT_WORK(&priv->unthrottle_work,
-			(void *)keyspan_pda_request_unthrottle,
-			(void *)(serial));
+	INIT_WORK(&priv->wakeup_work, keyspan_pda_wakeup_write);
+	INIT_WORK(&priv->unthrottle_work, keyspan_pda_request_unthrottle);
+	priv->serial = serial;
+	priv->port = serial->port[0];
 	return (0);
 }
 
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 8006e51c34bb..2cfba8488a93 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -533,9 +533,10 @@ void usb_serial_port_softint(struct usb_serial_port *port)
 	schedule_work(&port->work);
 }
 
-static void usb_serial_port_work(void *private)
+static void usb_serial_port_work(struct work_struct *work)
 {
-	struct usb_serial_port *port = private;
+	struct usb_serial_port *port =
+		container_of(work, struct usb_serial_port, work);
 	struct tty_struct *tty;
 
 	dbg("%s - port %d", __FUNCTION__, port->number);
@@ -799,7 +800,7 @@ int usb_serial_probe(struct usb_interface *interface,
 		port->serial = serial;
 		spin_lock_init(&port->lock);
 		mutex_init(&port->mutex);
-		INIT_WORK(&port->work, usb_serial_port_work, port);
+		INIT_WORK(&port->work, usb_serial_port_work);
 		serial->port[i] = port;
 	}
 
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 4d1cd7aeccd3..154c7d290597 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -227,6 +227,7 @@ struct whiteheat_private {
 	struct list_head	rx_urbs_submitted;
 	struct list_head	rx_urb_q;
 	struct work_struct	rx_work;
+	struct usb_serial_port	*port;
 	struct list_head	tx_urbs_free;
 	struct list_head	tx_urbs_submitted;
 };
@@ -241,7 +242,7 @@ static void command_port_read_callback(struct urb *urb);
 static int start_port_read(struct usb_serial_port *port);
 static struct whiteheat_urb_wrap *urb_to_wrap(struct urb *urb, struct list_head *head);
 static struct list_head *list_first(struct list_head *head);
-static void rx_data_softint(void *private);
+static void rx_data_softint(struct work_struct *work);
 
 static int firm_send_command(struct usb_serial_port *port, __u8 command, __u8 *data, __u8 datasize);
 static int firm_open(struct usb_serial_port *port);
@@ -424,7 +425,8 @@ static int whiteheat_attach (struct usb_serial *serial)
 		spin_lock_init(&info->lock);
 		info->flags = 0;
 		info->mcr = 0;
-		INIT_WORK(&info->rx_work, rx_data_softint, port);
+		INIT_WORK(&info->rx_work, rx_data_softint);
+		info->port = port;
 
 		INIT_LIST_HEAD(&info->rx_urbs_free);
 		INIT_LIST_HEAD(&info->rx_urbs_submitted);
@@ -949,7 +951,7 @@ static void whiteheat_unthrottle (struct usb_serial_port *port)
 	spin_unlock_irqrestore(&info->lock, flags);
 
 	if (actually_throttled)
-		rx_data_softint(port);
+		rx_data_softint(&info->rx_work);
 
 	return;
 }
@@ -1400,10 +1402,11 @@ static struct list_head *list_first(struct list_head *head)
 }
 
 
-static void rx_data_softint(void *private)
+static void rx_data_softint(struct work_struct *work)
 {
-	struct usb_serial_port *port = (struct usb_serial_port *)private;
-	struct whiteheat_private *info = usb_get_serial_port_data(port);
+	struct whiteheat_private *info =
+		container_of(work, struct whiteheat_private, rx_work);
+	struct usb_serial_port *port = info->port;
 	struct tty_struct *tty = port->tty;
 	struct whiteheat_urb_wrap *wrap;
 	struct urb *urb;
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 302174b8e477..31f476a64790 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -383,9 +383,9 @@ static void fbcon_update_softback(struct vc_data *vc)
 		softback_top = 0;
 }
 
-static void fb_flashcursor(void *private)
+static void fb_flashcursor(struct work_struct *work)
 {
-	struct fb_info *info = private;
+	struct fb_info *info = container_of(work, struct fb_info, queue);
 	struct fbcon_ops *ops = info->fbcon_par;
 	struct display *p;
 	struct vc_data *vc = NULL;
@@ -442,7 +442,7 @@ static void fbcon_add_cursor_timer(struct fb_info *info)
 	if ((!info->queue.func || info->queue.func == fb_flashcursor) &&
 	    !(ops->flags & FBCON_FLAGS_CURSOR_TIMER)) {
 		if (!info->queue.func)
-			INIT_WORK(&info->queue, fb_flashcursor, info);
+			INIT_WORK(&info->queue, fb_flashcursor);
 
 		init_timer(&ops->cursor_timer);
 		ops->cursor_timer.function = cursor_timer_handler;
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 90a79c784549..944273c3dbff 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -110,8 +110,8 @@ struct v9fs_mux_rpc {
 };
 
 static int v9fs_poll_proc(void *);
-static void v9fs_read_work(void *);
-static void v9fs_write_work(void *);
+static void v9fs_read_work(struct work_struct *work);
+static void v9fs_write_work(struct work_struct *work);
 static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
 			  poll_table * p);
 static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
@@ -297,8 +297,8 @@ struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
 	m->rbuf = NULL;
 	m->wpos = m->wsize = 0;
 	m->wbuf = NULL;
-	INIT_WORK(&m->rq, v9fs_read_work, m);
-	INIT_WORK(&m->wq, v9fs_write_work, m);
+	INIT_WORK(&m->rq, v9fs_read_work);
+	INIT_WORK(&m->wq, v9fs_write_work);
 	m->wsched = 0;
 	memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
 	m->poll_task = NULL;
@@ -458,13 +458,13 @@ static int v9fs_poll_proc(void *a)
 /**
  * v9fs_write_work - called when a transport can send some data
  */
-static void v9fs_write_work(void *a)
+static void v9fs_write_work(struct work_struct *work)
 {
 	int n, err;
 	struct v9fs_mux_data *m;
 	struct v9fs_req *req;
 
-	m = a;
+	m = container_of(work, struct v9fs_mux_data, wq);
 
 	if (m->err < 0) {
 		clear_bit(Wworksched, &m->wsched);
@@ -564,7 +564,7 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 /**
  * v9fs_read_work - called when there is some data to be read from a transport
  */
-static void v9fs_read_work(void *a)
+static void v9fs_read_work(struct work_struct *work)
 {
 	int n, err;
 	struct v9fs_mux_data *m;
@@ -572,7 +572,7 @@ static void v9fs_read_work(void *a)
 	struct v9fs_fcall *rcall;
 	char *rbuf;
 
-	m = a;
+	m = container_of(work, struct v9fs_mux_data, rq);
 
 	if (m->err < 0)
 		return;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 78fe0fae23ff..55f5333dae99 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -35,7 +35,7 @@
 
 struct greedy {
 	struct gfs2_holder gr_gh;
-	struct work_struct gr_work;
+	struct delayed_work gr_work;
 };
 
 struct gfs2_gl_hash_bucket {
@@ -1368,9 +1368,9 @@ static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
 	glops->go_xmote_th(gl, state, flags);
 }
 
-static void greedy_work(void *data)
+static void greedy_work(struct work_struct *work)
 {
-	struct greedy *gr = data;
+	struct greedy *gr = container_of(work, struct greedy, gr_work.work);
 	struct gfs2_holder *gh = &gr->gr_gh;
 	struct gfs2_glock *gl = gh->gh_gl;
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
@@ -1422,7 +1422,7 @@ int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
 
 	gfs2_holder_init(gl, 0, 0, gh);
 	set_bit(HIF_GREEDY, &gh->gh_iflags);
-	INIT_WORK(&gr->gr_work, greedy_work, gr);
+	INIT_DELAYED_WORK(&gr->gr_work, greedy_work);
 
 	set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
 	schedule_delayed_work(&gr->gr_work, time);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 42e3bef270c9..72dad552aa00 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -577,12 +577,12 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 		server->rcv.ptr = (unsigned char*)&server->rcv.buf;
 		server->rcv.len = 10;
 		server->rcv.state = 0;
-		INIT_WORK(&server->rcv.tq, ncp_tcp_rcv_proc, server);
-		INIT_WORK(&server->tx.tq, ncp_tcp_tx_proc, server);
+		INIT_WORK(&server->rcv.tq, ncp_tcp_rcv_proc);
+		INIT_WORK(&server->tx.tq, ncp_tcp_tx_proc);
 		sock->sk->sk_write_space = ncp_tcp_write_space;
 	} else {
-		INIT_WORK(&server->rcv.tq, ncpdgram_rcv_proc, server);
-		INIT_WORK(&server->timeout_tq, ncpdgram_timeout_proc, server);
+		INIT_WORK(&server->rcv.tq, ncpdgram_rcv_proc);
+		INIT_WORK(&server->timeout_tq, ncpdgram_timeout_proc);
 		server->timeout_tm.data = (unsigned long)server;
 		server->timeout_tm.function = ncpdgram_timeout_call;
 	}
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 11c2b252ebed..e496d8b65e92 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -350,9 +350,10 @@ static void info_server(struct ncp_server *server, unsigned int id, const void *
 	}
 }
 
-void ncpdgram_rcv_proc(void *s)
+void ncpdgram_rcv_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, rcv.tq);
 	struct socket* sock;
 	
 	sock = server->ncp_sock;
@@ -468,9 +469,10 @@ static void __ncpdgram_timeout_proc(struct ncp_server *server)
 	}
 }
 
-void ncpdgram_timeout_proc(void *s)
+void ncpdgram_timeout_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, timeout_tq);
 	mutex_lock(&server->rcv.creq_mutex);
 	__ncpdgram_timeout_proc(server);
 	mutex_unlock(&server->rcv.creq_mutex);
@@ -652,18 +654,20 @@ skipdata:;
 	}
 }
 
-void ncp_tcp_rcv_proc(void *s)
+void ncp_tcp_rcv_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, rcv.tq);
 
 	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_rcv_proc(server);
 	mutex_unlock(&server->rcv.creq_mutex);
 }
 
-void ncp_tcp_tx_proc(void *s)
+void ncp_tcp_tx_proc(struct work_struct *work)
 {
-	struct ncp_server *server = s;
+	struct ncp_server *server =
+		container_of(work, struct ncp_server, tx.tq);
 	
 	mutex_lock(&server->rcv.creq_mutex);
 	__ncptcp_try_send(server);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 293b6495829f..e431e93ab503 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1829,9 +1829,8 @@ out:
 }
 
 static struct workqueue_struct *laundry_wq;
-static struct work_struct laundromat_work;
-static void laundromat_main(void *);
-static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
+static void laundromat_main(struct work_struct *);
+static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
 
 __be32
 nfsd4_renew(clientid_t *clid)
@@ -1940,7 +1939,7 @@ nfs4_laundromat(void)
 }
 
 void
-laundromat_main(void *not_used)
+laundromat_main(struct work_struct *not_used)
 {
 	time_t t;
 
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f43bc5f18a35..0b2ad163005e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1205,10 +1205,12 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	return status;
 }
 
-static void ocfs2_truncate_log_worker(void *data)
+static void ocfs2_truncate_log_worker(struct work_struct *work)
 {
 	int status;
-	struct ocfs2_super *osb = data;
+	struct ocfs2_super *osb =
+		container_of(work, struct ocfs2_super,
+			     osb_truncate_log_wq.work);
 
 	mlog_entry_void();
 
@@ -1441,7 +1443,8 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
 	/* ocfs2_truncate_log_shutdown keys on the existence of
 	 * osb->osb_tl_inode so we don't set any of the osb variables
 	 * until we're sure all is well. */
-	INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb);
+	INIT_DELAYED_WORK(&osb->osb_truncate_log_wq,
+			  ocfs2_truncate_log_worker);
 	osb->osb_tl_bh    = tl_bh;
 	osb->osb_tl_inode = tl_inode;
 
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 305cba3681fe..4cd9a9580456 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -141,7 +141,7 @@ struct o2hb_region {
 	 * recognizes a node going up and down in one iteration */
 	u64			hr_generation;
 
-	struct work_struct	hr_write_timeout_work;
+	struct delayed_work	hr_write_timeout_work;
 	unsigned long		hr_last_timeout_start;
 
 	/* Used during o2hb_check_slot to hold a copy of the block
@@ -156,9 +156,11 @@ struct o2hb_bio_wait_ctxt {
 	int               wc_error;
 };
 
-static void o2hb_write_timeout(void *arg)
+static void o2hb_write_timeout(struct work_struct *work)
 {
-	struct o2hb_region *reg = arg;
+	struct o2hb_region *reg =
+		container_of(work, struct o2hb_region,
+			     hr_write_timeout_work.work);
 
 	mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
 	     "milliseconds\n", reg->hr_dev_name,
@@ -1404,7 +1406,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 		goto out;
 	}
 
-	INIT_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout, reg);
+	INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
 
 	/*
 	 * A node is considered live after it has beat LIVE_THRESHOLD
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 7bba98fbfc15..4705d659fe57 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -88,7 +88,7 @@ void o2quo_disk_timeout(void)
 	o2quo_fence_self();
 }
 
-static void o2quo_make_decision(void *arg)
+static void o2quo_make_decision(struct work_struct *work)
 {
 	int quorum;
 	int lowest_hb, lowest_reachable = 0, fence = 0;
@@ -306,7 +306,7 @@ void o2quo_init(void)
 	struct o2quo_state *qs = &o2quo_state;
 
 	spin_lock_init(&qs->qs_lock);
-	INIT_WORK(&qs->qs_work, o2quo_make_decision, NULL);
+	INIT_WORK(&qs->qs_work, o2quo_make_decision);
 }
 
 void o2quo_exit(void)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index b650efa8c8be..9b3209dc0b16 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -140,11 +140,11 @@ static int o2net_sys_err_translations[O2NET_ERR_MAX] =
 		 [O2NET_ERR_DIED]	= -EHOSTDOWN,};
 
 /* can't quite avoid *all* internal declarations :/ */
-static void o2net_sc_connect_completed(void *arg);
-static void o2net_rx_until_empty(void *arg);
-static void o2net_shutdown_sc(void *arg);
+static void o2net_sc_connect_completed(struct work_struct *work);
+static void o2net_rx_until_empty(struct work_struct *work);
+static void o2net_shutdown_sc(struct work_struct *work);
 static void o2net_listen_data_ready(struct sock *sk, int bytes);
-static void o2net_sc_send_keep_req(void *arg);
+static void o2net_sc_send_keep_req(struct work_struct *work);
 static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
 
@@ -308,10 +308,10 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
 	o2nm_node_get(node);
 	sc->sc_node = node;
 
-	INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed, sc);
-	INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty, sc);
-	INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc, sc);
-	INIT_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req, sc);
+	INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
+	INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
+	INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
+	INIT_DELAYED_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req);
 
 	init_timer(&sc->sc_idle_timeout);
 	sc->sc_idle_timeout.function = o2net_idle_timer;
@@ -342,7 +342,7 @@ static void o2net_sc_queue_work(struct o2net_sock_container *sc,
 		sc_put(sc);
 }
 static void o2net_sc_queue_delayed_work(struct o2net_sock_container *sc,
-					struct work_struct *work,
+					struct delayed_work *work,
 					int delay)
 {
 	sc_get(sc);
@@ -350,7 +350,7 @@ static void o2net_sc_queue_delayed_work(struct o2net_sock_container *sc,
 		sc_put(sc);
 }
 static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc,
-					 struct work_struct *work)
+					 struct delayed_work *work)
 {
 	if (cancel_delayed_work(work))
 		sc_put(sc);
@@ -564,9 +564,11 @@ static void o2net_ensure_shutdown(struct o2net_node *nn,
  * ourselves as state_change couldn't get the nn_lock and call set_nn_state
  * itself.
  */
-static void o2net_shutdown_sc(void *arg)
+static void o2net_shutdown_sc(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container,
+			     sc_shutdown_work);
 	struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
 
 	sclog(sc, "shutting down\n");
@@ -1201,9 +1203,10 @@ out:
 /* this work func is triggerd by data ready.  it reads until it can read no
  * more.  it interprets 0, eof, as fatal.  if data_ready hits while we're doing
  * our work the work struct will be marked and we'll be called again. */
-static void o2net_rx_until_empty(void *arg)
+static void o2net_rx_until_empty(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container, sc_rx_work);
 	int ret;
 
 	do {
@@ -1249,9 +1252,11 @@ static int o2net_set_nodelay(struct socket *sock)
 
 /* called when a connect completes and after a sock is accepted.  the
  * rx path will see the response and mark the sc valid */
-static void o2net_sc_connect_completed(void *arg)
+static void o2net_sc_connect_completed(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container,
+			     sc_connect_work);
 
 	mlog(ML_MSG, "sc sending handshake with ver %llu id %llx\n",
               (unsigned long long)O2NET_PROTOCOL_VERSION,
@@ -1262,9 +1267,11 @@ static void o2net_sc_connect_completed(void *arg)
 }
 
 /* this is called as a work_struct func. */
-static void o2net_sc_send_keep_req(void *arg)
+static void o2net_sc_send_keep_req(struct work_struct *work)
 {
-	struct o2net_sock_container *sc = arg;
+	struct o2net_sock_container *sc =
+		container_of(work, struct o2net_sock_container,
+			     sc_keepalive_work.work);
 
 	o2net_sendpage(sc, o2net_keep_req, sizeof(*o2net_keep_req));
 	sc_put(sc);
@@ -1314,14 +1321,15 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
  * having a connect attempt fail, etc. This centralizes the logic which decides
  * if a connect attempt should be made or if we should give up and all future
  * transmit attempts should fail */
-static void o2net_start_connect(void *arg)
+static void o2net_start_connect(struct work_struct *work)
 {
-	struct o2net_node *nn = arg;
+	struct o2net_node *nn =
+		container_of(work, struct o2net_node, nn_connect_work.work);
 	struct o2net_sock_container *sc = NULL;
 	struct o2nm_node *node = NULL, *mynode = NULL;
 	struct socket *sock = NULL;
 	struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
-	int ret = 0;
+	int ret = 0, stop;
 
 	/* if we're greater we initiate tx, otherwise we accept */
 	if (o2nm_this_node() <= o2net_num_from_nn(nn))
@@ -1342,10 +1350,9 @@ static void o2net_start_connect(void *arg)
 
 	spin_lock(&nn->nn_lock);
 	/* see if we already have one pending or have given up */
-	if (nn->nn_sc || nn->nn_persistent_error)
-		arg = NULL;
+	stop = (nn->nn_sc || nn->nn_persistent_error);
 	spin_unlock(&nn->nn_lock);
-	if (arg == NULL) /* *shrug*, needed some indicator */
+	if (stop)
 		goto out;
 
 	nn->nn_last_connect_attempt = jiffies;
@@ -1421,9 +1428,10 @@ out:
 	return;
 }
 
-static void o2net_connect_expired(void *arg)
+static void o2net_connect_expired(struct work_struct *work)
 {
-	struct o2net_node *nn = arg;
+	struct o2net_node *nn =
+		container_of(work, struct o2net_node, nn_connect_expired.work);
 
 	spin_lock(&nn->nn_lock);
 	if (!nn->nn_sc_valid) {
@@ -1436,9 +1444,10 @@ static void o2net_connect_expired(void *arg)
 	spin_unlock(&nn->nn_lock);
 }
 
-static void o2net_still_up(void *arg)
+static void o2net_still_up(struct work_struct *work)
 {
-	struct o2net_node *nn = arg;
+	struct o2net_node *nn =
+		container_of(work, struct o2net_node, nn_still_up.work);
 
 	o2quo_hb_still_up(o2net_num_from_nn(nn));
 }
@@ -1644,9 +1653,9 @@ out:
 	return ret;
 }
 
-static void o2net_accept_many(void *arg)
+static void o2net_accept_many(struct work_struct *work)
 {
-	struct socket *sock = arg;
+	struct socket *sock = o2net_listen_sock;
 	while (o2net_accept_one(sock) == 0)
 		cond_resched();
 }
@@ -1700,7 +1709,7 @@ static int o2net_open_listening_sock(__be16 port)
 	write_unlock_bh(&sock->sk->sk_callback_lock);
 
 	o2net_listen_sock = sock;
-	INIT_WORK(&o2net_listen_work, o2net_accept_many, sock);
+	INIT_WORK(&o2net_listen_work, o2net_accept_many);
 
 	sock->sk->sk_reuse = 1;
 	ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
@@ -1819,9 +1828,10 @@ int o2net_init(void)
 		struct o2net_node *nn = o2net_nn_from_num(i);
 
 		spin_lock_init(&nn->nn_lock);
-		INIT_WORK(&nn->nn_connect_work, o2net_start_connect, nn);
-		INIT_WORK(&nn->nn_connect_expired, o2net_connect_expired, nn);
-		INIT_WORK(&nn->nn_still_up, o2net_still_up, nn);
+		INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect);
+		INIT_DELAYED_WORK(&nn->nn_connect_expired,
+				  o2net_connect_expired);
+		INIT_DELAYED_WORK(&nn->nn_still_up, o2net_still_up);
 		/* until we see hb from a node we'll return einval */
 		nn->nn_persistent_error = -ENOTCONN;
 		init_waitqueue_head(&nn->nn_sc_wq);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 4b46aac7d243..daebbd3a2c8c 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -86,18 +86,18 @@ struct o2net_node {
 	 * connect attempt fails and so can be self-arming.  shutdown is
 	 * careful to first mark the nn such that no connects will be attempted
 	 * before canceling delayed connect work and flushing the queue. */
-	struct work_struct		nn_connect_work;
+	struct delayed_work		nn_connect_work;
 	unsigned long			nn_last_connect_attempt;
 
 	/* this is queued as nodes come up and is canceled when a connection is
 	 * established.  this expiring gives up on the node and errors out
 	 * transmits */
-	struct work_struct		nn_connect_expired;
+	struct delayed_work		nn_connect_expired;
 
 	/* after we give up on a socket we wait a while before deciding
 	 * that it is still heartbeating and that we should do some
 	 * quorum work */
-	struct work_struct		nn_still_up;
+	struct delayed_work		nn_still_up;
 };
 
 struct o2net_sock_container {
@@ -129,7 +129,7 @@ struct o2net_sock_container {
 	struct work_struct	sc_shutdown_work;
 
 	struct timer_list	sc_idle_timeout;
-	struct work_struct	sc_keepalive_work;
+	struct delayed_work	sc_keepalive_work;
 
 	unsigned		sc_handshake_ok:1;
 
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index fa968180b072..6b6ff76538c5 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -153,7 +153,7 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned
  * called functions that cannot be directly called from the
  * net message handlers for some reason, usually because
  * they need to send net messages of their own. */
-void dlm_dispatch_work(void *data);
+void dlm_dispatch_work(struct work_struct *work);
 
 struct dlm_lock_resource;
 struct dlm_work_item;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8d1065f8b3bd..637646e6922e 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1296,7 +1296,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
 	spin_lock_init(&dlm->work_lock);
 	INIT_LIST_HEAD(&dlm->work_list);
-	INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work, dlm);
+	INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work);
 
 	kref_init(&dlm->dlm_refs);
 	dlm->dlm_state = DLM_CTXT_NEW;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 9d950d7cea38..fb3e2b0817f1 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -153,9 +153,10 @@ static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
 }
 
 /* Worker function used during recovery. */
-void dlm_dispatch_work(void *data)
+void dlm_dispatch_work(struct work_struct *work)
 {
-	struct dlm_ctxt *dlm = (struct dlm_ctxt *)data;
+	struct dlm_ctxt *dlm =
+		container_of(work, struct dlm_ctxt, dispatched_work);
 	LIST_HEAD(tmp_list);
 	struct list_head *iter, *iter2;
 	struct dlm_work_item *item;
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index eead48bbfac6..7d2f578b267d 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -171,15 +171,14 @@ static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres)
 		BUG();
 }
 
-static void user_dlm_unblock_lock(void *opaque);
+static void user_dlm_unblock_lock(struct work_struct *work);
 
 static void __user_dlm_queue_lockres(struct user_lock_res *lockres)
 {
 	if (!(lockres->l_flags & USER_LOCK_QUEUED)) {
 		user_dlm_grab_inode_ref(lockres);
 
-		INIT_WORK(&lockres->l_work, user_dlm_unblock_lock,
-			  lockres);
+		INIT_WORK(&lockres->l_work, user_dlm_unblock_lock);
 
 		queue_work(user_dlm_worker, &lockres->l_work);
 		lockres->l_flags |= USER_LOCK_QUEUED;
@@ -279,10 +278,11 @@ static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres)
 	iput(inode);
 }
 
-static void user_dlm_unblock_lock(void *opaque)
+static void user_dlm_unblock_lock(struct work_struct *work)
 {
 	int new_level, status;
-	struct user_lock_res *lockres = (struct user_lock_res *) opaque;
+	struct user_lock_res *lockres =
+		container_of(work, struct user_lock_res, l_work);
 	struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
 
 	mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index fd9734def551..d95ee2720e6e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -911,11 +911,12 @@ struct ocfs2_la_recovery_item {
  * NOTE: This function can and will sleep on recovery of other nodes
  * during cluster locking, just like any other ocfs2 process.
  */
-void ocfs2_complete_recovery(void *data)
+void ocfs2_complete_recovery(struct work_struct *work)
 {
 	int ret;
-	struct ocfs2_super *osb = data;
-	struct ocfs2_journal *journal = osb->journal;
+	struct ocfs2_journal *journal =
+		container_of(work, struct ocfs2_journal, j_recovery_work);
+	struct ocfs2_super *osb = journal->j_osb;
 	struct ocfs2_dinode *la_dinode, *tl_dinode;
 	struct ocfs2_la_recovery_item *item;
 	struct list_head *p, *n;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2f3a6acdac45..5be161a4ad9f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -172,7 +172,7 @@ static inline void ocfs2_handle_set_sync(struct ocfs2_journal_handle *handle, in
 }
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
-void ocfs2_complete_recovery(void *data);
+void ocfs2_complete_recovery(struct work_struct *work);
 
 /*
  *  Journal Control:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 0462a7f4e21b..9b1bad1d48ec 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -283,7 +283,7 @@ struct ocfs2_super
 	/* Truncate log info */
 	struct inode			*osb_tl_inode;
 	struct buffer_head		*osb_tl_bh;
-	struct work_struct		osb_truncate_log_wq;
+	struct delayed_work		osb_truncate_log_wq;
 
 	struct ocfs2_node_map		osb_recovering_orphan_dirs;
 	unsigned int			*osb_orphan_wipes;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 76b46ebbb10c..9a8089030f55 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1365,7 +1365,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	spin_lock_init(&journal->j_lock);
 	journal->j_trans_id = (unsigned long) 1;
 	INIT_LIST_HEAD(&journal->j_la_cleanups);
-	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery, osb);
+	INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
 	journal->j_state = OCFS2_JOURNAL_FREE;
 
 	/* get some pseudo constants for clustersize bits */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 85ce23268302..cd1bb75ceb24 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -104,7 +104,7 @@ static int release_journal_dev(struct super_block *super,
 			       struct reiserfs_journal *journal);
 static int dirty_one_transaction(struct super_block *s,
 				 struct reiserfs_journal_list *jl);
-static void flush_async_commits(void *p);
+static void flush_async_commits(struct work_struct *work);
 static void queue_log_writer(struct super_block *s);
 
 /* values for join in do_journal_begin_r */
@@ -2836,7 +2836,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	if (reiserfs_mounted_fs_count <= 1)
 		commit_wq = create_workqueue("reiserfs");
 
-	INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
+	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
+	journal->j_work_sb = p_s_sb;
 	return 0;
       free_and_return:
 	free_journal_ram(p_s_sb);
@@ -3447,10 +3448,11 @@ int journal_end_sync(struct reiserfs_transaction_handle *th,
 /*
 ** writeback the pending async commits to disk
 */
-static void flush_async_commits(void *p)
+static void flush_async_commits(struct work_struct *work)
 {
-	struct super_block *p_s_sb = p;
-	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+	struct reiserfs_journal *journal =
+		container_of(work, struct reiserfs_journal, j_work.work);
+	struct super_block *p_s_sb = journal->j_work_sb;
 	struct reiserfs_journal_list *jl;
 	struct list_head *entry;
 
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 09360cf1e1f2..8e6b56fc1cad 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,9 +149,10 @@ xfs_destroy_ioend(
  */
 STATIC void
 xfs_end_bio_delalloc(
-	void			*data)
+	struct work_struct	*work)
 {
-	xfs_ioend_t		*ioend = data;
+	xfs_ioend_t		*ioend =
+		container_of(work, xfs_ioend_t, io_work);
 
 	xfs_destroy_ioend(ioend);
 }
@@ -161,9 +162,10 @@ xfs_end_bio_delalloc(
  */
 STATIC void
 xfs_end_bio_written(
-	void			*data)
+	struct work_struct	*work)
 {
-	xfs_ioend_t		*ioend = data;
+	xfs_ioend_t		*ioend =
+		container_of(work, xfs_ioend_t, io_work);
 
 	xfs_destroy_ioend(ioend);
 }
@@ -176,9 +178,10 @@ xfs_end_bio_written(
  */
 STATIC void
 xfs_end_bio_unwritten(
-	void			*data)
+	struct work_struct	*work)
 {
-	xfs_ioend_t		*ioend = data;
+	xfs_ioend_t		*ioend =
+		container_of(work, xfs_ioend_t, io_work);
 	bhv_vnode_t		*vp = ioend->io_vnode;
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
@@ -220,11 +223,11 @@ xfs_alloc_ioend(
 	ioend->io_size = 0;
 
 	if (type == IOMAP_UNWRITTEN)
-		INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+		INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
 	else if (type == IOMAP_DELAY)
-		INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
+		INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
 	else
-		INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
+		INIT_WORK(&ioend->io_work, xfs_end_bio_written);
 
 	return ioend;
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d3382843698e..eef4a0ba11e9 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -994,9 +994,10 @@ xfs_buf_wait_unpin(
 
 STATIC void
 xfs_buf_iodone_work(
-	void			*v)
+	struct work_struct	*work)
 {
-	xfs_buf_t		*bp = (xfs_buf_t *)v;
+	xfs_buf_t		*bp =
+		container_of(work, xfs_buf_t, b_iodone_work);
 
 	if (bp->b_iodone)
 		(*(bp->b_iodone))(bp);
@@ -1017,10 +1018,10 @@ xfs_buf_ioend(
 
 	if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
 		if (schedule) {
-			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
+			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
 			queue_work(xfslogd_workqueue, &bp->b_iodone_work);
 		} else {
-			xfs_buf_iodone_work(bp);
+			xfs_buf_iodone_work(&bp->b_iodone_work);
 		}
 	} else {
 		up(&bp->b_iodonesema);
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 4c02119c6ab9..3ea1cd58de97 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -133,7 +133,7 @@ struct cn_callback_data {
 struct cn_callback_entry {
 	struct list_head callback_entry;
 	struct cn_callback *cb;
-	struct work_struct work;
+	struct delayed_work work;
 	struct cn_queue_dev *pdev;
 
 	struct cn_callback_id id;
@@ -170,7 +170,7 @@ void cn_queue_free_dev(struct cn_queue_dev *dev);
 
 int cn_cb_equal(struct cb_id *, struct cb_id *);
 
-void cn_queue_wrapper(void *data);
+void cn_queue_wrapper(struct work_struct *work);
 
 extern int cn_already_initialized;
 
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index c115e9e840b4..1fb02e17f6f6 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -461,7 +461,7 @@ struct i2o_driver {
 	int (*reply) (struct i2o_controller *, u32, struct i2o_message *);
 
 	/* Event handler */
-	void (*event) (struct i2o_event *);
+	work_func_t event;
 
 	struct workqueue_struct *event_queue;	/* Event queue */
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 587264a58d56..8b08ef3820f2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -110,7 +110,7 @@ struct mmc_host {
 	struct mmc_card		*card_busy;	/* the MMC card claiming host */
 	struct mmc_card		*card_selected;	/* the selected MMC card */
 
-	struct work_struct	detect;
+	struct delayed_work	detect;
 
 	unsigned long		private[0] ____cacheline_aligned;
 };
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index b089d9506283..a503052138bd 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -127,10 +127,10 @@ struct ncp_server {
 	} unexpected_packet;
 };
 
-extern void ncp_tcp_rcv_proc(void *server);
-extern void ncp_tcp_tx_proc(void *server);
-extern void ncpdgram_rcv_proc(void *server);
-extern void ncpdgram_timeout_proc(void *server);
+extern void ncp_tcp_rcv_proc(struct work_struct *work);
+extern void ncp_tcp_tx_proc(struct work_struct *work);
+extern void ncpdgram_rcv_proc(struct work_struct *work);
+extern void ncpdgram_timeout_proc(struct work_struct *work);
 extern void ncpdgram_timeout_call(unsigned long server);
 extern void ncp_tcp_data_ready(struct sock* sk, int len);
 extern void ncp_tcp_write_space(struct sock* sk);
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 73e0becec086..6610103f23e1 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -249,7 +249,8 @@ struct reiserfs_journal {
 	int j_errno;
 
 	/* when flushing ordered buffers, throttle new ordered writers */
-	struct work_struct j_work;
+	struct delayed_work j_work;
+	struct super_block *j_work_sb;
 	atomic_t j_async_throttle;
 };
 
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 24accb483849..0e3d91b76996 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -38,7 +38,7 @@ struct rchan_buf
 	size_t subbufs_consumed;	/* count of sub-buffers consumed */
 	struct rchan *chan;		/* associated channel */
 	wait_queue_head_t read_wait;	/* reader wait queue */
-	struct work_struct wake_readers; /* reader wake-up work struct */
+	struct delayed_work wake_readers; /* reader wake-up work struct */
 	struct dentry *dentry;		/* channel file dentry */
 	struct kref kref;		/* channel buffer refcount */
 	struct page **page_array;	/* array of current buffer pages */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 5482bfb3303d..06ce7a626040 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -382,7 +382,7 @@ struct usb_device {
 
 	int pm_usage_cnt;		/* usage counter for autosuspend */
 #ifdef CONFIG_PM
-	struct work_struct autosuspend;	/* for delayed autosuspends */
+	struct delayed_work autosuspend; /* for delayed autosuspends */
 	struct mutex pm_mutex;		/* protects PM operations */
 
 	unsigned auto_pm:1;		/* autosuspend/resume in progress */
diff --git a/include/net/ieee80211softmac.h b/include/net/ieee80211softmac.h
index 617b672b1132..89119277553d 100644
--- a/include/net/ieee80211softmac.h
+++ b/include/net/ieee80211softmac.h
@@ -108,8 +108,8 @@ struct ieee80211softmac_assoc_info {
 	/* Scan retries remaining */
 	int scan_retry;
 
-	struct work_struct work;
-	struct work_struct timeout;
+	struct delayed_work work;
+	struct delayed_work timeout;
 };
 
 struct ieee80211softmac_bss_info {
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c6d93bb0dcd2..60b684470db8 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1030,7 +1030,7 @@ void sctp_inq_init(struct sctp_inq *);
 void sctp_inq_free(struct sctp_inq *);
 void sctp_inq_push(struct sctp_inq *, struct sctp_chunk *packet);
 struct sctp_chunk *sctp_inq_pop(struct sctp_inq *);
-void sctp_inq_set_th_handler(struct sctp_inq *, void (*)(void *), void *);
+void sctp_inq_set_th_handler(struct sctp_inq *, work_func_t);
 
 /* This is the structure we use to hold outbound chunks.  You push
  * chunks in and they automatically pop out the other end as bundled
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 9582e8401669..9ccc0365aa89 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -200,9 +200,14 @@ struct domain_device {
         void *lldd_dev;
 };
 
+struct sas_discovery_event {
+	struct work_struct work;
+	struct asd_sas_port *port;
+};
+
 struct sas_discovery {
 	spinlock_t disc_event_lock;
-	struct work_struct disc_work[DISC_NUM_EVENTS];
+	struct sas_discovery_event disc_work[DISC_NUM_EVENTS];
 	unsigned long    pending;
 	u8     fanout_sas_addr[8];
 	u8     eeds_a[8];
@@ -248,14 +253,19 @@ struct asd_sas_port {
 	void *lldd_port;	  /* not touched by the sas class code */
 };
 
+struct asd_sas_event {
+	struct work_struct work;
+	struct asd_sas_phy *phy;
+};
+
 /* The phy pretty much is controlled by the LLDD.
  * The class only reads those fields.
  */
 struct asd_sas_phy {
 /* private: */
 	/* protected by ha->event_lock */
-	struct work_struct   port_events[PORT_NUM_EVENTS];
-	struct work_struct   phy_events[PHY_NUM_EVENTS];
+	struct asd_sas_event   port_events[PORT_NUM_EVENTS];
+	struct asd_sas_event   phy_events[PHY_NUM_EVENTS];
 
 	unsigned long port_events_pending;
 	unsigned long phy_events_pending;
@@ -307,10 +317,15 @@ struct scsi_core {
 	int               queue_thread_kill;
 };
 
+struct sas_ha_event {
+	struct work_struct work;
+	struct sas_ha_struct *ha;
+};
+
 struct sas_ha_struct {
 /* private: */
 	spinlock_t       event_lock;
-	struct work_struct ha_events[HA_NUM_EVENTS];
+	struct sas_ha_event ha_events[HA_NUM_EVENTS];
 	unsigned long	 pending;
 
 	struct scsi_core core;
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index fd352323378b..798f7c7ee426 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -206,9 +206,9 @@ struct fc_rport {	/* aka fc_starget_attrs */
 	u8 flags;
 	struct list_head peers;
 	struct device dev;
- 	struct work_struct dev_loss_work;
+ 	struct delayed_work dev_loss_work;
  	struct work_struct scan_work;
- 	struct work_struct fail_io_work;
+ 	struct delayed_work fail_io_work;
  	struct work_struct stgt_delete_work;
 	struct work_struct rport_delete_work;
 } __attribute__((aligned(sizeof(unsigned long))));
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 4b95c89c95c9..d5c218ddc527 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -176,7 +176,7 @@ struct iscsi_cls_session {
 
 	/* recovery fields */
 	int recovery_tmo;
-	struct work_struct recovery_work;
+	struct delayed_work recovery_work;
 
 	int target_id;
 
diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 4c43521cc493..33720397a904 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -511,7 +511,7 @@ struct snd_ac97 {
 #ifdef CONFIG_SND_AC97_POWER_SAVE
 	unsigned int power_up;	/* power states */
 	struct workqueue_struct *power_workq;
-	struct work_struct power_work;
+	struct delayed_work power_work;
 #endif
 	struct device dev;
 };
diff --git a/include/sound/ak4114.h b/include/sound/ak4114.h
index 11702aa0bea9..2ee061625fd0 100644
--- a/include/sound/ak4114.h
+++ b/include/sound/ak4114.h
@@ -182,7 +182,7 @@ struct ak4114 {
 	unsigned char rcs0;
 	unsigned char rcs1;
 	struct workqueue_struct *workqueue;
-	struct work_struct work;
+	struct delayed_work work;
 	void *change_callback_private;
 	void (*change_callback)(struct ak4114 *ak4114, unsigned char c0, unsigned char c1);
 };
diff --git a/kernel/relay.c b/kernel/relay.c
index f04bbdb56ac2..2b92e8ece85b 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -308,9 +308,10 @@ static struct rchan_callbacks default_channel_callbacks = {
  *	reason waking is deferred is that calling directly from write
  *	causes problems if you're writing from say the scheduler.
  */
-static void wakeup_readers(void *private)
+static void wakeup_readers(struct work_struct *work)
 {
-	struct rchan_buf *buf = private;
+	struct rchan_buf *buf =
+		container_of(work, struct rchan_buf, wake_readers.work);
 	wake_up_interruptible(&buf->read_wait);
 }
 
@@ -328,7 +329,7 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
 	if (init) {
 		init_waitqueue_head(&buf->read_wait);
 		kref_init(&buf->kref);
-		INIT_WORK(&buf->wake_readers, NULL, NULL);
+		INIT_DELAYED_WORK(&buf->wake_readers, NULL);
 	} else {
 		cancel_delayed_work(&buf->wake_readers);
 		flush_scheduled_work();
@@ -549,7 +550,8 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
 			buf->padding[old_subbuf];
 		smp_mb();
 		if (waitqueue_active(&buf->read_wait)) {
-			PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
+			PREPARE_DELAYED_WORK(&buf->wake_readers,
+					     wakeup_readers);
 			schedule_delayed_work(&buf->wake_readers, 1);
 		}
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 2e0e871f542f..d9a3770d8f3c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -216,7 +216,7 @@ void lru_add_drain(void)
 }
 
 #ifdef CONFIG_NUMA
-static void lru_add_drain_per_cpu(void *dummy)
+static void lru_add_drain_per_cpu(struct work_struct *dummy)
 {
 	lru_add_drain();
 }
@@ -226,7 +226,7 @@ static void lru_add_drain_per_cpu(void *dummy)
  */
 int lru_add_drain_all(void)
 {
-	return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
+	return schedule_on_each_cpu(lru_add_drain_per_cpu);
 }
 
 #else
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 66c57c1091a8..e801fff69dc0 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1458,7 +1458,7 @@ static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr,
 
 #define LEC_ARP_REFRESH_INTERVAL (3*HZ)
 
-static void lec_arp_check_expire(void *data);
+static void lec_arp_check_expire(struct work_struct *work);
 static void lec_arp_expire_arp(unsigned long data);
 
 /* 
@@ -1481,7 +1481,7 @@ static void lec_arp_init(struct lec_priv *priv)
         INIT_HLIST_HEAD(&priv->lec_no_forward);
         INIT_HLIST_HEAD(&priv->mcast_fwds);
 	spin_lock_init(&priv->lec_arp_lock);
-	INIT_WORK(&priv->lec_arp_work, lec_arp_check_expire, priv);
+	INIT_DELAYED_WORK(&priv->lec_arp_work, lec_arp_check_expire);
 	schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL);
 }
 
@@ -1879,10 +1879,11 @@ static void lec_arp_expire_vcc(unsigned long data)
  *       to ESI_FORWARD_DIRECT. This causes the flush period to end
  *       regardless of the progress of the flush protocol.
  */
-static void lec_arp_check_expire(void *data)
+static void lec_arp_check_expire(struct work_struct *work)
 {
 	unsigned long flags;
-	struct lec_priv *priv = data;
+	struct lec_priv *priv =
+		container_of(work, struct lec_priv, lec_arp_work.work);
 	struct hlist_node *node, *next;
 	struct lec_arp_table *entry;
 	unsigned long now;
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 877f50939696..984e8e6e083a 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -92,7 +92,7 @@ struct lec_priv {
 	spinlock_t lec_arp_lock;
 	struct atm_vcc *mcast_vcc;		/* Default Multicast Send VCC */
 	struct atm_vcc *lecd;
-	struct work_struct lec_arp_work;	/* C10 */
+	struct delayed_work lec_arp_work;	/* C10 */
 	unsigned int maximum_unknown_frame_count;
 						/*
 						 * Within the period of time defined by this variable, the client will send
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 3eeeb7a86e75..d4c935692ccf 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -237,9 +237,9 @@ static void bt_release(struct device *dev)
 	kfree(data);
 }
 
-static void add_conn(void *data)
+static void add_conn(struct work_struct *work)
 {
-	struct hci_conn *conn = data;
+	struct hci_conn *conn = container_of(work, struct hci_conn, work);
 	int i;
 
 	if (device_register(&conn->dev) < 0) {
@@ -272,14 +272,14 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 
 	dev_set_drvdata(&conn->dev, conn);
 
-	INIT_WORK(&conn->work, add_conn, (void *) conn);
+	INIT_WORK(&conn->work, add_conn);
 
 	schedule_work(&conn->work);
 }
 
-static void del_conn(void *data)
+static void del_conn(struct work_struct *work)
 {
-	struct hci_conn *conn = data;
+	struct hci_conn *conn = container_of(work, struct hci_conn, work);
 	device_del(&conn->dev);
 }
 
@@ -287,7 +287,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 {
 	BT_DBG("conn %p", conn);
 
-	INIT_WORK(&conn->work, del_conn, (void *) conn);
+	INIT_WORK(&conn->work, del_conn);
 
 	schedule_work(&conn->work);
 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f753c40c11d2..55bb2634c088 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -77,12 +77,16 @@ static int port_cost(struct net_device *dev)
  * Called from work queue to allow for calling functions that
  * might sleep (such as speed check), and to debounce.
  */
-static void port_carrier_check(void *arg)
+static void port_carrier_check(struct work_struct *work)
 {
-	struct net_device *dev = arg;
 	struct net_bridge_port *p;
+	struct net_device *dev;
 	struct net_bridge *br;
 
+	dev = container_of(work, struct net_bridge_port,
+			   carrier_check.work)->dev;
+	work_release(work);
+
 	rtnl_lock();
 	p = dev->br_port;
 	if (!p)
@@ -276,7 +280,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	p->port_no = index;
 	br_init_port(p);
 	p->state = BR_STATE_DISABLED;
-	INIT_WORK(&p->carrier_check, port_carrier_check, dev);
+	INIT_DELAYED_WORK_NAR(&p->carrier_check, port_carrier_check);
 	br_stp_port_timer_init(p);
 
 	kobject_init(&p->kobj);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 74258d86f256..3a534e94c7f3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -82,7 +82,7 @@ struct net_bridge_port
 	struct timer_list		hold_timer;
 	struct timer_list		message_age_timer;
 	struct kobject			kobj;
-	struct work_struct		carrier_check;
+	struct delayed_work		carrier_check;
 	struct rcu_head			rcu;
 };
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 6589adb14cbf..63f24c914ddb 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -56,7 +56,7 @@ static atomic_t trapped;
 static void zap_completion_queue(void);
 static void arp_reply(struct sk_buff *skb);
 
-static void queue_process(void *p)
+static void queue_process(struct work_struct *work)
 {
 	unsigned long flags;
 	struct sk_buff *skb;
@@ -77,7 +77,7 @@ static void queue_process(void *p)
 	}
 }
 
-static DECLARE_WORK(send_queue, queue_process, NULL);
+static DECLARE_WORK(send_queue, queue_process);
 
 void netpoll_queue(struct sk_buff *skb)
 {
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 9045438d6b36..36db5be2a9e9 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -31,8 +31,7 @@ struct inet_timewait_death_row dccp_death_row = {
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&dccp_death_row),
 	.twkill_work	= __WORK_INITIALIZER(dccp_death_row.twkill_work,
-					     inet_twdr_twkill_work,
-					     &dccp_death_row),
+					     inet_twdr_twkill_work),
 /* Short-time timewait calendar */
 
 	.twcal_hand	= -1,
diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c
index cf51c87a971d..08386c102954 100644
--- a/net/ieee80211/softmac/ieee80211softmac_assoc.c
+++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c
@@ -58,9 +58,11 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft
 }
 
 void
-ieee80211softmac_assoc_timeout(void *d)
+ieee80211softmac_assoc_timeout(struct work_struct *work)
 {
-	struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d;
+	struct ieee80211softmac_device *mac =
+		container_of(work, struct ieee80211softmac_device,
+			     associnfo.timeout.work);
 	struct ieee80211softmac_network *n;
 
 	mutex_lock(&mac->associnfo.mutex);
@@ -186,9 +188,11 @@ ieee80211softmac_assoc_notify_auth(struct net_device *dev, int event_type, void
 
 /* This function is called to handle userspace requests (asynchronously) */
 void
-ieee80211softmac_assoc_work(void *d)
+ieee80211softmac_assoc_work(struct work_struct *work)
 {
-	struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d;
+	struct ieee80211softmac_device *mac =
+		container_of(work, struct ieee80211softmac_device,
+			     associnfo.work.work);
 	struct ieee80211softmac_network *found = NULL;
 	struct ieee80211_network *net = NULL, *best = NULL;
 	int bssvalid;
@@ -412,7 +416,7 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev,
 				network->authenticated = 0;
 				/* we don't want to do this more than once ... */
 				network->auth_desynced_once = 1;
-				schedule_work(&mac->associnfo.work);
+				schedule_delayed_work(&mac->associnfo.work, 0);
 				break;
 			}
 		default:
@@ -446,7 +450,7 @@ ieee80211softmac_handle_disassoc(struct net_device * dev,
 	ieee80211softmac_disassoc(mac);
 
 	/* try to reassociate */
-	schedule_work(&mac->associnfo.work);
+	schedule_delayed_work(&mac->associnfo.work, 0);
 
 	return 0;
 }
@@ -466,7 +470,7 @@ ieee80211softmac_handle_reassoc_req(struct net_device * dev,
 		dprintkl(KERN_INFO PFX "reassoc request from unknown network\n");
 		return 0;
 	}
-	schedule_work(&mac->associnfo.work);
+	schedule_delayed_work(&mac->associnfo.work, 0);
 
 	return 0;
 }
diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c
index 4cef39e171d0..2ae1833b657a 100644
--- a/net/ieee80211/softmac/ieee80211softmac_auth.c
+++ b/net/ieee80211/softmac/ieee80211softmac_auth.c
@@ -26,7 +26,7 @@
 
 #include "ieee80211softmac_priv.h"
 
-static void ieee80211softmac_auth_queue(void *data);
+static void ieee80211softmac_auth_queue(struct work_struct *work);
 
 /* Queues an auth request to the desired AP */
 int
@@ -54,14 +54,14 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac,
 	auth->mac = mac;
 	auth->retry = IEEE80211SOFTMAC_AUTH_RETRY_LIMIT;
 	auth->state = IEEE80211SOFTMAC_AUTH_OPEN_REQUEST;
-	INIT_WORK(&auth->work, &ieee80211softmac_auth_queue, (void *)auth);
+	INIT_DELAYED_WORK(&auth->work, ieee80211softmac_auth_queue);
 	
 	/* Lock (for list) */
 	spin_lock_irqsave(&mac->lock, flags);
 
 	/* add to list */
 	list_add_tail(&auth->list, &mac->auth_queue);
-	schedule_work(&auth->work);
+	schedule_delayed_work(&auth->work, 0);
 	spin_unlock_irqrestore(&mac->lock, flags);
 	
 	return 0;
@@ -70,14 +70,15 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac,
 
 /* Sends an auth request to the desired AP and handles timeouts */
 static void
-ieee80211softmac_auth_queue(void *data)
+ieee80211softmac_auth_queue(struct work_struct *work)
 {
 	struct ieee80211softmac_device *mac;
 	struct ieee80211softmac_auth_queue_item *auth;
 	struct ieee80211softmac_network *net;
 	unsigned long flags;
 
-	auth = (struct ieee80211softmac_auth_queue_item *)data;
+	auth = container_of(work, struct ieee80211softmac_auth_queue_item,
+			    work.work);
 	net = auth->net;
 	mac = auth->mac;
 
@@ -118,9 +119,11 @@ ieee80211softmac_auth_queue(void *data)
 
 /* Sends a response to an auth challenge (for shared key auth). */
 static void
-ieee80211softmac_auth_challenge_response(void *_aq)
+ieee80211softmac_auth_challenge_response(struct work_struct *work)
 {
-	struct ieee80211softmac_auth_queue_item *aq = _aq;
+	struct ieee80211softmac_auth_queue_item *aq =
+		container_of(work, struct ieee80211softmac_auth_queue_item,
+			     work.work);
 
 	/* Send our response */
 	ieee80211softmac_send_mgt_frame(aq->mac, aq->net, IEEE80211_STYPE_AUTH, aq->state);
@@ -228,8 +231,8 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
 			 * we have obviously already sent the initial auth
 			 * request. */
 			cancel_delayed_work(&aq->work);
-			INIT_WORK(&aq->work, &ieee80211softmac_auth_challenge_response, (void *)aq);
-			schedule_work(&aq->work);
+			INIT_DELAYED_WORK(&aq->work, &ieee80211softmac_auth_challenge_response);
+			schedule_delayed_work(&aq->work, 0);
 			spin_unlock_irqrestore(&mac->lock, flags);
 			return 0;
 		case IEEE80211SOFTMAC_AUTH_SHARED_PASS:
@@ -392,6 +395,6 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de
 	ieee80211softmac_deauth_from_net(mac, net);
 
 	/* let's try to re-associate */
-	schedule_work(&mac->associnfo.work);
+	schedule_delayed_work(&mac->associnfo.work, 0);
 	return 0;
 }
diff --git a/net/ieee80211/softmac/ieee80211softmac_event.c b/net/ieee80211/softmac/ieee80211softmac_event.c
index f34fa2ef666b..b9015656cfb3 100644
--- a/net/ieee80211/softmac/ieee80211softmac_event.c
+++ b/net/ieee80211/softmac/ieee80211softmac_event.c
@@ -73,10 +73,12 @@ static char *event_descriptions[IEEE80211SOFTMAC_EVENT_LAST+1] = {
 
 
 static void
-ieee80211softmac_notify_callback(void *d)
+ieee80211softmac_notify_callback(struct work_struct *work)
 {
-	struct ieee80211softmac_event event = *(struct ieee80211softmac_event*) d;
-	kfree(d);
+	struct ieee80211softmac_event *pevent =
+		container_of(work, struct ieee80211softmac_event, work.work);
+	struct ieee80211softmac_event event = *pevent;
+	kfree(pevent);
 	
 	event.fun(event.mac->dev, event.event_type, event.context);
 }
@@ -99,7 +101,7 @@ ieee80211softmac_notify_internal(struct ieee80211softmac_device *mac,
 		return -ENOMEM;
 	
 	eventptr->event_type = event;
-	INIT_WORK(&eventptr->work, ieee80211softmac_notify_callback, eventptr);
+	INIT_DELAYED_WORK(&eventptr->work, ieee80211softmac_notify_callback);
 	eventptr->fun = fun;
 	eventptr->context = context;
 	eventptr->mac = mac;
@@ -170,7 +172,7 @@ ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, int eve
 				/* User may have subscribed to ANY event, so
 				 * we tell them which event triggered it. */
 				eventptr->event_type = event;
-				schedule_work(&eventptr->work);
+				schedule_delayed_work(&eventptr->work, 0);
 			}
 		}
 }
diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c
index 33aff4f4a471..256207b71dc9 100644
--- a/net/ieee80211/softmac/ieee80211softmac_module.c
+++ b/net/ieee80211/softmac/ieee80211softmac_module.c
@@ -58,8 +58,8 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv)
 	INIT_LIST_HEAD(&softmac->events);
 
 	mutex_init(&softmac->associnfo.mutex);
-	INIT_WORK(&softmac->associnfo.work, ieee80211softmac_assoc_work, softmac);
-	INIT_WORK(&softmac->associnfo.timeout, ieee80211softmac_assoc_timeout, softmac);
+	INIT_DELAYED_WORK(&softmac->associnfo.work, ieee80211softmac_assoc_work);
+	INIT_DELAYED_WORK(&softmac->associnfo.timeout, ieee80211softmac_assoc_timeout);
 	softmac->start_scan = ieee80211softmac_start_scan_implementation;
 	softmac->wait_for_scan = ieee80211softmac_wait_for_scan_implementation;
 	softmac->stop_scan = ieee80211softmac_stop_scan_implementation;
diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h
index 0642e090b8a7..c0dbe070e548 100644
--- a/net/ieee80211/softmac/ieee80211softmac_priv.h
+++ b/net/ieee80211/softmac/ieee80211softmac_priv.h
@@ -78,7 +78,7 @@
 /* private definitions and prototypes */
 
 /*** prototypes from _scan.c */
-void ieee80211softmac_scan(void *sm);
+void ieee80211softmac_scan(struct work_struct *work);
 /* for internal use if scanning is needed */
 int ieee80211softmac_start_scan(struct ieee80211softmac_device *mac);
 void ieee80211softmac_stop_scan(struct ieee80211softmac_device *mac);
@@ -149,7 +149,7 @@ int ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *au
 int ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *deauth);
 
 /*** prototypes from _assoc.c */
-void ieee80211softmac_assoc_work(void *d);
+void ieee80211softmac_assoc_work(struct work_struct *work);
 int ieee80211softmac_handle_assoc_response(struct net_device * dev,
 					   struct ieee80211_assoc_response * resp,
 					   struct ieee80211_network * network);
@@ -157,7 +157,7 @@ int ieee80211softmac_handle_disassoc(struct net_device * dev,
 				     struct ieee80211_disassoc * disassoc);
 int ieee80211softmac_handle_reassoc_req(struct net_device * dev,
 				        struct ieee80211_reassoc_request * reassoc);
-void ieee80211softmac_assoc_timeout(void *d);
+void ieee80211softmac_assoc_timeout(struct work_struct *work);
 void ieee80211softmac_send_disassoc_req(struct ieee80211softmac_device *mac, u16 reason);
 void ieee80211softmac_disassoc(struct ieee80211softmac_device *mac);
 
@@ -207,7 +207,7 @@ struct ieee80211softmac_auth_queue_item {
 	struct ieee80211softmac_device	*mac;	/* SoftMAC device */
 	u8 retry;				/* Retry limit */
 	u8 state;				/* Auth State */
-	struct work_struct		work;	/* Work queue */
+	struct delayed_work		work;	/* Work queue */
 };
 
 /* scanning information */
@@ -219,7 +219,8 @@ struct ieee80211softmac_scaninfo {
 	   stop:1;
 	u8 skip_flags;
 	struct completion finished;
-	struct work_struct softmac_scan;
+	struct delayed_work softmac_scan;
+	struct ieee80211softmac_device *mac;
 };
 
 /* private event struct */
@@ -227,7 +228,7 @@ struct ieee80211softmac_event {
 	struct list_head list;
 	int event_type;
 	void *event_context;
-	struct work_struct work;
+	struct delayed_work work;
 	notify_function_ptr fun;
 	void *context;
 	struct ieee80211softmac_device *mac;
diff --git a/net/ieee80211/softmac/ieee80211softmac_scan.c b/net/ieee80211/softmac/ieee80211softmac_scan.c
index d31cf77498c4..a8326076581a 100644
--- a/net/ieee80211/softmac/ieee80211softmac_scan.c
+++ b/net/ieee80211/softmac/ieee80211softmac_scan.c
@@ -91,12 +91,14 @@ ieee80211softmac_wait_for_scan(struct ieee80211softmac_device *sm)
 
 
 /* internal scanning implementation follows */
-void ieee80211softmac_scan(void *d)
+void ieee80211softmac_scan(struct work_struct *work)
 {
 	int invalid_channel;
 	u8 current_channel_idx;
-	struct ieee80211softmac_device *sm = (struct ieee80211softmac_device *)d;
-	struct ieee80211softmac_scaninfo *si = sm->scaninfo;
+	struct ieee80211softmac_scaninfo *si =
+		container_of(work, struct ieee80211softmac_scaninfo,
+			     softmac_scan.work);
+	struct ieee80211softmac_device *sm = si->mac;
 	unsigned long flags;
 
 	while (!(si->stop) && (si->current_channel_idx < si->number_channels)) {
@@ -146,7 +148,8 @@ static inline struct ieee80211softmac_scaninfo *allocate_scaninfo(struct ieee802
 	struct ieee80211softmac_scaninfo *info = kmalloc(sizeof(struct ieee80211softmac_scaninfo), GFP_ATOMIC);
 	if (unlikely(!info))
 		return NULL;
-	INIT_WORK(&info->softmac_scan, ieee80211softmac_scan, mac);
+	INIT_DELAYED_WORK(&info->softmac_scan, ieee80211softmac_scan);
+	info->mac = mac;
 	init_completion(&info->finished);
 	return info;
 }
@@ -189,7 +192,7 @@ int ieee80211softmac_start_scan_implementation(struct net_device *dev)
 	sm->scaninfo->started = 1;
 	sm->scaninfo->stop = 0;
 	INIT_COMPLETION(sm->scaninfo->finished);
-	schedule_work(&sm->scaninfo->softmac_scan);
+	schedule_delayed_work(&sm->scaninfo->softmac_scan, 0);
 	spin_unlock_irqrestore(&sm->lock, flags);
 	return 0;
 }
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index 23068a830f7d..2ffaebd21c53 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -122,7 +122,7 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 
 	sm->associnfo.associating = 1;
 	/* queue lower level code to do work (if necessary) */
-	schedule_work(&sm->associnfo.work);
+	schedule_delayed_work(&sm->associnfo.work, 0);
 out:
 	mutex_unlock(&sm->associnfo.mutex);
 
@@ -356,7 +356,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
 		/* force reassociation */
 		mac->associnfo.bssvalid = 0;
 		if (mac->associnfo.associated)
-			schedule_work(&mac->associnfo.work);
+			schedule_delayed_work(&mac->associnfo.work, 0);
 	} else if (is_zero_ether_addr(data->ap_addr.sa_data)) {
 		/* the bssid we have is no longer fixed */
 		mac->associnfo.bssfixed = 0;
@@ -373,7 +373,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
 		/* tell the other code that this bssid should be used no matter what */
 		mac->associnfo.bssfixed = 1;
 		/* queue associate if new bssid or (old one again and not associated) */
-		schedule_work(&mac->associnfo.work);
+		schedule_delayed_work(&mac->associnfo.work, 0);
         }
 
  out:
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index f261616e4602..9b933381ebbe 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -221,10 +221,10 @@ static void update_defense_level(void)
  *	Timer for checking the defense
  */
 #define DEFENSE_TIMER_PERIOD	1*HZ
-static void defense_work_handler(void *data);
-static DECLARE_WORK(defense_work, defense_work_handler, NULL);
+static void defense_work_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 
-static void defense_work_handler(void *data)
+static void defense_work_handler(struct work_struct *work)
 {
 	update_defense_level();
 	if (atomic_read(&ip_vs_dropentry))
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index d50a02030ad7..262bda808d96 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -61,7 +61,7 @@ static void ircomm_tty_flush_buffer(struct tty_struct *tty);
 static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch);
 static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout);
 static void ircomm_tty_hangup(struct tty_struct *tty);
-static void ircomm_tty_do_softint(void *private_);
+static void ircomm_tty_do_softint(struct work_struct *work);
 static void ircomm_tty_shutdown(struct ircomm_tty_cb *self);
 static void ircomm_tty_stop(struct tty_struct *tty);
 
@@ -389,7 +389,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 		self->flow = FLOW_STOP;
 
 		self->line = line;
-		INIT_WORK(&self->tqueue, ircomm_tty_do_softint, self);
+		INIT_WORK(&self->tqueue, ircomm_tty_do_softint);
 		self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED;
 		self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED;
 		self->close_delay = 5*HZ/10;
@@ -594,15 +594,16 @@ static void ircomm_tty_flush_buffer(struct tty_struct *tty)
 }
 
 /*
- * Function ircomm_tty_do_softint (private_)
+ * Function ircomm_tty_do_softint (work)
  *
  *    We use this routine to give the write wakeup to the user at at a
  *    safe time (as fast as possible after write have completed). This 
  *    can be compared to the Tx interrupt.
  */
-static void ircomm_tty_do_softint(void *private_)
+static void ircomm_tty_do_softint(struct work_struct *work)
 {
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) private_;
+	struct ircomm_tty_cb *self =
+		container_of(work, struct ircomm_tty_cb, tqueue);
 	struct tty_struct *tty;
 	unsigned long flags;
 	struct sk_buff *skb, *ctrl_skb;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ed0445fe85e7..88124696ba60 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -61,7 +61,7 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal functions. */
-static void sctp_assoc_bh_rcv(struct sctp_association *asoc);
+static void sctp_assoc_bh_rcv(struct work_struct *work);
 
 
 /* 1st Level Abstractions. */
@@ -269,9 +269,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	/* Create an input queue.  */
 	sctp_inq_init(&asoc->base.inqueue);
-	sctp_inq_set_th_handler(&asoc->base.inqueue,
-				    (void (*)(void *))sctp_assoc_bh_rcv,
-				    asoc);
+	sctp_inq_set_th_handler(&asoc->base.inqueue, sctp_assoc_bh_rcv);
 
 	/* Create an output queue.  */
 	sctp_outq_init(asoc, &asoc->outqueue);
@@ -944,8 +942,11 @@ out:
 }
 
 /* Do delayed input processing.  This is scheduled by sctp_rcv(). */
-static void sctp_assoc_bh_rcv(struct sctp_association *asoc)
+static void sctp_assoc_bh_rcv(struct work_struct *work)
 {
+	struct sctp_association *asoc =
+		container_of(work, struct sctp_association,
+			     base.inqueue.immediate);
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
 	struct sock *sk;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9b6b394b66f6..a2b553721514 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -61,7 +61,7 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal helpers. */
-static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep);
+static void sctp_endpoint_bh_rcv(struct work_struct *work);
 
 /*
  * Initialize the base fields of the endpoint structure.
@@ -85,8 +85,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	sctp_inq_init(&ep->base.inqueue);
 
 	/* Set its top-half handler */
-	sctp_inq_set_th_handler(&ep->base.inqueue,
-				(void (*)(void *))sctp_endpoint_bh_rcv, ep);
+	sctp_inq_set_th_handler(&ep->base.inqueue, sctp_endpoint_bh_rcv);
 
 	/* Initialize the bind addr area */
 	sctp_bind_addr_init(&ep->base.bind_addr, 0);
@@ -311,8 +310,11 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
 /* Do delayed input processing.  This is scheduled by sctp_rcv().
  * This may be called on BH or task time.
  */
-static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep)
+static void sctp_endpoint_bh_rcv(struct work_struct *work)
 {
+	struct sctp_endpoint *ep =
+		container_of(work, struct sctp_endpoint,
+			     base.inqueue.immediate);
 	struct sctp_association *asoc;
 	struct sock *sk;
 	struct sctp_transport *transport;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cf6deed7e849..71b07466e880 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -54,7 +54,7 @@ void sctp_inq_init(struct sctp_inq *queue)
 	queue->in_progress = NULL;
 
 	/* Create a task for delivering data.  */
-	INIT_WORK(&queue->immediate, NULL, NULL);
+	INIT_WORK(&queue->immediate, NULL);
 
 	queue->malloced = 0;
 }
@@ -97,7 +97,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
 	 * on the BH related data structures.
 	 */
 	list_add_tail(&chunk->list, &q->in_chunk_list);
-	q->immediate.func(q->immediate.data);
+	q->immediate.func(&q->immediate);
 }
 
 /* Extract a chunk from an SCTP inqueue.
@@ -205,9 +205,8 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
  * The intent is that this routine will pull stuff out of the
  * inqueue and process it.
  */
-void sctp_inq_set_th_handler(struct sctp_inq *q,
-				 void (*callback)(void *), void *arg)
+void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback)
 {
-	INIT_WORK(&q->immediate, callback, arg);
+	INIT_WORK(&q->immediate, callback);
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7736b23c3f03..ba924d40df7d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -358,7 +358,7 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 	xfrm_pol_put(policy);
 }
 
-static void xfrm_policy_gc_task(void *data)
+static void xfrm_policy_gc_task(struct work_struct *work)
 {
 	struct xfrm_policy *policy;
 	struct hlist_node *entry, *tmp;
@@ -546,7 +546,7 @@ static inline int xfrm_byidx_should_resize(int total)
 
 static DEFINE_MUTEX(hash_resize_mutex);
 
-static void xfrm_hash_resize(void *__unused)
+static void xfrm_hash_resize(struct work_struct *__unused)
 {
 	int dir, total;
 
@@ -563,7 +563,7 @@ static void xfrm_hash_resize(void *__unused)
 	mutex_unlock(&hash_resize_mutex);
 }
 
-static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
 
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
@@ -2080,7 +2080,7 @@ static void __init xfrm_policy_init(void)
 			panic("XFRM: failed to allocate bydst hash\n");
 	}
 
-	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
+	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
 	register_netdevice_notifier(&xfrm_dev_notifier);
 }
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 899de9ed22a6..40c527179843 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -115,7 +115,7 @@ static unsigned long xfrm_hash_new_size(void)
 
 static DEFINE_MUTEX(hash_resize_mutex);
 
-static void xfrm_hash_resize(void *__unused)
+static void xfrm_hash_resize(struct work_struct *__unused)
 {
 	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
 	unsigned long nsize, osize;
@@ -168,7 +168,7 @@ out_unlock:
 	mutex_unlock(&hash_resize_mutex);
 }
 
-static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
 
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
 EXPORT_SYMBOL(km_waitq);
@@ -207,7 +207,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 	kfree(x);
 }
 
-static void xfrm_state_gc_task(void *data)
+static void xfrm_state_gc_task(struct work_struct *data)
 {
 	struct xfrm_state *x;
 	struct hlist_node *entry, *tmp;
@@ -1568,6 +1568,6 @@ void __init xfrm_state_init(void)
 		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
-	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
+	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 }
 
diff --git a/sound/aoa/aoa-gpio.h b/sound/aoa/aoa-gpio.h
index 3a61f3115573..ee64f5de8966 100644
--- a/sound/aoa/aoa-gpio.h
+++ b/sound/aoa/aoa-gpio.h
@@ -59,10 +59,10 @@ struct gpio_methods {
 };
 
 struct gpio_notification {
+	struct delayed_work work;
 	notify_func_t notify;
 	void *data;
 	void *gpio_private;
-	struct work_struct work;
 	struct mutex mutex;
 };
 
diff --git a/sound/aoa/core/snd-aoa-gpio-feature.c b/sound/aoa/core/snd-aoa-gpio-feature.c
index 40eb47eccf9a..2b03bc798bcb 100644
--- a/sound/aoa/core/snd-aoa-gpio-feature.c
+++ b/sound/aoa/core/snd-aoa-gpio-feature.c
@@ -195,9 +195,10 @@ static void ftr_gpio_all_amps_restore(struct gpio_runtime *rt)
 	ftr_gpio_set_lineout(rt, (s>>2)&1);
 }
 
-static void ftr_handle_notify(void *data)
+static void ftr_handle_notify(struct work_struct *work)
 {
-	struct gpio_notification *notif = data;
+	struct gpio_notification *notif =
+		container_of(work, struct gpio_notification, work.work);
 
 	mutex_lock(&notif->mutex);
 	if (notif->notify)
@@ -253,12 +254,9 @@ static void ftr_gpio_init(struct gpio_runtime *rt)
 
 	ftr_gpio_all_amps_off(rt);
 	rt->implementation_private = 0;
-	INIT_WORK(&rt->headphone_notify.work, ftr_handle_notify,
-		  &rt->headphone_notify);
-	INIT_WORK(&rt->line_in_notify.work, ftr_handle_notify,
-		  &rt->line_in_notify);
-	INIT_WORK(&rt->line_out_notify.work, ftr_handle_notify,
-		  &rt->line_out_notify);
+	INIT_DELAYED_WORK(&rt->headphone_notify.work, ftr_handle_notify);
+	INIT_DELAYED_WORK(&rt->line_in_notify.work, ftr_handle_notify);
+	INIT_DELAYED_WORK(&rt->line_out_notify.work, ftr_handle_notify);
 	mutex_init(&rt->headphone_notify.mutex);
 	mutex_init(&rt->line_in_notify.mutex);
 	mutex_init(&rt->line_out_notify.mutex);
@@ -287,7 +285,7 @@ static irqreturn_t ftr_handle_notify_irq(int xx, void *data)
 {
 	struct gpio_notification *notif = data;
 
-	schedule_work(&notif->work);
+	schedule_delayed_work(&notif->work, 0);
 
 	return IRQ_HANDLED;
 }
diff --git a/sound/aoa/core/snd-aoa-gpio-pmf.c b/sound/aoa/core/snd-aoa-gpio-pmf.c
index 2836c3218391..5ca2220eac7d 100644
--- a/sound/aoa/core/snd-aoa-gpio-pmf.c
+++ b/sound/aoa/core/snd-aoa-gpio-pmf.c
@@ -69,9 +69,10 @@ static void pmf_gpio_all_amps_restore(struct gpio_runtime *rt)
 	pmf_gpio_set_lineout(rt, (s>>2)&1);
 }
 
-static void pmf_handle_notify(void *data)
+static void pmf_handle_notify(struct work_struct *work)
 {
-	struct gpio_notification *notif = data;
+	struct gpio_notification *notif =
+		container_of(work, struct gpio_notification, work.work);
 
 	mutex_lock(&notif->mutex);
 	if (notif->notify)
@@ -83,12 +84,9 @@ static void pmf_gpio_init(struct gpio_runtime *rt)
 {
 	pmf_gpio_all_amps_off(rt);
 	rt->implementation_private = 0;
-	INIT_WORK(&rt->headphone_notify.work, pmf_handle_notify,
-		  &rt->headphone_notify);
-	INIT_WORK(&rt->line_in_notify.work, pmf_handle_notify,
-		  &rt->line_in_notify);
-	INIT_WORK(&rt->line_out_notify.work, pmf_handle_notify,
-		  &rt->line_out_notify);
+	INIT_DELAYED_WORK(&rt->headphone_notify.work, pmf_handle_notify);
+	INIT_DELAYED_WORK(&rt->line_in_notify.work, pmf_handle_notify);
+	INIT_DELAYED_WORK(&rt->line_out_notify.work, pmf_handle_notify);
 	mutex_init(&rt->headphone_notify.mutex);
 	mutex_init(&rt->line_in_notify.mutex);
 	mutex_init(&rt->line_out_notify.mutex);
@@ -129,7 +127,7 @@ static void pmf_handle_notify_irq(void *data)
 {
 	struct gpio_notification *notif = data;
 
-	schedule_work(&notif->work);
+	schedule_delayed_work(&notif->work, 0);
 }
 
 static int pmf_set_notify(struct gpio_runtime *rt,
diff --git a/sound/i2c/other/ak4114.c b/sound/i2c/other/ak4114.c
index 12ffffc9e814..d2f2c5078e65 100644
--- a/sound/i2c/other/ak4114.c
+++ b/sound/i2c/other/ak4114.c
@@ -35,7 +35,7 @@ MODULE_LICENSE("GPL");
 
 #define AK4114_ADDR			0x00 /* fixed address */
 
-static void ak4114_stats(void *);
+static void ak4114_stats(struct work_struct *work);
 
 static void reg_write(struct ak4114 *ak4114, unsigned char reg, unsigned char val)
 {
@@ -158,7 +158,7 @@ void snd_ak4114_reinit(struct ak4114 *chip)
 	reg_write(chip, AK4114_REG_PWRDN, old | AK4114_RST | AK4114_PWN);
 	/* bring up statistics / event queing */
 	chip->init = 0;
-	INIT_WORK(&chip->work, ak4114_stats, chip);
+	INIT_DELAYED_WORK(&chip->work, ak4114_stats);
 	queue_delayed_work(chip->workqueue, &chip->work, HZ / 10);
 }
 
@@ -561,9 +561,9 @@ int snd_ak4114_check_rate_and_errors(struct ak4114 *ak4114, unsigned int flags)
 	return res;
 }
 
-static void ak4114_stats(void *data)
+static void ak4114_stats(struct work_struct *work)
 {
-	struct ak4114 *chip = (struct ak4114 *)data;
+	struct ak4114 *chip = container_of(work, struct ak4114, work.work);
 
 	if (chip->init)
 		return;
diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index 6577b2325357..7abcb10b2754 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -1927,9 +1927,10 @@ static int snd_ac97_dev_disconnect(struct snd_device *device)
 static struct snd_ac97_build_ops null_build_ops;
 
 #ifdef CONFIG_SND_AC97_POWER_SAVE
-static void do_update_power(void *data)
+static void do_update_power(struct work_struct *work)
 {
-	update_power_regs(data);
+	update_power_regs(
+		container_of(work, struct snd_ac97, power_work.work));
 }
 #endif
 
@@ -1989,7 +1990,7 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template,
 	mutex_init(&ac97->page_mutex);
 #ifdef CONFIG_SND_AC97_POWER_SAVE
 	ac97->power_workq = create_workqueue("ac97");
-	INIT_WORK(&ac97->power_work, do_update_power, ac97);
+	INIT_DELAYED_WORK(&ac97->power_work, do_update_power);
 #endif
 
 #ifdef CONFIG_PCI
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 9c3d7ac08068..71482c15a852 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -272,10 +272,11 @@ EXPORT_SYMBOL(snd_hda_queue_unsol_event);
 /*
  * process queueud unsolicited events
  */
-static void process_unsol_events(void *data)
+static void process_unsol_events(struct work_struct *work)
 {
-	struct hda_bus *bus = data;
-	struct hda_bus_unsolicited *unsol = bus->unsol;
+	struct hda_bus_unsolicited *unsol =
+		container_of(work, struct hda_bus_unsolicited, work);
+	struct hda_bus *bus = unsol->bus;
 	struct hda_codec *codec;
 	unsigned int rp, caddr, res;
 
@@ -314,7 +315,8 @@ static int init_unsol_queue(struct hda_bus *bus)
 		kfree(unsol);
 		return -ENOMEM;
 	}
-	INIT_WORK(&unsol->work, process_unsol_events, bus);
+	INIT_WORK(&unsol->work, process_unsol_events);
+	unsol->bus = bus;
 	bus->unsol = unsol;
 	return 0;
 }
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index f9416c36396e..9ca1baf860bd 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -206,6 +206,7 @@ struct hda_bus_unsolicited {
 	/* workqueue */
 	struct workqueue_struct *workq;
 	struct work_struct work;
+	struct hda_bus *bus;
 };
 
 /*
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 2fbe1d183fce..8f074c7936e6 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -942,10 +942,11 @@ static void check_mute(struct snd_pmac *chip, struct pmac_gpio *gp, int val, int
 }
 
 static struct work_struct device_change;
+static struct snd_pmac *device_change_chip;
 
-static void device_change_handler(void *self)
+static void device_change_handler(struct work_struct *work)
 {
-	struct snd_pmac *chip = self;
+	struct snd_pmac *chip = device_change_chip;
 	struct pmac_tumbler *mix;
 	int headphone, lineout;
 
@@ -1417,7 +1418,8 @@ int __init snd_pmac_tumbler_init(struct snd_pmac *chip)
 	chip->resume = tumbler_resume;
 #endif
 
-	INIT_WORK(&device_change, device_change_handler, (void *)chip);
+	INIT_WORK(&device_change, device_change_handler);
+	device_change_chip = chip;
 
 #ifdef PMAC_SUPPORT_AUTOMUTE
 	if ((mix->headphone_irq >=0 || mix->lineout_irq >= 0)
-- 
cgit v1.2.3


From e17e0f51aeea4e59c7e450a1c0f26605b91c1260 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 24 Nov 2006 12:15:25 +0100
Subject: Driver core: show drivers in /sys/module/

Show the drivers, which belong to the module:
  $ ls -l /sys/module/usbcore/drivers/
  hub -> ../../../bus/usb/drivers/hub
  usb -> ../../../bus/usb/drivers/usb
  usbfs -> ../../../bus/usb/drivers/usbfs

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/module.h |  1 +
 kernel/module.c        | 31 +++++++++++++++++++++++++------
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/module.h b/include/linux/module.h
index d1d00ce8f4ed..9258ffd8a7f0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -264,6 +264,7 @@ struct module
 	struct module_attribute *modinfo_attrs;
 	const char *version;
 	const char *srcversion;
+	struct kobject *drivers_dir;
 
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
diff --git a/kernel/module.c b/kernel/module.c
index f0166563c602..45e01cb60101 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1086,22 +1086,35 @@ static int mod_sysfs_setup(struct module *mod,
 		goto out;
 	kobj_set_kset_s(&mod->mkobj, module_subsys);
 	mod->mkobj.mod = mod;
-	err = kobject_register(&mod->mkobj.kobj);
+
+	/* delay uevent until full sysfs population */
+	kobject_init(&mod->mkobj.kobj);
+	err = kobject_add(&mod->mkobj.kobj);
 	if (err)
 		goto out;
 
+	mod->drivers_dir = kobject_add_dir(&mod->mkobj.kobj, "drivers");
+	if (!mod->drivers_dir)
+		goto out_unreg;
+
 	err = module_param_sysfs_setup(mod, kparam, num_params);
 	if (err)
-		goto out_unreg;
+		goto out_unreg_drivers;
 
 	err = module_add_modinfo_attrs(mod);
 	if (err)
-		goto out_unreg;
+		goto out_unreg_param;
 
+	kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
 	return 0;
 
+out_unreg_drivers:
+	kobject_unregister(mod->drivers_dir);
+out_unreg_param:
+	module_param_sysfs_remove(mod);
 out_unreg:
-	kobject_unregister(&mod->mkobj.kobj);
+	kobject_del(&mod->mkobj.kobj);
+	kobject_put(&mod->mkobj.kobj);
 out:
 	return err;
 }
@@ -1110,6 +1123,7 @@ static void mod_kobject_remove(struct module *mod)
 {
 	module_remove_modinfo_attrs(mod);
 	module_param_sysfs_remove(mod);
+	kobject_unregister(mod->drivers_dir);
 
 	kobject_unregister(&mod->mkobj.kobj);
 }
@@ -2275,11 +2289,14 @@ void print_modules(void)
 
 void module_add_driver(struct module *mod, struct device_driver *drv)
 {
+	int no_warn;
+
 	if (!mod || !drv)
 		return;
 
-	/* Don't check return code; this call is idempotent */
-	sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
+	/* Don't check return codes; these calls are idempotent */
+	no_warn = sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
+	no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj, drv->name);
 }
 EXPORT_SYMBOL(module_add_driver);
 
@@ -2288,6 +2305,8 @@ void module_remove_driver(struct device_driver *drv)
 	if (!drv)
 		return;
 	sysfs_remove_link(&drv->kobj, "module");
+	if (drv->owner && drv->owner->drivers_dir)
+		sysfs_remove_link(drv->owner->drivers_dir, drv->name);
 }
 EXPORT_SYMBOL(module_remove_driver);
 
-- 
cgit v1.2.3


From 339bf98ffc6a8d8eb16fc532ac57ffbced2f8a68 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 10 Nov 2006 14:10:15 -0800
Subject: [NETLINK]: Do precise netlink message allocations where possible

Account for the netlink message header size directly in nlmsg_new()
instead of relying on the caller calculate it correctly.

Replaces error handling of message construction functions when
constructing notifications with bug traps since a failure implies
a bug in calculating the size of the skb.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h           |  1 +
 include/net/fib_rules.h           |  1 +
 include/net/netlink.h             |  9 ++---
 kernel/taskstats.c                |  3 +-
 net/bridge/br_netlink.c           | 21 ++++++++----
 net/core/fib_rules.c              | 24 +++++++++++---
 net/core/neighbour.c              | 17 +++++++---
 net/core/rtnetlink.c              | 39 ++++++++++++++--------
 net/decnet/dn_rules.c             |  6 ++++
 net/decnet/dn_table.c             | 34 ++++++++++++++++---
 net/ipv4/devinet.c                | 18 +++++++---
 net/ipv4/fib_rules.c              |  8 +++++
 net/ipv4/fib_semantics.c          | 36 ++++++++++++++++----
 net/ipv6/addrconf.c               | 70 ++++++++++++++++++---------------------
 net/ipv6/fib6_rules.c             |  7 ++++
 net/ipv6/route.c                  | 23 +++++++++----
 net/netlabel/netlabel_cipso_v4.c  |  2 +-
 net/netlabel/netlabel_mgmt.c      |  4 +--
 net/netlabel/netlabel_unlabeled.c |  2 +-
 net/netlink/af_netlink.c          | 13 ++++----
 net/netlink/genetlink.c           |  2 +-
 21 files changed, 233 insertions(+), 107 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 66411622e06e..e61e1e138421 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -174,6 +174,7 @@ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
  */
 #define NLMSG_GOODORDER 0
 #define NLMSG_GOODSIZE (SKB_MAX_ORDER(0, NLMSG_GOODORDER))
+#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
 
 
 struct netlink_callback
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e4ba781d289f..bc3c26494c3d 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -52,6 +52,7 @@ struct fib_rules_ops
 					struct nlmsghdr *,
 					struct fib_rule_hdr *);
 	u32			(*default_pref)(void);
+	size_t			(*nlmsg_payload)(struct fib_rule *);
 
 	int			nlgroup;
 	struct nla_policy	*policy;
diff --git a/include/net/netlink.h b/include/net/netlink.h
index ce5cba19c393..30021339157c 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -500,14 +500,15 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
 
 /**
  * nlmsg_new - Allocate a new netlink message
- * @size: maximum size of message
+ * @payload: size of the message payload
  * @flags: the type of memory to allocate.
  *
- * Use NLMSG_GOODSIZE if size isn't know and you need a good default size.
+ * Use NLMSG_DEFAULT_SIZE if the size of the payload isn't known
+ * and a good default is needed.
  */
-static inline struct sk_buff *nlmsg_new(int size, gfp_t flags)
+static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
 {
-	return alloc_skb(size, flags);
+	return alloc_skb(nlmsg_total_size(payload), flags);
 }
 
 /**
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index f45c5e70773c..4f3f0e48c845 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -77,8 +77,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 	/*
 	 * If new attributes are added, please revisit this allocation
 	 */
-	size = nlmsg_total_size(genlmsg_total_size(size));
-	skb = nlmsg_new(size, GFP_KERNEL);
+	skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8f661195d09d..15d6efbe7519 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -15,6 +15,18 @@
 #include <net/netlink.h>
 #include "br_private.h"
 
+static inline size_t br_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+	       + nla_total_size(4) /* IFLA_MASTER */
+	       + nla_total_size(4) /* IFLA_MTU */
+	       + nla_total_size(4) /* IFLA_LINK */
+	       + nla_total_size(1) /* IFLA_OPERSTATE */
+	       + nla_total_size(1); /* IFLA_PROTINFO */
+}
+
 /*
  * Create one netlink message for one interface
  * Contains port and master info as well as carrier and bridge state.
@@ -77,19 +89,16 @@ rtattr_failure:
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
 	struct sk_buff *skb;
-	int payload = sizeof(struct ifinfomsg) + 128;
 	int err = -ENOBUFS;
 
 	pr_debug("bridge notify event=%d\n", event);
-	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
 
 	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in br_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 errout:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 4148e274a204..1df6cd4568d3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -306,6 +306,22 @@ errout:
 	return err;
 }
 
+static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+					 struct fib_rule *rule)
+{
+	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
+			 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
+			 + nla_total_size(4) /* FRA_PRIORITY */
+			 + nla_total_size(4) /* FRA_TABLE */
+			 + nla_total_size(4) /* FRA_FWMARK */
+			 + nla_total_size(4); /* FRA_FWMASK */
+
+	if (ops->nlmsg_payload)
+		payload += ops->nlmsg_payload(rule);
+
+	return payload;
+}
+
 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 			    u32 pid, u32 seq, int type, int flags,
 			    struct fib_rules_ops *ops)
@@ -384,15 +400,13 @@ static void notify_rule_change(int event, struct fib_rule *rule,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
 	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in fib_rule_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
 errout:
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b4b478353b27..0e097ba14d73 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2410,20 +2410,27 @@ static struct file_operations neigh_stat_seq_fops = {
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_ARPD
+static inline size_t neigh_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ndmsg))
+	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+	       + nla_total_size(sizeof(struct nda_cacheinfo))
+	       + nla_total_size(4); /* NDA_PROBES */
+}
+
 static void __neigh_notify(struct neighbour *n, int type, int flags)
 {
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
 
 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in neigh_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
 errout:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02f3c7947898..50d6cb40c6e3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -273,6 +273,25 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 };
 
+static inline size_t if_nlmsg_size(int iwbuflen)
+{
+	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+	       + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+	       + nla_total_size(sizeof(struct rtnl_link_ifmap))
+	       + nla_total_size(sizeof(struct rtnl_link_stats))
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+	       + nla_total_size(4) /* IFLA_TXQLEN */
+	       + nla_total_size(4) /* IFLA_WEIGHT */
+	       + nla_total_size(4) /* IFLA_MTU */
+	       + nla_total_size(4) /* IFLA_LINK */
+	       + nla_total_size(4) /* IFLA_MASTER */
+	       + nla_total_size(1) /* IFLA_OPERSTATE */
+	       + nla_total_size(1) /* IFLA_LINKMODE */
+	       + nla_total_size(iwbuflen);
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    void *iwbuf, int iwbuflen, int type, u32 pid,
 			    u32 seq, u32 change, unsigned int flags)
@@ -558,7 +577,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct sk_buff *nskb;
 	char *iw_buf = NULL, *iw = NULL;
 	int iw_buf_len = 0;
-	int err, payload;
+	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
@@ -587,9 +606,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	}
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-	payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) +
-			      nla_total_size(iw_buf_len));
-	nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
 	if (nskb == NULL) {
 		err = -ENOBUFS;
 		goto errout;
@@ -597,10 +614,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
 			       NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
-	if (err <= 0) {
-		kfree_skb(nskb);
-		goto errout;
-	}
+	/* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */
+	BUG_ON(err < 0);
 
 	err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
 errout:
@@ -639,15 +654,13 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
 	err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in if_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
 errout:
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index e32d0c3d5a96..b7dfd04a9638 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -241,6 +241,12 @@ static u32 dn_fib_rule_default_pref(void)
 	return 0;
 }
 
+static size_t dn_fib_rule_nlmsg_payload(struct fib_rule *rule)
+{
+	return nla_total_size(2) /* dst */
+	       + nla_total_size(2); /* src */
+}
+
 int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	return fib_rules_dump(skb, cb, AF_DECnet);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 317904bb5896..e74b744254ab 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -263,6 +263,32 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
 	return 0;
 }
 
+static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
+{
+	size_t payload = NLMSG_ALIGN(struct rtmsg)
+			 + nla_total_size(4) /* RTA_TABLE */
+			 + nla_total_size(2) /* RTA_DST */
+			 + nla_total_size(4); /* RTA_PRIORITY */
+
+	/* space for nested metrics */
+	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
+
+	if (fi->fib_nhs) {
+		/* Also handles the special case fib_nhs == 1 */
+
+		/* each nexthop is packed in an attribute */
+		size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+
+		/* may contain a gateway attribute */
+		nhsize += nla_total_size(4);
+
+		/* all nexthops are packed in a nested attribute */
+		payload += nla_total_size(fi->fib_nhs * nhsize);
+	}
+
+	return payload;
+}
+
 static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
                         u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
                         struct dn_fib_info *fi, unsigned int flags)
@@ -335,17 +361,15 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
         u32 pid = req ? req->pid : 0;
 	int err = -ENOBUFS;
 
-        skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+        skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f), GFP_KERNEL));
         if (skb == NULL)
 		goto errout;
 
         err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
 			       f->fn_type, f->fn_scope, &f->fn_key, z,
 			       DN_FIB_INFO(f), 0);
-	if (err < 0) {
-                kfree_skb(skb);
-		goto errout;
-        }
+	/* failure implies BUG in dn_fib_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
 errout:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7602c79a389b..f38cbbae0ae3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1120,6 +1120,16 @@ static struct notifier_block ip_netdev_notifier = {
 	.notifier_call =inetdev_event,
 };
 
+static inline size_t inet_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+	       + nla_total_size(4) /* IFA_ADDRESS */
+	       + nla_total_size(4) /* IFA_LOCAL */
+	       + nla_total_size(4) /* IFA_BROADCAST */
+	       + nla_total_size(4) /* IFA_ANYCAST */
+	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
+}
+
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 			    u32 pid, u32 seq, int event, unsigned int flags)
 {
@@ -1208,15 +1218,13 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in inet_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
 errout:
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index fd4a8cd4c06e..b837c33e0404 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -299,6 +299,13 @@ static u32 fib4_rule_default_pref(void)
 	return 0;
 }
 
+static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
+{
+	return nla_total_size(4) /* dst */
+	       + nla_total_size(4) /* src */
+	       + nla_total_size(4); /* flow */
+}
+
 static struct fib_rules_ops fib4_rules_ops = {
 	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
@@ -308,6 +315,7 @@ static struct fib_rules_ops fib4_rules_ops = {
 	.compare	= fib4_rule_compare,
 	.fill		= fib4_rule_fill,
 	.default_pref	= fib4_rule_default_pref,
+	.nlmsg_payload	= fib4_rule_nlmsg_payload,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
 	.policy		= fib4_rule_policy,
 	.rules_list	= &fib4_rules,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 884d176e0082..e63b8a98fb4d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -273,25 +273,49 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 	return -1;
 }
 
+static inline size_t fib_nlmsg_size(struct fib_info *fi)
+{
+	size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
+			 + nla_total_size(4) /* RTA_TABLE */
+			 + nla_total_size(4) /* RTA_DST */
+			 + nla_total_size(4) /* RTA_PRIORITY */
+			 + nla_total_size(4); /* RTA_PREFSRC */
+
+	/* space for nested metrics */
+	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
+
+	if (fi->fib_nhs) {
+		/* Also handles the special case fib_nhs == 1 */
+
+		/* each nexthop is packed in an attribute */
+		size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+
+		/* may contain flow and gateway attribute */
+		nhsize += 2 * nla_total_size(4);
+
+		/* all nexthops are packed in a nested attribute */
+		payload += nla_total_size(fi->fib_nhs * nhsize);
+	}
+
+	return payload;
+}
+
 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 	       int dst_len, u32 tb_id, struct nl_info *info)
 {
 	struct sk_buff *skb;
-	int payload = sizeof(struct rtmsg) + 256;
 	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
 	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
 			    fa->fa_type, fa->fa_scope, key, dst_len,
 			    fa->fa_tos, fa->fa_info, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in fib_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
 			  info->nlh, GFP_KERNEL);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6a98f68348cb..967ea320a9ca 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3098,10 +3098,9 @@ static inline int rt_scope(int ifa_scope)
 
 static inline int inet6_ifaddr_msgsize(void)
 {
-	return nlmsg_total_size(sizeof(struct ifaddrmsg) +
-				nla_total_size(16) +
-				nla_total_size(sizeof(struct ifa_cacheinfo)) +
-				128);
+	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+	       + nla_total_size(16) /* IFA_ADDRESS */
+	       + nla_total_size(sizeof(struct ifa_cacheinfo));
 }
 
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
@@ -3329,10 +3328,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 
 	err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
 				nlh->nlmsg_seq, RTM_NEWADDR, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout_ifa;
-	}
+	/* failure implies BUG in inet6_ifaddr_msgsize() */
+	BUG_ON(err < 0);
 
 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 errout_ifa:
@@ -3351,10 +3348,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 		goto errout;
 
 	err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in inet6_ifaddr_msgsize() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
 errout:
@@ -3397,16 +3392,19 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
 }
 
-/* Maximum length of ifinfomsg attributes */
-#define INET6_IFINFO_RTA_SPACE \
-		RTA_SPACE(IFNAMSIZ) /* IFNAME */ + \
-		RTA_SPACE(MAX_ADDR_LEN) /* ADDRESS */ +	\
-		RTA_SPACE(sizeof(u32)) /* MTU */ + \
-		RTA_SPACE(sizeof(int)) /* LINK */ + \
-		RTA_SPACE(0) /* PROTINFO */ + \
-		RTA_SPACE(sizeof(u32)) /* FLAGS */ + \
-		RTA_SPACE(sizeof(struct ifla_cacheinfo)) /* CACHEINFO */ + \
-		RTA_SPACE(sizeof(__s32[DEVCONF_MAX])) /* CONF */
+static inline size_t inet6_if_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+	       + nla_total_size(4) /* IFLA_MTU */
+	       + nla_total_size(4) /* IFLA_LINK */
+	       + nla_total_size( /* IFLA_PROTINFO */
+			nla_total_size(4) /* IFLA_INET6_FLAGS */
+			+ nla_total_size(sizeof(struct ifla_cacheinfo))
+			+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+		 );
+}
 
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 
 			     u32 pid, u32 seq, int event, unsigned int flags)
@@ -3501,18 +3499,15 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 {
 	struct sk_buff *skb;
-	int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE;
 	int err = -ENOBUFS;
 	
-	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
 
 	err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in inet6_if_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
 errout:
@@ -3520,10 +3515,12 @@ errout:
 		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
 }
 
-/* Maximum length of prefix_cacheinfo attributes */
-#define INET6_PREFIX_RTA_SPACE \
-		RTA_SPACE(sizeof(((struct prefix_info *)NULL)->prefix)) /* ADDRESS */ + \
-		RTA_SPACE(sizeof(struct prefix_cacheinfo)) /* CACHEINFO */
+static inline size_t inet6_prefix_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct prefixmsg))
+	       + nla_total_size(sizeof(struct in6_addr))
+	       + nla_total_size(sizeof(struct prefix_cacheinfo));
+}
 
 static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
 			struct prefix_info *pinfo, u32 pid, u32 seq, 
@@ -3569,18 +3566,15 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 			 struct prefix_info *pinfo)
 {
 	struct sk_buff *skb;
-	int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
 
 	err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in inet6_prefix_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
 errout:
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 25804cb69cf0..d587dde5897e 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -232,6 +232,12 @@ static u32 fib6_rule_default_pref(void)
 	return 0x3FFF;
 }
 
+static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+{
+	return nla_total_size(16) /* dst */
+	       + nla_total_size(16); /* src */
+}
+
 static struct fib_rules_ops fib6_rules_ops = {
 	.family			= AF_INET6,
 	.rule_size		= sizeof(struct fib6_rule),
@@ -241,6 +247,7 @@ static struct fib_rules_ops fib6_rules_ops = {
 	.compare		= fib6_rule_compare,
 	.fill			= fib6_rule_fill,
 	.default_pref		= fib6_rule_default_pref,
+	.nlmsg_payload		= fib6_rule_nlmsg_payload,
 	.nlgroup		= RTNLGRP_IPV6_RULE,
 	.policy			= fib6_rule_policy,
 	.rules_list		= &fib6_rules,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0ad07c9087a7..a6472cb9054c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2006,6 +2006,20 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	return ip6_route_add(&cfg);
 }
 
+static inline size_t rt6_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct rtmsg))
+	       + nla_total_size(16) /* RTA_SRC */
+	       + nla_total_size(16) /* RTA_DST */
+	       + nla_total_size(16) /* RTA_GATEWAY */
+	       + nla_total_size(16) /* RTA_PREFSRC */
+	       + nla_total_size(4) /* RTA_TABLE */
+	       + nla_total_size(4) /* RTA_IIF */
+	       + nla_total_size(4) /* RTA_OIF */
+	       + nla_total_size(4) /* RTA_PRIORITY */
+	       + nla_total_size(sizeof(struct rta_cacheinfo));
+}
+
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
 			 int iif, int type, u32 pid, u32 seq,
@@ -2200,7 +2214,6 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 	struct sk_buff *skb;
 	u32 pid = 0, seq = 0;
 	struct nlmsghdr *nlh = NULL;
-	int payload = sizeof(struct rtmsg) + 256;
 	int err = -ENOBUFS;
 
 	if (info) {
@@ -2210,15 +2223,13 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 			seq = nlh->nlmsg_seq;
 	}
 
-	skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
+	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
 	if (skb == NULL)
 		goto errout;
 
 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
-	if (err < 0) {
-		kfree_skb(skb);
-		goto errout;
-	}
+	/* failure implies BUG in rt6_nlmsg_size() */
+	BUG_ON(err < 0);
 
 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
 errout:
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index a6ce1d6d5c59..f1788bd290f8 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -452,7 +452,7 @@ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
 	}
 
 list_start:
-	ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL);
+	ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE * nlsze_mult, GFP_KERNEL);
 	if (ans_skb == NULL) {
 		ret_val = -ENOMEM;
 		goto list_failure;
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 53c9079ad2c3..c529622ff0b7 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -356,7 +356,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
 	void *data;
 	struct netlbl_dom_map *entry;
 
-	ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
 		return -ENOMEM;
 	data = netlbl_netlink_hdr_put(ans_skb,
@@ -492,7 +492,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *ans_skb = NULL;
 	void *data;
 
-	ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
 		return -ENOMEM;
 	data = netlbl_netlink_hdr_put(ans_skb,
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 1833ad233b39..219dccade4e1 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -138,7 +138,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *ans_skb;
 	void *data;
 
-	ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
 		goto list_failure;
 	data = netlbl_netlink_hdr_put(ans_skb,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d527c8977b1f..f61d81b3c61c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1148,7 +1148,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (len > sk->sk_sndbuf - 32)
 		goto out;
 	err = -ENOBUFS;
-	skb = nlmsg_new(len, GFP_KERNEL);
+	skb = alloc_skb(len, GFP_KERNEL);
 	if (skb==NULL)
 		goto out;
 
@@ -1435,14 +1435,13 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 	struct sk_buff *skb;
 	struct nlmsghdr *rep;
 	struct nlmsgerr *errmsg;
-	int size;
+	size_t payload = sizeof(*errmsg);
 
-	if (err == 0)
-		size = nlmsg_total_size(sizeof(*errmsg));
-	else
-		size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh));
+	/* error messages get the original request appened */
+	if (err)
+		payload += nlmsg_len(nlh);
 
-	skb = nlmsg_new(size, GFP_KERNEL);
+	skb = nlmsg_new(payload, GFP_KERNEL);
 	if (!skb) {
 		struct sock *sk;
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 49bc2db7982b..70d60c818897 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -480,7 +480,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
 	struct sk_buff *skb;
 	int err;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (skb == NULL)
 		return ERR_PTR(-ENOBUFS);
 
-- 
cgit v1.2.3


From 3dabc7157859e706770c825aa229f8943db4e0e1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 14 Nov 2006 19:44:52 -0800
Subject: [GENL]: Add genlmsg_new() to allocate generic netlink messages

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 11 +++++++++++
 kernel/taskstats.c      |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index b619314218a6..2010465fa7d4 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -187,4 +187,15 @@ static inline int genlmsg_total_size(int payload)
 	return NLMSG_ALIGN(genlmsg_msg_size(payload));
 }
 
+/**
+ * genlmsg_new - Allocate a new generic netlink message
+ * @payload: size of the message payload
+ * @flags: the type of memory to allocate.
+ */
+static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
+{
+	return nlmsg_new(genlmsg_total_size(payload), flags);
+}
+
+
 #endif	/* __NET_GENERIC_NETLINK_H */
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4f3f0e48c845..faa5239813ce 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -77,7 +77,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 	/*
 	 * If new attributes are added, please revisit this allocation
 	 */
-	skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL);
+	skb = genlmsg_new(size, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 17c157c889f4b07258af6bfec9e4e9dcf3c00178 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 14 Nov 2006 19:46:02 -0800
Subject: [GENL]: Add genlmsg_put_reply() to simplify building reply headers

By modyfing genlmsg_put() to take a genl_family and by adding
genlmsg_put_reply() the process of constructing the netlink
and generic netlink headers is simplified.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h           | 31 ++++++++++++++++++++++++-------
 kernel/taskstats.c                |  8 ++------
 net/netlabel/netlabel_cipso_v4.c  | 17 +++++------------
 net/netlabel/netlabel_mgmt.c      | 34 ++++++++++------------------------
 net/netlabel/netlabel_unlabeled.c |  8 ++------
 net/netlabel/netlabel_user.h      | 31 -------------------------------
 net/netlink/genetlink.c           | 17 ++++++++---------
 7 files changed, 51 insertions(+), 95 deletions(-)

(limited to 'kernel')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 797c18b5041f..7fd131c9a8cc 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -79,33 +79,50 @@ extern struct sock *genl_sock;
  * @skb: socket buffer holding the message
  * @pid: netlink pid the message is addressed to
  * @seq: sequence number (usually the one of the sender)
- * @type: netlink message type
- * @hdrlen: length of the user specific header
+ * @family: generic netlink family
  * @flags netlink message flags
  * @cmd: generic netlink command
- * @version: version
  *
  * Returns pointer to user specific header
  */
 static inline void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
-				int type, int hdrlen, int flags,
-				u8 cmd, u8 version)
+				struct genl_family *family, int flags, u8 cmd)
 {
 	struct nlmsghdr *nlh;
 	struct genlmsghdr *hdr;
 
-	nlh = nlmsg_put(skb, pid, seq, type, GENL_HDRLEN + hdrlen, flags);
+	nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN +
+			family->hdrsize, flags);
 	if (nlh == NULL)
 		return NULL;
 
 	hdr = nlmsg_data(nlh);
 	hdr->cmd = cmd;
-	hdr->version = version;
+	hdr->version = family->version;
 	hdr->reserved = 0;
 
 	return (char *) hdr + GENL_HDRLEN;
 }
 
+/**
+ * genlmsg_put_reply - Add generic netlink header to a reply message
+ * @skb: socket buffer holding the message
+ * @info: receiver info
+ * @family: generic netlink family
+ * @flags: netlink message flags
+ * @cmd: generic netlink command
+ *
+ * Returns pointer to user specific header
+ */
+static inline void *genlmsg_put_reply(struct sk_buff *skb,
+				      struct genl_info *info,
+				      struct genl_family *family,
+				      int flags, u8 cmd)
+{
+	return genlmsg_put(skb, info->snd_pid, info->snd_seq, family,
+			   flags, cmd);
+}
+
 /**
  * genlmsg_end - Finalize a generic netlink message
  * @skb: socket buffer the message is stored in
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index faa5239813ce..d3d28919d4b4 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -85,13 +85,9 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 		int seq = get_cpu_var(taskstats_seqnum)++;
 		put_cpu_var(taskstats_seqnum);
 
-		reply = genlmsg_put(skb, 0, seq,
-				family.id, 0, 0,
-				cmd, family.version);
+		reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
 	} else
-		reply = genlmsg_put(skb, info->snd_pid, info->snd_seq,
-				family.id, 0, 0,
-				cmd, family.version);
+		reply = genlmsg_put_reply(skb, info, &family, 0, cmd);
 	if (reply == NULL) {
 		nlmsg_free(skb);
 		return -EINVAL;
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 52628878524c..fe9851fac85d 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -457,12 +457,8 @@ list_start:
 		ret_val = -ENOMEM;
 		goto list_failure;
 	}
-	data = netlbl_netlink_hdr_put(ans_skb,
-				      info->snd_pid,
-				      info->snd_seq,
-				      netlbl_cipsov4_gnl_family.id,
-				      0,
-				      NLBL_CIPSOV4_C_LIST);
+	data = genlmsg_put_reply(ans_skb, info, &netlbl_cipsov4_gnl_family,
+				 0, NLBL_CIPSOV4_C_LIST);
 	if (data == NULL) {
 		ret_val = -ENOMEM;
 		goto list_failure;
@@ -607,12 +603,9 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
 	struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
 	void *data;
 
-	data = netlbl_netlink_hdr_put(cb_arg->skb,
-				      NETLINK_CB(cb_arg->nl_cb->skb).pid,
-				      cb_arg->seq,
-				      netlbl_cipsov4_gnl_family.id,
-				      NLM_F_MULTI,
-				      NLBL_CIPSOV4_C_LISTALL);
+	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+			   cb_arg->seq, &netlbl_cipsov4_gnl_family,
+			   NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL);
 	if (data == NULL)
 		goto listall_cb_failure;
 
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 784693735e0d..e8c80f33f3d7 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -188,12 +188,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
 	struct netlbl_domhsh_walk_arg *cb_arg = arg;
 	void *data;
 
-	data = netlbl_netlink_hdr_put(cb_arg->skb,
-				      NETLINK_CB(cb_arg->nl_cb->skb).pid,
-				      cb_arg->seq,
-				      netlbl_mgmt_gnl_family.id,
-				      NLM_F_MULTI,
-				      NLBL_MGMT_C_LISTALL);
+	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+			   cb_arg->seq, &netlbl_mgmt_gnl_family,
+			   NLM_F_MULTI, NLBL_MGMT_C_LISTALL);
 	if (data == NULL)
 		goto listall_cb_failure;
 
@@ -359,12 +356,8 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
 	ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
 		return -ENOMEM;
-	data = netlbl_netlink_hdr_put(ans_skb,
-				      info->snd_pid,
-				      info->snd_seq,
-				      netlbl_mgmt_gnl_family.id,
-				      0,
-				      NLBL_MGMT_C_LISTDEF);
+	data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family,
+				 0, NLBL_MGMT_C_LISTDEF);
 	if (data == NULL)
 		goto listdef_failure;
 
@@ -422,12 +415,9 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
 	int ret_val = -ENOMEM;
 	void *data;
 
-	data = netlbl_netlink_hdr_put(skb,
-				      NETLINK_CB(cb->skb).pid,
-				      cb->nlh->nlmsg_seq,
-				      netlbl_mgmt_gnl_family.id,
-				      NLM_F_MULTI,
-				      NLBL_MGMT_C_PROTOCOLS);
+	data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			   &netlbl_mgmt_gnl_family, NLM_F_MULTI,
+			   NLBL_MGMT_C_PROTOCOLS);
 	if (data == NULL)
 		goto protocols_cb_failure;
 
@@ -495,12 +485,8 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
 	ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
 		return -ENOMEM;
-	data = netlbl_netlink_hdr_put(ans_skb,
-				      info->snd_pid,
-				      info->snd_seq,
-				      netlbl_mgmt_gnl_family.id,
-				      0,
-				      NLBL_MGMT_C_VERSION);
+	data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family,
+				 0, NLBL_MGMT_C_VERSION);
 	if (data == NULL)
 		goto version_failure;
 
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 57dd07b51be6..a1d4ae51db04 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -141,12 +141,8 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
 	ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
 		goto list_failure;
-	data = netlbl_netlink_hdr_put(ans_skb,
-				      info->snd_pid,
-				      info->snd_seq,
-				      netlbl_unlabel_gnl_family.id,
-				      0,
-				      NLBL_UNLABEL_C_LIST);
+	data = genlmsg_put_reply(ans_skb, info, &netlbl_unlabel_gnl_family,
+				 0, NLBL_UNLABEL_C_LIST);
 	if (data == NULL) {
 		ret_val = -ENOMEM;
 		goto list_failure;
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 47967ef32964..6d7f4ab46c2b 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -41,37 +41,6 @@
 
 /* NetLabel NETLINK helper functions */
 
-/**
- * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
- * @skb: the packet
- * @pid: the PID of the receipient
- * @seq: the sequence number
- * @type: the generic NETLINK message family type
- * @cmd: command
- *
- * Description:
- * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
- * struct to the packet.  Returns a pointer to the start of the payload buffer
- * on success or NULL on failure.
- *
- */
-static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
-					   u32 pid,
-					   u32 seq,
-					   int type,
-					   int flags,
-					   u8 cmd)
-{
-	return genlmsg_put(skb,
-			   pid,
-			   seq,
-			   type,
-			   0,
-			   flags,
-			   cmd,
-			   NETLBL_PROTO_VERSION);
-}
-
 /**
  * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg
  * @skb: the packet
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 0249a56a9aad..8903ef93db5b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -384,6 +384,13 @@ static void genl_rcv(struct sock *sk, int len)
  * Controller
  **************************************************************************/
 
+static struct genl_family genl_ctrl = {
+	.id = GENL_ID_CTRL,
+	.name = "nlctrl",
+	.version = 0x1,
+	.maxattr = CTRL_ATTR_MAX,
+};
+
 static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
 			  u32 flags, struct sk_buff *skb, u8 cmd)
 {
@@ -392,8 +399,7 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
 	void *hdr;
 	int idx = 1;
 
-	hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
-			  family->version);
+	hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd);
 	if (hdr == NULL)
 		return -1;
 
@@ -562,13 +568,6 @@ static struct genl_ops genl_ctrl_ops = {
 	.policy		= ctrl_policy,
 };
 
-static struct genl_family genl_ctrl = {
-	.id = GENL_ID_CTRL,
-	.name = "nlctrl",
-	.version = 0x1,
-	.maxattr = CTRL_ATTR_MAX,
-};
-
 static int __init genl_init(void)
 {
 	int i, err;
-- 
cgit v1.2.3


From f6a570333e554b48ad589e7137c77c57809eee81 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 18 Oct 2006 01:47:25 -0400
Subject: [PATCH] severing module.h->sched.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/i386/kernel/alternative.c            |  1 +
 arch/i386/kernel/cpu/mcheck/therm_throt.c |  1 +
 drivers/base/cpu.c                        |  1 +
 drivers/hwmon/abituguru.c                 |  1 +
 drivers/leds/ledtrig-ide-disk.c           |  1 +
 drivers/leds/ledtrig-timer.c              |  1 +
 drivers/pci/access.c                      |  1 +
 drivers/scsi/scsi_transport_sas.c         |  1 +
 drivers/w1/slaves/w1_therm.c              |  1 +
 include/asm-x86_64/elf.h                  |  1 -
 include/linux/acct.h                      |  1 +
 include/linux/module.h                    | 13 +------------
 include/scsi/libiscsi.h                   |  2 ++
 kernel/latency.c                          |  1 +
 kernel/module.c                           | 15 ++++++++++++++-
 lib/random32.c                            |  1 +
 16 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 583c238e17fb..535f9794fba1 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -1,4 +1,5 @@
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <asm/alternative.h>
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 2d8703b7ce65..bad8b4420709 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -20,6 +20,7 @@
 #include <linux/cpu.h>
 #include <asm/cpu.h>
 #include <linux/notifier.h>
+#include <linux/jiffies.h>
 #include <asm/therm_throt.h>
 
 /* How long to wait between reporting thermal events */
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 4bef76a2f3f2..1f745f12f94e 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -5,6 +5,7 @@
 #include <linux/sysdev.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/cpu.h>
 #include <linux/topology.h>
 #include <linux/device.h>
diff --git a/drivers/hwmon/abituguru.c b/drivers/hwmon/abituguru.c
index e5cb0fdab9b1..b1dc63e4ac7b 100644
--- a/drivers/hwmon/abituguru.c
+++ b/drivers/hwmon/abituguru.c
@@ -21,6 +21,7 @@
     etc voltage & frequency control is not supported!
 */
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
diff --git a/drivers/leds/ledtrig-ide-disk.c b/drivers/leds/ledtrig-ide-disk.c
index fa651886ab4f..54b155c7026f 100644
--- a/drivers/leds/ledtrig-ide-disk.c
+++ b/drivers/leds/ledtrig-ide-disk.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/timer.h>
diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c
index 29a8818a32ec..d756bdb01c59 100644
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/ledtrig-timer.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 73a58c73d526..fc405f0165d9 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -1,5 +1,6 @@
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/wait.h>
 
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index b5b0c2cba96b..5c0b75bbfa10 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -25,6 +25,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/jiffies.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/string.h>
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 5372cfcbd054..b022fffd8c51 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/delay.h>
diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h
index a406fcb1e924..6d24ea7c4d9d 100644
--- a/include/asm-x86_64/elf.h
+++ b/include/asm-x86_64/elf.h
@@ -45,7 +45,6 @@ typedef struct user_i387_struct elf_fpregset_t;
 
 #ifdef __KERNEL__
 #include <asm/processor.h>
-#include <asm/compat.h>
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 0496d1f09952..302eb727ecb8 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -119,6 +119,7 @@ struct acct_v3
 #ifdef CONFIG_BSD_PROCESS_ACCT
 struct vfsmount;
 struct super_block;
+struct pacct_struct;
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
 extern void acct_init_pacct(struct pacct_struct *pacct);
diff --git a/include/linux/module.h b/include/linux/module.h
index 9258ffd8a7f0..d33df2408e05 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -6,7 +6,6 @@
  * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
  * Rewritten again by Rusty Russell, 2002
  */
-#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/stat.h>
@@ -411,17 +410,7 @@ static inline int try_module_get(struct module *module)
 	return ret;
 }
 
-static inline void module_put(struct module *module)
-{
-	if (module) {
-		unsigned int cpu = get_cpu();
-		local_dec(&module->ref[cpu].count);
-		/* Maybe they're waiting for us to drop reference? */
-		if (unlikely(!module_is_live(module)))
-			wake_up_process(module->waiter);
-		put_cpu();
-	}
-}
+extern void module_put(struct module *module);
 
 #else /*!CONFIG_MODULE_UNLOAD*/
 static inline int try_module_get(struct module *module)
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 61eebec00a7b..ea0816d4904d 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -25,6 +25,8 @@
 
 #include <linux/types.h>
 #include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
 #include <scsi/iscsi_proto.h>
 #include <scsi/iscsi_if.h>
 
diff --git a/kernel/latency.c b/kernel/latency.c
index 258f2555abbc..e63fcacb61a7 100644
--- a/kernel/latency.c
+++ b/kernel/latency.c
@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
+#include <linux/jiffies.h>
 #include <asm/atomic.h>
 
 struct latency_info {
diff --git a/kernel/module.c b/kernel/module.c
index 45e01cb60101..e2d09d604ca0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -34,10 +34,10 @@
 #include <linux/err.h>
 #include <linux/vermagic.h>
 #include <linux/notifier.h>
+#include <linux/sched.h>
 #include <linux/stop_machine.h>
 #include <linux/device.h>
 #include <linux/string.h>
-#include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/unwind.h>
 #include <asm/uaccess.h>
@@ -790,6 +790,19 @@ static struct module_attribute refcnt = {
 	.show = show_refcnt,
 };
 
+void module_put(struct module *module)
+{
+	if (module) {
+		unsigned int cpu = get_cpu();
+		local_dec(&module->ref[cpu].count);
+		/* Maybe they're waiting for us to drop reference? */
+		if (unlikely(!module_is_live(module)))
+			wake_up_process(module->waiter);
+		put_cpu();
+	}
+}
+EXPORT_SYMBOL(module_put);
+
 #else /* !CONFIG_MODULE_UNLOAD */
 static void print_unload_info(struct seq_file *m, struct module *mod)
 {
diff --git a/lib/random32.c b/lib/random32.c
index 4a15ce51cea7..ec7f81d3fb18 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -36,6 +36,7 @@
 #include <linux/types.h>
 #include <linux/percpu.h>
 #include <linux/module.h>
+#include <linux/jiffies.h>
 #include <linux/random.h>
 
 struct rnd_state {
-- 
cgit v1.2.3


From a1f8e7f7fb9d7e2cbcb53170edca7c0ac4680697 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Oct 2006 16:08:53 -0400
Subject: [PATCH] severing skbuff.h -> highmem.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/infiniband/ulp/iser/iser_memory.c |  1 +
 drivers/net/sun3lance.c                   |  1 +
 fs/compat.c                               |  1 +
 include/linux/skbuff.h                    | 19 -------------------
 kernel/auditsc.c                          |  1 +
 net/appletalk/ddp.c                       |  1 +
 net/core/kmap_skb.h                       | 19 +++++++++++++++++++
 net/core/skbuff.c                         |  3 ++-
 net/core/sock.c                           |  1 +
 net/ipv4/ip_output.c                      |  1 +
 net/packet/af_packet.c                    |  1 +
 11 files changed, 29 insertions(+), 20 deletions(-)
 create mode 100644 net/core/kmap_skb.h

(limited to 'kernel')

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 0606744c3f84..5e122501fd80 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -35,6 +35,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/highmem.h>
 #include <asm/io.h>
 #include <asm/scatterlist.h>
 #include <linux/scatterlist.h>
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index b865db363ba0..47a1c09d19ac 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -38,6 +38,7 @@ static char *version = "sun3lance.c: v1.2 1/12/2001  Sam Creasey (sammy@sammy.ne
 #include <linux/skbuff.h>
 #include <linux/bitops.h>
 
+#include <asm/cacheflush.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
 #include <asm/io.h>
diff --git a/fs/compat.c b/fs/compat.c
index 8d0a0018a7d2..fde52d40c0b6 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -45,6 +45,7 @@
 #include <linux/personality.h>
 #include <linux/rwsem.h>
 #include <linux/tsacct_kern.h>
+#include <linux/highmem.h>
 #include <linux/mm.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 14ec16d2d9ba..24ce0add6c54 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -23,7 +23,6 @@
 #include <asm/types.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
-#include <linux/highmem.h>
 #include <linux/poll.h>
 #include <linux/net.h>
 #include <linux/textsearch.h>
@@ -1295,24 +1294,6 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 	return __pskb_trim(skb, len);
 }
 
-static inline void *kmap_skb_frag(const skb_frag_t *frag)
-{
-#ifdef CONFIG_HIGHMEM
-	BUG_ON(in_irq());
-
-	local_bh_disable();
-#endif
-	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
-}
-
-static inline void kunmap_skb_frag(void *vaddr)
-{
-	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
-#ifdef CONFIG_HIGHMEM
-	local_bh_enable();
-#endif
-}
-
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 42f2f1179711..ab97e5101232 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -64,6 +64,7 @@
 #include <linux/tty.h>
 #include <linux/selinux.h>
 #include <linux/binfmts.h>
+#include <linux/highmem.h>
 #include <linux/syscalls.h>
 
 #include "audit.h"
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 485e35c3b28b..3a7052207708 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -61,6 +61,7 @@
 #include <net/tcp_states.h>
 #include <net/route.h>
 #include <linux/atalk.h>
+#include "../core/kmap_skb.h"
 
 struct datalink_proto *ddp_dl, *aarp_dl;
 static const struct proto_ops atalk_dgram_ops;
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
new file mode 100644
index 000000000000..283c2b993fb8
--- /dev/null
+++ b/net/core/kmap_skb.h
@@ -0,0 +1,19 @@
+#include <linux/highmem.h>
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+	BUG_ON(in_irq());
+
+	local_bh_disable();
+#endif
+	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+	local_bh_enable();
+#endif
+}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a90bc439488e..8e1c385e5ba9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -56,7 +56,6 @@
 #include <linux/cache.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
-#include <linux/highmem.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -67,6 +66,8 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
+#include "kmap_skb.h"
+
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index ab8fafadb4ba..419c7d3289c7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,7 @@
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/init.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1da3d32f8289..a35209d517ad 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -53,6 +53,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/highmem.h>
 
 #include <linux/socket.h>
 #include <linux/sockios.h>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 271d2eed0699..08e68b67bbf6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -71,6 +71,7 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 #include <asm/page.h>
+#include <asm/cacheflush.h>
 #include <asm/io.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-- 
cgit v1.2.3


From f95d47caae5302a63d92be9a0292abc90e2a14e1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 7 Dec 2006 02:14:02 +0100
Subject: [PATCH] i386: Use %gs as the PDA base-segment in the kernel

This patch is the meat of the PDA change.  This patch makes several related
changes:

1: Most significantly, %gs is now used in the kernel.  This means that on
   entry, the old value of %gs is saved away, and it is reloaded with
   __KERNEL_PDA.

2: entry.S constructs the stack in the shape of struct pt_regs, and this
   is passed around the kernel so that the process's saved register
   state can be accessed.

   Unfortunately struct pt_regs doesn't currently have space for %gs
   (or %fs). This patch extends pt_regs to add space for gs (no space
   is allocated for %fs, since it won't be used, and it would just
   complicate the code in entry.S to work around the space).

3: Because %gs is now saved on the stack like %ds, %es and the integer
   registers, there are a number of places where it no longer needs to
   be handled specially; namely context switch, and saving/restoring the
   register state in a signal context.

4: And since kernel threads run in kernel space and call normal kernel
   code, they need to be created with their %gs == __KERNEL_PDA.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/i386/kernel/asm-offsets.c |  1 +
 arch/i386/kernel/cpu/common.c  | 21 +++++++++++--
 arch/i386/kernel/entry.S       | 70 +++++++++++++++++++++++++++++-------------
 arch/i386/kernel/head.S        | 31 ++++++++++++++++---
 arch/i386/kernel/process.c     | 26 ++++++++--------
 arch/i386/kernel/signal.c      |  6 ++--
 include/asm-i386/mmu_context.h |  4 +--
 include/asm-i386/processor.h   |  4 ++-
 include/asm-i386/ptrace.h      |  2 ++
 kernel/fork.c                  |  2 +-
 10 files changed, 117 insertions(+), 50 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 70b19807acf9..9620872d3534 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -72,6 +72,7 @@ void foo(void)
 	OFFSET(PT_EAX, pt_regs, eax);
 	OFFSET(PT_DS,  pt_regs, xds);
 	OFFSET(PT_ES,  pt_regs, xes);
+	OFFSET(PT_GS,  pt_regs, xgs);
 	OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
 	OFFSET(PT_EIP, pt_regs, eip);
 	OFFSET(PT_CS,  pt_regs, xcs);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 2534e25ed745..4e63d8ce602b 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -593,6 +593,14 @@ void __init early_cpu_init(void)
 #endif
 }
 
+/* Make sure %gs is initialized properly in idle threads */
+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+{
+	memset(regs, 0, sizeof(struct pt_regs));
+	regs->xgs = __KERNEL_PDA;
+	return regs;
+}
+
 __cpuinit int alloc_gdt(int cpu)
 {
 	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
@@ -644,6 +652,14 @@ struct i386_pda boot_pda = {
 	._pda = &boot_pda,
 };
 
+static inline void set_kernel_gs(void)
+{
+	/* Set %gs for this CPU's PDA.  Memory clobber is to create a
+	   barrier with respect to any PDA operations, so the compiler
+	   doesn't move any before here. */
+	asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
+}
+
 /* Initialize the CPU's GDT and PDA.  The boot CPU does this for
    itself, but secondaries find this done for them. */
 __cpuinit int init_gdt(int cpu, struct task_struct *idle)
@@ -693,6 +709,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
 	   the boot CPU, this will transition from the boot gdt+pda to
 	   the real ones). */
 	load_gdt(cpu_gdt_descr);
+	set_kernel_gs();
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -731,8 +748,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
 #endif
 
-	/* Clear %fs and %gs. */
-	asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
+	/* Clear %fs. */
+	asm volatile ("mov %0, %%fs" : : "r" (0));
 
 	/* Clear all 6 debug registers: */
 	set_debugreg(0, 0);
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 0069bf01603e..b99d4a160078 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -30,12 +30,13 @@
  *	18(%esp) - %eax
  *	1C(%esp) - %ds
  *	20(%esp) - %es
- *	24(%esp) - orig_eax
- *	28(%esp) - %eip
- *	2C(%esp) - %cs
- *	30(%esp) - %eflags
- *	34(%esp) - %oldesp
- *	38(%esp) - %oldss
+ *	24(%esp) - %gs
+ *	28(%esp) - orig_eax
+ *	2C(%esp) - %eip
+ *	30(%esp) - %cs
+ *	34(%esp) - %eflags
+ *	38(%esp) - %oldesp
+ *	3C(%esp) - %oldss
  *
  * "current" is in register %ebx during any slow entries.
  */
@@ -92,6 +93,9 @@ VM_MASK		= 0x00020000
 
 #define SAVE_ALL \
 	cld; \
+	pushl %gs; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	/*CFI_REL_OFFSET gs, 0;*/\
 	pushl %es; \
 	CFI_ADJUST_CFA_OFFSET 4;\
 	/*CFI_REL_OFFSET es, 0;*/\
@@ -121,7 +125,9 @@ VM_MASK		= 0x00020000
 	CFI_REL_OFFSET ebx, 0;\
 	movl $(__USER_DS), %edx; \
 	movl %edx, %ds; \
-	movl %edx, %es;
+	movl %edx, %es; \
+	movl $(__KERNEL_PDA), %edx; \
+	movl %edx, %gs
 
 #define RESTORE_INT_REGS \
 	popl %ebx;	\
@@ -154,17 +160,22 @@ VM_MASK		= 0x00020000
 2:	popl %es;	\
 	CFI_ADJUST_CFA_OFFSET -4;\
 	/*CFI_RESTORE es;*/\
-.section .fixup,"ax";	\
-3:	movl $0,(%esp);	\
-	jmp 1b;		\
+3:	popl %gs;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	/*CFI_RESTORE gs;*/\
+.pushsection .fixup,"ax";	\
 4:	movl $0,(%esp);	\
+	jmp 1b;		\
+5:	movl $0,(%esp);	\
 	jmp 2b;		\
-.previous;		\
+6:	movl $0,(%esp);	\
+	jmp 3b;		\
 .section __ex_table,"a";\
 	.align 4;	\
-	.long 1b,3b;	\
-	.long 2b,4b;	\
-.previous
+	.long 1b,4b;	\
+	.long 2b,5b;	\
+	.long 3b,6b;	\
+.popsection
 
 #define RING0_INT_FRAME \
 	CFI_STARTPROC simple;\
@@ -231,6 +242,7 @@ check_userspace:
 	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
 	cmpl $USER_RPL, %eax
 	jb resume_kernel		# not returning to v8086 or userspace
+
 ENTRY(resume_userspace)
  	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
@@ -327,9 +339,16 @@ sysenter_past_esp:
 	movl PT_OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
+1:	mov  PT_GS(%esp), %gs
 	ENABLE_INTERRUPTS_SYSEXIT
 	CFI_ENDPROC
-
+.pushsection .fixup,"ax"
+2:	movl $0,PT_GS(%esp)
+	jmp 1b
+.section __ex_table,"a"
+	.align 4
+	.long 1b,2b
+.popsection
 
 	# system call handler stub
 ENTRY(system_call)
@@ -375,7 +394,7 @@ restore_nocheck:
 	TRACE_IRQS_IRET
 restore_nocheck_notrace:
 	RESTORE_REGS
-	addl $4, %esp
+	addl $4, %esp			# skip orig_eax/error_code
 	CFI_ADJUST_CFA_OFFSET -4
 1:	INTERRUPT_RETURN
 .section .fixup,"ax"
@@ -588,6 +607,10 @@ KPROBE_ENTRY(page_fault)
 	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 error_code:
+	/* the function address is in %gs's slot on the stack */
+	pushl %es
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET es, 0*/
 	pushl %ds
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ds, 0*/
@@ -613,18 +636,20 @@ error_code:
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET ebx, 0
 	cld
-	pushl %es
+	pushl %gs
 	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET es, 0*/
+	/*CFI_REL_OFFSET gs, 0*/
+	movl $(__KERNEL_PDA), %ecx
+	movl %ecx, %gs
 	UNWIND_ESPFIX_STACK
 	popl %ecx
 	CFI_ADJUST_CFA_OFFSET -4
 	/*CFI_REGISTER es, ecx*/
-	movl PT_ES(%esp), %edi		# get the function address
+	movl PT_GS(%esp), %edi		# get the function address
 	movl PT_ORIG_EAX(%esp), %edx	# get the error code
-	movl $-1, PT_ORIG_EAX(%esp)
-	movl %ecx, PT_ES(%esp)
-	/*CFI_REL_OFFSET es, ES*/
+	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
+	mov  %ecx, PT_GS(%esp)
+	/*CFI_REL_OFFSET gs, ES*/
 	movl $(__USER_DS), %ecx
 	movl %ecx, %ds
 	movl %ecx, %es
@@ -936,6 +961,7 @@ ENTRY(arch_unwind_init_running)
 	movl	%ebx, PT_EAX(%edx)
 	movl	$__USER_DS, PT_DS(%edx)
 	movl	$__USER_DS, PT_ES(%edx)
+	movl	$0, PT_GS(%edx)
 	movl	%ebx, PT_ORIG_EAX(%edx)
 	movl	%ecx, PT_EIP(%edx)
 	movl	12(%esp), %ecx
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 4a83384c5a61..5b14e95ac8b9 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -302,6 +302,7 @@ is386:	movl $2,%ecx		# set MP
 	movl %eax,%cr0
 
 	call check_x87
+	call setup_pda
 	lgdt cpu_gdt_descr
 	lidt idt_descr
 	ljmp $(__KERNEL_CS),$1f
@@ -312,10 +313,13 @@ is386:	movl $2,%ecx		# set MP
 	movl %eax,%ds
 	movl %eax,%es
 
-	xorl %eax,%eax			# Clear FS/GS and LDT
+	xorl %eax,%eax			# Clear FS and LDT
 	movl %eax,%fs
-	movl %eax,%gs
 	lldt %ax
+
+	movl $(__KERNEL_PDA),%eax
+	mov  %eax,%gs
+
 	cld			# gcc2 wants the direction flag cleared at all times
 	pushl $0		# fake return address for unwinder
 #ifdef CONFIG_SMP
@@ -345,6 +349,23 @@ check_x87:
 	.byte 0xDB,0xE4		/* fsetpm for 287, ignored by 387 */
 	ret
 
+/*
+ * Point the GDT at this CPU's PDA.  On boot this will be
+ * cpu_gdt_table and boot_pda; for secondary CPUs, these will be
+ * that CPU's GDT and PDA.
+ */
+setup_pda:
+	/* get the PDA pointer */
+	movl start_pda, %eax
+
+	/* slot the PDA address into the GDT */
+	mov cpu_gdt_descr+2, %ecx
+	mov %ax, (__KERNEL_PDA+0+2)(%ecx)		/* base & 0x0000ffff */
+	shr $16, %eax
+	mov %al, (__KERNEL_PDA+4+0)(%ecx)		/* base & 0x00ff0000 */
+	mov %ah, (__KERNEL_PDA+4+3)(%ecx)		/* base & 0xff000000 */
+	ret
+
 /*
  *  setup_idt
  *
@@ -484,6 +505,8 @@ ENTRY(empty_zero_page)
  * This starts the data section.
  */
 .data
+ENTRY(start_pda)
+	.long boot_pda
 
 ENTRY(stack_start)
 	.long init_thread_union+THREAD_SIZE
@@ -525,7 +548,7 @@ idt_descr:
 
 # boot GDT descriptor (later on used by CPU#0):
 	.word 0				# 32 bit align gdt_desc.address
-cpu_gdt_descr:
+ENTRY(cpu_gdt_descr)
 	.word GDT_ENTRIES*8-1
 	.long cpu_gdt_table
 
@@ -585,7 +608,7 @@ ENTRY(cpu_gdt_table)
 	.quad 0x004092000000ffff	/* 0xc8 APM DS    data */
 
 	.quad 0x00c0920000000000	/* 0xd0 - ESPFIX SS */
-	.quad 0x0000000000000000	/* 0xd8 - PDA */
+	.quad 0x00cf92000000ffff	/* 0xd8 - PDA */
 	.quad 0x0000000000000000	/* 0xe0 - unused */
 	.quad 0x0000000000000000	/* 0xe8 - unused */
 	.quad 0x0000000000000000	/* 0xf0 - unused */
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index ae924c416b68..905364d42847 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -56,6 +56,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
+#include <asm/pda.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -346,6 +347,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 
 	regs.xds = __USER_DS;
 	regs.xes = __USER_DS;
+	regs.xgs = __KERNEL_PDA;
 	regs.orig_eax = -1;
 	regs.eip = (unsigned long) kernel_thread_helper;
 	regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -431,7 +433,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 	p->thread.eip = (unsigned long) ret_from_fork;
 
 	savesegment(fs,p->thread.fs);
-	savesegment(gs,p->thread.gs);
 
 	tsk = current;
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -659,16 +660,16 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
 	load_esp0(tss, next);
 
 	/*
-	 * Save away %fs and %gs. No need to save %es and %ds, as
-	 * those are always kernel segments while inside the kernel.
-	 * Doing this before setting the new TLS descriptors avoids
-	 * the situation where we temporarily have non-reloadable
-	 * segments in %fs and %gs.  This could be an issue if the
-	 * NMI handler ever used %fs or %gs (it does not today), or
-	 * if the kernel is running inside of a hypervisor layer.
+	 * Save away %fs. No need to save %gs, as it was saved on the
+	 * stack on entry.  No need to save %es and %ds, as those are
+	 * always kernel segments while inside the kernel.  Doing this
+	 * before setting the new TLS descriptors avoids the situation
+	 * where we temporarily have non-reloadable segments in %fs
+	 * and %gs.  This could be an issue if the NMI handler ever
+	 * used %fs or %gs (it does not today), or if the kernel is
+	 * running inside of a hypervisor layer.
 	 */
 	savesegment(fs, prev->fs);
-	savesegment(gs, prev->gs);
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
@@ -676,16 +677,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
 	load_TLS(next, cpu);
 
 	/*
-	 * Restore %fs and %gs if needed.
+	 * Restore %fs if needed.
 	 *
-	 * Glibc normally makes %fs be zero, and %gs is one of
-	 * the TLS segments.
+	 * Glibc normally makes %fs be zero.
 	 */
 	if (unlikely(prev->fs | next->fs))
 		loadsegment(fs, next->fs);
 
-	if (prev->gs | next->gs)
-		loadsegment(gs, next->gs);
 
 	/*
 	 * Restore IOPL if needed.
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 43002cfb40c4..65d7620eaa09 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -128,7 +128,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 			 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
 			 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
 
-	GET_SEG(gs);
+	COPY_SEG(gs);
 	GET_SEG(fs);
 	COPY_SEG(es);
 	COPY_SEG(ds);
@@ -244,9 +244,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
 {
 	int tmp, err = 0;
 
-	tmp = 0;
-	savesegment(gs, tmp);
-	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs);
 	savesegment(fs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
 
diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h
index 1b1495372c4d..68ff102d6f5e 100644
--- a/include/asm-i386/mmu_context.h
+++ b/include/asm-i386/mmu_context.h
@@ -62,8 +62,8 @@ static inline void switch_mm(struct mm_struct *prev,
 #endif
 }
 
-#define deactivate_mm(tsk, mm) \
-	asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+#define deactivate_mm(tsk, mm)			\
+	asm("movl %0,%%fs": :"r" (0));
 
 #define activate_mm(prev, next) \
 	switch_mm((prev),(next),NULL)
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index a9f2041c7c87..f73cf836e649 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -473,6 +473,7 @@ struct thread_struct {
 	.vm86_info = NULL,						\
 	.sysenter_cs = __KERNEL_CS,					\
 	.io_bitmap_ptr = NULL,						\
+	.gs = __KERNEL_PDA,						\
 }
 
 /*
@@ -500,7 +501,8 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa
 }
 
 #define start_thread(regs, new_eip, new_esp) do {		\
-	__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));	\
+	__asm__("movl %0,%%fs": :"r" (0));			\
+	regs->xgs = 0;						\
 	set_fs(USER_DS);					\
 	regs->xds = __USER_DS;					\
 	regs->xes = __USER_DS;					\
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index d505f501077a..bdbc894339b4 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -16,6 +16,8 @@ struct pt_regs {
 	long eax;
 	int  xds;
 	int  xes;
+	/* int  xfs; */
+	int  xgs;
 	long orig_eax;
 	long eip;
 	int  xcs;
diff --git a/kernel/fork.c b/kernel/fork.c
index 8cdd3e72ba55..fd22245e3881 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1303,7 +1303,7 @@ fork_out:
 	return ERR_PTR(retval);
 }
 
-struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
+noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
 	return regs;
-- 
cgit v1.2.3


From 0741f4d207a644482d7a040f05cd264c98cf7ee8 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <76306.1226@compuserve.com>
Date: Thu, 7 Dec 2006 02:14:11 +0100
Subject: [PATCH] x86: add sysctl for kstack_depth_to_print

Add sysctl for kstack_depth_to_print. This lets users change
the amount of raw stack data printed in dump_stack() without
having to reboot.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 Documentation/sysctl/kernel.txt | 8 ++++++++
 arch/i386/kernel/traps.c        | 2 +-
 arch/x86_64/kernel/traps.c      | 2 +-
 include/asm-x86_64/stacktrace.h | 2 ++
 kernel/sysctl.c                 | 9 +++++++++
 5 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 0bc7f1e3c9e6..5922e84d9133 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -27,6 +27,7 @@ show up in /proc/sys/kernel:
 - hotplug
 - java-appletviewer           [ binfmt_java, obsolete ]
 - java-interpreter            [ binfmt_java, obsolete ]
+- kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/kmod.txt
 - msgmax
@@ -170,6 +171,13 @@ This flag controls the L2 cache of G3 processor boards. If
 
 ==============================================================
 
+kstack_depth_to_print: (X86 only)
+
+Controls the number of words to print when dumping the raw
+kernel stack.
+
+==============================================================
+
 osrelease, ostype & version:
 
 # cat osrelease
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7b2f9f022089..1d48a75fa338 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -91,7 +91,7 @@ asmlinkage void alignment_check(void);
 asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
-static int kstack_depth_to_print = 24;
+int kstack_depth_to_print = 24;
 #ifdef CONFIG_STACK_UNWIND
 static int call_trace = 1;
 #else
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 264db33476ab..75ceccee178c 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 	preempt_enable_no_resched();
 }
 
-static int kstack_depth_to_print = 12;
+int kstack_depth_to_print = 12;
 #ifdef CONFIG_STACK_UNWIND
 static int call_trace = 1;
 #else
diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h
index 5eb9799bef76..6f0b54594307 100644
--- a/include/asm-x86_64/stacktrace.h
+++ b/include/asm-x86_64/stacktrace.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_STACKTRACE_H
 #define _ASM_STACKTRACE_H 1
 
+extern int kstack_depth_to_print;
+
 /* Generic stack tracer with callbacks */
 
 struct stacktrace_ops {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 09e569f4792b..6fc5e17086f4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -54,6 +54,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
 
 #ifdef CONFIG_X86
 #include <asm/nmi.h>
+#include <asm/stacktrace.h>
 #endif
 
 #if defined(CONFIG_SYSCTL)
@@ -707,6 +708,14 @@ static ctl_table kern_table[] = {
 		.mode		= 0444,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "kstack_depth_to_print",
+		.data		= &kstack_depth_to_print,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 #if defined(CONFIG_MMU)
 	{
-- 
cgit v1.2.3


From e2124bb8d369a4bc1afde1959040e33d71c41d5e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] unwinder: Use probe_kernel_address instead of __get_user in
 kernel/unwind.c

This avoids trouble with the page fault handler if the fault
happens inside an interrupt context.

Suggested by Linus

Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
 kernel/unwind.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/unwind.c b/kernel/unwind.c
index ed0a21d4a902..af48168a3afb 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -14,6 +14,7 @@
 #include <linux/bootmem.h>
 #include <linux/sort.h>
 #include <linux/stop_machine.h>
+#include <linux/uaccess.h>
 #include <asm/sections.h>
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
@@ -550,7 +551,7 @@ static unsigned long read_pointer(const u8 **pLoc,
 		return 0;
 	}
 	if ((ptrType & DW_EH_PE_indirect)
-	    && __get_user(value, (unsigned long *)value))
+	    && probe_kernel_address((unsigned long *)value, value))
 		return 0;
 	*pLoc = ptr.p8;
 
@@ -982,18 +983,19 @@ int unwind(struct unwind_frame_info *frame)
 		        & (sizeof(unsigned long) - 1))) {
 			unsigned long link;
 
-			if (!__get_user(link,
+			if (!probe_kernel_address(
 			                (unsigned long *)(UNW_FP(frame)
-			                                  + FRAME_LINK_OFFSET))
+			                                  + FRAME_LINK_OFFSET),
+						  link)
 # if FRAME_RETADDR_OFFSET < 0
 			   && link > bottom && link < UNW_FP(frame)
 # else
 			   && link > UNW_FP(frame) && link < bottom
 # endif
 			   && !(link & (sizeof(link) - 1))
-			   && !__get_user(UNW_PC(frame),
+			   && !probe_kernel_address(
 			                  (unsigned long *)(UNW_FP(frame)
-			                                    + FRAME_RETADDR_OFFSET))) {
+			                                    + FRAME_RETADDR_OFFSET), UNW_PC(frame))) {
 				UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
 # if FRAME_RETADDR_OFFSET < 0
 					-
@@ -1104,7 +1106,7 @@ int unwind(struct unwind_frame_info *frame)
 					return -EIO;
 				switch(reg_info[i].width) {
 #define CASE(n)     case sizeof(u##n): \
-					__get_user(FRAME_REG(i, u##n), (u##n *)addr); \
+					probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
 					break
 				CASES;
 #undef CASE
-- 
cgit v1.2.3


From eef5e0d185fc049bda11fa14ba286fbd357da896 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] unwinder: Remove lockdep disabling of nested locks for
 unwinder

Shouldn't be needed anymore since __kernel_text_address
is used unconditionally on x86-64

Signed-off-by: Andi Kleen <ak@suse.de>
---
 kernel/lockdep.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c9fefdb1a7db..9bb8d784eb02 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -228,11 +228,7 @@ static int save_trace(struct stack_trace *trace)
 	trace->skip = 3;
 	trace->all_contexts = 0;
 
-	/* Make sure to not recurse in case the the unwinder needs to tak
-e	   locks. */
-	lockdep_off();
 	save_stack_trace(trace, NULL);
-	lockdep_on();
 
 	trace->max_entries = trace->nr_entries;
 
-- 
cgit v1.2.3


From 359ad0d4015a9ab39243f2ebc4eb07915bd618b2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 7 Dec 2006 02:14:13 +0100
Subject: [PATCH] unwinder: more sanity checks in Dwarf2 unwinder

Tighten the requirements on both input to and output from the Dwarf2
unwinder.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/traps.c    |  7 +++++++
 arch/x86_64/kernel/traps.c  |  7 +++++++
 include/asm-i386/unwind.h   | 12 ++++--------
 include/asm-x86_64/unwind.h |  8 ++------
 kernel/unwind.c             | 16 +++++++++++++++-
 5 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 86d8476be4fe..c447807e2a45 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -161,12 +161,19 @@ dump_trace_unwind(struct unwind_frame_info *info, void *data)
 {
 	struct ops_and_data *oad = (struct ops_and_data *)data;
 	int n = 0;
+	unsigned long sp = UNW_SP(info);
 
+	if (arch_unw_user_mode(info))
+		return -1;
 	while (unwind(info) == 0 && UNW_PC(info)) {
 		n++;
 		oad->ops->address(oad->data, UNW_PC(info));
 		if (arch_unw_user_mode(info))
 			break;
+		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
+		    && sp > UNW_SP(info))
+			break;
+		sp = UNW_SP(info);
 	}
 	return n;
 }
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 9864d195c408..4fdd162f0bef 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -225,12 +225,19 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
 {
 	struct ops_and_data *oad = (struct ops_and_data *)context;
 	int n = 0;
+	unsigned long sp = UNW_SP(info);
 
+	if (arch_unw_user_mode(info))
+		return -1;
 	while (unwind(info) == 0 && UNW_PC(info)) {
 		n++;
 		oad->ops->address(oad->data, UNW_PC(info));
 		if (arch_unw_user_mode(info))
 			break;
+		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
+		    && sp > UNW_SP(info))
+			break;
+		sp = UNW_SP(info);
 	}
 	return n;
 }
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h
index 601fc67bd775..aa2c931e30db 100644
--- a/include/asm-i386/unwind.h
+++ b/include/asm-i386/unwind.h
@@ -79,17 +79,13 @@ extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
                                                                           void *arg),
                                                void *arg);
 
-static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
+static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
 {
-#if 0 /* This can only work when selector register and EFLAGS saves/restores
-         are properly annotated (and tracked in UNW_REGISTER_INFO). */
-	return user_mode_vm(&info->regs);
-#else
-	return info->regs.eip < PAGE_OFFSET
+	return user_mode_vm(&info->regs)
+	       || info->regs.eip < PAGE_OFFSET
 	       || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
-	            && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
+	           && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
 	       || info->regs.esp < PAGE_OFFSET;
-#endif
 }
 
 #else
diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h
index 2e7ff10fd775..2f6349e48717 100644
--- a/include/asm-x86_64/unwind.h
+++ b/include/asm-x86_64/unwind.h
@@ -87,14 +87,10 @@ extern int arch_unwind_init_running(struct unwind_frame_info *,
 
 static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
 {
-#if 0 /* This can only work when selector register saves/restores
-         are properly annotated (and tracked in UNW_REGISTER_INFO). */
-	return user_mode(&info->regs);
-#else
-	return (long)info->regs.rip >= 0
+	return user_mode(&info->regs)
+	       || (long)info->regs.rip >= 0
 	       || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
 	       || (long)info->regs.rsp >= 0;
-#endif
 }
 
 #else
diff --git a/kernel/unwind.c b/kernel/unwind.c
index af48168a3afb..7e721f104105 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -95,6 +95,7 @@ static const struct {
 
 typedef unsigned long uleb128_t;
 typedef   signed long sleb128_t;
+#define sleb128abs __builtin_labs
 
 static struct unwind_table {
 	struct {
@@ -787,7 +788,7 @@ int unwind(struct unwind_frame_info *frame)
 #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
 	const u32 *fde = NULL, *cie = NULL;
 	const u8 *ptr = NULL, *end = NULL;
-	unsigned long pc = UNW_PC(frame) - frame->call_frame;
+	unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
 	unsigned long startLoc = 0, endLoc = 0, cfa;
 	unsigned i;
 	signed ptrType = -1;
@@ -936,6 +937,9 @@ int unwind(struct unwind_frame_info *frame)
 		state.dataAlign = get_sleb128(&ptr, end);
 		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
 			cie = NULL;
+		else if (UNW_PC(frame) % state.codeAlign
+		         || UNW_SP(frame) % sleb128abs(state.dataAlign))
+			return -EPERM;
 		else {
 			retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
 			/* skip augmentation */
@@ -968,6 +972,8 @@ int unwind(struct unwind_frame_info *frame)
 #ifdef CONFIG_FRAME_POINTER
 		unsigned long top, bottom;
 
+		if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
+			return -EPERM;
 		top = STACK_TOP(frame->task);
 		bottom = STACK_BOTTOM(frame->task);
 # if FRAME_RETADDR_OFFSET < 0
@@ -1018,6 +1024,7 @@ int unwind(struct unwind_frame_info *frame)
 	   || state.regs[retAddrReg].where == Nowhere
 	   || state.cfa.reg >= ARRAY_SIZE(reg_info)
 	   || reg_info[state.cfa.reg].width != sizeof(unsigned long)
+	   || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
 	   || state.cfa.offs % sizeof(unsigned long))
 		return -EIO;
 	/* update frame */
@@ -1038,6 +1045,8 @@ int unwind(struct unwind_frame_info *frame)
 #else
 # define CASES CASE(8); CASE(16); CASE(32); CASE(64)
 #endif
+	pc = UNW_PC(frame);
+	sp = UNW_SP(frame);
 	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
 		if (REG_INVALID(i)) {
 			if (state.regs[i].where == Nowhere)
@@ -1118,6 +1127,11 @@ int unwind(struct unwind_frame_info *frame)
 		}
 	}
 
+	if (UNW_PC(frame) % state.codeAlign
+	    || UNW_SP(frame) % sleb128abs(state.dataAlign)
+	    || (pc == UNW_PC(frame) && sp == UNW_SP(frame)))
+		return -EIO;
+
 	return 0;
 #undef CASES
 #undef FRAME_REG
-- 
cgit v1.2.3


From 6d0185ea611276fdf81991d7774d396bdc1ae392 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 7 Dec 2006 02:14:13 +0100
Subject: [PATCH] unwinder: Add debugging output to the Dwarf2 unwinder

Add debugging printks to the unwinder to allow easier debugging
when something goes wrong with it.

This can be controlled with the new unwinder_debug=N option
Most output is given by N=1

AK: Added documentation of unwinder_debug=

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 Documentation/kernel-parameters.txt |   3 +
 kernel/unwind.c                     | 113 ++++++++++++++++++++++++++++++------
 2 files changed, 99 insertions(+), 17 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4e90aa427aea..d34fd6a28faa 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1735,6 +1735,9 @@ and is between 256 and 4096 characters. It is defined in the file
 	norandmaps	Don't use address space randomization
 			Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space
 
+ 	unwind_debug=N 	N > 0 will enable dwarf2 unwinder debugging
+			This is useful to get more information why
+			you got a "dwarf2 unwinder stuck"
 
 ______________________________________________________________________
 
diff --git a/kernel/unwind.c b/kernel/unwind.c
index 7e721f104105..209e248517db 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -137,6 +137,17 @@ struct unwind_state {
 
 static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
 
+static unsigned unwind_debug;
+static int __init unwind_debug_setup(char *s)
+{
+	unwind_debug = simple_strtoul(s, NULL, 0);
+	return 1;
+}
+__setup("unwind_debug=", unwind_debug_setup);
+#define dprintk(lvl, fmt, args...) \
+	((void)(lvl > unwind_debug \
+	 || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
+
 static struct unwind_table *find_table(unsigned long pc)
 {
 	struct unwind_table *table;
@@ -281,6 +292,7 @@ static void __init setup_unwind_table(struct unwind_table *table,
 
 	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
 	        + 2 * n * sizeof(unsigned long);
+	dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
 	header = alloc(hdrSize);
 	if (!header)
 		return;
@@ -500,13 +512,17 @@ static unsigned long read_pointer(const u8 **pLoc,
 		const unsigned long *pul;
 	} ptr;
 
-	if (ptrType < 0 || ptrType == DW_EH_PE_omit)
+	if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
+		dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
 		return 0;
+	}
 	ptr.p8 = *pLoc;
 	switch(ptrType & DW_EH_PE_FORM) {
 	case DW_EH_PE_data2:
-		if (end < (const void *)(ptr.p16u + 1))
+		if (end < (const void *)(ptr.p16u + 1)) {
+			dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		if(ptrType & DW_EH_PE_signed)
 			value = get_unaligned(ptr.p16s++);
 		else
@@ -514,8 +530,10 @@ static unsigned long read_pointer(const u8 **pLoc,
 		break;
 	case DW_EH_PE_data4:
 #ifdef CONFIG_64BIT
-		if (end < (const void *)(ptr.p32u + 1))
+		if (end < (const void *)(ptr.p32u + 1)) {
+			dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		if(ptrType & DW_EH_PE_signed)
 			value = get_unaligned(ptr.p32s++);
 		else
@@ -527,8 +545,10 @@ static unsigned long read_pointer(const u8 **pLoc,
 		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
 #endif
 	case DW_EH_PE_native:
-		if (end < (const void *)(ptr.pul + 1))
+		if (end < (const void *)(ptr.pul + 1)) {
+			dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		value = get_unaligned(ptr.pul++);
 		break;
 	case DW_EH_PE_leb128:
@@ -536,10 +556,14 @@ static unsigned long read_pointer(const u8 **pLoc,
 		value = ptrType & DW_EH_PE_signed
 		        ? get_sleb128(&ptr.p8, end)
 		        : get_uleb128(&ptr.p8, end);
-		if ((const void *)ptr.p8 > end)
+		if ((const void *)ptr.p8 > end) {
+			dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		break;
 	default:
+		dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
+		        ptrType, ptr.p8, end);
 		return 0;
 	}
 	switch(ptrType & DW_EH_PE_ADJUST) {
@@ -549,11 +573,16 @@ static unsigned long read_pointer(const u8 **pLoc,
 		value += (unsigned long)*pLoc;
 		break;
 	default:
+		dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
+		        ptrType, *pLoc, end);
 		return 0;
 	}
 	if ((ptrType & DW_EH_PE_indirect)
-	    && probe_kernel_address((unsigned long *)value, value))
+	    && probe_kernel_address((unsigned long *)value, value)) {
+		dprintk(1, "Cannot read indirect value %lx (%p,%p).",
+		        value, *pLoc, end);
 		return 0;
+	}
 	*pLoc = ptr.p8;
 
 	return value;
@@ -702,8 +731,10 @@ static int processCFI(const u8 *start,
 					state->label = NULL;
 					return 1;
 				}
-				if (state->stackDepth >= MAX_STACK_DEPTH)
+				if (state->stackDepth >= MAX_STACK_DEPTH) {
+					dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
 					return 0;
+				}
 				state->stack[state->stackDepth++] = ptr.p8;
 				break;
 			case DW_CFA_restore_state:
@@ -718,8 +749,10 @@ static int processCFI(const u8 *start,
 					result = processCFI(start, end, 0, ptrType, state);
 					state->loc = loc;
 					state->label = label;
-				} else
+				} else {
+					dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
 					return 0;
+				}
 				break;
 			case DW_CFA_def_cfa:
 				state->cfa.reg = get_uleb128(&ptr.p8, end);
@@ -751,6 +784,7 @@ static int processCFI(const u8 *start,
 				break;
 			case DW_CFA_GNU_window_save:
 			default:
+				dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
 				result = 0;
 				break;
 			}
@@ -766,12 +800,17 @@ static int processCFI(const u8 *start,
 			set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
 			break;
 		}
-		if (ptr.p8 > end)
+		if (ptr.p8 > end) {
+			dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
 			result = 0;
+		}
 		if (result && targetLoc != 0 && targetLoc < state->loc)
 			return 1;
 	}
 
+	if (result && ptr.p8 < end)
+		dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
+
 	return result
 	   && ptr.p8 == end
 	   && (targetLoc == 0
@@ -843,6 +882,8 @@ int unwind(struct unwind_frame_info *frame)
 					                           hdr[3]);
 			}
 		}
+		if(hdr && !fde)
+			dprintk(3, "Binary lookup for %lx failed.", pc);
 
 		if (fde != NULL) {
 			cie = cie_for_fde(fde, table);
@@ -864,6 +905,8 @@ int unwind(struct unwind_frame_info *frame)
 					fde = NULL;
 			} else
 				fde = NULL;
+			if(!fde)
+				dprintk(1, "Binary lookup result for %lx discarded.", pc);
 		}
 		if (fde == NULL) {
 			for (fde = table->address, tableSize = table->size;
@@ -895,6 +938,8 @@ int unwind(struct unwind_frame_info *frame)
 				if (pc >= startLoc && pc < endLoc)
 					break;
 			}
+			if(!fde)
+				dprintk(3, "Linear lookup for %lx failed.", pc);
 		}
 	}
 	if (cie != NULL) {
@@ -928,6 +973,8 @@ int unwind(struct unwind_frame_info *frame)
 			if (ptr >= end || *ptr)
 				cie = NULL;
 		}
+		if(!cie)
+			dprintk(1, "CIE unusable (%p,%p).", ptr, end);
 		++ptr;
 	}
 	if (cie != NULL) {
@@ -938,9 +985,11 @@ int unwind(struct unwind_frame_info *frame)
 		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
 			cie = NULL;
 		else if (UNW_PC(frame) % state.codeAlign
-		         || UNW_SP(frame) % sleb128abs(state.dataAlign))
+		         || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+			dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
+			        UNW_PC(frame), UNW_SP(frame));
 			return -EPERM;
-		else {
+		} else {
 			retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
 			/* skip augmentation */
 			if (((const char *)(cie + 2))[1] == 'z') {
@@ -954,6 +1003,8 @@ int unwind(struct unwind_frame_info *frame)
 			   || reg_info[retAddrReg].width != sizeof(unsigned long))
 				cie = NULL;
 		}
+		if(!cie)
+			dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
 	}
 	if (cie != NULL) {
 		state.cieStart = ptr;
@@ -967,6 +1018,8 @@ int unwind(struct unwind_frame_info *frame)
 			if ((ptr += augSize) > end)
 				fde = NULL;
 		}
+		if(!fde)
+			dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
 	}
 	if (cie == NULL || fde == NULL) {
 #ifdef CONFIG_FRAME_POINTER
@@ -1025,8 +1078,10 @@ int unwind(struct unwind_frame_info *frame)
 	   || state.cfa.reg >= ARRAY_SIZE(reg_info)
 	   || reg_info[state.cfa.reg].width != sizeof(unsigned long)
 	   || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
-	   || state.cfa.offs % sizeof(unsigned long))
+	   || state.cfa.offs % sizeof(unsigned long)) {
+		dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
 		return -EIO;
+	}
 	/* update frame */
 #ifndef CONFIG_AS_CFI_SIGNAL_FRAME
 	if(frame->call_frame
@@ -1051,6 +1106,8 @@ int unwind(struct unwind_frame_info *frame)
 		if (REG_INVALID(i)) {
 			if (state.regs[i].where == Nowhere)
 				continue;
+			dprintk(1, "Cannot restore register %u (%d).",
+			        i, state.regs[i].where);
 			return -EIO;
 		}
 		switch(state.regs[i].where) {
@@ -1059,8 +1116,11 @@ int unwind(struct unwind_frame_info *frame)
 		case Register:
 			if (state.regs[i].value >= ARRAY_SIZE(reg_info)
 			   || REG_INVALID(state.regs[i].value)
-			   || reg_info[i].width > reg_info[state.regs[i].value].width)
+			   || reg_info[i].width > reg_info[state.regs[i].value].width) {
+				dprintk(1, "Cannot restore register %u from register %lu.",
+				        i, state.regs[i].value);
 				return -EIO;
+			}
 			switch(reg_info[state.regs[i].value].width) {
 #define CASE(n) \
 			case sizeof(u##n): \
@@ -1070,6 +1130,9 @@ int unwind(struct unwind_frame_info *frame)
 			CASES;
 #undef CASE
 			default:
+				dprintk(1, "Unsupported register size %u (%lu).",
+				        reg_info[state.regs[i].value].width,
+				        state.regs[i].value);
 				return -EIO;
 			}
 			break;
@@ -1094,12 +1157,17 @@ int unwind(struct unwind_frame_info *frame)
 			CASES;
 #undef CASE
 			default:
+				dprintk(1, "Unsupported register size %u (%u).",
+				        reg_info[i].width, i);
 				return -EIO;
 			}
 			break;
 		case Value:
-			if (reg_info[i].width != sizeof(unsigned long))
+			if (reg_info[i].width != sizeof(unsigned long)) {
+				dprintk(1, "Unsupported value size %u (%u).",
+				        reg_info[i].width, i);
 				return -EIO;
+			}
 			FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
 			                                    * state.dataAlign;
 			break;
@@ -1111,8 +1179,11 @@ int unwind(struct unwind_frame_info *frame)
 				    % sizeof(unsigned long)
 				    || addr < startLoc
 				    || addr + sizeof(unsigned long) < addr
-				    || addr + sizeof(unsigned long) > endLoc)
+				    || addr + sizeof(unsigned long) > endLoc) {
+					dprintk(1, "Bad memory location %lx (%lx).",
+					        addr, state.regs[i].value);
 					return -EIO;
+				}
 				switch(reg_info[i].width) {
 #define CASE(n)     case sizeof(u##n): \
 					probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
@@ -1120,6 +1191,8 @@ int unwind(struct unwind_frame_info *frame)
 				CASES;
 #undef CASE
 				default:
+					dprintk(1, "Unsupported memory size %u (%u).",
+					        reg_info[i].width, i);
 					return -EIO;
 				}
 			}
@@ -1128,9 +1201,15 @@ int unwind(struct unwind_frame_info *frame)
 	}
 
 	if (UNW_PC(frame) % state.codeAlign
-	    || UNW_SP(frame) % sleb128abs(state.dataAlign)
-	    || (pc == UNW_PC(frame) && sp == UNW_SP(frame)))
+	    || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+		dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
+		        UNW_PC(frame), UNW_SP(frame));
 		return -EIO;
+	}
+	if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
+		dprintk(1, "No progress (%lx,%lx).", pc, sp);
+		return -EIO;
+	}
 
 	return 0;
 #undef CASES
-- 
cgit v1.2.3


From c65f38d911aa301cea109d38d40925750dd6c2da Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 7 Dec 2006 02:14:19 +0100
Subject: [PATCH] unwinder: fully support linker generated .eh_frame_hdr
 section

Now that binutils' ld is able to properly populate .eh_frame_hdr in the
Linux kernel case, here's a patch to add some functionality to the Dwarf2
unwinder to actually be able to make use of this (applies on firstfloor
tree with the previously sent patch to add debug output, but not on plain
2.6.19).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 kernel/unwind.c | 66 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/kernel/unwind.c b/kernel/unwind.c
index 209e248517db..08645aa7c2d6 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -164,7 +164,9 @@ static struct unwind_table *find_table(unsigned long pc)
 
 static unsigned long read_pointer(const u8 **pLoc,
                                   const void *end,
-                                  signed ptrType);
+                                  signed ptrType,
+                                  unsigned long text_base,
+                                  unsigned long data_base);
 
 static void init_unwind_table(struct unwind_table *table,
                               const char *name,
@@ -189,10 +191,13 @@ static void init_unwind_table(struct unwind_table *table,
 	/* See if the linker provided table looks valid. */
 	if (header_size <= 4
 	    || header_start[0] != 1
-	    || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
-	    || header_start[2] == DW_EH_PE_omit
-	    || read_pointer(&ptr, end, header_start[2]) <= 0
-	    || header_start[3] == DW_EH_PE_omit)
+	    || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
+	       != table_start
+	    || !read_pointer(&ptr, end, header_start[2], 0, 0)
+	    || !read_pointer(&ptr, end, header_start[3], 0,
+	                     (unsigned long)header_start)
+	    || !read_pointer(&ptr, end, header_start[3], 0,
+	                     (unsigned long)header_start))
 		header_start = NULL;
 	table->hdrsz = header_size;
 	smp_wmb();
@@ -282,7 +287,7 @@ static void __init setup_unwind_table(struct unwind_table *table,
 		ptr = (const u8 *)(fde + 2);
 		if (!read_pointer(&ptr,
 		                  (const u8 *)(fde + 1) + *fde,
-		                  ptrType))
+		                  ptrType, 0, 0))
 			return;
 		++n;
 	}
@@ -317,7 +322,7 @@ static void __init setup_unwind_table(struct unwind_table *table,
 		ptr = (const u8 *)(fde + 2);
 		header->table[n].start = read_pointer(&ptr,
 		                                      (const u8 *)(fde + 1) + *fde,
-		                                      fde_pointer_type(cie));
+		                                      fde_pointer_type(cie), 0, 0);
 		header->table[n].fde = (unsigned long)fde;
 		++n;
 	}
@@ -500,7 +505,9 @@ static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
 
 static unsigned long read_pointer(const u8 **pLoc,
                                   const void *end,
-                                  signed ptrType)
+                                  signed ptrType,
+                                  unsigned long text_base,
+                                  unsigned long data_base)
 {
 	unsigned long value = 0;
 	union {
@@ -572,6 +579,22 @@ static unsigned long read_pointer(const u8 **pLoc,
 	case DW_EH_PE_pcrel:
 		value += (unsigned long)*pLoc;
 		break;
+	case DW_EH_PE_textrel:
+		if (likely(text_base)) {
+			value += text_base;
+			break;
+		}
+		dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
+		        ptrType, *pLoc, end);
+		return 0;
+	case DW_EH_PE_datarel:
+		if (likely(data_base)) {
+			value += data_base;
+			break;
+		}
+		dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
+		        ptrType, *pLoc, end);
+		return 0;
 	default:
 		dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
 		        ptrType, *pLoc, end);
@@ -625,7 +648,8 @@ static signed fde_pointer_type(const u32 *cie)
 			case 'P': {
 					signed ptrType = *ptr++;
 
-					if (!read_pointer(&ptr, end, ptrType) || ptr > end)
+					if (!read_pointer(&ptr, end, ptrType, 0, 0)
+					    || ptr > end)
 						return -1;
 				}
 				break;
@@ -685,7 +709,8 @@ static int processCFI(const u8 *start,
 			case DW_CFA_nop:
 				break;
 			case DW_CFA_set_loc:
-				if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0)
+				state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
+				if (state->loc == 0)
 					result = 0;
 				break;
 			case DW_CFA_advance_loc1:
@@ -854,9 +879,9 @@ int unwind(struct unwind_frame_info *frame)
 			ptr = hdr + 4;
 			end = hdr + table->hdrsz;
 			if (tableSize
-			    && read_pointer(&ptr, end, hdr[1])
+			    && read_pointer(&ptr, end, hdr[1], 0, 0)
 			       == (unsigned long)table->address
-			    && (i = read_pointer(&ptr, end, hdr[2])) > 0
+			    && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
 			    && i == (end - ptr) / (2 * tableSize)
 			    && !((end - ptr) % (2 * tableSize))) {
 				do {
@@ -864,7 +889,8 @@ int unwind(struct unwind_frame_info *frame)
 
 					startLoc = read_pointer(&cur,
 					                        cur + tableSize,
-					                        hdr[3]);
+					                        hdr[3], 0,
+					                        (unsigned long)hdr);
 					if (pc < startLoc)
 						i /= 2;
 					else {
@@ -875,11 +901,13 @@ int unwind(struct unwind_frame_info *frame)
 				if (i == 1
 				    && (startLoc = read_pointer(&ptr,
 				                                ptr + tableSize,
-				                                hdr[3])) != 0
+				                                hdr[3], 0,
+				                                (unsigned long)hdr)) != 0
 				    && pc >= startLoc)
 					fde = (void *)read_pointer(&ptr,
 					                           ptr + tableSize,
-					                           hdr[3]);
+					                           hdr[3], 0,
+					                           (unsigned long)hdr);
 			}
 		}
 		if(hdr && !fde)
@@ -894,13 +922,13 @@ int unwind(struct unwind_frame_info *frame)
 			   && (ptrType = fde_pointer_type(cie)) >= 0
 			   && read_pointer(&ptr,
 			                   (const u8 *)(fde + 1) + *fde,
-			                   ptrType) == startLoc) {
+			                   ptrType, 0, 0) == startLoc) {
 				if (!(ptrType & DW_EH_PE_indirect))
 					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
 				endLoc = startLoc
 				         + read_pointer(&ptr,
 				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType);
+				                        ptrType, 0, 0);
 				if(pc >= endLoc)
 					fde = NULL;
 			} else
@@ -926,7 +954,7 @@ int unwind(struct unwind_frame_info *frame)
 				ptr = (const u8 *)(fde + 2);
 				startLoc = read_pointer(&ptr,
 				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType);
+				                        ptrType, 0, 0);
 				if (!startLoc)
 					continue;
 				if (!(ptrType & DW_EH_PE_indirect))
@@ -934,7 +962,7 @@ int unwind(struct unwind_frame_info *frame)
 				endLoc = startLoc
 				         + read_pointer(&ptr,
 				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType);
+				                        ptrType, 0, 0);
 				if (pc >= startLoc && pc < endLoc)
 					break;
 			}
-- 
cgit v1.2.3


From b65780e123ba9b762276482bbfb52836e4d41fd9 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 7 Dec 2006 02:14:19 +0100
Subject: [PATCH] unwinder: move .eh_frame to RODATA

The .eh_frame section contents is never written to, so it can as well
benefit from CONFIG_DEBUG_RODATA.

Diff-ed against firstfloor tree.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/vmlinux.lds.S    |  9 ---------
 arch/x86_64/kernel/vmlinux.lds.S  |  9 ---------
 include/asm-generic/vmlinux.lds.h | 17 ++++++++++++-----
 kernel/unwind.c                   |  2 +-
 4 files changed, 13 insertions(+), 24 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 25581e87c60d..56e6ad5cb045 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -102,15 +102,6 @@ SECTIONS
 	_edata = .;		/* End of data section */
   }
 
-#ifdef CONFIG_STACK_UNWIND
-  . = ALIGN(4);
-  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
-	__start_unwind = .;
-  	*(.eh_frame)
-	__end_unwind = .;
-  }
-#endif
-
   . = ALIGN(THREAD_SIZE);	/* init_task */
   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
 	*(.data.init_task)
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index d9534e750d4f..6a1f8f491e5d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -51,15 +51,6 @@ SECTIONS
 
   RODATA
 
-#ifdef CONFIG_STACK_UNWIND
-  . = ALIGN(8);
-  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
-	__start_unwind = .;
-  	*(.eh_frame)
-	__end_unwind = .;
-  }
-#endif
-
   . = ALIGN(PAGE_SIZE);        /* Align data segment to page size boundary */
 				/* Data */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 9f4747780dac..4d4c62d11059 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -119,8 +119,7 @@
 		*(__ksymtab_strings)					\
 	}								\
 									\
-	/* Unwind data binary search table */				\
-	EH_FRAME_HDR							\
+	EH_FRAME							\
 									\
 	/* Built-in module parameters. */				\
 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
@@ -162,15 +161,23 @@
 		VMLINUX_SYMBOL(__kprobes_text_end) = .;
 
 #ifdef CONFIG_STACK_UNWIND
-		/* Unwind data binary search table */
-#define EH_FRAME_HDR							\
+#define EH_FRAME							\
+		/* Unwind data binary search table */			\
+		. = ALIGN(8);						\
         	.eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) {	\
 			VMLINUX_SYMBOL(__start_unwind_hdr) = .;		\
 			*(.eh_frame_hdr)				\
 			VMLINUX_SYMBOL(__end_unwind_hdr) = .;		\
+		}							\
+		/* Unwind data */					\
+		. = ALIGN(8);						\
+		.eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {		\
+			VMLINUX_SYMBOL(__start_unwind) = .;		\
+		  	*(.eh_frame)					\
+			VMLINUX_SYMBOL(__end_unwind) = .;		\
 		}
 #else
-#define EH_FRAME_HDR
+#define EH_FRAME
 #endif
 
 		/* DWARF debug sections.
diff --git a/kernel/unwind.c b/kernel/unwind.c
index 08645aa7c2d6..09c261329249 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -19,7 +19,7 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 
-extern char __start_unwind[], __end_unwind[];
+extern const char __start_unwind[], __end_unwind[];
 extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
 
 #define MAX_STACK_DEPTH 8
-- 
cgit v1.2.3


From 161a09e737f0761ca064ee6a907313402f7a54b6 Mon Sep 17 00:00:00 2001
From: Joy Latten <latten@austin.ibm.com>
Date: Mon, 27 Nov 2006 13:11:54 -0600
Subject: audit: Add auditing to ipsec

An audit message occurs when an ipsec SA
or ipsec policy is created/deleted.

Signed-off-by: Joy Latten <latten@austin.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/audit.h  |   6 +++
 include/net/xfrm.h     |  19 +++++---
 kernel/auditsc.c       |   6 ++-
 net/key/af_key.c       |  27 +++++++++--
 net/xfrm/xfrm_policy.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++-
 net/xfrm/xfrm_state.c  |  17 ++++++-
 net/xfrm/xfrm_user.c   |  33 ++++++++++++--
 7 files changed, 213 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b2ca666d9997..0e07db6cc0d0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -101,6 +101,10 @@
 #define AUDIT_MAC_CIPSOV4_DEL	1408	/* NetLabel: del CIPSOv4 DOI entry */
 #define AUDIT_MAC_MAP_ADD	1409	/* NetLabel: add LSM domain mapping */
 #define AUDIT_MAC_MAP_DEL	1410	/* NetLabel: del LSM domain mapping */
+#define AUDIT_MAC_IPSEC_ADDSA	1411	/* Add a XFRM state */
+#define AUDIT_MAC_IPSEC_DELSA	1412	/* Delete a XFRM state */
+#define AUDIT_MAC_IPSEC_ADDSPD	1413	/* Add a XFRM policy */
+#define AUDIT_MAC_IPSEC_DELSPD	1414	/* Delete a XFRM policy */
 
 #define AUDIT_FIRST_KERN_ANOM_MSG   1700
 #define AUDIT_LAST_KERN_ANOM_MSG    1799
@@ -377,6 +381,7 @@ extern void auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec *t, unsigned int *serial);
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
+extern void audit_log_task_context(struct audit_buffer *ab);
 extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
@@ -449,6 +454,7 @@ extern int audit_n_rules;
 #define audit_inode_update(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0)
 #define audit_get_loginuid(c) ({ -1; })
+#define audit_log_task_context(b) do { ; } while (0)
 #define audit_ipc_obj(i) ({ 0; })
 #define audit_ipc_set_perm(q,u,g,m) ({ 0; })
 #define audit_bprm(p) ({ 0; })
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 15ec19dcf9c8..f699cdcab406 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -392,6 +392,15 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km);
 
 extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
 
+/* Audit Information */
+struct xfrm_audit
+{
+	uid_t	loginuid;
+	u32	secid;
+};
+void xfrm_audit_log(uid_t auid, u32 secid, int type, int result,
+		    struct xfrm_policy *xp, struct xfrm_state *x);
+
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
 {
 	if (likely(policy != NULL))
@@ -906,7 +915,7 @@ static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **s
 #endif
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
-extern void xfrm_state_flush(u8 proto);
+extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info);
 extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq);
 extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq);
 extern void xfrm_replay_notify(struct xfrm_state *x, int event);
@@ -959,13 +968,13 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete);
 struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete);
-void xfrm_policy_flush(u8 type);
+void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
 u32 xfrm_get_acqseq(void);
 void xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi);
-struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
-				  xfrm_address_t *daddr, xfrm_address_t *saddr, 
+struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
+				  xfrm_address_t *daddr, xfrm_address_t *saddr,
 				  int create, unsigned short family);
-extern void xfrm_policy_flush(u8 type);
+extern void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
 			  struct flowi *fl, int family, int strict);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ab97e5101232..40722e26de98 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -731,7 +731,7 @@ static inline void audit_free_context(struct audit_context *context)
 		printk(KERN_ERR "audit: freed %d contexts\n", count);
 }
 
-static void audit_log_task_context(struct audit_buffer *ab)
+void audit_log_task_context(struct audit_buffer *ab)
 {
 	char *ctx = NULL;
 	ssize_t len = 0;
@@ -760,6 +760,8 @@ error_path:
 	return;
 }
 
+EXPORT_SYMBOL(audit_log_task_context);
+
 static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
 {
 	char name[sizeof(tsk->comm)];
@@ -1488,6 +1490,8 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
 	return ctx ? ctx->loginuid : -1;
 }
 
+EXPORT_SYMBOL(audit_get_loginuid);
+
 /**
  * __audit_mq_open - record audit data for a POSIX MQ open
  * @oflag: open flag
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 0e1dbfbb9b10..5dd5094659a1 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -27,6 +27,7 @@
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <net/xfrm.h>
+#include <linux/audit.h>
 
 #include <net/sock.h>
 
@@ -1420,6 +1421,9 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
 	else
 		err = xfrm_state_update(x);
 
+	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+		       AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x);
+
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
 		__xfrm_state_put(x);
@@ -1460,8 +1464,12 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 		err = -EPERM;
 		goto out;
 	}
-	
+
 	err = xfrm_state_delete(x);
+
+	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+		       AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+
 	if (err < 0)
 		goto out;
 
@@ -1637,12 +1645,15 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 {
 	unsigned proto;
 	struct km_event c;
+	struct xfrm_audit audit_info;
 
 	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
 	if (proto == 0)
 		return -EINVAL;
 
-	xfrm_state_flush(proto);
+	audit_info.loginuid = audit_get_loginuid(current->audit_context);
+	audit_info.secid = 0;
+	xfrm_state_flush(proto, &audit_info);
 	c.data.proto = proto;
 	c.seq = hdr->sadb_msg_seq;
 	c.pid = hdr->sadb_msg_pid;
@@ -2205,6 +2216,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
 				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
 
+	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+		       AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL);
+
 	if (err)
 		goto out;
 
@@ -2282,6 +2296,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1,
 				   &sel, tmp.security, 1);
 	security_xfrm_policy_free(&tmp);
+
+	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+		       AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
+
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -2416,8 +2434,11 @@ static int key_notify_policy_flush(struct km_event *c)
 static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
 {
 	struct km_event c;
+	struct xfrm_audit audit_info;
 
-	xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN);
+	audit_info.loginuid = audit_get_loginuid(current->audit_context);
+	audit_info.secid = 0;
+	xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info);
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4f04222698d9..47c13649bac1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -25,6 +25,7 @@
 #include <linux/cache.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
+#include <linux/audit.h>
 
 #include "xfrm_hash.h"
 
@@ -804,7 +805,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
 
-void xfrm_policy_flush(u8 type)
+void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 {
 	int dir;
 
@@ -824,6 +825,9 @@ void xfrm_policy_flush(u8 type)
 			hlist_del(&pol->byidx);
 			write_unlock_bh(&xfrm_policy_lock);
 
+			xfrm_audit_log(audit_info->loginuid, audit_info->secid,
+				       AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL);
+
 			xfrm_policy_kill(pol);
 			killed++;
 
@@ -842,6 +846,11 @@ void xfrm_policy_flush(u8 type)
 				hlist_del(&pol->byidx);
 				write_unlock_bh(&xfrm_policy_lock);
 
+				xfrm_audit_log(audit_info->loginuid,
+					       audit_info->secid,
+					       AUDIT_MAC_IPSEC_DELSPD, 1,
+					       pol, NULL);
+
 				xfrm_policy_kill(pol);
 				killed++;
 
@@ -1977,6 +1986,115 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 
 EXPORT_SYMBOL(xfrm_bundle_ok);
 
+/* Audit addition and deletion of SAs and ipsec policy */
+
+void xfrm_audit_log(uid_t auid, u32 sid, int type, int result,
+		    struct xfrm_policy *xp, struct xfrm_state *x)
+{
+
+	char *secctx;
+	u32 secctx_len;
+	struct xfrm_sec_ctx *sctx = NULL;
+	struct audit_buffer *audit_buf;
+	int family;
+	extern int audit_enabled;
+
+	if (audit_enabled == 0)
+		return;
+
+	audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
+	if (audit_buf == NULL)
+	return;
+
+	switch(type) {
+	case AUDIT_MAC_IPSEC_ADDSA:
+		audit_log_format(audit_buf, "SAD add: auid=%u", auid);
+		break;
+	case AUDIT_MAC_IPSEC_DELSA:
+		audit_log_format(audit_buf, "SAD delete: auid=%u", auid);
+		break;
+	case AUDIT_MAC_IPSEC_ADDSPD:
+		audit_log_format(audit_buf, "SPD add: auid=%u", auid);
+		break;
+	case AUDIT_MAC_IPSEC_DELSPD:
+		audit_log_format(audit_buf, "SPD delete: auid=%u", auid);
+		break;
+	default:
+		return;
+	}
+
+	if (sid != 0 &&
+		security_secid_to_secctx(sid, &secctx, &secctx_len) == 0)
+		audit_log_format(audit_buf, " subj=%s", secctx);
+	else
+		audit_log_task_context(audit_buf);
+
+	if (xp) {
+		family = xp->selector.family;
+		if (xp->security)
+			sctx = xp->security;
+	} else {
+		family = x->props.family;
+		if (x->security)
+			sctx = x->security;
+	}
+
+	if (sctx)
+		audit_log_format(audit_buf,
+				" sec_alg=%u sec_doi=%u sec_obj=%s",
+				sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str);
+
+	switch(family) {
+	case AF_INET:
+		{
+			struct in_addr saddr, daddr;
+			if (xp) {
+				saddr.s_addr = xp->selector.saddr.a4;
+				daddr.s_addr = xp->selector.daddr.a4;
+			} else {
+				saddr.s_addr = x->props.saddr.a4;
+				daddr.s_addr = x->id.daddr.a4;
+			}
+			audit_log_format(audit_buf,
+					 " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
+					 NIPQUAD(saddr), NIPQUAD(daddr));
+		}
+			break;
+	case AF_INET6:
+		{
+			struct in6_addr saddr6, daddr6;
+			if (xp) {
+				memcpy(&saddr6, xp->selector.saddr.a6,
+					sizeof(struct in6_addr));
+				memcpy(&daddr6, xp->selector.daddr.a6,
+					sizeof(struct in6_addr));
+			} else {
+				memcpy(&saddr6, x->props.saddr.a6,
+					sizeof(struct in6_addr));
+				memcpy(&daddr6, x->id.daddr.a6,
+					sizeof(struct in6_addr));
+			}
+			audit_log_format(audit_buf,
+					 " src=" NIP6_FMT "dst=" NIP6_FMT,
+					 NIP6(saddr6), NIP6(daddr6));
+		}
+		break;
+	}
+
+	if (x)
+		audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s",
+				(unsigned long)ntohl(x->id.spi),
+				(unsigned long)ntohl(x->id.spi),
+				x->id.proto == IPPROTO_AH ? "AH" :
+				(x->id.proto == IPPROTO_ESP ?
+				"ESP" : "IPCOMP"));
+
+	audit_log_format(audit_buf, " res=%u", result);
+	audit_log_end(audit_buf);
+}
+
+EXPORT_SYMBOL(xfrm_audit_log);
+
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
 	int err = 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a14c88bf17f0..d5d3a6f1f609 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/cache.h>
 #include <asm/uaccess.h>
+#include <linux/audit.h>
 
 #include "xfrm_hash.h"
 
@@ -238,6 +239,7 @@ static void xfrm_timer_handler(unsigned long data)
 	unsigned long now = (unsigned long)xtime.tv_sec;
 	long next = LONG_MAX;
 	int warn = 0;
+	int err = 0;
 
 	spin_lock(&x->lock);
 	if (x->km.state == XFRM_STATE_DEAD)
@@ -295,9 +297,14 @@ expired:
 		next = 2;
 		goto resched;
 	}
-	if (!__xfrm_state_delete(x) && x->id.spi)
+
+	err = __xfrm_state_delete(x);
+	if (!err && x->id.spi)
 		km_state_expired(x, 1, 0);
 
+	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+		       AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+
 out:
 	spin_unlock(&x->lock);
 }
@@ -384,9 +391,10 @@ int xfrm_state_delete(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_delete);
 
-void xfrm_state_flush(u8 proto)
+void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
 {
 	int i;
+	int err = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
 	for (i = 0; i <= xfrm_state_hmask; i++) {
@@ -400,6 +408,11 @@ restart:
 				spin_unlock_bh(&xfrm_state_lock);
 
 				xfrm_state_delete(x);
+				err = xfrm_state_delete(x);
+				xfrm_audit_log(audit_info->loginuid,
+					       audit_info->secid,
+					       AUDIT_MAC_IPSEC_DELSA,
+					       err ? 0 : 1, NULL, x);
 				xfrm_state_put(x);
 
 				spin_lock_bh(&xfrm_state_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 311205ffa775..e5372b11fc8f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,6 +31,7 @@
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #include <linux/in6.h>
 #endif
+#include <linux/audit.h>
 
 static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
 {
@@ -454,6 +455,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 	else
 		err = xfrm_state_update(x);
 
+	xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+		       AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x);
+
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
 		__xfrm_state_put(x);
@@ -523,6 +527,10 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 	}
 
 	err = xfrm_state_delete(x);
+
+	xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+		       AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+
 	if (err < 0)
 		goto out;
 
@@ -1030,6 +1038,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 	 * a type XFRM_MSG_UPDPOLICY - JHS */
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
+	xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+		       AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
+
 	if (err) {
 		security_xfrm_policy_free(xp);
 		kfree(xp);
@@ -1257,6 +1268,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 		xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
 		security_xfrm_policy_free(&tmp);
 	}
+	if (delete)
+		xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+			       AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
+
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -1291,8 +1306,11 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma
 {
 	struct km_event c;
 	struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
+	struct xfrm_audit audit_info;
 
-	xfrm_state_flush(p->proto);
+	audit_info.loginuid = NETLINK_CB(skb).loginuid;
+	audit_info.secid = NETLINK_CB(skb).sid;
+	xfrm_state_flush(p->proto, &audit_info);
 	c.data.proto = p->proto;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
@@ -1442,12 +1460,15 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x
 	struct km_event c;
 	u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err;
+	struct xfrm_audit audit_info;
 
 	err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
 	if (err)
 		return err;
 
-	xfrm_policy_flush(type);
+	audit_info.loginuid = NETLINK_CB(skb).loginuid;
+	audit_info.secid = NETLINK_CB(skb).sid;
+	xfrm_policy_flush(type, &audit_info);
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
@@ -1502,6 +1523,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
 	err = 0;
 	if (up->hard) {
 		xfrm_policy_delete(xp, p->dir);
+		xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+				AUDIT_MAC_IPSEC_DELSPD, 1, xp, NULL);
+
 	} else {
 		// reset the timers here?
 		printk("Dont know what to do with soft policy expire\n");
@@ -1533,8 +1557,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **
 		goto out;
 	km_state_expired(x, ue->hard, current->pid);
 
-	if (ue->hard)
+	if (ue->hard) {
 		__xfrm_state_delete(x);
+		xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+			       AUDIT_MAC_IPSEC_DELSA, 1, NULL, x);
+	}
 out:
 	spin_unlock_bh(&x->lock);
 	xfrm_state_put(x);
-- 
cgit v1.2.3


From 7602bdf2fd14a40dd9b104e516fdc05e1bd17952 Mon Sep 17 00:00:00 2001
From: Ashwin Chaugule <ashwin.chaugule@celunite.com>
Date: Wed, 6 Dec 2006 20:31:57 -0800
Subject: [PATCH] new scheme to preempt swap token

The new swap token patches replace the current token traversal algo.  The old
algo had a crude timeout parameter that was used to handover the token from
one task to another.  This algo, transfers the token to the tasks that are in
need of the token.  The urgency for the token is based on the number of times
a task is required to swap-in pages.  Accordingly, the priority of a task is
incremented if it has been badly affected due to swap-outs.  To ensure that
the token doesnt bounce around rapidly, the token holders are given a priority
boost.  The priority of tasks is also decremented, if their rate of swap-in's
keeps reducing.  This way, the condition to check whether to pre-empt the swap
token, is a matter of comparing two task's priority fields.

[akpm@osdl.org: cleanups]
Signed-off-by: Ashwin Chaugule <ashwin.chaugule@celunite.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  13 ++++--
 include/linux/swap.h  |   1 -
 kernel/fork.c         |   8 ++++
 kernel/sysctl.c       |  11 -----
 mm/thrash.c           | 116 ++++++++++++++++++++------------------------------
 5 files changed, 63 insertions(+), 86 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eafe4a7b8237..cad6a16260f7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -344,9 +344,16 @@ struct mm_struct {
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
-	/* Token based thrashing protection. */
-	unsigned long swap_token_time;
-	char recent_pagein;
+	/* Swap token stuff */
+	/*
+	 * Last value of global fault stamp as seen by this process.
+	 * In other words, this value gives an indication of how long
+	 * it has been since this task got the token.
+	 * Look at mm/thrash.c
+	 */
+	unsigned int faultstamp;
+	unsigned int token_priority;
+	unsigned int last_interval;
 
 	/* coredumping support */
 	int core_waiters;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e7c36ba2a2db..89f8a39773bf 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -259,7 +259,6 @@ extern spinlock_t swap_lock;
 
 /* linux/mm/thrash.c */
 extern struct mm_struct * swap_token_mm;
-extern unsigned long swap_token_default_timeout;
 extern void grab_swap_token(void);
 extern void __put_swap_token(struct mm_struct *);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 8cdd3e72ba55..5678e6c61ef2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -479,6 +479,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
 
 	memcpy(mm, oldmm, sizeof(*mm));
 
+	/* Initializing for Swap token stuff */
+	mm->token_priority = 0;
+	mm->last_interval = 0;
+
 	if (!mm_init(mm))
 		goto fail_nomem;
 
@@ -542,6 +546,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 		goto fail_nomem;
 
 good_mm:
+	/* Initializing for Swap token stuff */
+	mm->token_priority = 0;
+	mm->last_interval = 0;
+
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	return 0;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 09e569f4792b..7abe9704e75a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -977,17 +977,6 @@ static ctl_table vm_table[] = {
 		.extra1		= &zero,
 	},
 #endif
-#ifdef CONFIG_SWAP
-	{
-		.ctl_name	= VM_SWAP_TOKEN_TIMEOUT,
-		.procname	= "swap_token_timeout",
-		.data		= &swap_token_default_timeout,
-		.maxlen		= sizeof(swap_token_default_timeout),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies,
-	},
-#endif
 #ifdef CONFIG_NUMA
 	{
 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
diff --git a/mm/thrash.c b/mm/thrash.c
index f4c560b4a2b7..19e428ca8b03 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -7,100 +7,74 @@
  *
  * Simple token based thrashing protection, using the algorithm
  * described in:  http://www.cs.wm.edu/~sjiang/token.pdf
+ *
+ * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com>
+ * Improved algorithm to pass token:
+ * Each task has a priority which is incremented if it contended
+ * for the token in an interval less than its previous attempt.
+ * If the token is acquired, that task's priority is boosted to prevent
+ * the token from bouncing around too often and to let the task make
+ * some progress in its execution.
  */
+
 #include <linux/jiffies.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/swap.h>
 
 static DEFINE_SPINLOCK(swap_token_lock);
-static unsigned long swap_token_timeout;
-static unsigned long swap_token_check;
-struct mm_struct * swap_token_mm = &init_mm;
-
-#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2)
-#define SWAP_TOKEN_TIMEOUT	(300 * HZ)
-/*
- * Currently disabled; Needs further code to work at HZ * 300.
- */
-unsigned long swap_token_default_timeout = SWAP_TOKEN_TIMEOUT;
-
-/*
- * Take the token away if the process had no page faults
- * in the last interval, or if it has held the token for
- * too long.
- */
-#define SWAP_TOKEN_ENOUGH_RSS 1
-#define SWAP_TOKEN_TIMED_OUT 2
-static int should_release_swap_token(struct mm_struct *mm)
-{
-	int ret = 0;
-	if (!mm->recent_pagein)
-		ret = SWAP_TOKEN_ENOUGH_RSS;
-	else if (time_after(jiffies, swap_token_timeout))
-		ret = SWAP_TOKEN_TIMED_OUT;
-	mm->recent_pagein = 0;
-	return ret;
-}
+struct mm_struct *swap_token_mm;
+unsigned int global_faults;
 
-/*
- * Try to grab the swapout protection token.  We only try to
- * grab it once every TOKEN_CHECK_INTERVAL, both to prevent
- * SMP lock contention and to check that the process that held
- * the token before is no longer thrashing.
- */
 void grab_swap_token(void)
 {
-	struct mm_struct *mm;
-	int reason;
+	int current_interval;
 
-	/* We have the token. Let others know we still need it. */
-	if (has_swap_token(current->mm)) {
-		current->mm->recent_pagein = 1;
-		if (unlikely(!swap_token_default_timeout))
-			disable_swap_token();
-		return;
-	}
-
-	if (time_after(jiffies, swap_token_check)) {
+	global_faults++;
 
-		if (!swap_token_default_timeout) {
-			swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
-			return;
-		}
-
-		/* ... or if we recently held the token. */
-		if (time_before(jiffies, current->mm->swap_token_time))
-			return;
+	current_interval = global_faults - current->mm->faultstamp;
 
-		if (!spin_trylock(&swap_token_lock))
-			return;
+	if (!spin_trylock(&swap_token_lock))
+		return;
 
-		swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
+	/* First come first served */
+	if (swap_token_mm == NULL) {
+		current->mm->token_priority = current->mm->token_priority + 2;
+		swap_token_mm = current->mm;
+		goto out;
+	}
 
-		mm = swap_token_mm;
-		if ((reason = should_release_swap_token(mm))) {
-			unsigned long eligible = jiffies;
-			if (reason == SWAP_TOKEN_TIMED_OUT) {
-				eligible += swap_token_default_timeout;
-			}
-			mm->swap_token_time = eligible;
-			swap_token_timeout = jiffies + swap_token_default_timeout;
+	if (current->mm != swap_token_mm) {
+		if (current_interval < current->mm->last_interval)
+			current->mm->token_priority++;
+		else {
+			current->mm->token_priority--;
+			if (unlikely(current->mm->token_priority < 0))
+				current->mm->token_priority = 0;
+		}
+		/* Check if we deserve the token */
+		if (current->mm->token_priority >
+				swap_token_mm->token_priority) {
+			current->mm->token_priority += 2;
 			swap_token_mm = current->mm;
 		}
-		spin_unlock(&swap_token_lock);
+	} else {
+		/* Token holder came in again! */
+		current->mm->token_priority += 2;
 	}
-	return;
+
+out:
+	current->mm->faultstamp = global_faults;
+	current->mm->last_interval = current_interval;
+	spin_unlock(&swap_token_lock);
+return;
 }
 
 /* Called on process exit. */
 void __put_swap_token(struct mm_struct *mm)
 {
 	spin_lock(&swap_token_lock);
-	if (likely(mm == swap_token_mm)) {
-		mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
-		swap_token_mm = &init_mm;
-		swap_token_check = jiffies;
-	}
+	if (likely(mm == swap_token_mm))
+		swap_token_mm = NULL;
 	spin_unlock(&swap_token_lock);
 }
-- 
cgit v1.2.3


From a866374aecc90c7d90619727ccd851ac096b2fc7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 6 Dec 2006 20:32:20 -0800
Subject: [PATCH] mm: pagefault_{disable,enable}()

Introduce pagefault_{disable,enable}() and use these where previously we did
manual preempt increments/decrements to make the pagefault handler do the
atomic thing.

Currently they still rely on the increased preempt count, but do not rely on
the disabled preemption, this might go away in the future.

(NOTE: the extra barrier() in pagefault_disable might fix some holes on
       machines which have too many registers for their own good)

[heiko.carstens@de.ibm.com: s390 fix]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Nick Piggin <npiggin@suse.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/frv/kernel/futex.c     |  4 ++--
 arch/i386/mm/highmem.c      | 10 ++++------
 arch/mips/mm/highmem.c      | 10 ++++------
 arch/s390/lib/uaccess_std.c |  6 +++---
 arch/sparc/mm/highmem.c     |  8 +++-----
 include/asm-frv/highmem.h   |  5 ++---
 include/asm-generic/futex.h |  4 ++--
 include/asm-i386/futex.h    |  4 ++--
 include/asm-ia64/futex.h    |  4 ++--
 include/asm-mips/futex.h    |  4 ++--
 include/asm-parisc/futex.h  |  4 ++--
 include/asm-powerpc/futex.h |  4 ++--
 include/asm-ppc/highmem.h   |  8 +++-----
 include/asm-sparc64/futex.h |  4 ++--
 include/asm-x86_64/futex.h  |  4 ++--
 include/linux/uaccess.h     | 39 +++++++++++++++++++++++++++++++++++++--
 kernel/futex.c              | 28 ++++++++++++++--------------
 17 files changed, 88 insertions(+), 62 deletions(-)

(limited to 'kernel')

diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index eae874a970c6..53dc5ed1ebda 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -200,7 +200,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -223,7 +223,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 		break;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index f9f647cdbc7b..178bbfe6cbac 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -32,7 +32,7 @@ void *kmap_atomic(struct page *page, enum km_type type)
 	unsigned long vaddr;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
-	inc_preempt_count();
+	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
@@ -52,8 +52,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 
 #ifdef CONFIG_DEBUG_HIGHMEM
 	if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
-		dec_preempt_count();
-		preempt_check_resched();
+		pagefault_enable();
 		return;
 	}
 
@@ -68,8 +67,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 	 */
 	kpte_clear_flush(kmap_pte-idx, vaddr);
 
-	dec_preempt_count();
-	preempt_check_resched();
+	pagefault_enable();
 }
 
 /* This is the same as kmap_atomic() but can map memory that doesn't
@@ -80,7 +78,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
 	enum fixed_addresses idx;
 	unsigned long vaddr;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index 99ebf3ccc222..675502ada5a2 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -39,7 +39,7 @@ void *__kmap_atomic(struct page *page, enum km_type type)
 	unsigned long vaddr;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
-	inc_preempt_count();
+	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
@@ -62,8 +62,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type)
 	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
 	if (vaddr < FIXADDR_START) { // FIXME
-		dec_preempt_count();
-		preempt_check_resched();
+		pagefault_enable();
 		return;
 	}
 
@@ -78,8 +77,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type)
 	local_flush_tlb_one(vaddr);
 #endif
 
-	dec_preempt_count();
-	preempt_check_resched();
+	pagefault_enable();
 }
 
 #ifndef CONFIG_LIMITED_DMA
@@ -92,7 +90,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
 	enum fixed_addresses idx;
 	unsigned long vaddr;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
index 2d549ed2e113..bbaca66fa293 100644
--- a/arch/s390/lib/uaccess_std.c
+++ b/arch/s390/lib/uaccess_std.c
@@ -11,7 +11,7 @@
 
 #include <linux/errno.h>
 #include <linux/mm.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/futex.h>
 
 #ifndef __s390x__
@@ -258,7 +258,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old)
 {
 	int oldval = 0, newval, ret;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -284,7 +284,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old)
 	default:
 		ret = -ENOSYS;
 	}
-	dec_preempt_count();
+	pagefault_enable();
 	*old = oldval;
 	return ret;
 }
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 4d8ed9c65182..01fc6c254292 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -35,7 +35,7 @@ void *kmap_atomic(struct page *page, enum km_type type)
 	unsigned long vaddr;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
-	inc_preempt_count();
+	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
@@ -70,8 +70,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 	unsigned long idx = type + KM_TYPE_NR*smp_processor_id();
 
 	if (vaddr < FIXADDR_START) { // FIXME
-		dec_preempt_count();
-		preempt_check_resched();
+		pagefault_enable();
 		return;
 	}
 
@@ -97,8 +96,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 #endif
 #endif
 
-	dec_preempt_count();
-	preempt_check_resched();
+	pagefault_enable();
 }
 
 /* We may be fed a pagetable here by ptep_to_xxx and others. */
diff --git a/include/asm-frv/highmem.h b/include/asm-frv/highmem.h
index 0f390f41f816..ff4d6cdeb152 100644
--- a/include/asm-frv/highmem.h
+++ b/include/asm-frv/highmem.h
@@ -115,7 +115,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
 {
 	unsigned long paddr;
 
-	inc_preempt_count();
+	pagefault_disable();
 	paddr = page_to_phys(page);
 
 	switch (type) {
@@ -170,8 +170,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
 	default:
 		BUG();
 	}
-	dec_preempt_count();
-	preempt_check_resched();
+	pagefault_enable();
 }
 
 #endif /* !__ASSEMBLY__ */
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index df893c160318..f422df0956a2 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -21,7 +21,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -33,7 +33,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 946d97cfea23..438ef0ec7101 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	if (op == FUTEX_OP_SET)
 		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
@@ -88,7 +88,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 		}
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h
index 07d77f3a8cbe..8a98a2654139 100644
--- a/include/asm-ia64/futex.h
+++ b/include/asm-ia64/futex.h
@@ -59,7 +59,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -83,7 +83,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h
index 927a216bd530..47e5679c2353 100644
--- a/include/asm-mips/futex.h
+++ b/include/asm-mips/futex.h
@@ -88,7 +88,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -115,7 +115,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h
index d84bbb283fd1..dbee6e60aa81 100644
--- a/include/asm-parisc/futex.h
+++ b/include/asm-parisc/futex.h
@@ -21,7 +21,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -33,7 +33,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h
index 936422e54891..3f3673fd3ff3 100644
--- a/include/asm-powerpc/futex.h
+++ b/include/asm-powerpc/futex.h
@@ -43,7 +43,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/asm-ppc/highmem.h b/include/asm-ppc/highmem.h
index 1d2c4ef81c22..f7b21ee302b4 100644
--- a/include/asm-ppc/highmem.h
+++ b/include/asm-ppc/highmem.h
@@ -79,7 +79,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
 	unsigned long vaddr;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
-	inc_preempt_count();
+	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
@@ -101,8 +101,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
 	unsigned int idx = type + KM_TYPE_NR*smp_processor_id();
 
 	if (vaddr < KMAP_FIX_BEGIN) { // FIXME
-		dec_preempt_count();
-		preempt_check_resched();
+		pagefault_enable();
 		return;
 	}
 
@@ -115,8 +114,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
 	pte_clear(&init_mm, vaddr, kmap_pte+idx);
 	flush_tlb_page(NULL, vaddr);
 #endif
-	dec_preempt_count();
-	preempt_check_resched();
+	pagefault_enable();
 }
 
 static inline struct page *kmap_atomic_to_page(void *ptr)
diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index 7392fc4a954e..876312fe82cc 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -45,7 +45,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -67,7 +67,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h
index 9804bf07b092..5cdfb08013c3 100644
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -55,7 +55,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
+	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -78,7 +78,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 		ret = -ENOSYS;
 	}
 
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (!ret) {
 		switch (cmp) {
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index a48d7f11c7be..67918c22339c 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -1,8 +1,43 @@
 #ifndef __LINUX_UACCESS_H__
 #define __LINUX_UACCESS_H__
 
+#include <linux/preempt.h>
 #include <asm/uaccess.h>
 
+/*
+ * These routines enable/disable the pagefault handler in that
+ * it will not take any locks and go straight to the fixup table.
+ *
+ * They have great resemblance to the preempt_disable/enable calls
+ * and in fact they are identical; this is because currently there is
+ * no other way to make the pagefault handlers do this. So we do
+ * disable preemption but we don't necessarily care about that.
+ */
+static inline void pagefault_disable(void)
+{
+	inc_preempt_count();
+	/*
+	 * make sure to have issued the store before a pagefault
+	 * can hit.
+	 */
+	barrier();
+}
+
+static inline void pagefault_enable(void)
+{
+	/*
+	 * make sure to issue those last loads/stores before enabling
+	 * the pagefault handler again.
+	 */
+	barrier();
+	dec_preempt_count();
+	/*
+	 * make sure we do..
+	 */
+	barrier();
+	preempt_check_resched();
+}
+
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
 static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
@@ -35,9 +70,9 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 	({						\
 		long ret;				\
 							\
-		inc_preempt_count();			\
+		pagefault_disable();			\
 		ret = __get_user(retval, addr);		\
-		dec_preempt_count();			\
+		pagefault_enable();			\
 		ret;					\
 	})
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 93ef30ba209f..af7b81cbde30 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -282,9 +282,9 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
 {
 	int ret;
 
-	inc_preempt_count();
+	pagefault_disable();
 	ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
-	dec_preempt_count();
+	pagefault_enable();
 
 	return ret ? -EFAULT : 0;
 }
@@ -585,9 +585,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		newval = FUTEX_WAITERS | new_owner->pid;
 
-		inc_preempt_count();
+		pagefault_disable();
 		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
-		dec_preempt_count();
+		pagefault_enable();
 		if (curval == -EFAULT)
 			return -EFAULT;
 		if (curval != uval)
@@ -618,9 +618,9 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
 	 * There is no waiter, so we unlock the futex. The owner died
 	 * bit has not to be preserved here. We are the owner:
 	 */
-	inc_preempt_count();
+	pagefault_disable();
 	oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (oldval == -EFAULT)
 		return oldval;
@@ -1158,9 +1158,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
 	 */
 	newval = current->pid;
 
-	inc_preempt_count();
+	pagefault_disable();
 	curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
 		goto uaddr_faulted;
@@ -1183,9 +1183,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
 	uval = curval;
 	newval = uval | FUTEX_WAITERS;
 
-	inc_preempt_count();
+	pagefault_disable();
 	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
-	dec_preempt_count();
+	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
 		goto uaddr_faulted;
@@ -1215,10 +1215,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
 			newval = current->pid |
 				FUTEX_OWNER_DIED | FUTEX_WAITERS;
 
-			inc_preempt_count();
+			pagefault_disable();
 			curval = futex_atomic_cmpxchg_inatomic(uaddr,
 							       uval, newval);
-			dec_preempt_count();
+			pagefault_enable();
 
 			if (unlikely(curval == -EFAULT))
 				goto uaddr_faulted;
@@ -1390,9 +1390,9 @@ retry_locked:
 	 * anyone else up:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
-		inc_preempt_count();
+		pagefault_disable();
 		uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
-		dec_preempt_count();
+		pagefault_enable();
 	}
 
 	if (unlikely(uval == -EFAULT))
-- 
cgit v1.2.3


From e94b1766097d53e6f3ccfb36c8baa562ffeda3fc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:17 -0800
Subject: [PATCH] slab: remove SLAB_KERNEL

SLAB_KERNEL is an alias of GFP_KERNEL.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/sysenter.c               |  2 +-
 arch/ia64/ia32/binfmt_elf32.c             |  8 ++++----
 arch/ia64/kernel/perfmon.c                |  2 +-
 arch/ia64/mm/init.c                       |  4 ++--
 arch/powerpc/kernel/vdso.c                |  2 +-
 arch/powerpc/platforms/cell/spufs/inode.c |  2 +-
 arch/sh/kernel/vsyscall/vsyscall.c        |  2 +-
 arch/x86_64/ia32/ia32_binfmt.c            |  2 +-
 arch/x86_64/ia32/syscall32.c              |  2 +-
 drivers/atm/he.c                          |  4 ++--
 drivers/base/dmapool.c                    |  2 +-
 drivers/dma/ioatdma.c                     |  4 ++--
 drivers/ieee1394/hosts.c                  |  2 +-
 drivers/ieee1394/ohci1394.c               |  8 ++++----
 drivers/ieee1394/pcilynx.c                |  2 +-
 drivers/ieee1394/raw1394.c                | 10 +++++-----
 drivers/infiniband/hw/ehca/ehca_av.c      |  2 +-
 drivers/infiniband/hw/ehca/ehca_cq.c      |  2 +-
 drivers/infiniband/hw/ehca/ehca_main.c    |  2 +-
 drivers/infiniband/hw/ehca/ehca_mrmw.c    |  4 ++--
 drivers/infiniband/hw/ehca/ehca_pd.c      |  2 +-
 drivers/infiniband/hw/ehca/ehca_qp.c      |  2 +-
 drivers/input/touchscreen/ads7846.c       |  2 +-
 drivers/isdn/gigaset/bas-gigaset.c        | 14 +++++++-------
 drivers/isdn/gigaset/usb-gigaset.c        |  6 +++---
 drivers/media/dvb/cinergyT2/cinergyT2.c   |  2 +-
 drivers/mtd/devices/m25p80.c              |  2 +-
 drivers/scsi/ipr.c                        |  2 +-
 drivers/spi/spi.c                         |  4 ++--
 drivers/spi/spi_bitbang.c                 |  2 +-
 drivers/usb/core/hub.c                    |  4 ++--
 drivers/usb/gadget/gmidi.c                |  2 +-
 drivers/usb/gadget/goku_udc.c             |  2 +-
 drivers/usb/gadget/inode.c                |  6 +++---
 drivers/usb/gadget/net2280.c              |  2 +-
 drivers/usb/gadget/omap_udc.c             |  2 +-
 drivers/usb/gadget/zero.c                 |  2 +-
 drivers/usb/host/hc_crisv10.c             |  2 +-
 drivers/usb/host/ohci-pnx4008.c           |  2 +-
 drivers/usb/input/acecad.c                |  2 +-
 drivers/usb/input/usbtouchscreen.c        |  2 +-
 drivers/usb/misc/usbtest.c                | 28 ++++++++++++++--------------
 drivers/usb/net/rndis_host.c              |  2 +-
 drivers/usb/net/usbnet.c                  |  4 ++--
 fs/adfs/super.c                           |  2 +-
 fs/affs/super.c                           |  2 +-
 fs/afs/super.c                            |  2 +-
 fs/befs/linuxvfs.c                        |  2 +-
 fs/bfs/inode.c                            |  2 +-
 fs/block_dev.c                            |  2 +-
 fs/cifs/cifsfs.c                          |  2 +-
 fs/cifs/misc.c                            |  4 ++--
 fs/cifs/transport.c                       |  4 ++--
 fs/coda/inode.c                           |  2 +-
 fs/dnotify.c                              |  2 +-
 fs/ecryptfs/crypto.c                      |  2 +-
 fs/ecryptfs/file.c                        |  2 +-
 fs/ecryptfs/inode.c                       |  4 ++--
 fs/ecryptfs/keystore.c                    |  2 +-
 fs/ecryptfs/main.c                        |  4 ++--
 fs/ecryptfs/super.c                       |  2 +-
 fs/efs/super.c                            |  2 +-
 fs/eventpoll.c                            |  4 ++--
 fs/exec.c                                 |  2 +-
 fs/ext2/super.c                           |  2 +-
 fs/fat/cache.c                            |  2 +-
 fs/fat/inode.c                            |  2 +-
 fs/fcntl.c                                |  2 +-
 fs/freevxfs/vxfs_inode.c                  |  4 ++--
 fs/fuse/dev.c                             |  2 +-
 fs/fuse/inode.c                           |  2 +-
 fs/hfs/super.c                            |  2 +-
 fs/hfsplus/super.c                        |  2 +-
 fs/hugetlbfs/inode.c                      |  2 +-
 fs/inode.c                                |  2 +-
 fs/isofs/inode.c                          |  2 +-
 fs/jffs2/super.c                          |  2 +-
 fs/locks.c                                |  2 +-
 fs/minix/inode.c                          |  2 +-
 fs/ncpfs/inode.c                          |  2 +-
 fs/nfs/direct.c                           |  2 +-
 fs/nfs/inode.c                            |  2 +-
 fs/nfs/pagelist.c                         |  2 +-
 fs/openpromfs/inode.c                     |  2 +-
 fs/proc/inode.c                           |  2 +-
 fs/qnx4/inode.c                           |  2 +-
 fs/reiserfs/super.c                       |  2 +-
 fs/romfs/inode.c                          |  2 +-
 fs/smbfs/inode.c                          |  2 +-
 fs/smbfs/request.c                        |  2 +-
 fs/sysv/inode.c                           |  2 +-
 fs/udf/super.c                            |  2 +-
 fs/ufs/super.c                            |  2 +-
 include/linux/fs.h                        |  2 +-
 include/linux/rmap.h                      |  2 +-
 include/linux/slab.h                      |  1 -
 include/linux/taskstats_kern.h            |  2 +-
 ipc/mqueue.c                              |  2 +-
 kernel/delayacct.c                        |  2 +-
 kernel/fork.c                             |  6 +++---
 kernel/taskstats.c                        |  2 +-
 kernel/user.c                             |  2 +-
 mm/mempolicy.c                            |  2 +-
 mm/mmap.c                                 |  4 ++--
 mm/shmem.c                                |  2 +-
 mm/slab.c                                 |  2 +-
 net/decnet/dn_table.c                     |  2 +-
 net/ipv4/fib_hash.c                       |  4 ++--
 net/ipv4/fib_trie.c                       |  4 ++--
 net/socket.c                              |  2 +-
 net/sunrpc/rpc_pipe.c                     |  2 +-
 security/keys/key.c                       |  2 +-
 security/selinux/hooks.c                  |  2 +-
 security/selinux/ss/avtab.c               |  2 +-
 114 files changed, 164 insertions(+), 165 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 713ba39d32c6..0bbacd0ec175 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -132,7 +132,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 		goto up_fail;
 	}
 
-	vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		ret = -ENOMEM;
 		goto up_fail;
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index daa6b91bc921..578737ec7629 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -91,7 +91,7 @@ ia64_elf32_init (struct pt_regs *regs)
 	 * it with privilege level 3 because the IVE uses non-privileged accesses to these
 	 * tables.  IA-32 segmentation is used to protect against IA-32 accesses to them.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
 		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
@@ -117,7 +117,7 @@ ia64_elf32_init (struct pt_regs *regs)
 	 * code is locked in specific gate page, which is pointed by pretcode
 	 * when setup_frame_ia32
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
 		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
@@ -142,7 +142,7 @@ ia64_elf32_init (struct pt_regs *regs)
 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
 	 * until a task modifies them via modify_ldt().
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
 		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
@@ -214,7 +214,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt)
 		return -ENOMEM;
 
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 3aaede0d6981..e2321536ee4c 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2302,7 +2302,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ff87a5cba399..56dc2024220e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -156,7 +156,7 @@ ia64_init_addr_space (void)
 	 * the problem.  When the process attempts to write to the register backing store
 	 * for the first time, it will get a SEGFAULT in this case.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
 		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
@@ -175,7 +175,7 @@ ia64_init_addr_space (void)
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
-		vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (vma) {
 			memset(vma, 0, sizeof(*vma));
 			vma->vm_mm = current->mm;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index c913ad5cad29..a4b28c73bba0 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -264,7 +264,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 
 
 	/* Allocate a VMA structure and fill it up */
-	vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma == NULL) {
 		rc = -ENOMEM;
 		goto fail_mmapsem;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index c7d010749a18..7edfcc9d2853 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -48,7 +48,7 @@ spufs_alloc_inode(struct super_block *sb)
 {
 	struct spufs_inode_info *ei;
 
-	ei = kmem_cache_alloc(spufs_inode_cache, SLAB_KERNEL);
+	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 075d6cc1a2d7..deb46941f315 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -97,7 +97,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 		goto up_fail;
 	}
 
-	vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		ret = -ENOMEM;
 		goto up_fail;
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 82ef182de6ae..932a62ad6c83 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -351,7 +351,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt) 
 		return -ENOMEM; 
 
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 3a01329473ab..3e5ed20cba45 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -49,7 +49,7 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
 	struct mm_struct *mm = current->mm;
 	int ret;
 
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma)
 		return -ENOMEM;
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 2a2f0fc2288f..ec8a7a633e68 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -820,7 +820,7 @@ he_init_group(struct he_dev *he_dev, int group)
 		void *cpuaddr;
 
 #ifdef USE_RBPS_POOL 
-		cpuaddr = pci_pool_alloc(he_dev->rbps_pool, SLAB_KERNEL|SLAB_DMA, &dma_handle);
+		cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|SLAB_DMA, &dma_handle);
 		if (cpuaddr == NULL)
 			return -ENOMEM;
 #else
@@ -884,7 +884,7 @@ he_init_group(struct he_dev *he_dev, int group)
 		void *cpuaddr;
 
 #ifdef USE_RBPL_POOL
-		cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, SLAB_KERNEL|SLAB_DMA, &dma_handle);
+		cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|SLAB_DMA, &dma_handle);
 		if (cpuaddr == NULL)
 			return -ENOMEM;
 #else
diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c
index fa4675254f67..dbe0735f8c9e 100644
--- a/drivers/base/dmapool.c
+++ b/drivers/base/dmapool.c
@@ -126,7 +126,7 @@ dma_pool_create (const char *name, struct device *dev,
 	} else if (allocation < size)
 		return NULL;
 
-	if (!(retval = kmalloc (sizeof *retval, SLAB_KERNEL)))
+	if (!(retval = kmalloc (sizeof *retval, GFP_KERNEL)))
 		return retval;
 
 	strlcpy (retval->name, name, sizeof retval->name);
diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index 0358419a0e48..8e8726104619 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -636,10 +636,10 @@ static int ioat_self_test(struct ioat_device *device)
 	dma_cookie_t cookie;
 	int err = 0;
 
-	src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL);
+	src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
 	if (!src)
 		return -ENOMEM;
-	dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL);
+	dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
 	if (!dest) {
 		kfree(src);
 		return -ENOMEM;
diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c
index 8f4378a1631c..b935e08695a9 100644
--- a/drivers/ieee1394/hosts.c
+++ b/drivers/ieee1394/hosts.c
@@ -123,7 +123,7 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
 	int i;
 	int hostnum = 0;
 
-	h = kzalloc(sizeof(*h) + extra, SLAB_KERNEL);
+	h = kzalloc(sizeof(*h) + extra, GFP_KERNEL);
 	if (!h)
 		return NULL;
 
diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c
index 6e8ea9110c46..eae97d8dcf03 100644
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -1225,7 +1225,7 @@ static int ohci_iso_recv_init(struct hpsb_iso *iso)
 	int ctx;
 	int ret = -ENOMEM;
 
-	recv = kmalloc(sizeof(*recv), SLAB_KERNEL);
+	recv = kmalloc(sizeof(*recv), GFP_KERNEL);
 	if (!recv)
 		return -ENOMEM;
 
@@ -1918,7 +1918,7 @@ static int ohci_iso_xmit_init(struct hpsb_iso *iso)
 	int ctx;
 	int ret = -ENOMEM;
 
-	xmit = kmalloc(sizeof(*xmit), SLAB_KERNEL);
+	xmit = kmalloc(sizeof(*xmit), GFP_KERNEL);
 	if (!xmit)
 		return -ENOMEM;
 
@@ -3021,7 +3021,7 @@ alloc_dma_rcv_ctx(struct ti_ohci *ohci, struct dma_rcv_ctx *d,
 			return -ENOMEM;
 		}
 
-		d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, SLAB_KERNEL, d->prg_bus+i);
+		d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, GFP_KERNEL, d->prg_bus+i);
 		OHCI_DMA_ALLOC("pool dma_rcv prg[%d]", i);
 
                 if (d->prg_cpu[i] != NULL) {
@@ -3117,7 +3117,7 @@ alloc_dma_trm_ctx(struct ti_ohci *ohci, struct dma_trm_ctx *d,
 	OHCI_DMA_ALLOC("dma_rcv prg pool");
 
 	for (i = 0; i < d->num_desc; i++) {
-		d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, SLAB_KERNEL, d->prg_bus+i);
+		d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, GFP_KERNEL, d->prg_bus+i);
 		OHCI_DMA_ALLOC("pool dma_trm prg[%d]", i);
 
                 if (d->prg_cpu[i] != NULL) {
diff --git a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c
index 0a7412e27eb4..9cab1d661472 100644
--- a/drivers/ieee1394/pcilynx.c
+++ b/drivers/ieee1394/pcilynx.c
@@ -1428,7 +1428,7 @@ static int __devinit add_card(struct pci_dev *dev,
         	struct i2c_algo_bit_data i2c_adapter_data;
 
         	error = -ENOMEM;
-		i2c_ad = kmalloc(sizeof(*i2c_ad), SLAB_KERNEL);
+		i2c_ad = kmalloc(sizeof(*i2c_ad), GFP_KERNEL);
         	if (!i2c_ad) FAIL("failed to allocate I2C adapter memory");
 
 		memcpy(i2c_ad, &bit_ops, sizeof(struct i2c_adapter));
diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index 47f6a4e29b40..bf71e069eaf5 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -112,7 +112,7 @@ static struct pending_request *__alloc_pending_request(gfp_t flags)
 
 static inline struct pending_request *alloc_pending_request(void)
 {
-	return __alloc_pending_request(SLAB_KERNEL);
+	return __alloc_pending_request(GFP_KERNEL);
 }
 
 static void free_pending_request(struct pending_request *req)
@@ -1737,7 +1737,7 @@ static int arm_register(struct file_info *fi, struct pending_request *req)
 		return (-EINVAL);
 	}
 	/* addr-list-entry for fileinfo */
-	addr = kmalloc(sizeof(*addr), SLAB_KERNEL);
+	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
 	if (!addr) {
 		req->req.length = 0;
 		return (-ENOMEM);
@@ -2103,7 +2103,7 @@ static int write_phypacket(struct file_info *fi, struct pending_request *req)
 static int get_config_rom(struct file_info *fi, struct pending_request *req)
 {
 	int ret = sizeof(struct raw1394_request);
-	quadlet_t *data = kmalloc(req->req.length, SLAB_KERNEL);
+	quadlet_t *data = kmalloc(req->req.length, GFP_KERNEL);
 	int status;
 
 	if (!data)
@@ -2133,7 +2133,7 @@ static int get_config_rom(struct file_info *fi, struct pending_request *req)
 static int update_config_rom(struct file_info *fi, struct pending_request *req)
 {
 	int ret = sizeof(struct raw1394_request);
-	quadlet_t *data = kmalloc(req->req.length, SLAB_KERNEL);
+	quadlet_t *data = kmalloc(req->req.length, GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 	if (copy_from_user(data, int2ptr(req->req.sendb), req->req.length)) {
@@ -2779,7 +2779,7 @@ static int raw1394_open(struct inode *inode, struct file *file)
 {
 	struct file_info *fi;
 
-	fi = kzalloc(sizeof(*fi), SLAB_KERNEL);
+	fi = kzalloc(sizeof(*fi), GFP_KERNEL);
 	if (!fi)
 		return -ENOMEM;
 
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index 214e2fdddeef..0d6e2c4bb245 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -57,7 +57,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
 					      ib_device);
 
-	av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+	av = kmem_cache_alloc(av_cache, GFP_KERNEL);
 	if (!av) {
 		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
 			 pd, ah_attr);
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 458fe19648a1..93995b658d94 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -134,7 +134,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
 	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
 		return ERR_PTR(-EINVAL);
 
-	my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+	my_cq = kmem_cache_alloc(cq_cache, GFP_KERNEL);
 	if (!my_cq) {
 		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
 			 device);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 3d1c1c535038..cc47e4c13a18 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -108,7 +108,7 @@ static struct kmem_cache *ctblk_cache = NULL;
 
 void *ehca_alloc_fw_ctrlblock(void)
 {
-	void *ret = kmem_cache_zalloc(ctblk_cache, SLAB_KERNEL);
+	void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL);
 	if (!ret)
 		ehca_gen_err("Out of memory for ctblk");
 	return ret;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index abce676c0ae0..0a5e2214cc5f 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -53,7 +53,7 @@ static struct ehca_mr *ehca_mr_new(void)
 {
 	struct ehca_mr *me;
 
-	me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
+	me = kmem_cache_alloc(mr_cache, GFP_KERNEL);
 	if (me) {
 		memset(me, 0, sizeof(struct ehca_mr));
 		spin_lock_init(&me->mrlock);
@@ -72,7 +72,7 @@ static struct ehca_mw *ehca_mw_new(void)
 {
 	struct ehca_mw *me;
 
-	me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
+	me = kmem_cache_alloc(mw_cache, GFP_KERNEL);
 	if (me) {
 		memset(me, 0, sizeof(struct ehca_mw));
 		spin_lock_init(&me->mwlock);
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index 2c3cdc6f7b39..d5345e5b3cd6 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -50,7 +50,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
 {
 	struct ehca_pd *pd;
 
-	pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+	pd = kmem_cache_alloc(pd_cache, GFP_KERNEL);
 	if (!pd) {
 		ehca_err(device, "device=%p context=%p out of memory",
 			 device, context);
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 8682aa50c707..c6c9cef203e3 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -450,7 +450,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 	if (pd->uobject && udata)
 		context = pd->uobject->context;
 
-	my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
+	my_qp = kmem_cache_alloc(qp_cache, GFP_KERNEL);
 	if (!my_qp) {
 		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index f56d6a0f0624..0517c7387d67 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -189,7 +189,7 @@ static int ads7846_read12_ser(struct device *dev, unsigned command)
 {
 	struct spi_device	*spi = to_spi_device(dev);
 	struct ads7846		*ts = dev_get_drvdata(dev);
-	struct ser_req		*req = kzalloc(sizeof *req, SLAB_KERNEL);
+	struct ser_req		*req = kzalloc(sizeof *req, GFP_KERNEL);
 	int			status;
 	int			sample;
 	int			i;
diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index 5857e7e23f84..63b629b1cdb2 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -2218,21 +2218,21 @@ static int gigaset_probe(struct usb_interface *interface,
 	 * - three for the different uses of the default control pipe
 	 * - three for each isochronous pipe
 	 */
-	if (!(ucs->urb_int_in = usb_alloc_urb(0, SLAB_KERNEL)) ||
-	    !(ucs->urb_cmd_in = usb_alloc_urb(0, SLAB_KERNEL)) ||
-	    !(ucs->urb_cmd_out = usb_alloc_urb(0, SLAB_KERNEL)) ||
-	    !(ucs->urb_ctrl = usb_alloc_urb(0, SLAB_KERNEL)))
+	if (!(ucs->urb_int_in = usb_alloc_urb(0, GFP_KERNEL)) ||
+	    !(ucs->urb_cmd_in = usb_alloc_urb(0, GFP_KERNEL)) ||
+	    !(ucs->urb_cmd_out = usb_alloc_urb(0, GFP_KERNEL)) ||
+	    !(ucs->urb_ctrl = usb_alloc_urb(0, GFP_KERNEL)))
 		goto allocerr;
 
 	for (j = 0; j < 2; ++j) {
 		ubc = cs->bcs[j].hw.bas;
 		for (i = 0; i < BAS_OUTURBS; ++i)
 			if (!(ubc->isoouturbs[i].urb =
-			      usb_alloc_urb(BAS_NUMFRAMES, SLAB_KERNEL)))
+			      usb_alloc_urb(BAS_NUMFRAMES, GFP_KERNEL)))
 				goto allocerr;
 		for (i = 0; i < BAS_INURBS; ++i)
 			if (!(ubc->isoinurbs[i] =
-			      usb_alloc_urb(BAS_NUMFRAMES, SLAB_KERNEL)))
+			      usb_alloc_urb(BAS_NUMFRAMES, GFP_KERNEL)))
 				goto allocerr;
 	}
 
@@ -2246,7 +2246,7 @@ static int gigaset_probe(struct usb_interface *interface,
 					(endpoint->bEndpointAddress) & 0x0f),
 			 ucs->int_in_buf, 3, read_int_callback, cs,
 			 endpoint->bInterval);
-	if ((rc = usb_submit_urb(ucs->urb_int_in, SLAB_KERNEL)) != 0) {
+	if ((rc = usb_submit_urb(ucs->urb_int_in, GFP_KERNEL)) != 0) {
 		dev_err(cs->dev, "could not submit interrupt URB: %s\n",
 			get_usb_rcmsg(rc));
 		goto error;
diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c
index af89ce188f2a..04f2ad7ba8b0 100644
--- a/drivers/isdn/gigaset/usb-gigaset.c
+++ b/drivers/isdn/gigaset/usb-gigaset.c
@@ -763,7 +763,7 @@ static int gigaset_probe(struct usb_interface *interface,
 		goto error;
 	}
 
-	ucs->bulk_out_urb = usb_alloc_urb(0, SLAB_KERNEL);
+	ucs->bulk_out_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!ucs->bulk_out_urb) {
 		dev_err(cs->dev, "Couldn't allocate bulk_out_urb\n");
 		retval = -ENOMEM;
@@ -774,7 +774,7 @@ static int gigaset_probe(struct usb_interface *interface,
 
 	atomic_set(&ucs->busy, 0);
 
-	ucs->read_urb = usb_alloc_urb(0, SLAB_KERNEL);
+	ucs->read_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!ucs->read_urb) {
 		dev_err(cs->dev, "No free urbs available\n");
 		retval = -ENOMEM;
@@ -797,7 +797,7 @@ static int gigaset_probe(struct usb_interface *interface,
 			 gigaset_read_int_callback,
 			 cs->inbuf + 0, endpoint->bInterval);
 
-	retval = usb_submit_urb(ucs->read_urb, SLAB_KERNEL);
+	retval = usb_submit_urb(ucs->read_urb, GFP_KERNEL);
 	if (retval) {
 		dev_err(cs->dev, "Could not submit URB (error %d)\n", -retval);
 		goto error;
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index 206c13e47a06..9123147e376f 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -287,7 +287,7 @@ static int cinergyt2_alloc_stream_urbs (struct cinergyt2 *cinergyt2)
 	int i;
 
 	cinergyt2->streambuf = usb_buffer_alloc(cinergyt2->udev, STREAM_URB_COUNT*STREAM_BUF_SIZE,
-					      SLAB_KERNEL, &cinergyt2->streambuf_dmahandle);
+					      GFP_KERNEL, &cinergyt2->streambuf_dmahandle);
 	if (!cinergyt2->streambuf) {
 		dprintk(1, "failed to alloc consistent stream memory area, bailing out!\n");
 		return -ENOMEM;
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index ef4a731ca5c2..334e078ffaff 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -451,7 +451,7 @@ static int __devinit m25p_probe(struct spi_device *spi)
 		return -ENODEV;
 	}
 
-	flash = kzalloc(sizeof *flash, SLAB_KERNEL);
+	flash = kzalloc(sizeof *flash, GFP_KERNEL);
 	if (!flash)
 		return -ENOMEM;
 
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index ccd4dafce8e2..b318500785e5 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6940,7 +6940,7 @@ static int __devinit ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
 		return -ENOMEM;
 
 	for (i = 0; i < IPR_NUM_CMD_BLKS; i++) {
-		ipr_cmd = pci_pool_alloc (ioa_cfg->ipr_cmd_pool, SLAB_KERNEL, &dma_addr);
+		ipr_cmd = pci_pool_alloc (ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr);
 
 		if (!ipr_cmd) {
 			ipr_free_cmd_blks(ioa_cfg);
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index c3c0626f550b..09f2c74a40c5 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -360,7 +360,7 @@ spi_alloc_master(struct device *dev, unsigned size)
 	if (!dev)
 		return NULL;
 
-	master = kzalloc(size + sizeof *master, SLAB_KERNEL);
+	master = kzalloc(size + sizeof *master, GFP_KERNEL);
 	if (!master)
 		return NULL;
 
@@ -607,7 +607,7 @@ static int __init spi_init(void)
 {
 	int	status;
 
-	buf = kmalloc(SPI_BUFSIZ, SLAB_KERNEL);
+	buf = kmalloc(SPI_BUFSIZ, GFP_KERNEL);
 	if (!buf) {
 		status = -ENOMEM;
 		goto err0;
diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c
index 08c1c57c6128..57289b61d0be 100644
--- a/drivers/spi/spi_bitbang.c
+++ b/drivers/spi/spi_bitbang.c
@@ -196,7 +196,7 @@ int spi_bitbang_setup(struct spi_device *spi)
 		return -EINVAL;
 
 	if (!cs) {
-		cs = kzalloc(sizeof *cs, SLAB_KERNEL);
+		cs = kzalloc(sizeof *cs, GFP_KERNEL);
 		if (!cs)
 			return -ENOMEM;
 		spi->controller_state = cs;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 0a46acf557ac..77c05be5241a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2371,7 +2371,7 @@ check_highspeed (struct usb_hub *hub, struct usb_device *udev, int port1)
 	struct usb_qualifier_descriptor	*qual;
 	int				status;
 
-	qual = kmalloc (sizeof *qual, SLAB_KERNEL);
+	qual = kmalloc (sizeof *qual, GFP_KERNEL);
 	if (qual == NULL)
 		return;
 
@@ -2922,7 +2922,7 @@ static int config_descriptors_changed(struct usb_device *udev)
 		if (len < le16_to_cpu(udev->config[index].desc.wTotalLength))
 			len = le16_to_cpu(udev->config[index].desc.wTotalLength);
 	}
-	buf = kmalloc (len, SLAB_KERNEL);
+	buf = kmalloc (len, GFP_KERNEL);
 	if (buf == NULL) {
 		dev_err(&udev->dev, "no mem to re-read configs after reset\n");
 		/* assume the worst */
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 64554acad63f..31351826f2ba 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -1236,7 +1236,7 @@ autoconf_fail:
 
 
 	/* ok, we made sense of the hardware ... */
-	dev = kzalloc(sizeof(*dev), SLAB_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev) {
 		return -ENOMEM;
 	}
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index a3076da3f4eb..805a9826842d 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1864,7 +1864,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* alloc, and start init */
-	dev = kmalloc (sizeof *dev, SLAB_KERNEL);
+	dev = kmalloc (sizeof *dev, GFP_KERNEL);
 	if (dev == NULL){
 		pr_debug("enomem %s\n", pci_name(pdev));
 		retval = -ENOMEM;
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 86924f9cdd7e..3fb1044a4db0 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -412,7 +412,7 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
 	/* FIXME readahead for O_NONBLOCK and poll(); careful with ZLPs */
 
 	value = -ENOMEM;
-	kbuf = kmalloc (len, SLAB_KERNEL);
+	kbuf = kmalloc (len, GFP_KERNEL);
 	if (unlikely (!kbuf))
 		goto free1;
 
@@ -456,7 +456,7 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 	/* FIXME writebehind for O_NONBLOCK and poll(), qlen = 1 */
 
 	value = -ENOMEM;
-	kbuf = kmalloc (len, SLAB_KERNEL);
+	kbuf = kmalloc (len, GFP_KERNEL);
 	if (!kbuf)
 		goto free1;
 	if (copy_from_user (kbuf, buf, len)) {
@@ -1898,7 +1898,7 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 	buf += 4;
 	length -= 4;
 
-	kbuf = kmalloc (length, SLAB_KERNEL);
+	kbuf = kmalloc (length, GFP_KERNEL);
 	if (!kbuf)
 		return -ENOMEM;
 	if (copy_from_user (kbuf, buf, length)) {
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 0b590831582c..3024c679e38e 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -2861,7 +2861,7 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* alloc, and start init */
-	dev = kzalloc (sizeof *dev, SLAB_KERNEL);
+	dev = kzalloc (sizeof *dev, GFP_KERNEL);
 	if (dev == NULL){
 		retval = -ENOMEM;
 		goto done;
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 48a09fd89d18..030d87c28c2f 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -2581,7 +2581,7 @@ omap_udc_setup(struct platform_device *odev, struct otg_transceiver *xceiv)
 	/* UDC_PULLUP_EN gates the chip clock */
 	// OTG_SYSCON_1_REG |= DEV_IDLE_EN;
 
-	udc = kzalloc(sizeof(*udc), SLAB_KERNEL);
+	udc = kzalloc(sizeof(*udc), GFP_KERNEL);
 	if (!udc)
 		return -ENOMEM;
 
diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c
index 0f809dd68492..40710ea1b490 100644
--- a/drivers/usb/gadget/zero.c
+++ b/drivers/usb/gadget/zero.c
@@ -1190,7 +1190,7 @@ autoconf_fail:
 
 
 	/* ok, we made sense of the hardware ... */
-	dev = kzalloc(sizeof(*dev), SLAB_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
 	spin_lock_init (&dev->lock);
diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c
index 396dc69d4b4c..7fd872aa654a 100644
--- a/drivers/usb/host/hc_crisv10.c
+++ b/drivers/usb/host/hc_crisv10.c
@@ -188,7 +188,7 @@ static DEFINE_TIMER(bulk_eot_timer, NULL, 0, 0);
 #define CHECK_ALIGN(x) if (((__u32)(x)) & 0x00000003) \
 {panic("Alignment check (DWORD) failed at %s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);}
 
-#define SLAB_FLAG     (in_interrupt() ? GFP_ATOMIC : SLAB_KERNEL)
+#define SLAB_FLAG     (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
 #define KMALLOC_FLAG  (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
 
 /* Most helpful debugging aid */
diff --git a/drivers/usb/host/ohci-pnx4008.c b/drivers/usb/host/ohci-pnx4008.c
index 2dbb77414905..7f26f9bdbaf1 100644
--- a/drivers/usb/host/ohci-pnx4008.c
+++ b/drivers/usb/host/ohci-pnx4008.c
@@ -134,7 +134,7 @@ static int isp1301_attach(struct i2c_adapter *adap, int addr, int kind)
 {
 	struct i2c_client *c;
 
-	c = (struct i2c_client *)kzalloc(sizeof(*c), SLAB_KERNEL);
+	c = (struct i2c_client *)kzalloc(sizeof(*c), GFP_KERNEL);
 
 	if (!c)
 		return -ENOMEM;
diff --git a/drivers/usb/input/acecad.c b/drivers/usb/input/acecad.c
index 0096373b5f98..909138e5aa04 100644
--- a/drivers/usb/input/acecad.c
+++ b/drivers/usb/input/acecad.c
@@ -152,7 +152,7 @@ static int usb_acecad_probe(struct usb_interface *intf, const struct usb_device_
 	if (!acecad || !input_dev)
 		goto fail1;
 
-	acecad->data = usb_buffer_alloc(dev, 8, SLAB_KERNEL, &acecad->data_dma);
+	acecad->data = usb_buffer_alloc(dev, 8, GFP_KERNEL, &acecad->data_dma);
 	if (!acecad->data)
 		goto fail1;
 
diff --git a/drivers/usb/input/usbtouchscreen.c b/drivers/usb/input/usbtouchscreen.c
index 49704d4ed0e2..7f3c57da9bc0 100644
--- a/drivers/usb/input/usbtouchscreen.c
+++ b/drivers/usb/input/usbtouchscreen.c
@@ -680,7 +680,7 @@ static int usbtouch_probe(struct usb_interface *intf,
 		type->process_pkt = usbtouch_process_pkt;
 
 	usbtouch->data = usb_buffer_alloc(udev, type->rept_size,
-	                                  SLAB_KERNEL, &usbtouch->data_dma);
+	                                  GFP_KERNEL, &usbtouch->data_dma);
 	if (!usbtouch->data)
 		goto out_free;
 
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index ea04dccdc651..fb321864a92d 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -213,7 +213,7 @@ static struct urb *simple_alloc_urb (
 
 	if (bytes < 0)
 		return NULL;
-	urb = usb_alloc_urb (0, SLAB_KERNEL);
+	urb = usb_alloc_urb (0, GFP_KERNEL);
 	if (!urb)
 		return urb;
 	usb_fill_bulk_urb (urb, udev, pipe, NULL, bytes, simple_callback, NULL);
@@ -223,7 +223,7 @@ static struct urb *simple_alloc_urb (
 	urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
 	if (usb_pipein (pipe))
 		urb->transfer_flags |= URB_SHORT_NOT_OK;
-	urb->transfer_buffer = usb_buffer_alloc (udev, bytes, SLAB_KERNEL,
+	urb->transfer_buffer = usb_buffer_alloc (udev, bytes, GFP_KERNEL,
 			&urb->transfer_dma);
 	if (!urb->transfer_buffer) {
 		usb_free_urb (urb);
@@ -315,7 +315,7 @@ static int simple_io (
 		init_completion (&completion);
 		if (usb_pipeout (urb->pipe))
 			simple_fill_buf (urb);
-		if ((retval = usb_submit_urb (urb, SLAB_KERNEL)) != 0)
+		if ((retval = usb_submit_urb (urb, GFP_KERNEL)) != 0)
 			break;
 
 		/* NOTE:  no timeouts; can't be broken out of by interrupt */
@@ -374,7 +374,7 @@ alloc_sglist (int nents, int max, int vary)
 	unsigned		i;
 	unsigned		size = max;
 
-	sg = kmalloc (nents * sizeof *sg, SLAB_KERNEL);
+	sg = kmalloc (nents * sizeof *sg, GFP_KERNEL);
 	if (!sg)
 		return NULL;
 
@@ -382,7 +382,7 @@ alloc_sglist (int nents, int max, int vary)
 		char		*buf;
 		unsigned	j;
 
-		buf = kzalloc (size, SLAB_KERNEL);
+		buf = kzalloc (size, GFP_KERNEL);
 		if (!buf) {
 			free_sglist (sg, i);
 			return NULL;
@@ -428,7 +428,7 @@ static int perform_sglist (
 				(udev->speed == USB_SPEED_HIGH)
 					? (INTERRUPT_RATE << 3)
 					: INTERRUPT_RATE,
-				sg, nents, 0, SLAB_KERNEL);
+				sg, nents, 0, GFP_KERNEL);
 		
 		if (retval)
 			break;
@@ -855,7 +855,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param)
 	 * as with bulk/intr sglists, sglen is the queue depth; it also
 	 * controls which subtests run (more tests than sglen) or rerun.
 	 */
-	urb = kcalloc(param->sglen, sizeof(struct urb *), SLAB_KERNEL);
+	urb = kcalloc(param->sglen, sizeof(struct urb *), GFP_KERNEL);
 	if (!urb)
 		return -ENOMEM;
 	for (i = 0; i < param->sglen; i++) {
@@ -981,7 +981,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param)
 		if (!u)
 			goto cleanup;
 
-		reqp = usb_buffer_alloc (udev, sizeof *reqp, SLAB_KERNEL,
+		reqp = usb_buffer_alloc (udev, sizeof *reqp, GFP_KERNEL,
 				&u->setup_dma);
 		if (!reqp)
 			goto cleanup;
@@ -1067,7 +1067,7 @@ static int unlink1 (struct usbtest_dev *dev, int pipe, int size, int async)
 	 * FIXME want additional tests for when endpoint is STALLing
 	 * due to errors, or is just NAKing requests.
 	 */
-	if ((retval = usb_submit_urb (urb, SLAB_KERNEL)) != 0) {
+	if ((retval = usb_submit_urb (urb, GFP_KERNEL)) != 0) {
 		dev_dbg (&dev->intf->dev, "submit fail %d\n", retval);
 		return retval;
 	}
@@ -1251,7 +1251,7 @@ static int ctrl_out (struct usbtest_dev *dev,
 	if (length < 1 || length > 0xffff || vary >= length)
 		return -EINVAL;
 
-	buf = kmalloc(length, SLAB_KERNEL);
+	buf = kmalloc(length, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -1403,7 +1403,7 @@ static struct urb *iso_alloc_urb (
 	maxp *= 1 + (0x3 & (le16_to_cpu(desc->wMaxPacketSize) >> 11));
 	packets = (bytes + maxp - 1) / maxp;
 
-	urb = usb_alloc_urb (packets, SLAB_KERNEL);
+	urb = usb_alloc_urb (packets, GFP_KERNEL);
 	if (!urb)
 		return urb;
 	urb->dev = udev;
@@ -1411,7 +1411,7 @@ static struct urb *iso_alloc_urb (
 
 	urb->number_of_packets = packets;
 	urb->transfer_buffer_length = bytes;
-	urb->transfer_buffer = usb_buffer_alloc (udev, bytes, SLAB_KERNEL,
+	urb->transfer_buffer = usb_buffer_alloc (udev, bytes, GFP_KERNEL,
 			&urb->transfer_dma);
 	if (!urb->transfer_buffer) {
 		usb_free_urb (urb);
@@ -1900,7 +1900,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id)
 	}
 #endif
 
-	dev = kzalloc(sizeof(*dev), SLAB_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
 	info = (struct usbtest_info *) id->driver_info;
@@ -1910,7 +1910,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id)
 	dev->intf = intf;
 
 	/* cacheline-aligned scratch for i/o */
-	if ((dev->buf = kmalloc (TBUF_SIZE, SLAB_KERNEL)) == NULL) {
+	if ((dev->buf = kmalloc (TBUF_SIZE, GFP_KERNEL)) == NULL) {
 		kfree (dev);
 		return -ENOMEM;
 	}
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index c2a28d88ef3c..99f26b3e502f 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -469,7 +469,7 @@ static void rndis_unbind(struct usbnet *dev, struct usb_interface *intf)
 	struct rndis_halt	*halt;
 
 	/* try to clear any rndis state/activity (no i/o from stack!) */
-	halt = kcalloc(1, sizeof *halt, SLAB_KERNEL);
+	halt = kcalloc(1, sizeof *halt, GFP_KERNEL);
 	if (halt) {
 		halt->msg_type = RNDIS_MSG_HALT;
 		halt->msg_len = ccpu2(sizeof *halt);
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index 327f97555679..6e39e9988259 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -179,9 +179,9 @@ static int init_status (struct usbnet *dev, struct usb_interface *intf)
 	period = max ((int) dev->status->desc.bInterval,
 		(dev->udev->speed == USB_SPEED_HIGH) ? 7 : 3);
 
-	buf = kmalloc (maxp, SLAB_KERNEL);
+	buf = kmalloc (maxp, GFP_KERNEL);
 	if (buf) {
-		dev->interrupt = usb_alloc_urb (0, SLAB_KERNEL);
+		dev->interrupt = usb_alloc_urb (0, GFP_KERNEL);
 		if (!dev->interrupt) {
 			kfree (buf);
 			return -ENOMEM;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9ade139086fc..52eb10ca654e 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -217,7 +217,7 @@ static kmem_cache_t *adfs_inode_cachep;
 static struct inode *adfs_alloc_inode(struct super_block *sb)
 {
 	struct adfs_inode_info *ei;
-	ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, SLAB_KERNEL);
+	ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5ea72c3a16c3..81c73ec09f66 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -71,7 +71,7 @@ static kmem_cache_t * affs_inode_cachep;
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
 	struct affs_inode_info *ei;
-	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, SLAB_KERNEL);
+	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 67d1f5c819ec..c6ead009bf78 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -412,7 +412,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	struct afs_vnode *vnode;
 
 	vnode = (struct afs_vnode *)
-		kmem_cache_alloc(afs_inode_cachep, SLAB_KERNEL);
+		kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
 	if (!vnode)
 		return NULL;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 07f7144f0e2e..995348df94ad 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -277,7 +277,7 @@ befs_alloc_inode(struct super_block *sb)
 {
         struct befs_inode_info *bi;
         bi = (struct befs_inode_info *)kmem_cache_alloc(befs_inode_cachep,
-							SLAB_KERNEL);
+							GFP_KERNEL);
         if (!bi)
                 return NULL;
         return &bi->vfs_inode;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index ed27ffb3459e..2e45123c8f7a 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -233,7 +233,7 @@ static kmem_cache_t * bfs_inode_cachep;
 static struct inode *bfs_alloc_inode(struct super_block *sb)
 {
 	struct bfs_inode_info *bi;
-	bi = kmem_cache_alloc(bfs_inode_cachep, SLAB_KERNEL);
+	bi = kmem_cache_alloc(bfs_inode_cachep, GFP_KERNEL);
 	if (!bi)
 		return NULL;
 	return &bi->vfs_inode;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 36c0e7af9d0f..063506705f27 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -239,7 +239,7 @@ static kmem_cache_t * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
-	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL);
+	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84976cdbe713..84168629cea3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -245,7 +245,7 @@ static struct inode *
 cifs_alloc_inode(struct super_block *sb)
 {
 	struct cifsInodeInfo *cifs_inode;
-	cifs_inode = kmem_cache_alloc(cifs_inode_cachep, SLAB_KERNEL);
+	cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL);
 	if (!cifs_inode)
 		return NULL;
 	cifs_inode->cifsAttrs = 0x20;	/* default */
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 8355daff504c..aedf683f011f 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -153,7 +153,7 @@ cifs_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | GFP_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, GFP_KERNEL | GFP_NOFS);
 
 	/* clear the first few header bytes */
 	/* for most paths, more is cleared in header_assemble */
@@ -192,7 +192,7 @@ cifs_small_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | GFP_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, GFP_KERNEL | GFP_NOFS);
 	if (ret_buf) {
 	/* No need to clear memory here, cleared in header assemble */
 	/*	memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 7514237cf31a..1f727765a8ea 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 	}
 	
 	temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,
-						    SLAB_KERNEL | GFP_NOFS);
+						    GFP_KERNEL | GFP_NOFS);
 	if (temp == NULL)
 		return temp;
 	else {
@@ -118,7 +118,7 @@ AllocOplockQEntry(struct inode * pinode, __u16 fid, struct cifsTconInfo * tcon)
 		return NULL;
 	}
 	temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep,
-						       SLAB_KERNEL);
+						       GFP_KERNEL);
 	if (temp == NULL)
 		return temp;
 	else {
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 88d123321164..50cedd2617d6 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -43,7 +43,7 @@ static kmem_cache_t * coda_inode_cachep;
 static struct inode *coda_alloc_inode(struct super_block *sb)
 {
 	struct coda_inode_info *ei;
-	ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, SLAB_KERNEL);
+	ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	memset(&ei->c_fid, 0, sizeof(struct CodaFid));
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 2b0442db67e0..e778b1737b79 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -77,7 +77,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	inode = filp->f_dentry->d_inode;
 	if (!S_ISDIR(inode->i_mode))
 		return -ENOTDIR;
-	dn = kmem_cache_alloc(dn_cache, SLAB_KERNEL);
+	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
 	if (dn == NULL)
 		return -ENOMEM;
 	spin_lock(&inode->i_lock);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 776b2eed371e..7196f50fe152 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -628,7 +628,7 @@ int ecryptfs_decrypt_page(struct file *file, struct page *page)
 	num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
 	base_extent = (page->index * num_extents_per_page);
 	lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
-					   SLAB_KERNEL);
+					   GFP_KERNEL);
 	if (!lower_page_virt) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Error getting page for encrypted "
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index a92ef05eff8f..42099e779a56 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -250,7 +250,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	int lower_flags;
 
 	/* Released in ecryptfs_release or end of function if failure */
-	file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL);
+	file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL);
 	ecryptfs_set_file_private(file, file_info);
 	if (!file_info) {
 		ecryptfs_printk(KERN_ERR,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 70911412044d..8a1945a84c36 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -369,7 +369,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	BUG_ON(!atomic_read(&lower_dentry->d_count));
 	ecryptfs_set_dentry_private(dentry,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
-						     SLAB_KERNEL));
+						     GFP_KERNEL));
 	if (!ecryptfs_dentry_to_private(dentry)) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting "
@@ -795,7 +795,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 	/* Released at out_free: label */
 	ecryptfs_set_file_private(&fake_ecryptfs_file,
 				  kmem_cache_alloc(ecryptfs_file_info_cache,
-						   SLAB_KERNEL));
+						   GFP_KERNEL));
 	if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c3746f56d162..745c0f1bfbbd 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -207,7 +207,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 	/* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
 	 * at end of function upon failure */
 	auth_tok_list_item =
-	    kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL);
+	    kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
 	if (!auth_tok_list_item) {
 		ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
 		rc = -ENOMEM;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a78d87d14baf..a2c6ccbce300 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -378,7 +378,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 	/* Released in ecryptfs_put_super() */
 	ecryptfs_set_superblock_private(sb,
 					kmem_cache_alloc(ecryptfs_sb_info_cache,
-							 SLAB_KERNEL));
+							 GFP_KERNEL));
 	if (!ecryptfs_superblock_to_private(sb)) {
 		ecryptfs_printk(KERN_WARNING, "Out of memory\n");
 		rc = -ENOMEM;
@@ -402,7 +402,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 	/* through deactivate_super(sb) from get_sb_nodev() */
 	ecryptfs_set_dentry_private(sb->s_root,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
-						     SLAB_KERNEL));
+						     GFP_KERNEL));
 	if (!ecryptfs_dentry_to_private(sb->s_root)) {
 		ecryptfs_printk(KERN_ERR,
 				"dentry_info_cache alloc failed\n");
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 825757ae4867..eaa5daaf106e 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -50,7 +50,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
 	struct inode *inode = NULL;
 
 	ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache,
-					  SLAB_KERNEL);
+					  GFP_KERNEL);
 	if (unlikely(!ecryptfs_inode))
 		goto out;
 	ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index b3f50651eb6b..69b15a996cfc 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -57,7 +57,7 @@ static kmem_cache_t * efs_inode_cachep;
 static struct inode *efs_alloc_inode(struct super_block *sb)
 {
 	struct efs_inode_info *ei;
-	ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, SLAB_KERNEL);
+	ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ae228ec54e94..f5c88435c6be 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -961,7 +961,7 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
 
-	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
+	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
@@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	struct ep_pqueue epq;
 
 	error = -ENOMEM;
-	if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
+	if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
 		goto eexit_1;
 
 	/* Item initialization follow here ... */
diff --git a/fs/exec.c b/fs/exec.c
index d993ea1a81ae..2092bd207463 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -404,7 +404,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt)
 		return -ENOMEM;
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d8b9abd95d07..85c237e73853 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -140,7 +140,7 @@ static kmem_cache_t * ext2_inode_cachep;
 static struct inode *ext2_alloc_inode(struct super_block *sb)
 {
 	struct ext2_inode_info *ei;
-	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL);
+	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 82cc4f59e3ba..8c272278455c 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -63,7 +63,7 @@ void fat_cache_destroy(void)
 
 static inline struct fat_cache *fat_cache_alloc(struct inode *inode)
 {
-	return kmem_cache_alloc(fat_cache_cachep, SLAB_KERNEL);
+	return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL);
 }
 
 static inline void fat_cache_free(struct fat_cache *cache)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 78945b53b0f8..b58fd0c9f3cd 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -482,7 +482,7 @@ static kmem_cache_t *fat_inode_cachep;
 static struct inode *fat_alloc_inode(struct super_block *sb)
 {
 	struct msdos_inode_info *ei;
-	ei = kmem_cache_alloc(fat_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e4f26165f12a..c03dc9cb21cb 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -567,7 +567,7 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
 	int result = 0;
 
 	if (on) {
-		new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
+		new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
 		if (!new)
 			return -ENOMEM;
 	}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 4786d51ad3bd..d2dd0d700077 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -103,7 +103,7 @@ vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino)
 		struct vxfs_inode_info	*vip;
 		struct vxfs_dinode	*dip;
 
-		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL)))
+		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
 			goto fail;
 		dip = (struct vxfs_dinode *)(bp->b_data + offset);
 		memcpy(vip, dip, sizeof(*vip));
@@ -145,7 +145,7 @@ __vxfs_iget(ino_t ino, struct inode *ilistp)
 		struct vxfs_dinode	*dip;
 		caddr_t			kaddr = (char *)page_address(pp);
 
-		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL)))
+		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
 			goto fail;
 		dip = (struct vxfs_dinode *)(kaddr + offset);
 		memcpy(vip, dip, sizeof(*vip));
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 66571eafbb1e..8c15139f2756 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -41,7 +41,7 @@ static void fuse_request_init(struct fuse_req *req)
 
 struct fuse_req *fuse_request_alloc(void)
 {
-	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
+	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
 	if (req)
 		fuse_request_init(req);
 	return req;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fc4203570370..e039e2047cce 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -46,7 +46,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	struct inode *inode;
 	struct fuse_inode *fi;
 
-	inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
+	inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
 	if (!inode)
 		return NULL;
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 85b17b3fa4a0..ffc6409132c8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -145,7 +145,7 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
 {
 	struct hfs_inode_info *i;
 
-	i = kmem_cache_alloc(hfs_inode_cachep, SLAB_KERNEL);
+	i = kmem_cache_alloc(hfs_inode_cachep, GFP_KERNEL);
 	return i ? &i->vfs_inode : NULL;
 }
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 194eede52fa4..4a0c70c76c8a 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -440,7 +440,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 {
 	struct hfsplus_inode_info *i;
 
-	i = kmem_cache_alloc(hfsplus_inode_cachep, SLAB_KERNEL);
+	i = kmem_cache_alloc(hfsplus_inode_cachep, GFP_KERNEL);
 	return i ? &i->vfs_inode : NULL;
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7f4756963d05..36e52173a54a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -522,7 +522,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 
 	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
 		return NULL;
-	p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
+	p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
 	if (unlikely(!p)) {
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
diff --git a/fs/inode.c b/fs/inode.c
index 26cdb115ce67..dd15984d51a8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -109,7 +109,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 	if (sb->s_op->alloc_inode)
 		inode = sb->s_op->alloc_inode(sb);
 	else
-		inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
+		inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
 
 	if (inode) {
 		struct address_space * const mapping = &inode->i_data;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index c34b862cdbf2..4b6381cd2cf4 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -62,7 +62,7 @@ static kmem_cache_t *isofs_inode_cachep;
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
 	struct iso_inode_info *ei;
-	ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(isofs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bc4b8106a490..77be534ce422 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -33,7 +33,7 @@ static kmem_cache_t *jffs2_inode_cachep;
 static struct inode *jffs2_alloc_inode(struct super_block *sb)
 {
 	struct jffs2_inode_info *ei;
-	ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, SLAB_KERNEL);
+	ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/locks.c b/fs/locks.c
index e0b6a80649a0..a7b97d50c1e0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -147,7 +147,7 @@ static kmem_cache_t *filelock_cache __read_mostly;
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
 {
-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
 }
 
 static void locks_release_private(struct file_lock *fl)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 1e36bae4d0eb..ce532c2deda8 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -56,7 +56,7 @@ static kmem_cache_t * minix_inode_cachep;
 static struct inode *minix_alloc_inode(struct super_block *sb)
 {
 	struct minix_inode_info *ei;
-	ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, SLAB_KERNEL);
+	ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 72dad552aa00..ed84d899220c 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -45,7 +45,7 @@ static kmem_cache_t * ncp_inode_cachep;
 static struct inode *ncp_alloc_inode(struct super_block *sb)
 {
 	struct ncp_inode_info *ei;
-	ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, SLAB_KERNEL);
+	ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bdfabf854a51..769fd0a0c772 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -143,7 +143,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
 
-	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL);
 	if (!dreq)
 		return NULL;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 08cc4c5919ab..6b53aae4ed2c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1080,7 +1080,7 @@ void nfs4_clear_inode(struct inode *inode)
 struct inode *nfs_alloc_inode(struct super_block *sb)
 {
 	struct nfs_inode *nfsi;
-	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
+	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
 	if (!nfsi)
 		return NULL;
 	nfsi->flags = 0UL;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 829af323f288..a1561a820abe 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,7 +26,7 @@ static inline struct nfs_page *
 nfs_page_alloc(void)
 {
 	struct nfs_page	*p;
-	p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+	p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL);
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->wb_list);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 592a6402e851..911d1bcfc567 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -336,7 +336,7 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
 {
 	struct op_inode_info *oi;
 
-	oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL);
+	oi = kmem_cache_alloc(op_inode_cachep, GFP_KERNEL);
 	if (!oi)
 		return NULL;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 49dfb2ab783e..b24cdb2f17c1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -88,7 +88,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	struct proc_inode *ei;
 	struct inode *inode;
 
-	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
+	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->pid = NULL;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5a41db2a218d..5b943eb11d76 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -520,7 +520,7 @@ static kmem_cache_t *qnx4_inode_cachep;
 static struct inode *qnx4_alloc_inode(struct super_block *sb)
 {
 	struct qnx4_inode_info *ei;
-	ei = kmem_cache_alloc(qnx4_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(qnx4_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 17249994110f..32332516d656 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -496,7 +496,7 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 {
 	struct reiserfs_inode_info *ei;
 	ei = (struct reiserfs_inode_info *)
-	    kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL);
+	    kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index ddcd9e1ef282..d1b455f9b669 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -555,7 +555,7 @@ static kmem_cache_t * romfs_inode_cachep;
 static struct inode *romfs_alloc_inode(struct super_block *sb)
 {
 	struct romfs_inode_info *ei;
-	ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, SLAB_KERNEL);
+	ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 2c122ee83adb..221617103484 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -55,7 +55,7 @@ static kmem_cache_t *smb_inode_cachep;
 static struct inode *smb_alloc_inode(struct super_block *sb)
 {
 	struct smb_inode_info *ei;
-	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, SLAB_KERNEL);
+	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 0fb74697abc4..3eb1402191b9 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
 	struct smb_request *req;
 	unsigned char *buf = NULL;
 
-	req = kmem_cache_alloc(req_cachep, SLAB_KERNEL);
+	req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
 	VERBOSE("allocating request: %p\n", req);
 	if (!req)
 		goto out;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index d63c5e48b050..a6ca12b747cf 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -307,7 +307,7 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
 {
 	struct sysv_inode_info *si;
 
-	si = kmem_cache_alloc(sysv_inode_cachep, SLAB_KERNEL);
+	si = kmem_cache_alloc(sysv_inode_cachep, GFP_KERNEL);
 	if (!si)
 		return NULL;
 	return &si->vfs_inode;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 1aea6a4f9a4a..e50f24221dea 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -112,7 +112,7 @@ static kmem_cache_t * udf_inode_cachep;
 static struct inode *udf_alloc_inode(struct super_block *sb)
 {
 	struct udf_inode_info *ei;
-	ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL);
+	ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ec79e3091d1b..85a88c0c5e68 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1209,7 +1209,7 @@ static kmem_cache_t * ufs_inode_cachep;
 static struct inode *ufs_alloc_inode(struct super_block *sb)
 {
 	struct ufs_inode_info *ei;
-	ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, SLAB_KERNEL);
+	ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a8039c8d8cbb..94b831b8157c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1483,7 +1483,7 @@ extern void __init vfs_caches_init(unsigned long);
 
 extern struct kmem_cache *names_cachep;
 
-#define __getname()	kmem_cache_alloc(names_cachep, SLAB_KERNEL)
+#define __getname()	kmem_cache_alloc(names_cachep, GFP_KERNEL)
 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
 #ifndef CONFIG_AUDITSYSCALL
 #define putname(name)   __putname(name)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index db2c1df4fef9..61c2ab634b00 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -34,7 +34,7 @@ extern kmem_cache_t *anon_vma_cachep;
 
 static inline struct anon_vma *anon_vma_alloc(void)
 {
-	return kmem_cache_alloc(anon_vma_cachep, SLAB_KERNEL);
+	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
 }
 
 static inline void anon_vma_free(struct anon_vma *anon_vma)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 34b046ea88f1..639f65efa46e 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -19,7 +19,6 @@ typedef struct kmem_cache kmem_cache_t;
 #include	<asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 
 /* flags for kmem_cache_alloc() */
-#define	SLAB_KERNEL		GFP_KERNEL
 #define	SLAB_DMA		GFP_DMA
 
 /* flags to pass to kmem_cache_create().
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 6562a2050a25..f81a5af8a4f8 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -35,7 +35,7 @@ static inline void taskstats_tgid_alloc(struct task_struct *tsk)
 		return;
 
 	/* No problem if kmem_cache_zalloc() fails */
-	stats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
+	stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
 
 	spin_lock_irq(&tsk->sighand->siglock);
 	if (!sig->stats) {
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 7c274002c9f5..813bb941342b 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -224,7 +224,7 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
 {
 	struct mqueue_inode_info *ei;
 
-	ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 66a0ea48751d..70e9ec603082 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -41,7 +41,7 @@ void delayacct_init(void)
 
 void __delayacct_tsk_init(struct task_struct *tsk)
 {
-	tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
+	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
 	if (tsk->delays)
 		spin_lock_init(&tsk->delays->lock);
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 5678e6c61ef2..711aa5f10da7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -237,7 +237,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 				goto fail_nomem;
 			charge = len;
 		}
-		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
@@ -319,7 +319,7 @@ static inline void mm_free_pgd(struct mm_struct * mm)
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
-#define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+#define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 
 #include <linux/init_task.h>
@@ -621,7 +621,7 @@ static struct files_struct *alloc_files(void)
 	struct files_struct *newf;
 	struct fdtable *fdt;
 
-	newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
+	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
 	if (!newf)
 		goto out;
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d3d28919d4b4..1b2b326cf703 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -425,7 +425,7 @@ void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
 	*mycpu = raw_smp_processor_id();
 
 	*ptidstats = NULL;
-	tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
+	tmp = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
 	if (!tmp)
 		return;
 
diff --git a/kernel/user.c b/kernel/user.c
index 220e586127a0..c1f93c164c93 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -132,7 +132,7 @@ struct user_struct * alloc_uid(uid_t uid)
 	if (!up) {
 		struct user_struct *new;
 
-		new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
+		new = kmem_cache_alloc(uid_cachep, GFP_KERNEL);
 		if (!new)
 			return NULL;
 		new->uid = uid;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e7b69c90cfd6..ad864f8708b0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1326,7 +1326,7 @@ struct mempolicy *__mpol_copy(struct mempolicy *old)
 	atomic_set(&new->refcnt, 1);
 	if (new->policy == MPOL_BIND) {
 		int sz = ksize(old->v.zonelist);
-		new->v.zonelist = kmemdup(old->v.zonelist, sz, SLAB_KERNEL);
+		new->v.zonelist = kmemdup(old->v.zonelist, sz, GFP_KERNEL);
 		if (!new->v.zonelist) {
 			kmem_cache_free(policy_cache, new);
 			return ERR_PTR(-ENOMEM);
diff --git a/mm/mmap.c b/mm/mmap.c
index 7b40abd7cba2..7be110e98d4c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1736,7 +1736,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	if (mm->map_count >= sysctl_max_map_count)
 		return -ENOMEM;
 
-	new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
 
@@ -2057,7 +2057,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		    vma_start < new_vma->vm_end)
 			*vmap = new_vma;
 	} else {
-		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (new_vma) {
 			*new_vma = *vma;
 			pol = mpol_copy(vma_policy(vma));
diff --git a/mm/shmem.c b/mm/shmem.c
index 4959535fc14c..bdaecfdaabd4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2263,7 +2263,7 @@ static struct kmem_cache *shmem_inode_cachep;
 static struct inode *shmem_alloc_inode(struct super_block *sb)
 {
 	struct shmem_inode_info *p;
-	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
+	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
 	if (!p)
 		return NULL;
 	return &p->vfs_inode;
diff --git a/mm/slab.c b/mm/slab.c
index 9f34b4946fba..1f374c1df018 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2237,7 +2237,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	align = ralign;
 
 	/* Get cache's description obj. */
-	cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL);
+	cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
 	if (!cachep)
 		goto oops;
 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index bdbc3f431668..101e5ccaf096 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -590,7 +590,7 @@ create:
 
 replace:
 	err = -ENOBUFS;
-	new_f = kmem_cache_alloc(dn_hash_kmem, SLAB_KERNEL);
+	new_f = kmem_cache_alloc(dn_hash_kmem, GFP_KERNEL);
 	if (new_f == NULL)
 		goto out;
 
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 107bb6cbb0b3..4463443e42cd 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -485,13 +485,13 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 		goto out;
 
 	err = -ENOBUFS;
-	new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
 	if (new_fa == NULL)
 		goto out;
 
 	new_f = NULL;
 	if (!f) {
-		new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
+		new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL);
 		if (new_f == NULL)
 			goto out_free_new_fa;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d17990ec724f..6be6caf1af37 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1187,7 +1187,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 			u8 state;
 
 			err = -ENOBUFS;
-			new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
 			if (new_fa == NULL)
 				goto out;
 
@@ -1232,7 +1232,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 		goto out;
 
 	err = -ENOBUFS;
-	new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
 	if (new_fa == NULL)
 		goto out;
 
diff --git a/net/socket.c b/net/socket.c
index e8db54702a69..4f417c2ddc15 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -236,7 +236,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 {
 	struct socket_alloc *ei;
 
-	ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	init_waitqueue_head(&ei->socket.wait);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 49dba5febbbd..df753d0a884b 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -143,7 +143,7 @@ static struct inode *
 rpc_alloc_inode(struct super_block *sb)
 {
 	struct rpc_inode *rpci;
-	rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL);
+	rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
 	if (!rpci)
 		return NULL;
 	return &rpci->vfs_inode;
diff --git a/security/keys/key.c b/security/keys/key.c
index 70eacbe5abde..157bac658bf9 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -285,7 +285,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 	}
 
 	/* allocate and initialise the key and its description */
-	key = kmem_cache_alloc(key_jar, SLAB_KERNEL);
+	key = kmem_cache_alloc(key_jar, GFP_KERNEL);
 	if (!key)
 		goto no_memory_2;
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 78f98fe084eb..ac1aeed0b289 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -181,7 +181,7 @@ static int inode_alloc_security(struct inode *inode)
 	struct task_security_struct *tsec = current->security;
 	struct inode_security_struct *isec;
 
-	isec = kmem_cache_alloc(sel_inode_cache, SLAB_KERNEL);
+	isec = kmem_cache_alloc(sel_inode_cache, GFP_KERNEL);
 	if (!isec)
 		return -ENOMEM;
 
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index d049c7acbc8b..2dfc6134c2c2 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -36,7 +36,7 @@ avtab_insert_node(struct avtab *h, int hvalue,
 		  struct avtab_key *key, struct avtab_datum *datum)
 {
 	struct avtab_node * newnode;
-	newnode = kmem_cache_alloc(avtab_node_cachep, SLAB_KERNEL);
+	newnode = kmem_cache_alloc(avtab_node_cachep, GFP_KERNEL);
 	if (newnode == NULL)
 		return NULL;
 	memset(newnode, 0, sizeof(struct avtab_node));
-- 
cgit v1.2.3


From e18b890bb0881bbab6f4f1a6cd20d9c60d66b003 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:20 -0800
Subject: [PATCH] slab: remove kmem_cache_t

Replace all uses of kmem_cache_t with struct kmem_cache.

The patch was generated using the following script:

	#!/bin/sh
	#
	# Replace one string by another in all the kernel sources.
	#

	set -e

	for file in `find * -name "*.c" -o -name "*.h"|xargs grep -l $1`; do
		quilt add $file
		sed -e "1,\$s/$1/$2/g" $file >/tmp/$$
		mv /tmp/$$ $file
		quilt refresh
	done

The script was run like this

	sh replace kmem_cache_t "struct kmem_cache"

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/DMA-API.txt                   |  4 ++--
 arch/arm/mach-s3c2410/dma.c                 |  4 ++--
 arch/arm26/mm/memc.c                        |  6 +++---
 arch/frv/mm/pgalloc.c                       |  6 +++---
 arch/i386/mm/init.c                         |  4 ++--
 arch/i386/mm/pgtable.c                      |  6 +++---
 arch/ia64/ia32/ia32_support.c               |  2 +-
 arch/ia64/ia32/sys_ia32.c                   |  2 +-
 arch/powerpc/kernel/rtas_flash.c            |  4 ++--
 arch/powerpc/mm/hugetlbpage.c               |  2 +-
 arch/powerpc/mm/init_64.c                   |  6 +++---
 arch/powerpc/platforms/cell/spufs/inode.c   |  4 ++--
 arch/sh/kernel/cpu/sh4/sq.c                 |  2 +-
 arch/sh/mm/pmb.c                            |  6 +++---
 arch/sparc64/mm/init.c                      |  4 ++--
 arch/sparc64/mm/tsb.c                       |  2 +-
 block/cfq-iosched.c                         |  4 ++--
 block/ll_rw_blk.c                           |  6 +++---
 drivers/block/aoe/aoeblk.c                  |  2 +-
 drivers/ieee1394/eth1394.c                  |  2 +-
 drivers/md/dm-crypt.c                       |  2 +-
 drivers/md/dm-mpath.c                       |  2 +-
 drivers/md/dm-snap.c                        |  6 +++---
 drivers/md/dm.c                             |  4 ++--
 drivers/md/kcopyd.c                         |  2 +-
 drivers/md/raid5.c                          |  4 ++--
 drivers/message/i2o/i2o_block.h             |  2 +-
 drivers/pci/msi.c                           |  2 +-
 drivers/s390/block/dasd_devmap.c            |  2 +-
 drivers/s390/block/dasd_int.h               |  2 +-
 drivers/s390/scsi/zfcp_def.h                |  6 +++---
 drivers/scsi/aic94xx/aic94xx.h              |  4 ++--
 drivers/scsi/aic94xx/aic94xx_hwi.c          |  2 +-
 drivers/scsi/aic94xx/aic94xx_init.c         |  4 ++--
 drivers/scsi/libsas/sas_init.c              |  2 +-
 drivers/scsi/qla2xxx/qla_os.c               |  2 +-
 drivers/scsi/qla4xxx/ql4_os.c               |  2 +-
 drivers/scsi/scsi.c                         |  2 +-
 drivers/scsi/scsi_lib.c                     |  4 ++--
 drivers/scsi/scsi_tgt_lib.c                 |  2 +-
 drivers/usb/host/hc_crisv10.c               |  6 +++---
 drivers/usb/host/uhci-hcd.c                 |  2 +-
 drivers/usb/mon/mon_text.c                  |  6 +++---
 fs/adfs/super.c                             |  4 ++--
 fs/affs/super.c                             |  4 ++--
 fs/afs/super.c                              |  6 +++---
 fs/aio.c                                    |  4 ++--
 fs/befs/linuxvfs.c                          |  4 ++--
 fs/bfs/inode.c                              |  4 ++--
 fs/bio.c                                    |  4 ++--
 fs/block_dev.c                              |  4 ++--
 fs/buffer.c                                 |  4 ++--
 fs/cifs/cifsfs.c                            | 14 +++++++-------
 fs/cifs/transport.c                         |  2 +-
 fs/coda/inode.c                             |  4 ++--
 fs/configfs/configfs_internal.h             |  2 +-
 fs/configfs/mount.c                         |  2 +-
 fs/dcache.c                                 |  6 +++---
 fs/dcookies.c                               |  2 +-
 fs/dlm/memory.c                             |  2 +-
 fs/dnotify.c                                |  2 +-
 fs/dquot.c                                  |  2 +-
 fs/ecryptfs/main.c                          |  2 +-
 fs/efs/super.c                              |  4 ++--
 fs/eventpoll.c                              |  4 ++--
 fs/ext2/super.c                             |  4 ++--
 fs/ext3/super.c                             |  4 ++--
 fs/ext4/super.c                             |  4 ++--
 fs/fat/cache.c                              |  4 ++--
 fs/fat/inode.c                              |  4 ++--
 fs/fcntl.c                                  |  2 +-
 fs/freevxfs/vxfs_inode.c                    |  2 +-
 fs/fuse/dev.c                               |  2 +-
 fs/fuse/inode.c                             |  4 ++--
 fs/gfs2/main.c                              |  4 ++--
 fs/gfs2/util.c                              |  6 +++---
 fs/gfs2/util.h                              |  6 +++---
 fs/hfs/super.c                              |  4 ++--
 fs/hfsplus/super.c                          |  4 ++--
 fs/hpfs/super.c                             |  4 ++--
 fs/hugetlbfs/inode.c                        |  4 ++--
 fs/inode.c                                  |  4 ++--
 fs/inotify_user.c                           |  4 ++--
 fs/isofs/inode.c                            |  4 ++--
 fs/jbd/journal.c                            |  6 +++---
 fs/jbd/revoke.c                             |  4 ++--
 fs/jbd2/journal.c                           |  6 +++---
 fs/jbd2/revoke.c                            |  4 ++--
 fs/jffs/inode-v23.c                         |  4 ++--
 fs/jffs/jffs_fm.c                           |  4 ++--
 fs/jffs2/malloc.c                           | 18 +++++++++---------
 fs/jffs2/super.c                            |  4 ++--
 fs/jfs/jfs_metapage.c                       |  4 ++--
 fs/jfs/super.c                              |  4 ++--
 fs/locks.c                                  |  4 ++--
 fs/mbcache.c                                |  2 +-
 fs/minix/inode.c                            |  4 ++--
 fs/namespace.c                              |  2 +-
 fs/ncpfs/inode.c                            |  4 ++--
 fs/nfs/direct.c                             |  2 +-
 fs/nfs/inode.c                              |  4 ++--
 fs/nfs/pagelist.c                           |  2 +-
 fs/nfs/read.c                               |  2 +-
 fs/nfs/write.c                              |  2 +-
 fs/nfsd/nfs4state.c                         | 10 +++++-----
 fs/ocfs2/dlm/dlmfs.c                        |  4 ++--
 fs/ocfs2/dlm/dlmmaster.c                    |  2 +-
 fs/ocfs2/extent_map.c                       |  2 +-
 fs/ocfs2/inode.h                            |  2 +-
 fs/ocfs2/super.c                            |  4 ++--
 fs/ocfs2/uptodate.c                         |  2 +-
 fs/openpromfs/inode.c                       |  4 ++--
 fs/proc/inode.c                             |  4 ++--
 fs/qnx4/inode.c                             |  4 ++--
 fs/reiserfs/super.c                         |  4 ++--
 fs/romfs/inode.c                            |  4 ++--
 fs/smbfs/inode.c                            |  4 ++--
 fs/smbfs/request.c                          |  2 +-
 fs/sysfs/mount.c                            |  2 +-
 fs/sysfs/sysfs.h                            |  2 +-
 fs/sysv/inode.c                             |  4 ++--
 fs/udf/super.c                              |  4 ++--
 fs/ufs/super.c                              |  4 ++--
 include/acpi/platform/aclinux.h             |  2 +-
 include/asm-arm26/pgalloc.h                 |  2 +-
 include/asm-i386/pgtable.h                  | 10 +++++-----
 include/asm-powerpc/pgalloc.h               |  2 +-
 include/asm-sparc64/pgalloc.h               |  2 +-
 include/linux/delayacct.h                   |  2 +-
 include/linux/i2o.h                         |  2 +-
 include/linux/jbd.h                         |  2 +-
 include/linux/jbd2.h                        |  2 +-
 include/linux/raid/raid5.h                  |  2 +-
 include/linux/rmap.h                        |  2 +-
 include/linux/skbuff.h                      |  2 +-
 include/linux/taskstats_kern.h              |  2 +-
 include/net/dst.h                           |  2 +-
 include/net/inet_hashtables.h               |  6 +++---
 include/net/neighbour.h                     |  2 +-
 include/net/netfilter/nf_conntrack_expect.h |  2 +-
 include/net/request_sock.h                  |  2 +-
 include/net/sock.h                          |  2 +-
 include/net/timewait_sock.h                 |  2 +-
 include/scsi/libsas.h                       |  4 ++--
 ipc/mqueue.c                                |  4 ++--
 kernel/delayacct.c                          |  2 +-
 kernel/fork.c                               | 16 ++++++++--------
 kernel/pid.c                                |  2 +-
 kernel/posix-timers.c                       |  2 +-
 kernel/signal.c                             |  2 +-
 kernel/taskstats.c                          |  2 +-
 kernel/user.c                               |  2 +-
 lib/idr.c                                   |  4 ++--
 lib/radix-tree.c                            |  4 ++--
 net/bridge/br_fdb.c                         |  2 +-
 net/core/flow.c                             |  2 +-
 net/core/skbuff.c                           |  8 ++++----
 net/core/sock.c                             |  2 +-
 net/dccp/ackvec.c                           |  4 ++--
 net/dccp/ccid.c                             |  6 +++---
 net/dccp/ccid.h                             |  4 ++--
 net/dccp/ccids/lib/loss_interval.h          |  2 +-
 net/dccp/ccids/lib/packet_history.h         |  4 ++--
 net/decnet/dn_table.c                       |  2 +-
 net/ipv4/fib_hash.c                         |  4 ++--
 net/ipv4/fib_trie.c                         |  2 +-
 net/ipv4/inet_hashtables.c                  |  4 ++--
 net/ipv4/inetpeer.c                         |  2 +-
 net/ipv4/ipmr.c                             |  2 +-
 net/ipv4/ipvs/ip_vs_conn.c                  |  2 +-
 net/ipv4/netfilter/ip_conntrack_core.c      |  4 ++--
 net/ipv6/ip6_fib.c                          |  2 +-
 net/ipv6/xfrm6_tunnel.c                     |  2 +-
 net/netfilter/nf_conntrack_core.c           |  6 +++---
 net/netfilter/nf_conntrack_expect.c         |  2 +-
 net/netfilter/xt_hashlimit.c                |  2 +-
 net/sctp/protocol.c                         |  4 ++--
 net/sctp/sm_make_chunk.c                    |  2 +-
 net/sctp/socket.c                           |  2 +-
 net/socket.c                                |  4 ++--
 net/sunrpc/rpc_pipe.c                       |  4 ++--
 net/sunrpc/sched.c                          |  4 ++--
 net/tipc/handler.c                          |  2 +-
 net/xfrm/xfrm_input.c                       |  2 +-
 net/xfrm/xfrm_policy.c                      |  2 +-
 security/keys/key.c                         |  2 +-
 security/selinux/avc.c                      |  2 +-
 security/selinux/hooks.c                    |  2 +-
 security/selinux/ss/avtab.c                 |  2 +-
 189 files changed, 332 insertions(+), 332 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 05431621c861..8621a064f7e1 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -77,7 +77,7 @@ To get this part of the dma_ API, you must #include <linux/dmapool.h>
 Many drivers need lots of small dma-coherent memory regions for DMA
 descriptors or I/O buffers.  Rather than allocating in units of a page
 or more using dma_alloc_coherent(), you can use DMA pools.  These work
-much like a kmem_cache_t, except that they use the dma-coherent allocator
+much like a struct kmem_cache, except that they use the dma-coherent allocator
 not __get_free_pages().  Also, they understand common hardware constraints
 for alignment, like queue heads needing to be aligned on N byte boundaries.
 
@@ -94,7 +94,7 @@ The pool create() routines initialize a pool of dma-coherent buffers
 for use with a given device.  It must be called in a context which
 can sleep.
 
-The "name" is for diagnostics (like a kmem_cache_t name); dev and size
+The "name" is for diagnostics (like a struct kmem_cache name); dev and size
 are like what you'd pass to dma_alloc_coherent().  The device's hardware
 alignment requirement for this type of data is "align" (which is expressed
 in bytes, and must be a power of two).  If your device has no boundary
diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index 3d211dc2f2f9..01abb0ace234 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -40,7 +40,7 @@
 
 /* io map for dma */
 static void __iomem *dma_base;
-static kmem_cache_t *dma_kmem;
+static struct kmem_cache *dma_kmem;
 
 struct s3c24xx_dma_selection dma_sel;
 
@@ -1271,7 +1271,7 @@ struct sysdev_class dma_sysclass = {
 
 /* kmem cache implementation */
 
-static void s3c2410_dma_cache_ctor(void *p, kmem_cache_t *c, unsigned long f)
+static void s3c2410_dma_cache_ctor(void *p, struct kmem_cache *c, unsigned long f)
 {
 	memset(p, 0, sizeof(struct s3c2410_dma_buf));
 }
diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c
index 34def6397c3c..f2901581d4da 100644
--- a/arch/arm26/mm/memc.c
+++ b/arch/arm26/mm/memc.c
@@ -24,7 +24,7 @@
 
 #define MEMC_TABLE_SIZE (256*sizeof(unsigned long))
 
-kmem_cache_t *pte_cache, *pgd_cache;
+struct kmem_cache *pte_cache, *pgd_cache;
 int page_nr;
 
 /*
@@ -162,12 +162,12 @@ void __init create_memmap_holes(struct meminfo *mi)
 {
 }
 
-static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
+static void pte_cache_ctor(void *pte, struct kmem_cache *cache, unsigned long flags)
 {
 	memzero(pte, sizeof(pte_t) * PTRS_PER_PTE);
 }
 
-static void pgd_cache_ctor(void *pgd, kmem_cache_t *cache, unsigned long flags)
+static void pgd_cache_ctor(void *pgd, struct kmem_cache *cache, unsigned long flags)
 {
 	memzero(pgd + MEMC_TABLE_SIZE, USER_PTRS_PER_PGD * sizeof(pgd_t));
 }
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index f76dd03ddd99..19b13be114a2 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -18,7 +18,7 @@
 #include <asm/cacheflush.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE)));
-kmem_cache_t *pgd_cache;
+struct kmem_cache *pgd_cache;
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
@@ -100,7 +100,7 @@ static inline void pgd_list_del(pgd_t *pgd)
 		set_page_private(next, (unsigned long) pprev);
 }
 
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
 {
 	unsigned long flags;
 
@@ -120,7 +120,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 }
 
 /* never called when PTRS_PER_PMD > 1 */
-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
 {
 	unsigned long flags; /* can be called from interrupt context */
 
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 167416155ee4..a6a8e397dd88 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -699,8 +699,8 @@ int remove_memory(u64 start, u64 size)
 #endif
 #endif
 
-kmem_cache_t *pgd_cache;
-kmem_cache_t *pmd_cache;
+struct kmem_cache *pgd_cache;
+struct kmem_cache *pmd_cache;
 
 void __init pgtable_cache_init(void)
 {
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 10126e3f8174..33be236fc6af 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -193,7 +193,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
+void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
 {
 	memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
 }
@@ -233,7 +233,7 @@ static inline void pgd_list_del(pgd_t *pgd)
 		set_page_private(next, (unsigned long)pprev);
 }
 
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
 {
 	unsigned long flags;
 
@@ -253,7 +253,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 }
 
 /* never called when PTRS_PER_PMD > 1 */
-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
 {
 	unsigned long flags; /* can be called from interrupt context */
 
diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c
index c187743965a0..6af400a12ca1 100644
--- a/arch/ia64/ia32/ia32_support.c
+++ b/arch/ia64/ia32/ia32_support.c
@@ -249,7 +249,7 @@ ia32_init (void)
 
 #if PAGE_SHIFT > IA32_PAGE_SHIFT
 	{
-		extern kmem_cache_t *partial_page_cachep;
+		extern struct kmem_cache *partial_page_cachep;
 
 		partial_page_cachep = kmem_cache_create("partial_page_cache",
 							sizeof(struct partial_page), 0, 0,
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 9d6a3f210148..a4a6e1463af8 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -254,7 +254,7 @@ mmap_subpage (struct file *file, unsigned long start, unsigned long end, int pro
 }
 
 /* SLAB cache for partial_page structures */
-kmem_cache_t *partial_page_cachep;
+struct kmem_cache *partial_page_cachep;
 
 /*
  * init partial_page_list.
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index b9561d300516..7d0f13fecc0e 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -101,7 +101,7 @@ struct flash_block_list_header { /* just the header of flash_block_list */
 static struct flash_block_list_header rtas_firmware_flash_list = {0, NULL};
 
 /* Use slab cache to guarantee 4k alignment */
-static kmem_cache_t *flash_block_cache = NULL;
+static struct kmem_cache *flash_block_cache = NULL;
 
 #define FLASH_BLOCK_LIST_VERSION (1UL)
 
@@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf,
 }
 
 /* constructor for flash_block_cache */
-void rtas_block_ctor(void *ptr, kmem_cache_t *cache, unsigned long flags)
+void rtas_block_ctor(void *ptr, struct kmem_cache *cache, unsigned long flags)
 {
 	memset(ptr, 0, RTAS_BLK_SIZE);
 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 424a8f57e155..89c836d54809 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1047,7 +1047,7 @@ repeat:
 	return err;
 }
 
-static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
 {
 	memset(addr, 0, kmem_cache_size(cache));
 }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 9a178549cbcf..d12a87ec5ae9 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -141,7 +141,7 @@ static int __init setup_kcore(void)
 }
 module_init(setup_kcore);
 
-static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
 {
 	memset(addr, 0, kmem_cache_size(cache));
 }
@@ -166,9 +166,9 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
 /* Hugepages need one extra cache, initialized in hugetlbpage.c.  We
  * can't put into the tables above, because HPAGE_SHIFT is not compile
  * time constant. */
-kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1];
+struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1];
 #else
-kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
 #endif
 
 void pgtable_cache_init(void)
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 7edfcc9d2853..e3af9112c026 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -40,7 +40,7 @@
 
 #include "spufs.h"
 
-static kmem_cache_t *spufs_inode_cache;
+static struct kmem_cache *spufs_inode_cache;
 char *isolated_loader;
 
 static struct inode *
@@ -65,7 +65,7 @@ spufs_destroy_inode(struct inode *inode)
 }
 
 static void
-spufs_init_once(void *p, kmem_cache_t * cachep, unsigned long flags)
+spufs_init_once(void *p, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct spufs_inode_info *ei = p;
 
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 55f43506995a..0c9ea38d2caa 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -38,7 +38,7 @@ struct sq_mapping {
 
 static struct sq_mapping *sq_mapping_list;
 static DEFINE_SPINLOCK(sq_mapping_lock);
-static kmem_cache_t *sq_cache;
+static struct kmem_cache *sq_cache;
 static unsigned long *sq_bitmap;
 
 #define store_queue_barrier()			\
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index 92e745341e4d..b60ad83a7635 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -30,7 +30,7 @@
 
 #define NR_PMB_ENTRIES	16
 
-static kmem_cache_t *pmb_cache;
+static struct kmem_cache *pmb_cache;
 static unsigned long pmb_map;
 
 static struct pmb_entry pmb_init_map[] = {
@@ -283,7 +283,7 @@ void pmb_unmap(unsigned long addr)
 	} while (pmbe);
 }
 
-static void pmb_cache_ctor(void *pmb, kmem_cache_t *cachep, unsigned long flags)
+static void pmb_cache_ctor(void *pmb, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct pmb_entry *pmbe = pmb;
 
@@ -297,7 +297,7 @@ static void pmb_cache_ctor(void *pmb, kmem_cache_t *cachep, unsigned long flags)
 	spin_unlock_irq(&pmb_list_lock);
 }
 
-static void pmb_cache_dtor(void *pmb, kmem_cache_t *cachep, unsigned long flags)
+static void pmb_cache_dtor(void *pmb, struct kmem_cache *cachep, unsigned long flags)
 {
 	spin_lock_irq(&pmb_list_lock);
 	pmb_list_del(pmb);
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 09cb7fccc03a..a8e8802eed4d 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -176,9 +176,9 @@ unsigned long sparc64_kern_sec_context __read_mostly;
 
 int bigkernel = 0;
 
-kmem_cache_t *pgtable_cache __read_mostly;
+struct kmem_cache *pgtable_cache __read_mostly;
 
-static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
 {
 	clear_page(addr);
 }
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index beaa02810f0e..236d02f41a01 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -239,7 +239,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
 	}
 }
 
-static kmem_cache_t *tsb_caches[8] __read_mostly;
+static struct kmem_cache *tsb_caches[8] __read_mostly;
 
 static const char *tsb_cache_names[8] = {
 	"tsb_8KB",
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 84e9be073180..78c6b312bd30 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -43,8 +43,8 @@ static int cfq_slice_idle = HZ / 125;
 #define RQ_CIC(rq)		((struct cfq_io_context*)(rq)->elevator_private)
 #define RQ_CFQQ(rq)		((rq)->elevator_private2)
 
-static kmem_cache_t *cfq_pool;
-static kmem_cache_t *cfq_ioc_pool;
+static struct kmem_cache *cfq_pool;
+static struct kmem_cache *cfq_ioc_pool;
 
 static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index cc6e95f8e5d9..a4ff3271d4a8 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -44,17 +44,17 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node);
 /*
  * For the allocated request tables
  */
-static kmem_cache_t *request_cachep;
+static struct kmem_cache *request_cachep;
 
 /*
  * For queue allocation
  */
-static kmem_cache_t *requestq_cachep;
+static struct kmem_cache *requestq_cachep;
 
 /*
  * For io context allocations
  */
-static kmem_cache_t *iocontext_cachep;
+static struct kmem_cache *iocontext_cachep;
 
 /*
  * Controlling structure to kblockd
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index aa25f8b09fe3..478489c568a4 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -12,7 +12,7 @@
 #include <linux/netdevice.h>
 #include "aoe.h"
 
-static kmem_cache_t *buf_pool_cache;
+static struct kmem_cache *buf_pool_cache;
 
 static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
 {
diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index 31e5cc49d61a..27d6c642415d 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -133,7 +133,7 @@ struct eth1394_node_info {
 #define ETH1394_DRIVER_NAME "eth1394"
 static const char driver_name[] = ETH1394_DRIVER_NAME;
 
-static kmem_cache_t *packet_task_cache;
+static struct kmem_cache *packet_task_cache;
 
 static struct hpsb_highlevel eth1394_highlevel;
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ed2d4ef27fd8..c7bee4f2eedb 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -101,7 +101,7 @@ struct crypt_config {
 #define MIN_POOL_PAGES 32
 #define MIN_BIO_PAGES  8
 
-static kmem_cache_t *_crypt_io_pool;
+static struct kmem_cache *_crypt_io_pool;
 
 /*
  * Different IV generation algorithms:
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e77ee6fd1044..cf8bf052138e 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -101,7 +101,7 @@ typedef int (*action_fn) (struct pgpath *pgpath);
 
 #define MIN_IOS 256	/* Mempool size */
 
-static kmem_cache_t *_mpio_cache;
+static struct kmem_cache *_mpio_cache;
 
 struct workqueue_struct *kmultipathd;
 static void process_queued_ios(struct work_struct *work);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 91c7aa1fed0e..b0ce2ce82278 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -88,8 +88,8 @@ struct pending_exception {
  * Hash table mapping origin volumes to lists of snapshots and
  * a lock to protect it
  */
-static kmem_cache_t *exception_cache;
-static kmem_cache_t *pending_cache;
+static struct kmem_cache *exception_cache;
+static struct kmem_cache *pending_cache;
 static mempool_t *pending_pool;
 
 /*
@@ -228,7 +228,7 @@ static int init_exception_table(struct exception_table *et, uint32_t size)
 	return 0;
 }
 
-static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
+static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem)
 {
 	struct list_head *slot;
 	struct exception *ex, *next;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index fc4f743f3b53..7ec1b112a6d5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -121,8 +121,8 @@ struct mapped_device {
 };
 
 #define MIN_IOS 256
-static kmem_cache_t *_io_cache;
-static kmem_cache_t *_tio_cache;
+static struct kmem_cache *_io_cache;
+static struct kmem_cache *_tio_cache;
 
 static int __init local_init(void)
 {
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index b3c01496c737..b46f6c575f7e 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -203,7 +203,7 @@ struct kcopyd_job {
 /* FIXME: this should scale with the number of pages */
 #define MIN_JOBS 512
 
-static kmem_cache_t *_job_cache;
+static struct kmem_cache *_job_cache;
 static mempool_t *_job_pool;
 
 /*
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 69c3e201fa3b..52914d5cec76 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -348,7 +348,7 @@ static int grow_one_stripe(raid5_conf_t *conf)
 
 static int grow_stripes(raid5_conf_t *conf, int num)
 {
-	kmem_cache_t *sc;
+	struct kmem_cache *sc;
 	int devs = conf->raid_disks;
 
 	sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
@@ -397,7 +397,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
 	LIST_HEAD(newstripes);
 	struct disk_info *ndisks;
 	int err = 0;
-	kmem_cache_t *sc;
+	struct kmem_cache *sc;
 	int i;
 
 	if (newsize <= conf->pool_size)
diff --git a/drivers/message/i2o/i2o_block.h b/drivers/message/i2o/i2o_block.h
index d9fdc95b440d..67f921b4419b 100644
--- a/drivers/message/i2o/i2o_block.h
+++ b/drivers/message/i2o/i2o_block.h
@@ -64,7 +64,7 @@
 
 /* I2O Block OSM mempool struct */
 struct i2o_block_mempool {
-	kmem_cache_t *slab;
+	struct kmem_cache *slab;
 	mempool_t *pool;
 };
 
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 9fc9a34ef24a..9168401401bc 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -26,7 +26,7 @@
 
 static DEFINE_SPINLOCK(msi_lock);
 static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
-static kmem_cache_t* msi_cachep;
+static struct kmem_cache* msi_cachep;
 
 static int pci_msi_enable = 1;
 
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 17fdd8c9f740..cf28ccc57948 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -25,7 +25,7 @@
 
 #include "dasd_int.h"
 
-kmem_cache_t *dasd_page_cache;
+struct kmem_cache *dasd_page_cache;
 EXPORT_SYMBOL_GPL(dasd_page_cache);
 
 /*
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 9f52004f6fc2..dc5dd509434d 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -474,7 +474,7 @@ extern struct dasd_profile_info_t dasd_global_profile;
 extern unsigned int dasd_profile_level;
 extern struct block_device_operations dasd_device_operations;
 
-extern kmem_cache_t *dasd_page_cache;
+extern struct kmem_cache *dasd_page_cache;
 
 struct dasd_ccw_req *
 dasd_kmalloc_request(char *, int, int, struct dasd_device *);
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 74c0eac083e4..32933ed54b8a 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -1032,9 +1032,9 @@ struct zfcp_data {
 	wwn_t                   init_wwpn;
 	fcp_lun_t               init_fcp_lun;
 	char 			*driver_version;
-	kmem_cache_t		*fsf_req_qtcb_cache;
-	kmem_cache_t		*sr_buffer_cache;
-	kmem_cache_t		*gid_pn_cache;
+	struct kmem_cache		*fsf_req_qtcb_cache;
+	struct kmem_cache		*sr_buffer_cache;
+	struct kmem_cache		*gid_pn_cache;
 };
 
 /**
diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h
index 71a031df7a34..32f513b1b78a 100644
--- a/drivers/scsi/aic94xx/aic94xx.h
+++ b/drivers/scsi/aic94xx/aic94xx.h
@@ -56,8 +56,8 @@
 /* 2*ITNL timeout + 1 second */
 #define AIC94XX_SCB_TIMEOUT  (5*HZ)
 
-extern kmem_cache_t *asd_dma_token_cache;
-extern kmem_cache_t *asd_ascb_cache;
+extern struct kmem_cache *asd_dma_token_cache;
+extern struct kmem_cache *asd_ascb_cache;
 extern char sas_addr_str[2*SAS_ADDR_SIZE + 1];
 
 static inline void asd_stringify_sas_addr(char *p, const u8 *sas_addr)
diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c
index af7e01134364..da94e126ca83 100644
--- a/drivers/scsi/aic94xx/aic94xx_hwi.c
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.c
@@ -1047,7 +1047,7 @@ irqreturn_t asd_hw_isr(int irq, void *dev_id)
 static inline struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha,
 					      gfp_t gfp_flags)
 {
-	extern kmem_cache_t *asd_ascb_cache;
+	extern struct kmem_cache *asd_ascb_cache;
 	struct asd_seq_data *seq = &asd_ha->seq;
 	struct asd_ascb *ascb;
 	unsigned long flags;
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 42302ef05ee5..fbc82b00a418 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -450,8 +450,8 @@ static inline void asd_destroy_ha_caches(struct asd_ha_struct *asd_ha)
 	asd_ha->scb_pool = NULL;
 }
 
-kmem_cache_t *asd_dma_token_cache;
-kmem_cache_t *asd_ascb_cache;
+struct kmem_cache *asd_dma_token_cache;
+struct kmem_cache *asd_ascb_cache;
 
 static int asd_create_global_caches(void)
 {
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index d65bc4e0f214..2f0c07fc3f48 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -36,7 +36,7 @@
 
 #include "../scsi_sas_internal.h"
 
-kmem_cache_t *sas_task_cache;
+struct kmem_cache *sas_task_cache;
 
 /*------------ SAS addr hash -----------*/
 void sas_hash_addr(u8 *hashed, const u8 *sas_addr)
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index cbe0cad83b68..d03523d3bf38 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -24,7 +24,7 @@ char qla2x00_version_str[40];
 /*
  * SRB allocation cache
  */
-static kmem_cache_t *srb_cachep;
+static struct kmem_cache *srb_cachep;
 
 /*
  * Ioctl related information.
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 969c9e431028..9ef693c8809a 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -19,7 +19,7 @@ char qla4xxx_version_str[40];
 /*
  * SRB allocation cache
  */
-static kmem_cache_t *srb_cachep;
+static struct kmem_cache *srb_cachep;
 
 /*
  * Module parameter information and variables
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index fafc00deaade..24cffd98ee63 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -136,7 +136,7 @@ const char * scsi_device_type(unsigned type)
 EXPORT_SYMBOL(scsi_device_type);
 
 struct scsi_host_cmd_pool {
-	kmem_cache_t	*slab;
+	struct kmem_cache	*slab;
 	unsigned int	users;
 	char		*name;
 	unsigned int	slab_flags;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fb616c69151f..1748e27501cd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -36,7 +36,7 @@
 struct scsi_host_sg_pool {
 	size_t		size;
 	char		*name; 
-	kmem_cache_t	*slab;
+	struct kmem_cache	*slab;
 	mempool_t	*pool;
 };
 
@@ -241,7 +241,7 @@ struct scsi_io_context {
 	char sense[SCSI_SENSE_BUFFERSIZE];
 };
 
-static kmem_cache_t *scsi_io_context_cache;
+static struct kmem_cache *scsi_io_context_cache;
 
 static void scsi_end_async(struct request *req, int uptodate)
 {
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index 386dbae17b44..d402aff5f314 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -33,7 +33,7 @@
 #include "scsi_tgt_priv.h"
 
 static struct workqueue_struct *scsi_tgtd;
-static kmem_cache_t *scsi_tgt_cmd_cache;
+static struct kmem_cache *scsi_tgt_cmd_cache;
 
 /*
  * TODO: this struct will be killed when the block layer supports large bios
diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c
index 7fd872aa654a..9325e46a68c0 100644
--- a/drivers/usb/host/hc_crisv10.c
+++ b/drivers/usb/host/hc_crisv10.c
@@ -275,13 +275,13 @@ static volatile USB_SB_Desc_t TxIntrSB_zout __attribute__ ((aligned (4)));
 static int zout_buffer[4] __attribute__ ((aligned (4)));
 
 /* Cache for allocating new EP and SB descriptors. */
-static kmem_cache_t *usb_desc_cache;
+static struct kmem_cache *usb_desc_cache;
 
 /* Cache for the registers allocated in the top half. */
-static kmem_cache_t *top_half_reg_cache;
+static struct kmem_cache *top_half_reg_cache;
 
 /* Cache for the data allocated in the isoc descr top half. */
-static kmem_cache_t *isoc_compl_cache;
+static struct kmem_cache *isoc_compl_cache;
 
 static struct usb_bus *etrax_usb_bus;
 
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index 226bf3de8edd..e87692c31be4 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -81,7 +81,7 @@ MODULE_PARM_DESC(debug, "Debug level");
 static char *errbuf;
 #define ERRBUF_LEN    (32 * 1024)
 
-static kmem_cache_t *uhci_up_cachep;	/* urb_priv */
+static struct kmem_cache *uhci_up_cachep;	/* urb_priv */
 
 static void suspend_rh(struct uhci_hcd *uhci, enum uhci_rh_state new_state);
 static void wakeup_rh(struct uhci_hcd *uhci);
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index 46aecc8754f1..05cf2c9a8f84 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -50,7 +50,7 @@ struct mon_event_text {
 
 #define SLAB_NAME_SZ  30
 struct mon_reader_text {
-	kmem_cache_t *e_slab;
+	struct kmem_cache *e_slab;
 	int nevents;
 	struct list_head e_list;
 	struct mon_reader r;	/* In C, parent class can be placed anywhere */
@@ -63,7 +63,7 @@ struct mon_reader_text {
 	char slab_name[SLAB_NAME_SZ];
 };
 
-static void mon_text_ctor(void *, kmem_cache_t *, unsigned long);
+static void mon_text_ctor(void *, struct kmem_cache *, unsigned long);
 
 /*
  * mon_text_submit
@@ -450,7 +450,7 @@ const struct file_operations mon_fops_text = {
 /*
  * Slab interface: constructor.
  */
-static void mon_text_ctor(void *mem, kmem_cache_t *slab, unsigned long sflags)
+static void mon_text_ctor(void *mem, struct kmem_cache *slab, unsigned long sflags)
 {
 	/*
 	 * Nothing to initialize. No, really!
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 52eb10ca654e..e5a205c9f42f 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -212,7 +212,7 @@ static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t *adfs_inode_cachep;
+static struct kmem_cache *adfs_inode_cachep;
 
 static struct inode *adfs_alloc_inode(struct super_block *sb)
 {
@@ -228,7 +228,7 @@ static void adfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 81c73ec09f66..3de93e799949 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -66,7 +66,7 @@ affs_write_super(struct super_block *sb)
 	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
 }
 
-static kmem_cache_t * affs_inode_cachep;
+static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
@@ -83,7 +83,7 @@ static void affs_destroy_inode(struct inode *inode)
 	kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct affs_inode_info *ei = (struct affs_inode_info *) foo;
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index c6ead009bf78..9a351c4c7564 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -35,7 +35,7 @@ struct afs_mount_params {
 	struct afs_volume	*volume;
 };
 
-static void afs_i_init_once(void *foo, kmem_cache_t *cachep,
+static void afs_i_init_once(void *foo, struct kmem_cache *cachep,
 			    unsigned long flags);
 
 static int afs_get_sb(struct file_system_type *fs_type,
@@ -65,7 +65,7 @@ static struct super_operations afs_super_ops = {
 	.put_super	= afs_put_super,
 };
 
-static kmem_cache_t *afs_inode_cachep;
+static struct kmem_cache *afs_inode_cachep;
 static atomic_t afs_count_active_inodes;
 
 /*****************************************************************************/
@@ -384,7 +384,7 @@ static void afs_put_super(struct super_block *sb)
 /*
  * initialise an inode cache slab element prior to any use
  */
-static void afs_i_init_once(void *_vnode, kmem_cache_t *cachep,
+static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 			    unsigned long flags)
 {
 	struct afs_vnode *vnode = (struct afs_vnode *) _vnode;
diff --git a/fs/aio.c b/fs/aio.c
index 287a1bc7a182..13aa9298abf4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -47,8 +47,8 @@ unsigned long aio_nr;		/* current system wide number of aio requests */
 unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
 /*----end sysctl variables---*/
 
-static kmem_cache_t	*kiocb_cachep;
-static kmem_cache_t	*kioctx_cachep;
+static struct kmem_cache	*kiocb_cachep;
+static struct kmem_cache	*kioctx_cachep;
 
 static struct workqueue_struct *aio_wq;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 995348df94ad..bce402eee554 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -61,7 +61,7 @@ static const struct super_operations befs_sops = {
 };
 
 /* slab cache for befs_inode_info objects */
-static kmem_cache_t *befs_inode_cachep;
+static struct kmem_cache *befs_inode_cachep;
 
 static const struct file_operations befs_dir_operations = {
 	.read		= generic_read_dir,
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
         kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
         struct befs_inode_info *bi = (struct befs_inode_info *) foo;
 	
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 2e45123c8f7a..eac175ed9f44 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -228,7 +228,7 @@ static void bfs_write_super(struct super_block *s)
 	unlock_kernel();
 }
 
-static kmem_cache_t * bfs_inode_cachep;
+static struct kmem_cache * bfs_inode_cachep;
 
 static struct inode *bfs_alloc_inode(struct super_block *sb)
 {
@@ -244,7 +244,7 @@ static void bfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct bfs_inode_info *bi = foo;
 
diff --git a/fs/bio.c b/fs/bio.c
index 50c40ce2cead..7ec737eda72b 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
 
 #define BIO_POOL_SIZE 256
 
-static kmem_cache_t *bio_slab __read_mostly;
+static struct kmem_cache *bio_slab __read_mostly;
 
 #define BIOVEC_NR_POOLS 6
 
@@ -44,7 +44,7 @@ mempool_t *bio_split_pool __read_mostly;
 struct biovec_slab {
 	int nr_vecs;
 	char *name; 
-	kmem_cache_t *slab;
+	struct kmem_cache *slab;
 };
 
 /*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 063506705f27..13816b4d76f6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,7 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
  */
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
-static kmem_cache_t * bdev_cachep __read_mostly;
+static struct kmem_cache * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
@@ -253,7 +253,7 @@ static void bdev_destroy_inode(struct inode *inode)
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
 	struct block_device *bdev = &ei->bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index 35527dca1dbc..a8ca0ac21488 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2908,7 +2908,7 @@ asmlinkage long sys_bdflush(int func, long data)
 /*
  * Buffer-head allocation
  */
-static kmem_cache_t *bh_cachep;
+static struct kmem_cache *bh_cachep;
 
 /*
  * Once the number of bh's in the machine exceeds this level, we start
@@ -2961,7 +2961,7 @@ void free_buffer_head(struct buffer_head *bh)
 EXPORT_SYMBOL(free_buffer_head);
 
 static void
-init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
+init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
 {
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 			    SLAB_CTOR_CONSTRUCTOR) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84168629cea3..e6b5866e5001 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -81,7 +81,7 @@ extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
-extern kmem_cache_t *cifs_oplock_cachep;
+extern struct kmem_cache *cifs_oplock_cachep;
 
 static int
 cifs_read_super(struct super_block *sb, void *data,
@@ -232,11 +232,11 @@ static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
 		return generic_permission(inode, mask, NULL);
 }
 
-static kmem_cache_t *cifs_inode_cachep;
-static kmem_cache_t *cifs_req_cachep;
-static kmem_cache_t *cifs_mid_cachep;
-kmem_cache_t *cifs_oplock_cachep;
-static kmem_cache_t *cifs_sm_req_cachep;
+static struct kmem_cache *cifs_inode_cachep;
+static struct kmem_cache *cifs_req_cachep;
+static struct kmem_cache *cifs_mid_cachep;
+struct kmem_cache *cifs_oplock_cachep;
+static struct kmem_cache *cifs_sm_req_cachep;
 mempool_t *cifs_sm_req_poolp;
 mempool_t *cifs_req_poolp;
 mempool_t *cifs_mid_poolp;
@@ -668,7 +668,7 @@ const struct file_operations cifs_dir_ops = {
 };
 
 static void
-cifs_init_once(void *inode, kmem_cache_t * cachep, unsigned long flags)
+cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct cifsInodeInfo *cifsi = inode;
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 1f727765a8ea..f80007eaebf4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -34,7 +34,7 @@
 #include "cifs_debug.h"
   
 extern mempool_t *cifs_mid_poolp;
-extern kmem_cache_t *cifs_oplock_cachep;
+extern struct kmem_cache *cifs_oplock_cachep;
 
 static struct mid_q_entry *
 AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 50cedd2617d6..b64659fa82d0 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -38,7 +38,7 @@ static void coda_clear_inode(struct inode *);
 static void coda_put_super(struct super_block *);
 static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
 
-static kmem_cache_t * coda_inode_cachep;
+static struct kmem_cache * coda_inode_cachep;
 
 static struct inode *coda_alloc_inode(struct super_block *sb)
 {
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
 	kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct coda_inode_info *ei = (struct coda_inode_info *) foo;
 
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 3f4ff7a242b9..f92cd303d2c9 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,7 +49,7 @@ struct configfs_dirent {
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 
 extern struct vfsmount * configfs_mount;
-extern kmem_cache_t *configfs_dir_cachep;
+extern struct kmem_cache *configfs_dir_cachep;
 
 extern int configfs_is_root(struct config_item *item);
 
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 68bd5c93ca52..ed678529ebb2 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -38,7 +38,7 @@
 
 struct vfsmount * configfs_mount = NULL;
 struct super_block * configfs_sb = NULL;
-kmem_cache_t *configfs_dir_cachep;
+struct kmem_cache *configfs_dir_cachep;
 static int configfs_mnt_count = 0;
 
 static struct super_operations configfs_ops = {
diff --git a/fs/dcache.c b/fs/dcache.c
index fd4a428998ef..a7c67cee5d83 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -43,7 +43,7 @@ static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache __read_mostly;
+static struct kmem_cache *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -2072,10 +2072,10 @@ static void __init dcache_init(unsigned long mempages)
 }
 
 /* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep __read_mostly;
+struct kmem_cache *names_cachep __read_mostly;
 
 /* SLAB cache for file structures */
-kmem_cache_t *filp_cachep __read_mostly;
+struct kmem_cache *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 0c4b0674854b..21af1629f9bc 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -37,7 +37,7 @@ struct dcookie_struct {
 
 static LIST_HEAD(dcookie_users);
 static DEFINE_MUTEX(dcookie_mutex);
-static kmem_cache_t *dcookie_cache __read_mostly;
+static struct kmem_cache *dcookie_cache __read_mostly;
 static struct list_head *dcookie_hashtable __read_mostly;
 static size_t hash_size __read_mostly;
 
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 989b608fd836..5352b03ff5aa 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -15,7 +15,7 @@
 #include "config.h"
 #include "memory.h"
 
-static kmem_cache_t *lkb_cache;
+static struct kmem_cache *lkb_cache;
 
 
 int dlm_memory_init(void)
diff --git a/fs/dnotify.c b/fs/dnotify.c
index e778b1737b79..1f26a2b9eee1 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -23,7 +23,7 @@
 
 int dir_notify_enable __read_mostly = 1;
 
-static kmem_cache_t *dn_cache __read_mostly;
+static struct kmem_cache *dn_cache __read_mostly;
 
 static void redo_inode_mask(struct inode *inode)
 {
diff --git a/fs/dquot.c b/fs/dquot.c
index c6ae6c0e0cfc..f9cd5e23ebdf 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -131,7 +131,7 @@ static struct quota_format_type *quota_formats;	/* List of registered formats */
 static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
 
 /* SLAB cache for dquot structures */
-static kmem_cache_t *dquot_cachep;
+static struct kmem_cache *dquot_cachep;
 
 int register_quota_format(struct quota_format_type *fmt)
 {
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a2c6ccbce300..306f8fbd1a08 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -546,7 +546,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
 }
 
 static struct ecryptfs_cache_info {
-	kmem_cache_t **cache;
+	struct kmem_cache **cache;
 	const char *name;
 	size_t size;
 	void (*ctor)(void*, struct kmem_cache *, unsigned long);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 69b15a996cfc..dfebf21289f4 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -52,7 +52,7 @@ static struct pt_types sgi_pt_types[] = {
 };
 
 
-static kmem_cache_t * efs_inode_cachep;
+static struct kmem_cache * efs_inode_cachep;
 
 static struct inode *efs_alloc_inode(struct super_block *sb)
 {
@@ -68,7 +68,7 @@ static void efs_destroy_inode(struct inode *inode)
 	kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct efs_inode_info *ei = (struct efs_inode_info *) foo;
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f5c88435c6be..88a6f8d0b88e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -283,10 +283,10 @@ static struct mutex epmutex;
 static struct poll_safewake psw;
 
 /* Slab cache used to allocate "struct epitem" */
-static kmem_cache_t *epi_cache __read_mostly;
+static struct kmem_cache *epi_cache __read_mostly;
 
 /* Slab cache used to allocate "struct eppoll_entry" */
-static kmem_cache_t *pwq_cache __read_mostly;
+static struct kmem_cache *pwq_cache __read_mostly;
 
 /* Virtual fs used to allocate inodes for eventpoll files */
 static struct vfsmount *eventpoll_mnt __read_mostly;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 85c237e73853..3aafb1dd917a 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -135,7 +135,7 @@ static void ext2_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t * ext2_inode_cachep;
+static struct kmem_cache * ext2_inode_cachep;
 
 static struct inode *ext2_alloc_inode(struct super_block *sb)
 {
@@ -156,7 +156,7 @@ static void ext2_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0cf633f0cfa3..9856565d4ddc 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -436,7 +436,7 @@ static void ext3_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t *ext3_inode_cachep;
+static struct kmem_cache *ext3_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
@@ -462,7 +462,7 @@ static void ext3_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c730cbc84030..f2e8c4aa0fd2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -486,7 +486,7 @@ static void ext4_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t *ext4_inode_cachep;
+static struct kmem_cache *ext4_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
@@ -513,7 +513,7 @@ static void ext4_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 8c272278455c..05c2941c74f2 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -34,9 +34,9 @@ static inline int fat_max_cache(struct inode *inode)
 	return FAT_MAX_CACHE;
 }
 
-static kmem_cache_t *fat_cache_cachep;
+static struct kmem_cache *fat_cache_cachep;
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct fat_cache *cache = (struct fat_cache *)foo;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index b58fd0c9f3cd..a9e4688582a2 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -477,7 +477,7 @@ static void fat_put_super(struct super_block *sb)
 	kfree(sbi);
 }
 
-static kmem_cache_t *fat_inode_cachep;
+static struct kmem_cache *fat_inode_cachep;
 
 static struct inode *fat_alloc_inode(struct super_block *sb)
 {
@@ -493,7 +493,7 @@ static void fat_destroy_inode(struct inode *inode)
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
 
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c03dc9cb21cb..4740d35e52cd 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -553,7 +553,7 @@ int send_sigurg(struct fown_struct *fown)
 }
 
 static DEFINE_RWLOCK(fasync_lock);
-static kmem_cache_t *fasync_cache __read_mostly;
+static struct kmem_cache *fasync_cache __read_mostly;
 
 /*
  * fasync_helper() is used by some character device drivers (mainly mice)
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index d2dd0d700077..0b7ae897cb78 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -46,7 +46,7 @@ extern const struct address_space_operations vxfs_immed_aops;
 
 extern struct inode_operations vxfs_immed_symlink_iops;
 
-kmem_cache_t		*vxfs_inode_cachep;
+struct kmem_cache		*vxfs_inode_cachep;
 
 
 #ifdef DIAGNOSTIC
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c15139f2756..357764d85ff1 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,7 @@
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 
-static kmem_cache_t *fuse_req_cachep;
+static struct kmem_cache *fuse_req_cachep;
 
 static struct fuse_conn *fuse_get_conn(struct file *file)
 {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e039e2047cce..2bdc652b8b46 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -22,7 +22,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t *fuse_inode_cachep;
+static struct kmem_cache *fuse_inode_cachep;
 struct list_head fuse_conn_list;
 DEFINE_MUTEX(fuse_mutex);
 
@@ -601,7 +601,7 @@ static struct file_system_type fuse_fs_type = {
 static decl_subsys(fuse, NULL, NULL);
 static decl_subsys(connections, NULL, NULL);
 
-static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
+static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep,
 				 unsigned long flags)
 {
 	struct inode * inode = foo;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 9889c1eacec1..7c1a9e22a526 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -25,7 +25,7 @@
 #include "util.h"
 #include "glock.h"
 
-static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct gfs2_inode *ip = foo;
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
@@ -37,7 +37,7 @@ static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long
 	}
 }
 
-static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct gfs2_glock *gl = foo;
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 196c604faadc..e5707a9f78c2 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -23,9 +23,9 @@
 #include "lm.h"
 #include "util.h"
 
-kmem_cache_t *gfs2_glock_cachep __read_mostly;
-kmem_cache_t *gfs2_inode_cachep __read_mostly;
-kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
+struct kmem_cache *gfs2_glock_cachep __read_mostly;
+struct kmem_cache *gfs2_inode_cachep __read_mostly;
+struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
 
 void gfs2_assert_i(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 76a50899fe9e..7984dcf89ad0 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -146,9 +146,9 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
 
 
-extern kmem_cache_t *gfs2_glock_cachep;
-extern kmem_cache_t *gfs2_inode_cachep;
-extern kmem_cache_t *gfs2_bufdata_cachep;
+extern struct kmem_cache *gfs2_glock_cachep;
+extern struct kmem_cache *gfs2_inode_cachep;
+extern struct kmem_cache *gfs2_bufdata_cachep;
 
 static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
 					   unsigned int *p)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ffc6409132c8..a36987966004 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -24,7 +24,7 @@
 #include "hfs_fs.h"
 #include "btree.h"
 
-static kmem_cache_t *hfs_inode_cachep;
+static struct kmem_cache *hfs_inode_cachep;
 
 MODULE_LICENSE("GPL");
 
@@ -430,7 +430,7 @@ static struct file_system_type hfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfs_init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hfs_inode_info *i = p;
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4a0c70c76c8a..0f513c6bf843 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -434,7 +434,7 @@ MODULE_AUTHOR("Brad Boyer");
 MODULE_DESCRIPTION("Extended Macintosh Filesystem");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t *hfsplus_inode_cachep;
+static struct kmem_cache *hfsplus_inode_cachep;
 
 static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 {
@@ -467,7 +467,7 @@ static struct file_system_type hfsplus_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfsplus_init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hfsplus_inode_info *i = p;
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 46ceadd6f16a..34d68e211714 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -160,7 +160,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t * hpfs_inode_cachep;
+static struct kmem_cache * hpfs_inode_cachep;
 
 static struct inode *hpfs_alloc_inode(struct super_block *sb)
 {
@@ -177,7 +177,7 @@ static void hpfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 36e52173a54a..0706f5aac6a2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -513,7 +513,7 @@ static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
 }
 
 
-static kmem_cache_t *hugetlbfs_inode_cachep;
+static struct kmem_cache *hugetlbfs_inode_cachep;
 
 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 {
@@ -545,7 +545,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 };
 
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
 
diff --git a/fs/inode.c b/fs/inode.c
index dd15984d51a8..699aa4f7aa74 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -97,7 +97,7 @@ static DEFINE_MUTEX(iprune_mutex);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep __read_mostly;
+static struct kmem_cache * inode_cachep __read_mostly;
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
 
 EXPORT_SYMBOL(inode_init_once);
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct inode * inode = (struct inode *) foo;
 
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 017cb0f134d6..e1956e6f116c 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -34,8 +34,8 @@
 
 #include <asm/ioctls.h>
 
-static kmem_cache_t *watch_cachep __read_mostly;
-static kmem_cache_t *event_cachep __read_mostly;
+static struct kmem_cache *watch_cachep __read_mostly;
+static struct kmem_cache *event_cachep __read_mostly;
 
 static struct vfsmount *inotify_mnt __read_mostly;
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4b6381cd2cf4..ea55b6c469ec 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -57,7 +57,7 @@ static void isofs_put_super(struct super_block *sb)
 static void isofs_read_inode(struct inode *);
 static int isofs_statfs (struct dentry *, struct kstatfs *);
 
-static kmem_cache_t *isofs_inode_cachep;
+static struct kmem_cache *isofs_inode_cachep;
 
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct iso_inode_info *ei = foo;
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b85c686b60db..a8774bed20b6 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1630,7 +1630,7 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 #define JBD_MAX_SLABS 5
 #define JBD_SLAB_INDEX(size)  (size >> 11)
 
-static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
 static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
 };
@@ -1693,7 +1693,7 @@ void jbd_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *journal_head_cache;
+static struct kmem_cache *journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
@@ -1996,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void)
 
 #endif
 
-kmem_cache_t *jbd_handle_cache;
+struct kmem_cache *jbd_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c532429d8d9b..d204ab394f36 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -70,8 +70,8 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *revoke_record_cache;
-static kmem_cache_t *revoke_table_cache;
+static struct kmem_cache *revoke_record_cache;
+static struct kmem_cache *revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c60f378b0f76..50356019ae30 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1641,7 +1641,7 @@ void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 #define JBD_MAX_SLABS 5
 #define JBD_SLAB_INDEX(size)  (size >> 11)
 
-static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
 static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
 };
@@ -1704,7 +1704,7 @@ void jbd2_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *jbd2_journal_head_cache;
+static struct kmem_cache *jbd2_journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
@@ -2007,7 +2007,7 @@ static void __exit jbd2_remove_jbd_proc_entry(void)
 
 #endif
 
-kmem_cache_t *jbd2_handle_cache;
+struct kmem_cache *jbd2_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 380d19917f37..f506646ad0ff 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -70,8 +70,8 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *jbd2_revoke_record_cache;
-static kmem_cache_t *jbd2_revoke_table_cache;
+static struct kmem_cache *jbd2_revoke_record_cache;
+static struct kmem_cache *jbd2_revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 3f7899ea4cba..9f15bce92022 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -61,8 +61,8 @@ static const struct file_operations jffs_dir_operations;
 static struct inode_operations jffs_dir_inode_operations;
 static const struct address_space_operations jffs_address_operations;
 
-kmem_cache_t     *node_cache = NULL;
-kmem_cache_t     *fm_cache = NULL;
+struct kmem_cache     *node_cache = NULL;
+struct kmem_cache     *fm_cache = NULL;
 
 /* Called by the VFS at mount time to initialize the whole file system.  */
 static int jffs_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 29b68d939bd9..077258b2103e 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -29,8 +29,8 @@ static int jffs_mark_obsolete(struct jffs_fmcontrol *fmc, __u32 fm_offset);
 static struct jffs_fm *jffs_alloc_fm(void);
 static void jffs_free_fm(struct jffs_fm *n);
 
-extern kmem_cache_t     *fm_cache;
-extern kmem_cache_t     *node_cache;
+extern struct kmem_cache     *fm_cache;
+extern struct kmem_cache     *node_cache;
 
 #if CONFIG_JFFS_FS_VERBOSE > 0
 void
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 33f291005012..83f9881ec4cc 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -19,16 +19,16 @@
 
 /* These are initialised to NULL in the kernel startup code.
    If you're porting to other operating systems, beware */
-static kmem_cache_t *full_dnode_slab;
-static kmem_cache_t *raw_dirent_slab;
-static kmem_cache_t *raw_inode_slab;
-static kmem_cache_t *tmp_dnode_info_slab;
-static kmem_cache_t *raw_node_ref_slab;
-static kmem_cache_t *node_frag_slab;
-static kmem_cache_t *inode_cache_slab;
+static struct kmem_cache *full_dnode_slab;
+static struct kmem_cache *raw_dirent_slab;
+static struct kmem_cache *raw_inode_slab;
+static struct kmem_cache *tmp_dnode_info_slab;
+static struct kmem_cache *raw_node_ref_slab;
+static struct kmem_cache *node_frag_slab;
+static struct kmem_cache *inode_cache_slab;
 #ifdef CONFIG_JFFS2_FS_XATTR
-static kmem_cache_t *xattr_datum_cache;
-static kmem_cache_t *xattr_ref_cache;
+static struct kmem_cache *xattr_datum_cache;
+static struct kmem_cache *xattr_ref_cache;
 #endif
 
 int __init jffs2_create_slab_caches(void)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 77be534ce422..7deb78254021 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -28,7 +28,7 @@
 
 static void jffs2_put_super(struct super_block *);
 
-static kmem_cache_t *jffs2_inode_cachep;
+static struct kmem_cache *jffs2_inode_cachep;
 
 static struct inode *jffs2_alloc_inode(struct super_block *sb)
 {
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
 	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
 
-static void jffs2_i_init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo;
 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 0cccd1c39d75..b1a1c7296014 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -74,7 +74,7 @@ static inline void lock_metapage(struct metapage *mp)
 }
 
 #define METAPOOL_MIN_PAGES 32
-static kmem_cache_t *metapage_cache;
+static struct kmem_cache *metapage_cache;
 static mempool_t *metapage_mempool;
 
 #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
@@ -180,7 +180,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 
 #endif
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct metapage *mp = (struct metapage *)foo;
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 9c1c6e0e633d..ca3e19128532 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -44,7 +44,7 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
 MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t * jfs_inode_cachep;
+static struct kmem_cache * jfs_inode_cachep;
 
 static struct super_operations jfs_super_operations;
 static struct export_operations jfs_export_operations;
@@ -748,7 +748,7 @@ static struct file_system_type jfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
 
diff --git a/fs/locks.c b/fs/locks.c
index a7b97d50c1e0..1cb0c57fedbd 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -142,7 +142,7 @@ int lease_break_time = 45;
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
-static kmem_cache_t *filelock_cache __read_mostly;
+static struct kmem_cache *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(locks_init_lock);
  * Initialises the fields of the file lock which are invariant for
  * free file_locks.
  */
-static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags)
 {
 	struct file_lock *lock = (struct file_lock *) foo;
 
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 0ff71256e65b..deeb9dc062d9 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -85,7 +85,7 @@ struct mb_cache {
 #ifndef MB_CACHE_INDEXES_COUNT
 	int				c_indexes_count;
 #endif
-	kmem_cache_t			*c_entry_cache;
+	struct kmem_cache			*c_entry_cache;
 	struct list_head		*c_block_hash;
 	struct list_head		*c_indexes_hash[0];
 };
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index ce532c2deda8..629e09b38c5c 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -51,7 +51,7 @@ static void minix_put_super(struct super_block *sb)
 	return;
 }
 
-static kmem_cache_t * minix_inode_cachep;
+static struct kmem_cache * minix_inode_cachep;
 
 static struct inode *minix_alloc_inode(struct super_block *sb)
 {
@@ -67,7 +67,7 @@ static void minix_destroy_inode(struct inode *inode)
 	kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 55442a6cf221..b00ac84ebbdd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -36,7 +36,7 @@ static int event;
 
 static struct list_head *mount_hashtable __read_mostly;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
-static kmem_cache_t *mnt_cache __read_mostly;
+static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 
 /* /sys/fs */
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index ed84d899220c..fae53243bb92 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -40,7 +40,7 @@ static void ncp_delete_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
 static int  ncp_statfs(struct dentry *, struct kstatfs *);
 
-static kmem_cache_t * ncp_inode_cachep;
+static struct kmem_cache * ncp_inode_cachep;
 
 static struct inode *ncp_alloc_inode(struct super_block *sb)
 {
@@ -56,7 +56,7 @@ static void ncp_destroy_inode(struct inode *inode)
 	kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 769fd0a0c772..2f488e1d9b6c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -58,7 +58,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static kmem_cache_t *nfs_direct_cachep;
+static struct kmem_cache *nfs_direct_cachep;
 
 /*
  * This represents a set of asynchronous requests that we're waiting on
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6b53aae4ed2c..15afa460e629 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -55,7 +55,7 @@ static int nfs_update_inode(struct inode *, struct nfs_fattr *);
 
 static void nfs_zap_acl_cache(struct inode *);
 
-static kmem_cache_t * nfs_inode_cachep;
+static struct kmem_cache * nfs_inode_cachep;
 
 static inline unsigned long
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
@@ -1111,7 +1111,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 #endif
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a1561a820abe..3fbfc2f03307 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,7 +20,7 @@
 
 #define NFS_PARANOIA 1
 
-static kmem_cache_t *nfs_page_cachep;
+static struct kmem_cache *nfs_page_cachep;
 
 static inline struct nfs_page *
 nfs_page_alloc(void)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 56f66f0ccb6a..244a8c45b68e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -38,7 +38,7 @@ static int nfs_pagein_one(struct list_head *, struct inode *);
 static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
-static kmem_cache_t *nfs_rdata_cachep;
+static struct kmem_cache *nfs_rdata_cachep;
 static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f7dd0d005957..41b07288f99e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 
-static kmem_cache_t *nfs_wdata_cachep;
+static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e431e93ab503..640c92b2a9f7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -84,10 +84,10 @@ static void nfs4_set_recdir(char *recdir);
  */
 static DEFINE_MUTEX(client_mutex);
 
-static kmem_cache_t *stateowner_slab = NULL;
-static kmem_cache_t *file_slab = NULL;
-static kmem_cache_t *stateid_slab = NULL;
-static kmem_cache_t *deleg_slab = NULL;
+static struct kmem_cache *stateowner_slab = NULL;
+static struct kmem_cache *file_slab = NULL;
+static struct kmem_cache *stateid_slab = NULL;
+static struct kmem_cache *deleg_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -1003,7 +1003,7 @@ alloc_init_file(struct inode *ino)
 }
 
 static void
-nfsd4_free_slab(kmem_cache_t **slab)
+nfsd4_free_slab(struct kmem_cache **slab)
 {
 	if (*slab == NULL)
 		return;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 01f91501f70a..941acf14e61f 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -66,7 +66,7 @@ static struct file_operations dlmfs_file_operations;
 static struct inode_operations dlmfs_dir_inode_operations;
 static struct inode_operations dlmfs_root_inode_operations;
 static struct inode_operations dlmfs_file_inode_operations;
-static kmem_cache_t *dlmfs_inode_cache;
+static struct kmem_cache *dlmfs_inode_cache;
 
 struct workqueue_struct *user_dlm_worker;
 
@@ -257,7 +257,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 }
 
 static void dlmfs_init_once(void *foo,
-			    kmem_cache_t *cachep,
+			    struct kmem_cache *cachep,
 			    unsigned long flags)
 {
 	struct dlmfs_inode_private *ip =
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f784177b6241..856012b4fa49 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dlm_dump_all_mles);
 #endif  /*  0  */
 
 
-static kmem_cache_t *dlm_mle_cache = NULL;
+static struct kmem_cache *dlm_mle_cache = NULL;
 
 
 static void dlm_mle_release(struct kref *kref);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index fcd4475d1f89..80ac69f11d9f 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -61,7 +61,7 @@ struct ocfs2_em_insert_context {
 	struct ocfs2_extent_map_entry *right_ent;
 };
 
-static kmem_cache_t *ocfs2_em_ent_cachep = NULL;
+static struct kmem_cache *ocfs2_em_ent_cachep = NULL;
 
 
 static struct ocfs2_extent_map_entry *
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 46a378fbc40b..1a7dd2945b34 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -106,7 +106,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 #define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL)
 #define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL)
 
-extern kmem_cache_t *ocfs2_inode_cache;
+extern struct kmem_cache *ocfs2_inode_cache;
 
 extern const struct address_space_operations ocfs2_aops;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7574d26ee0ff..0524f8a04ad6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -68,7 +68,7 @@
 
 #include "buffer_head_io.h"
 
-static kmem_cache_t *ocfs2_inode_cachep = NULL;
+static struct kmem_cache *ocfs2_inode_cachep = NULL;
 
 /* OCFS2 needs to schedule several differnt types of work which
  * require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -914,7 +914,7 @@ bail:
 }
 
 static void ocfs2_inode_init_once(void *data,
-				  kmem_cache_t *cachep,
+				  struct kmem_cache *cachep,
 				  unsigned long flags)
 {
 	struct ocfs2_inode_info *oi = data;
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 9707ed7a3206..39814b900fc0 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -69,7 +69,7 @@ struct ocfs2_meta_cache_item {
 	sector_t	c_block;
 };
 
-static kmem_cache_t *ocfs2_uptodate_cachep = NULL;
+static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
 void ocfs2_metadata_cache_init(struct inode *inode)
 {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911d1bcfc567..26f44e0074ec 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -330,7 +330,7 @@ out:
 	return 0;
 }
 
-static kmem_cache_t *op_inode_cachep;
+static struct kmem_cache *op_inode_cachep;
 
 static struct inode *openprom_alloc_inode(struct super_block *sb)
 {
@@ -415,7 +415,7 @@ static struct file_system_type openprom_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags)
+static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct op_inode_info *oi = (struct op_inode_info *) data;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b24cdb2f17c1..e26945ba685b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -81,7 +81,7 @@ static void proc_read_inode(struct inode * inode)
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 }
 
-static kmem_cache_t * proc_inode_cachep;
+static struct kmem_cache * proc_inode_cachep;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
@@ -105,7 +105,7 @@ static void proc_destroy_inode(struct inode *inode)
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
 
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5b943eb11d76..c047dc654d5c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -515,7 +515,7 @@ static void qnx4_read_inode(struct inode *inode)
 	brelse(bh);
 }
 
-static kmem_cache_t *qnx4_inode_cachep;
+static struct kmem_cache *qnx4_inode_cachep;
 
 static struct inode *qnx4_alloc_inode(struct super_block *sb)
 {
@@ -531,7 +531,7 @@ static void qnx4_destroy_inode(struct inode *inode)
 	kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep,
+static void init_once(void *foo, struct kmem_cache * cachep,
 		      unsigned long flags)
 {
 	struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 32332516d656..745bc714ba91 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -490,7 +490,7 @@ static void reiserfs_put_super(struct super_block *s)
 	return;
 }
 
-static kmem_cache_t *reiserfs_inode_cachep;
+static struct kmem_cache *reiserfs_inode_cachep;
 
 static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 {
@@ -507,7 +507,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
 
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index d1b455f9b669..c5af088d4a4c 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -550,7 +550,7 @@ romfs_read_inode(struct inode *i)
 	}
 }
 
-static kmem_cache_t * romfs_inode_cachep;
+static struct kmem_cache * romfs_inode_cachep;
 
 static struct inode *romfs_alloc_inode(struct super_block *sb)
 {
@@ -566,7 +566,7 @@ static void romfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct romfs_inode_info *ei = (struct romfs_inode_info *) foo;
 
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 221617103484..4af4cd729a5a 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -50,7 +50,7 @@ static void smb_put_super(struct super_block *);
 static int  smb_statfs(struct dentry *, struct kstatfs *);
 static int  smb_show_options(struct seq_file *, struct vfsmount *);
 
-static kmem_cache_t *smb_inode_cachep;
+static struct kmem_cache *smb_inode_cachep;
 
 static struct inode *smb_alloc_inode(struct super_block *sb)
 {
@@ -66,7 +66,7 @@ static void smb_destroy_inode(struct inode *inode)
 	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
 	unsigned long flagmask = SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 3eb1402191b9..a4bcae8a9aff 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -25,7 +25,7 @@
 #define ROUND_UP(x) (((x)+3) & ~3)
 
 /* cache for request structures */
-static kmem_cache_t *req_cachep;
+static struct kmem_cache *req_cachep;
 
 static int smb_request_send_req(struct smb_request *req);
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 20551a1b8a56..e503f858fba8 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -16,7 +16,7 @@
 
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
-kmem_cache_t *sysfs_dir_cachep;
+struct kmem_cache *sysfs_dir_cachep;
 
 static struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6f3d6bd52887..bd7cec295dab 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,6 +1,6 @@
 
 extern struct vfsmount * sysfs_mount;
-extern kmem_cache_t *sysfs_dir_cachep;
+extern struct kmem_cache *sysfs_dir_cachep;
 
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index a6ca12b747cf..ead9864567e3 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -301,7 +301,7 @@ static void sysv_delete_inode(struct inode *inode)
 	unlock_kernel();
 }
 
-static kmem_cache_t *sysv_inode_cachep;
+static struct kmem_cache *sysv_inode_cachep;
 
 static struct inode *sysv_alloc_inode(struct super_block *sb)
 {
@@ -318,7 +318,7 @@ static void sysv_destroy_inode(struct inode *inode)
 	kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
 
-static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e50f24221dea..397f54aaf667 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,7 +107,7 @@ static struct file_system_type udf_fstype = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static kmem_cache_t * udf_inode_cachep;
+static struct kmem_cache * udf_inode_cachep;
 
 static struct inode *udf_alloc_inode(struct super_block *sb)
 {
@@ -130,7 +130,7 @@ static void udf_destroy_inode(struct inode *inode)
 	kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct udf_inode_info *ei = (struct udf_inode_info *) foo;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85a88c0c5e68..0b18d2cc2efa 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1204,7 +1204,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t * ufs_inode_cachep;
+static struct kmem_cache * ufs_inode_cachep;
 
 static struct inode *ufs_alloc_inode(struct super_block *sb)
 {
@@ -1221,7 +1221,7 @@ static void ufs_destroy_inode(struct inode *inode)
 	kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
 
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 47faf27913a5..7f1e92930b62 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -64,7 +64,7 @@
 /* Host-dependent types and defines */
 
 #define ACPI_MACHINE_WIDTH          BITS_PER_LONG
-#define acpi_cache_t                        kmem_cache_t
+#define acpi_cache_t                        struct kmem_cache
 #define acpi_spinlock                   spinlock_t *
 #define ACPI_EXPORT_SYMBOL(symbol)  EXPORT_SYMBOL(symbol);
 #define strtoul                     simple_strtoul
diff --git a/include/asm-arm26/pgalloc.h b/include/asm-arm26/pgalloc.h
index 6437167b1ffe..7725af3ddb4d 100644
--- a/include/asm-arm26/pgalloc.h
+++ b/include/asm-arm26/pgalloc.h
@@ -15,7 +15,7 @@
 #include <asm/tlbflush.h>
 #include <linux/slab.h>
 
-extern kmem_cache_t *pte_cache;
+extern struct kmem_cache *pte_cache;
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr){
 	return kmem_cache_alloc(pte_cache, GFP_KERNEL);
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 7d398f493dde..bfee7ddfff53 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -34,14 +34,14 @@ struct vm_area_struct;
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 extern unsigned long empty_zero_page[1024];
 extern pgd_t swapper_pg_dir[1024];
-extern kmem_cache_t *pgd_cache;
-extern kmem_cache_t *pmd_cache;
+extern struct kmem_cache *pgd_cache;
+extern struct kmem_cache *pmd_cache;
 extern spinlock_t pgd_lock;
 extern struct page *pgd_list;
 
-void pmd_ctor(void *, kmem_cache_t *, unsigned long);
-void pgd_ctor(void *, kmem_cache_t *, unsigned long);
-void pgd_dtor(void *, kmem_cache_t *, unsigned long);
+void pmd_ctor(void *, struct kmem_cache *, unsigned long);
+void pgd_ctor(void *, struct kmem_cache *, unsigned long);
+void pgd_dtor(void *, struct kmem_cache *, unsigned long);
 void pgtable_cache_init(void);
 void paging_init(void);
 
diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h
index ae63db7b3e7d..b0830db68f8a 100644
--- a/include/asm-powerpc/pgalloc.h
+++ b/include/asm-powerpc/pgalloc.h
@@ -11,7 +11,7 @@
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
 
-extern kmem_cache_t *pgtable_cache[];
+extern struct kmem_cache *pgtable_cache[];
 
 #ifdef CONFIG_PPC_64K_PAGES
 #define PTE_CACHE_NUM	0
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index 010f9cd0a672..5891ff7ba760 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -13,7 +13,7 @@
 #include <asm/page.h>
 
 /* Page table allocation/freeing. */
-extern kmem_cache_t *pgtable_cache;
+extern struct kmem_cache *pgtable_cache;
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 561e2a77805c..55d1ca5e60f5 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -30,7 +30,7 @@
 #ifdef CONFIG_TASK_DELAY_ACCT
 
 extern int delayacct_on;	/* Delay accounting turned on/off */
-extern kmem_cache_t *delayacct_cache;
+extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
 extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 1fb02e17f6f6..2514f4e286b7 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -490,7 +490,7 @@ struct i2o_dma {
  */
 struct i2o_pool {
 	char *name;
-	kmem_cache_t *slab;
+	struct kmem_cache *slab;
 	mempool_t *mempool;
 };
 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index fe89444b1c6f..dacd566c9f6c 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -949,7 +949,7 @@ void journal_put_journal_head(struct journal_head *jh);
 /*
  * handle management
  */
-extern kmem_cache_t *jbd_handle_cache;
+extern struct kmem_cache *jbd_handle_cache;
 
 static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
 {
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index ddb128795781..98a0ae52660f 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -958,7 +958,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh);
 /*
  * handle management
  */
-extern kmem_cache_t *jbd2_handle_cache;
+extern struct kmem_cache *jbd2_handle_cache;
 
 static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
 {
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index f13299a15591..03636d7918fe 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -235,7 +235,7 @@ struct raid5_private_data {
 	 */
 	int			active_name;
 	char			cache_name[2][20];
-	kmem_cache_t		*slab_cache; /* for allocating stripes */
+	struct kmem_cache		*slab_cache; /* for allocating stripes */
 
 	int			seq_flush, seq_write;
 	int			quiesce;
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 61c2ab634b00..36f850373d2c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -30,7 +30,7 @@ struct anon_vma {
 
 #ifdef CONFIG_MMU
 
-extern kmem_cache_t *anon_vma_cachep;
+extern struct kmem_cache *anon_vma_cachep;
 
 static inline struct anon_vma *anon_vma_alloc(void)
 {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1d649f3eb006..4ff3940210d8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -345,7 +345,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, 1, -1);
 }
 
-extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+extern struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
 					    unsigned int size,
 					    gfp_t priority);
 extern void	       kfree_skbmem(struct sk_buff *skb);
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index f81a5af8a4f8..ce8a912e5426 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -12,7 +12,7 @@
 #include <net/genetlink.h>
 
 #ifdef CONFIG_TASKSTATS
-extern kmem_cache_t *taskstats_cache;
+extern struct kmem_cache *taskstats_cache;
 extern struct mutex taskstats_exit_mutex;
 
 static inline void taskstats_exit_free(struct taskstats *tidstats)
diff --git a/include/net/dst.h b/include/net/dst.h
index e156e38e4ac3..62b7e7598e9a 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -98,7 +98,7 @@ struct dst_ops
 	int			entry_size;
 
 	atomic_t		entries;
-	kmem_cache_t 		*kmem_cachep;
+	struct kmem_cache 		*kmem_cachep;
 };
 
 #ifdef __KERNEL__
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index a9eb2eaf094e..34cc76e3ddb4 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -125,7 +125,7 @@ struct inet_hashinfo {
 	rwlock_t			lhash_lock ____cacheline_aligned;
 	atomic_t			lhash_users;
 	wait_queue_head_t		lhash_wait;
-	kmem_cache_t			*bind_bucket_cachep;
+	struct kmem_cache			*bind_bucket_cachep;
 };
 
 static inline struct inet_ehash_bucket *inet_ehash_bucket(
@@ -136,10 +136,10 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
 }
 
 extern struct inet_bind_bucket *
-		    inet_bind_bucket_create(kmem_cache_t *cachep,
+		    inet_bind_bucket_create(struct kmem_cache *cachep,
 					    struct inet_bind_hashbucket *head,
 					    const unsigned short snum);
-extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
+extern void inet_bind_bucket_destroy(struct kmem_cache *cachep,
 				     struct inet_bind_bucket *tb);
 
 static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index c8aacbd2e333..23967031ddb7 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -160,7 +160,7 @@ struct neigh_table
 	atomic_t		entries;
 	rwlock_t		lock;
 	unsigned long		last_rand;
-	kmem_cache_t		*kmem_cachep;
+	struct kmem_cache		*kmem_cachep;
 	struct neigh_statistics	*stats;
 	struct neighbour	**hash_buckets;
 	unsigned int		hash_mask;
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index cef3136e22a3..41bcc9eb4206 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -7,7 +7,7 @@
 #include <net/netfilter/nf_conntrack.h>
 
 extern struct list_head nf_conntrack_expect_list;
-extern kmem_cache_t *nf_conntrack_expect_cachep;
+extern struct kmem_cache *nf_conntrack_expect_cachep;
 extern struct file_operations exp_file_ops;
 
 struct nf_conntrack_expect
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 426f0fe774ef..7aed02ce2b65 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -29,7 +29,7 @@ struct proto;
 struct request_sock_ops {
 	int		family;
 	int		obj_size;
-	kmem_cache_t	*slab;
+	struct kmem_cache	*slab;
 	int		(*rtx_syn_ack)(struct sock *sk,
 				       struct request_sock *req,
 				       struct dst_entry *dst);
diff --git a/include/net/sock.h b/include/net/sock.h
index fe3a33fad03f..730899ce5162 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -571,7 +571,7 @@ struct proto {
 	int			*sysctl_rmem;
 	int			max_header;
 
-	kmem_cache_t		*slab;
+	struct kmem_cache		*slab;
 	unsigned int		obj_size;
 
 	atomic_t		*orphan_count;
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index d7a306ea560d..1e1ee3253fd8 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -15,7 +15,7 @@
 #include <net/sock.h>
 
 struct timewait_sock_ops {
-	kmem_cache_t	*twsk_slab;
+	struct kmem_cache	*twsk_slab;
 	unsigned int	twsk_obj_size;
 	int		(*twsk_unique)(struct sock *sk,
 				       struct sock *sktw, void *twp);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 9233ed5de664..0c775fceb675 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -557,7 +557,7 @@ struct sas_task {
 
 static inline struct sas_task *sas_alloc_task(gfp_t flags)
 {
-	extern kmem_cache_t *sas_task_cache;
+	extern struct kmem_cache *sas_task_cache;
 	struct sas_task *task = kmem_cache_alloc(sas_task_cache, flags);
 
 	if (task) {
@@ -575,7 +575,7 @@ static inline struct sas_task *sas_alloc_task(gfp_t flags)
 static inline void sas_free_task(struct sas_task *task)
 {
 	if (task) {
-		extern kmem_cache_t *sas_task_cache;
+		extern struct kmem_cache *sas_task_cache;
 		BUG_ON(!list_empty(&task->list));
 		kmem_cache_free(sas_task_cache, task);
 	}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 813bb941342b..3acc1661e517 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -90,7 +90,7 @@ static struct super_operations mqueue_super_ops;
 static void remove_notification(struct mqueue_inode_info *info);
 
 static spinlock_t mq_lock;
-static kmem_cache_t *mqueue_inode_cachep;
+static struct kmem_cache *mqueue_inode_cachep;
 static struct vfsmount *mqueue_mnt;
 
 static unsigned int queues_count;
@@ -211,7 +211,7 @@ static int mqueue_get_sb(struct file_system_type *fs_type,
 	return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 70e9ec603082..766d5912b26a 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -20,7 +20,7 @@
 #include <linux/delayacct.h>
 
 int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
-kmem_cache_t *delayacct_cache;
+struct kmem_cache *delayacct_cache;
 
 static int __init delayacct_setup_disable(char *str)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 711aa5f10da7..2cf74edd3295 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -82,26 +82,26 @@ int nr_processes(void)
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
 # define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
 # define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
-static kmem_cache_t *task_struct_cachep;
+static struct kmem_cache *task_struct_cachep;
 #endif
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
-static kmem_cache_t *signal_cachep;
+static struct kmem_cache *signal_cachep;
 
 /* SLAB cache for sighand_struct structures (tsk->sighand) */
-kmem_cache_t *sighand_cachep;
+struct kmem_cache *sighand_cachep;
 
 /* SLAB cache for files_struct structures (tsk->files) */
-kmem_cache_t *files_cachep;
+struct kmem_cache *files_cachep;
 
 /* SLAB cache for fs_struct structures (tsk->fs) */
-kmem_cache_t *fs_cachep;
+struct kmem_cache *fs_cachep;
 
 /* SLAB cache for vm_area_struct structures */
-kmem_cache_t *vm_area_cachep;
+struct kmem_cache *vm_area_cachep;
 
 /* SLAB cache for mm_struct structures (tsk->mm) */
-static kmem_cache_t *mm_cachep;
+static struct kmem_cache *mm_cachep;
 
 void free_task(struct task_struct *tsk)
 {
@@ -1421,7 +1421,7 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
-static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct sighand_struct *sighand = data;
 
diff --git a/kernel/pid.c b/kernel/pid.c
index b914392085f9..a48879b0b921 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -31,7 +31,7 @@
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
-static kmem_cache_t *pid_cachep;
+static struct kmem_cache *pid_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
 
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 9cbb5d1be06f..5fe87de10ff0 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -70,7 +70,7 @@
 /*
  * Lets keep our timers in a slab cache :-)
  */
-static kmem_cache_t *posix_timers_cache;
+static struct kmem_cache *posix_timers_cache;
 static struct idr posix_timers_id;
 static DEFINE_SPINLOCK(idr_lock);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index df18c167a2a7..8e19d2785486 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -33,7 +33,7 @@
  * SLAB caches for signal bits.
  */
 
-static kmem_cache_t *sigqueue_cachep;
+static struct kmem_cache *sigqueue_cachep;
 
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 1b2b326cf703..f5f92014ae98 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -34,7 +34,7 @@
 
 static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
 static int family_registered;
-kmem_cache_t *taskstats_cache;
+struct kmem_cache *taskstats_cache;
 
 static struct genl_family family = {
 	.id		= GENL_ID_GENERATE,
diff --git a/kernel/user.c b/kernel/user.c
index c1f93c164c93..4869563080e9 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -26,7 +26,7 @@
 #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
 #define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
 
-static kmem_cache_t *uid_cachep;
+static struct kmem_cache *uid_cachep;
 static struct list_head uidhash_table[UIDHASH_SZ];
 
 /*
diff --git a/lib/idr.c b/lib/idr.c
index 16d2143fea48..71853531d3b0 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -33,7 +33,7 @@
 #include <linux/string.h>
 #include <linux/idr.h>
 
-static kmem_cache_t *idr_layer_cache;
+static struct kmem_cache *idr_layer_cache;
 
 static struct idr_layer *alloc_layer(struct idr *idp)
 {
@@ -445,7 +445,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 }
 EXPORT_SYMBOL(idr_replace);
 
-static void idr_cache_ctor(void * idr_layer, kmem_cache_t *idr_layer_cache,
+static void idr_cache_ctor(void * idr_layer, struct kmem_cache *idr_layer_cache,
 		unsigned long flags)
 {
 	memset(idr_layer, 0, sizeof(struct idr_layer));
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index aa9bfd0bdbd1..9eb25955019f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -63,7 +63,7 @@ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
 /*
  * Radix tree node cache.
  */
-static kmem_cache_t *radix_tree_node_cachep;
+static struct kmem_cache *radix_tree_node_cachep;
 
 /*
  * Per-cpu pool of preloaded nodes
@@ -846,7 +846,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
 EXPORT_SYMBOL(radix_tree_tagged);
 
 static void
-radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
+radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags)
 {
 	memset(node, 0, sizeof(struct radix_tree_node));
 }
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d9f04864d15d..8ca448db7a0d 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,7 +23,7 @@
 #include <asm/atomic.h>
 #include "br_private.h"
 
-static kmem_cache_t *br_fdb_cache __read_mostly;
+static struct kmem_cache *br_fdb_cache __read_mostly;
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		      const unsigned char *addr);
 
diff --git a/net/core/flow.c b/net/core/flow.c
index 5df3e297f817..104c25d00a1d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -44,7 +44,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
 
 #define flow_table(cpu) (per_cpu(flow_tables, cpu))
 
-static kmem_cache_t *flow_cachep __read_mostly;
+static struct kmem_cache *flow_cachep __read_mostly;
 
 static int flow_lwm, flow_hwm;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7217fb8928f2..de7801d589e7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,8 +68,8 @@
 
 #include "kmap_skb.h"
 
-static kmem_cache_t *skbuff_head_cache __read_mostly;
-static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+static struct kmem_cache *skbuff_head_cache __read_mostly;
+static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 
 /*
  *	Keep out-of-line to prevent kernel bloat.
@@ -144,7 +144,7 @@ EXPORT_SYMBOL(skb_truesize_bug);
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int fclone, int node)
 {
-	kmem_cache_t *cache;
+	struct kmem_cache *cache;
 	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
 	u8 *data;
@@ -211,7 +211,7 @@ nodata:
  *	Buffers may only be allocated from interrupts using a @gfp_mask of
  *	%GFP_ATOMIC.
  */
-struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
 				     unsigned int size,
 				     gfp_t gfp_mask)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 419c7d3289c7..4a432da441e9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -841,7 +841,7 @@ struct sock *sk_alloc(int family, gfp_t priority,
 		      struct proto *prot, int zero_it)
 {
 	struct sock *sk = NULL;
-	kmem_cache_t *slab = prot->slab;
+	struct kmem_cache *slab = prot->slab;
 
 	if (slab != NULL)
 		sk = kmem_cache_alloc(slab, priority);
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index bdf1bb7a82c0..1f4727ddbdbf 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -21,8 +21,8 @@
 
 #include <net/sock.h>
 
-static kmem_cache_t *dccp_ackvec_slab;
-static kmem_cache_t *dccp_ackvec_record_slab;
+static struct kmem_cache *dccp_ackvec_slab;
+static struct kmem_cache *dccp_ackvec_record_slab;
 
 static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
 {
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index ff05e59043cd..d8cf92f09e68 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -55,9 +55,9 @@ static inline void ccids_read_unlock(void)
 #define ccids_read_unlock() do { } while(0)
 #endif
 
-static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
+static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
 {
-	kmem_cache_t *slab;
+	struct kmem_cache *slab;
 	char slab_name_fmt[32], *slab_name;
 	va_list args;
 
@@ -75,7 +75,7 @@ static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
 	return slab;
 }
 
-static void ccid_kmem_cache_destroy(kmem_cache_t *slab)
+static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 {
 	if (slab != NULL) {
 		const char *name = kmem_cache_name(slab);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c7c29514dce8..bcc2d12ae81c 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -27,9 +27,9 @@ struct ccid_operations {
 	unsigned char	ccid_id;
 	const char	*ccid_name;
 	struct module	*ccid_owner;
-	kmem_cache_t	*ccid_hc_rx_slab;
+	struct kmem_cache	*ccid_hc_rx_slab;
 	__u32		ccid_hc_rx_obj_size;
-	kmem_cache_t	*ccid_hc_tx_slab;
+	struct kmem_cache	*ccid_hc_tx_slab;
 	__u32		ccid_hc_tx_obj_size;
 	int		(*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
 	int		(*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 0ae85f0340b2..eb257014dd74 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -20,7 +20,7 @@
 #define DCCP_LI_HIST_IVAL_F_LENGTH  8
 
 struct dccp_li_hist {
-	kmem_cache_t *dccplih_slab;
+	struct kmem_cache *dccplih_slab;
 };
 
 extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 067cf1c85a37..9a8bcf224aa7 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -68,14 +68,14 @@ struct dccp_rx_hist_entry {
 };
 
 struct dccp_tx_hist {
-	kmem_cache_t *dccptxh_slab;
+	struct kmem_cache *dccptxh_slab;
 };
 
 extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
 extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
 
 struct dccp_rx_hist {
-	kmem_cache_t *dccprxh_slab;
+	struct kmem_cache *dccprxh_slab;
 };
 
 extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 101e5ccaf096..13b2421991ba 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n
 static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
 static DEFINE_RWLOCK(dn_fib_tables_lock);
 
-static kmem_cache_t *dn_hash_kmem __read_mostly;
+static struct kmem_cache *dn_hash_kmem __read_mostly;
 static int dn_fib_hash_zombies;
 
 static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 4463443e42cd..648f47c1c399 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -45,8 +45,8 @@
 
 #include "fib_lookup.h"
 
-static kmem_cache_t *fn_hash_kmem __read_mostly;
-static kmem_cache_t *fn_alias_kmem __read_mostly;
+static struct kmem_cache *fn_hash_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __read_mostly;
 
 struct fib_node {
 	struct hlist_node	fn_hash;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6be6caf1af37..cfb249cc0a58 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -172,7 +172,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn);
 static struct tnode *halve(struct trie *t, struct tnode *tn);
 static void tnode_free(struct tnode *tn);
 
-static kmem_cache_t *fn_alias_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __read_mostly;
 static struct trie *trie_local = NULL, *trie_main = NULL;
 
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index bd6c9bc41893..8c79c8a4ea5c 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -27,7 +27,7 @@
  * Allocate and initialize a new local port bind bucket.
  * The bindhash mutex for snum's hash chain must be held here.
  */
-struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
+struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 						 struct inet_bind_hashbucket *head,
 						 const unsigned short snum)
 {
@@ -45,7 +45,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
 /*
  * Caller must hold hashbucket lock for this tb with local BH disabled
  */
-void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb)
+void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
 {
 	if (hlist_empty(&tb->owners)) {
 		__hlist_del(&tb->node);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index f072f3875af8..711eb6d0285a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -73,7 +73,7 @@
 /* Exported for inet_getid inline function.  */
 DEFINE_SPINLOCK(inet_peer_idlock);
 
-static kmem_cache_t *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __read_mostly;
 
 #define node_height(x) x->avl_height
 static struct inet_peer peer_fake_node = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index efcf45ecc818..ecb5422ea237 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -105,7 +105,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
    In this case data path is free of exclusive locks at all.
  */
 
-static kmem_cache_t *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __read_mostly;
 
 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 8832eb517d52..8086787a2c51 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -44,7 +44,7 @@
 static struct list_head *ip_vs_conn_tab;
 
 /*  SLAB cache for IPVS connections */
-static kmem_cache_t *ip_vs_conn_cachep __read_mostly;
+static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
 
 /*  counter for current IPVS connections */
 static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index f4b0e68a16d2..8556a4f4f60a 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -65,8 +65,8 @@ static LIST_HEAD(helpers);
 unsigned int ip_conntrack_htable_size __read_mostly = 0;
 int ip_conntrack_max __read_mostly;
 struct list_head *ip_conntrack_hash __read_mostly;
-static kmem_cache_t *ip_conntrack_cachep __read_mostly;
-static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
+static struct kmem_cache *ip_conntrack_cachep __read_mostly;
+static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
 unsigned int ip_ct_log_invalid __read_mostly;
 static LIST_HEAD(unconfirmed);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 97a8cfbb61a1..96d8310ae9c8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -50,7 +50,7 @@
 
 struct rt6_statistics	rt6_stats;
 
-static kmem_cache_t * fib6_node_kmem __read_mostly;
+static struct kmem_cache * fib6_node_kmem __read_mostly;
 
 enum fib_walk_state_t
 {
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index d4f68b0f27d5..12e426b9aacd 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -50,7 +50,7 @@ static u32 xfrm6_tunnel_spi;
 #define XFRM6_TUNNEL_SPI_MIN	1
 #define XFRM6_TUNNEL_SPI_MAX	0xffffffff
 
-static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly;
+static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
 
 #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eaa0f8a1adb6..a9638ff52a72 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -108,7 +108,7 @@ static struct {
 	size_t size;
 
 	/* slab cache pointer */
-	kmem_cache_t *cachep;
+	struct kmem_cache *cachep;
 
 	/* allocated slab cache + modules which uses this slab cache */
 	int use;
@@ -147,7 +147,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name,
 {
 	int ret = 0;
 	char *cache_name;
-	kmem_cache_t *cachep;
+	struct kmem_cache *cachep;
 
 	DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
 	       features, name, size);
@@ -226,7 +226,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_register_cache);
 /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
 void nf_conntrack_unregister_cache(u_int32_t features)
 {
-	kmem_cache_t *cachep;
+	struct kmem_cache *cachep;
 	char *name;
 
 	/*
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 588d37937046..c20f901fa177 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -29,7 +29,7 @@
 LIST_HEAD(nf_conntrack_expect_list);
 EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
 
-kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
+struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
 static unsigned int nf_conntrack_expect_next_id;
 
 /* nf_conntrack_expect helper functions */
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a98de0b54d65..a5a6e192ac2d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -92,7 +92,7 @@ struct xt_hashlimit_htable {
 static DEFINE_SPINLOCK(hashlimit_lock);	/* protects htables list */
 static DEFINE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
 static HLIST_HEAD(hashlimit_htables);
-static kmem_cache_t *hashlimit_cachep __read_mostly;
+static struct kmem_cache *hashlimit_cachep __read_mostly;
 
 static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
 {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 11f3b549f4a4..f2ba8615895b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -79,8 +79,8 @@ static struct sctp_pf *sctp_pf_inet_specific;
 static struct sctp_af *sctp_af_v4_specific;
 static struct sctp_af *sctp_af_v6_specific;
 
-kmem_cache_t *sctp_chunk_cachep __read_mostly;
-kmem_cache_t *sctp_bucket_cachep __read_mostly;
+struct kmem_cache *sctp_chunk_cachep __read_mostly;
+struct kmem_cache *sctp_bucket_cachep __read_mostly;
 
 /* Return the address of the control sock. */
 struct sock *sctp_get_ctl_sock(void)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8d55d10041f9..30927d3a597f 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -65,7 +65,7 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-extern kmem_cache_t *sctp_chunk_cachep;
+extern struct kmem_cache *sctp_chunk_cachep;
 
 SCTP_STATIC
 struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 49607792cbd3..1e8132b8c4d9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -107,7 +107,7 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
 			      struct sctp_association *, sctp_socket_type_t);
 static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
 
-extern kmem_cache_t *sctp_bucket_cachep;
+extern struct kmem_cache *sctp_bucket_cachep;
 
 /* Get the sndbuf space available at the time on the association.  */
 static inline int sctp_wspace(struct sctp_association *asoc)
diff --git a/net/socket.c b/net/socket.c
index 4f417c2ddc15..43eff489c878 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -230,7 +230,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
 
 #define SOCKFS_MAGIC 0x534F434B
 
-static kmem_cache_t *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __read_mostly;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
@@ -257,7 +257,7 @@ static void sock_destroy_inode(struct inode *inode)
 			container_of(inode, struct socket_alloc, vfs_inode));
 }
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct socket_alloc *ei = (struct socket_alloc *)foo;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index df753d0a884b..19703aa9659e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -33,7 +33,7 @@ static int rpc_mount_count;
 static struct file_system_type rpc_pipe_fs_type;
 
 
-static kmem_cache_t *rpc_inode_cachep __read_mostly;
+static struct kmem_cache *rpc_inode_cachep __read_mostly;
 
 #define RPC_UPCALL_TIMEOUT (30*HZ)
 
@@ -824,7 +824,7 @@ static struct file_system_type rpc_pipe_fs_type = {
 };
 
 static void
-init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct rpc_inode *rpci = (struct rpc_inode *) foo;
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index eff44bcdc95a..225e6510b523 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -34,8 +34,8 @@ static int			rpc_task_id;
 #define RPC_BUFFER_MAXSIZE	(2048)
 #define RPC_BUFFER_POOLSIZE	(8)
 #define RPC_TASK_POOLSIZE	(8)
-static kmem_cache_t	*rpc_task_slabp __read_mostly;
-static kmem_cache_t	*rpc_buffer_slabp __read_mostly;
+static struct kmem_cache	*rpc_task_slabp __read_mostly;
+static struct kmem_cache	*rpc_buffer_slabp __read_mostly;
 static mempool_t	*rpc_task_mempool __read_mostly;
 static mempool_t	*rpc_buffer_mempool __read_mostly;
 
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index ae6ddf00a1aa..eb80778d6d9c 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -42,7 +42,7 @@ struct queue_item {
 	unsigned long data;
 };
 
-static kmem_cache_t *tipc_queue_item_cache;
+static struct kmem_cache *tipc_queue_item_cache;
 static struct list_head signal_queue_head;
 static DEFINE_SPINLOCK(qitem_lock);
 static int handler_enabled = 0;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index a898a6a83a56..414f89070380 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -12,7 +12,7 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
-static kmem_cache_t *secpath_cachep __read_mostly;
+static struct kmem_cache *secpath_cachep __read_mostly;
 
 void __secpath_destroy(struct sec_path *sp)
 {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f6c77bd36fdd..3f3f563eb4ab 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,7 +39,7 @@ EXPORT_SYMBOL(xfrm_policy_count);
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 
-static kmem_cache_t *xfrm_dst_cache __read_mostly;
+static struct kmem_cache *xfrm_dst_cache __read_mostly;
 
 static struct work_struct xfrm_policy_gc_work;
 static HLIST_HEAD(xfrm_policy_gc_list);
diff --git a/security/keys/key.c b/security/keys/key.c
index 157bac658bf9..0db816f10f85 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -20,7 +20,7 @@
 #include <linux/err.h>
 #include "internal.h"
 
-static kmem_cache_t	*key_jar;
+static struct kmem_cache	*key_jar;
 struct rb_root		key_serial_tree; /* tree of keys indexed by serial */
 DEFINE_SPINLOCK(key_serial_lock);
 
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 65b4ec9c699a..e7c0b5e2066b 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -124,7 +124,7 @@ DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 };
 
 static struct avc_cache avc_cache;
 static struct avc_callback_node *avc_callbacks;
-static kmem_cache_t *avc_node_cachep;
+static struct kmem_cache *avc_node_cachep;
 
 static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
 {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ac1aeed0b289..44e9cd470543 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -124,7 +124,7 @@ static struct security_operations *secondary_ops = NULL;
 static LIST_HEAD(superblock_security_head);
 static DEFINE_SPINLOCK(sb_security_lock);
 
-static kmem_cache_t *sel_inode_cache;
+static struct kmem_cache *sel_inode_cache;
 
 /* Return security context for a given sid or just the context 
    length if the buffer is null or length is 0 */
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index 2dfc6134c2c2..ebb993c5c244 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -28,7 +28,7 @@
  (keyp->source_type << 9)) & \
  AVTAB_HASH_MASK)
 
-static kmem_cache_t *avtab_node_cachep;
+static struct kmem_cache *avtab_node_cachep;
 
 static struct avtab_node*
 avtab_insert_node(struct avtab *h, int hvalue,
-- 
cgit v1.2.3


From 3592695c363c3f3119621bdcf5ed852d6b9d1a5c Mon Sep 17 00:00:00 2001
From: Stefan Seyfried <seife@suse.de>
Date: Wed, 6 Dec 2006 20:34:06 -0800
Subject: [PATCH] uswsusp: add pmops->{prepare,enter,finish} support (aka
 "platform mode")

Add an ioctl to the userspace swsusp code that enables the usage of the
pmops->prepare, pmops->enter and pmops->finish methods (the in-kernel
suspend knows these as "platform method").  These are needed on many
machines to (among others) speed up resuming by letting the BIOS skip some
steps or let my hp nx5000 recognise the correct ac_adapter state after
resume again.

It also ensures on many machines, that changed hardware (unplugged AC
adapters) gets correctly detected and that kacpid does not run wild after
resume.

Signed-off-by: Stefan Seyfried <seife@suse.de>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/power.h |  7 ++++++-
 kernel/power/user.c  | 28 ++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/power.h b/kernel/power/power.h
index bfe999f7b272..87ecb1856ee8 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -117,7 +117,12 @@ extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
 #define SNAPSHOT_FREE_SWAP_PAGES	_IO(SNAPSHOT_IOC_MAGIC, 9)
 #define SNAPSHOT_SET_SWAP_FILE		_IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
 #define SNAPSHOT_S2RAM			_IO(SNAPSHOT_IOC_MAGIC, 11)
-#define SNAPSHOT_IOC_MAXNR	11
+#define SNAPSHOT_PMOPS			_IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
+#define SNAPSHOT_IOC_MAXNR	12
+
+#define PMOPS_PREPARE	1
+#define PMOPS_ENTER	2
+#define PMOPS_FINISH	3
 
 /**
  *	The bitmap is used for tracing allocated swap pages
diff --git a/kernel/power/user.c b/kernel/power/user.c
index d991d3b0e5a4..4c24ca5d62e3 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -11,6 +11,7 @@
 
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
+#include <linux/reboot.h>
 #include <linux/string.h>
 #include <linux/device.h>
 #include <linux/miscdevice.h>
@@ -313,6 +314,33 @@ OutS3:
 		up(&pm_sem);
 		break;
 
+	case SNAPSHOT_PMOPS:
+		switch (arg) {
+
+		case PMOPS_PREPARE:
+			if (pm_ops->prepare) {
+				error = pm_ops->prepare(PM_SUSPEND_DISK);
+			}
+			break;
+
+		case PMOPS_ENTER:
+			kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
+			error = pm_ops->enter(PM_SUSPEND_DISK);
+			break;
+
+		case PMOPS_FINISH:
+			if (pm_ops && pm_ops->finish) {
+				pm_ops->finish(PM_SUSPEND_DISK);
+			}
+			break;
+
+		default:
+			printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
+			error = -EINVAL;
+
+		}
+		break;
+
 	default:
 		error = -ENOTTY;
 
-- 
cgit v1.2.3


From 915bae9ebe41e52d71ad8b06d50e4ab26189f964 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:07 -0800
Subject: [PATCH] swsusp: use partition device and offset to identify swap
 areas

The Linux kernel handles swap files almost in the same way as it handles swap
partitions and there are only two differences between these two types of swap
areas:

(1) swap files need not be contiguous,

(2) the header of a swap file is not in the first block of the partition
    that holds it.  From the swsusp's point of view (1) is not a problem,
    because it is already taken care of by the swap-handling code, but (2) has
    to be taken into consideration.

In principle the location of a swap file's header may be determined with the
help of appropriate filesystem driver.  Unfortunately, however, it requires
the filesystem holding the swap file to be mounted, and if this filesystem is
journaled, it cannot be mounted during a resume from disk.  For this reason we
need some other means by which swap areas can be identified.

For example, to identify a swap area we can use the partition that holds the
area and the offset from the beginning of this partition at which the swap
header is located.

The following patch allows swsusp to identify swap areas this way.  It changes
swap_type_of() so that it takes an additional argument representing an offset
of the swap header within the partition represented by its first argument.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h |  2 +-
 kernel/power/swap.c  |  2 +-
 kernel/power/user.c  |  5 +++--
 mm/swapfile.c        | 38 ++++++++++++++++++++++++++------------
 4 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 89f8a39773bf..d51e35e4e168 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -247,7 +247,7 @@ extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
 extern void free_swap_and_cache(swp_entry_t);
-extern int swap_type_of(dev_t);
+extern int swap_type_of(dev_t, sector_t);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1a3b0dd2c3fc..7b10da16389e 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -74,7 +74,7 @@ static int mark_swapfiles(swp_entry_t start)
 
 static int swsusp_swap_check(void) /* This is called before saving image */
 {
-	int res = swap_type_of(swsusp_resume_device);
+	int res = swap_type_of(swsusp_resume_device, 0);
 
 	if (res >= 0) {
 		root_swap = res;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 4c24ca5d62e3..a327b18a5ffd 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -55,7 +55,8 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	filp->private_data = data;
 	memset(&data->handle, 0, sizeof(struct snapshot_handle));
 	if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
-		data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device) : -1;
+		data->swap = swsusp_resume_device ?
+				swap_type_of(swsusp_resume_device, 0) : -1;
 		data->mode = O_RDONLY;
 	} else {
 		data->swap = -1;
@@ -265,7 +266,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			 * so we need to recode them
 			 */
 			if (old_decode_dev(arg)) {
-				data->swap = swap_type_of(old_decode_dev(arg));
+				data->swap = swap_type_of(old_decode_dev(arg), 0);
 				if (data->swap < 0)
 					error = -ENODEV;
 			} else {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f315131db006..2bfacbac0f4c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -427,34 +427,48 @@ void free_swap_and_cache(swp_entry_t entry)
 
 #ifdef CONFIG_SOFTWARE_SUSPEND
 /*
- * Find the swap type that corresponds to given device (if any)
+ * Find the swap type that corresponds to given device (if any).
  *
- * This is needed for software suspend and is done in such a way that inode
- * aliasing is allowed.
+ * @offset - number of the PAGE_SIZE-sized block of the device, starting
+ * from 0, in which the swap header is expected to be located.
+ *
+ * This is needed for the suspend to disk (aka swsusp).
  */
-int swap_type_of(dev_t device)
+int swap_type_of(dev_t device, sector_t offset)
 {
+	struct block_device *bdev = NULL;
 	int i;
 
+	if (device)
+		bdev = bdget(device);
+
 	spin_lock(&swap_lock);
 	for (i = 0; i < nr_swapfiles; i++) {
-		struct inode *inode;
+		struct swap_info_struct *sis = swap_info + i;
 
-		if (!(swap_info[i].flags & SWP_WRITEOK))
+		if (!(sis->flags & SWP_WRITEOK))
 			continue;
 
-		if (!device) {
+		if (!bdev) {
 			spin_unlock(&swap_lock);
 			return i;
 		}
-		inode = swap_info[i].swap_file->f_dentry->d_inode;
-		if (S_ISBLK(inode->i_mode) &&
-		    device == MKDEV(imajor(inode), iminor(inode))) {
-			spin_unlock(&swap_lock);
-			return i;
+		if (bdev == sis->bdev) {
+			struct swap_extent *se;
+
+			se = list_entry(sis->extent_list.next,
+					struct swap_extent, list);
+			if (se->start_block == offset) {
+				spin_unlock(&swap_lock);
+				bdput(bdev);
+				return i;
+			}
 		}
 	}
 	spin_unlock(&swap_lock);
+	if (bdev)
+		bdput(bdev);
+
 	return -ENODEV;
 }
 
-- 
cgit v1.2.3


From 3fc6b34f4803b959c1e30c15247e2180cd529115 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:09 -0800
Subject: [PATCH] swsusp: rearrange swap-handling code

Rearrange the code in kernel/power/swap.c so that the next patch is more
readable.

[This patch only moves the existing code.]

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/swap.c | 221 ++++++++++++++++++++++++++--------------------------
 1 file changed, 112 insertions(+), 109 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7b10da16389e..7768c2ba7550 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -41,10 +41,121 @@ static struct swsusp_header {
 } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
 
 /*
- * Saving part...
+ * General things
  */
 
 static unsigned short root_swap = 0xffff;
+static struct block_device *resume_bdev;
+
+/**
+ *	submit - submit BIO request.
+ *	@rw:	READ or WRITE.
+ *	@off	physical offset of page.
+ *	@page:	page we're reading or writing.
+ *	@bio_chain: list of pending biod (for async reading)
+ *
+ *	Straight from the textbook - allocate and initialize the bio.
+ *	If we're reading, make sure the page is marked as dirty.
+ *	Then submit it and, if @bio_chain == NULL, wait.
+ */
+static int submit(int rw, pgoff_t page_off, struct page *page,
+			struct bio **bio_chain)
+{
+	struct bio *bio;
+
+	bio = bio_alloc(GFP_ATOMIC, 1);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
+	bio->bi_bdev = resume_bdev;
+	bio->bi_end_io = end_swap_bio_read;
+
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+		printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
+		bio_put(bio);
+		return -EFAULT;
+	}
+
+	lock_page(page);
+	bio_get(bio);
+
+	if (bio_chain == NULL) {
+		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+		wait_on_page_locked(page);
+		if (rw == READ)
+			bio_set_pages_dirty(bio);
+		bio_put(bio);
+	} else {
+		if (rw == READ)
+			get_page(page);	/* These pages are freed later */
+		bio->bi_private = *bio_chain;
+		*bio_chain = bio;
+		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+	}
+	return 0;
+}
+
+static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+	return submit(READ, page_off, virt_to_page(addr), bio_chain);
+}
+
+static int bio_write_page(pgoff_t page_off, void *addr)
+{
+	return submit(WRITE, page_off, virt_to_page(addr), NULL);
+}
+
+static int wait_on_bio_chain(struct bio **bio_chain)
+{
+	struct bio *bio;
+	struct bio *next_bio;
+	int ret = 0;
+
+	if (bio_chain == NULL)
+		return 0;
+
+	bio = *bio_chain;
+	if (bio == NULL)
+		return 0;
+	while (bio) {
+		struct page *page;
+
+		next_bio = bio->bi_private;
+		page = bio->bi_io_vec[0].bv_page;
+		wait_on_page_locked(page);
+		if (!PageUptodate(page) || PageError(page))
+			ret = -EIO;
+		put_page(page);
+		bio_put(bio);
+		bio = next_bio;
+	}
+	*bio_chain = NULL;
+	return ret;
+}
+
+static void show_speed(struct timeval *start, struct timeval *stop,
+			unsigned nr_pages, char *msg)
+{
+	s64 elapsed_centisecs64;
+	int centisecs;
+	int k;
+	int kps;
+
+	elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
+	do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
+	centisecs = elapsed_centisecs64;
+	if (centisecs == 0)
+		centisecs = 1;	/* avoid div-by-zero */
+	k = nr_pages * (PAGE_SIZE / 1024);
+	kps = (k * 100) / centisecs;
+	printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
+			centisecs / 100, centisecs % 100,
+			kps / 1000, (kps % 1000) / 10);
+}
+
+/*
+ * Saving part
+ */
 
 static int mark_swapfiles(swp_entry_t start)
 {
@@ -166,26 +277,6 @@ static void release_swap_writer(struct swap_map_handle *handle)
 	handle->bitmap = NULL;
 }
 
-static void show_speed(struct timeval *start, struct timeval *stop,
-			unsigned nr_pages, char *msg)
-{
-	s64 elapsed_centisecs64;
-	int centisecs;
-	int k;
-	int kps;
-
-	elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
-	do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
-	centisecs = elapsed_centisecs64;
-	if (centisecs == 0)
-		centisecs = 1;	/* avoid div-by-zero */
-	k = nr_pages * (PAGE_SIZE / 1024);
-	kps = (k * 100) / centisecs;
-	printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
-			centisecs / 100, centisecs % 100,
-			kps / 1000, (kps % 1000) / 10);
-}
-
 static int get_swap_writer(struct swap_map_handle *handle)
 {
 	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
@@ -205,34 +296,6 @@ static int get_swap_writer(struct swap_map_handle *handle)
 	return 0;
 }
 
-static int wait_on_bio_chain(struct bio **bio_chain)
-{
-	struct bio *bio;
-	struct bio *next_bio;
-	int ret = 0;
-
-	if (bio_chain == NULL)
-		return 0;
-
-	bio = *bio_chain;
-	if (bio == NULL)
-		return 0;
-	while (bio) {
-		struct page *page;
-
-		next_bio = bio->bi_private;
-		page = bio->bi_io_vec[0].bv_page;
-		wait_on_page_locked(page);
-		if (!PageUptodate(page) || PageError(page))
-			ret = -EIO;
-		put_page(page);
-		bio_put(bio);
-		bio = next_bio;
-	}
-	*bio_chain = NULL;
-	return ret;
-}
-
 static int swap_write_page(struct swap_map_handle *handle, void *buf,
 				struct bio **bio_chain)
 {
@@ -384,66 +447,6 @@ int swsusp_write(void)
 	return error;
 }
 
-static struct block_device *resume_bdev;
-
-/**
- *	submit - submit BIO request.
- *	@rw:	READ or WRITE.
- *	@off	physical offset of page.
- *	@page:	page we're reading or writing.
- *	@bio_chain: list of pending biod (for async reading)
- *
- *	Straight from the textbook - allocate and initialize the bio.
- *	If we're reading, make sure the page is marked as dirty.
- *	Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, pgoff_t page_off, struct page *page,
-			struct bio **bio_chain)
-{
-	struct bio *bio;
-
-	bio = bio_alloc(GFP_ATOMIC, 1);
-	if (!bio)
-		return -ENOMEM;
-	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
-	bio->bi_bdev = resume_bdev;
-	bio->bi_end_io = end_swap_bio_read;
-
-	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
-		printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
-		bio_put(bio);
-		return -EFAULT;
-	}
-
-	lock_page(page);
-	bio_get(bio);
-
-	if (bio_chain == NULL) {
-		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
-		wait_on_page_locked(page);
-		if (rw == READ)
-			bio_set_pages_dirty(bio);
-		bio_put(bio);
-	} else {
-		if (rw == READ)
-			get_page(page);	/* These pages are freed later */
-		bio->bi_private = *bio_chain;
-		*bio_chain = bio;
-		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
-	}
-	return 0;
-}
-
-static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
-	return submit(READ, page_off, virt_to_page(addr), bio_chain);
-}
-
-static int bio_write_page(pgoff_t page_off, void *addr)
-{
-	return submit(WRITE, page_off, virt_to_page(addr), NULL);
-}
-
 /**
  *	The following functions allow us to read data using a swap map
  *	in a file-alike way
-- 
cgit v1.2.3


From 3aef83e0ef1ffb8ea3bea97be46821a45c952173 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:10 -0800
Subject: [PATCH] swsusp: use block device offsets to identify swap locations

Make swsusp use block device offsets instead of swap offsets to identify swap
locations and make it use the same code paths for writing as well as for
reading data.

This allows us to use the same code for handling swap files and swap
partitions and to simplify the code, eg.  by dropping rw_swap_page_sync().

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h  |   3 +-
 kernel/power/power.h  |   2 +-
 kernel/power/swap.c   | 133 ++++++++++++++++++++++++++------------------------
 kernel/power/swsusp.c |  10 ++--
 kernel/power/user.c   |   7 +--
 mm/page_io.c          |  45 -----------------
 mm/swapfile.c         |  17 +++++++
 7 files changed, 98 insertions(+), 119 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d51e35e4e168..add51cebc8d9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -218,8 +218,6 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
-				struct bio **bio_chain);
 extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
 
 /* linux/mm/swap_state.c */
@@ -250,6 +248,7 @@ extern void free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
+extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 87ecb1856ee8..210ebba26020 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -146,7 +146,7 @@ struct bitmap_page {
 
 extern void free_bitmap(struct bitmap_page *bitmap);
 extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
-extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap);
+extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap);
 extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
 
 extern int swsusp_check(void);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7768c2ba7550..1b08f46bbb7e 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -34,8 +34,8 @@ extern char resume_file[];
 #define SWSUSP_SIG	"S1SUSPEND"
 
 static struct swsusp_header {
-	char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
-	swp_entry_t image;
+	char reserved[PAGE_SIZE - 20 - sizeof(sector_t)];
+	sector_t image;
 	char	orig_sig[10];
 	char	sig[10];
 } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
@@ -100,9 +100,9 @@ static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
 	return submit(READ, page_off, virt_to_page(addr), bio_chain);
 }
 
-static int bio_write_page(pgoff_t page_off, void *addr)
+static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
 {
-	return submit(WRITE, page_off, virt_to_page(addr), NULL);
+	return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
 }
 
 static int wait_on_bio_chain(struct bio **bio_chain)
@@ -157,22 +157,19 @@ static void show_speed(struct timeval *start, struct timeval *stop,
  * Saving part
  */
 
-static int mark_swapfiles(swp_entry_t start)
+static int mark_swapfiles(sector_t start)
 {
 	int error;
 
-	rw_swap_page_sync(READ, swp_entry(root_swap, 0),
-			  virt_to_page((unsigned long)&swsusp_header), NULL);
+	bio_read_page(0, &swsusp_header, NULL);
 	if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
 	    !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
 		memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
 		memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
 		swsusp_header.image = start;
-		error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0),
-				virt_to_page((unsigned long)&swsusp_header),
-				NULL);
+		error = bio_write_page(0, &swsusp_header, NULL);
 	} else {
-		pr_debug("swsusp: Partition is not swap space.\n");
+		printk(KERN_ERR "swsusp: Swap header not found!\n");
 		error = -ENODEV;
 	}
 	return error;
@@ -185,12 +182,21 @@ static int mark_swapfiles(swp_entry_t start)
 
 static int swsusp_swap_check(void) /* This is called before saving image */
 {
-	int res = swap_type_of(swsusp_resume_device, 0);
+	int res;
+
+	res = swap_type_of(swsusp_resume_device, 0);
+	if (res < 0)
+		return res;
+
+	root_swap = res;
+	resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE);
+	if (IS_ERR(resume_bdev))
+		return PTR_ERR(resume_bdev);
+
+	res = set_blocksize(resume_bdev, PAGE_SIZE);
+	if (res < 0)
+		blkdev_put(resume_bdev);
 
-	if (res >= 0) {
-		root_swap = res;
-		return 0;
-	}
 	return res;
 }
 
@@ -201,36 +207,26 @@ static int swsusp_swap_check(void) /* This is called before saving image */
  *	@bio_chain:	Link the next write BIO here
  */
 
-static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
+static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
 {
-	swp_entry_t entry;
-	int error = -ENOSPC;
-
-	if (offset) {
-		struct page *page = virt_to_page(buf);
-
-		if (bio_chain) {
-			/*
-			 * Whether or not we successfully allocated a copy page,
-			 * we take a ref on the page here.  It gets undone in
-			 * wait_on_bio_chain().
-			 */
-			struct page *page_copy;
-			page_copy = alloc_page(GFP_ATOMIC);
-			if (page_copy == NULL) {
-				WARN_ON_ONCE(1);
-				bio_chain = NULL;	/* Go synchronous */
-				get_page(page);
-			} else {
-				memcpy(page_address(page_copy),
-					page_address(page), PAGE_SIZE);
-				page = page_copy;
-			}
+	void *src;
+
+	if (!offset)
+		return -ENOSPC;
+
+	if (bio_chain) {
+		src = (void *)__get_free_page(GFP_ATOMIC);
+		if (src) {
+			memcpy(src, buf, PAGE_SIZE);
+		} else {
+			WARN_ON_ONCE(1);
+			bio_chain = NULL;	/* Go synchronous */
+			src = buf;
 		}
-		entry = swp_entry(root_swap, offset);
-		error = rw_swap_page_sync(WRITE, entry, page, bio_chain);
+	} else {
+		src = buf;
 	}
-	return error;
+	return bio_write_page(offset, src, bio_chain);
 }
 
 /*
@@ -248,11 +244,11 @@ static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
  *	at a time.
  */
 
-#define MAP_PAGE_ENTRIES	(PAGE_SIZE / sizeof(long) - 1)
+#define MAP_PAGE_ENTRIES	(PAGE_SIZE / sizeof(sector_t) - 1)
 
 struct swap_map_page {
-	unsigned long		entries[MAP_PAGE_ENTRIES];
-	unsigned long		next_swap;
+	sector_t entries[MAP_PAGE_ENTRIES];
+	sector_t next_swap;
 };
 
 /**
@@ -262,7 +258,7 @@ struct swap_map_page {
 
 struct swap_map_handle {
 	struct swap_map_page *cur;
-	unsigned long cur_swap;
+	sector_t cur_swap;
 	struct bitmap_page *bitmap;
 	unsigned int k;
 };
@@ -287,7 +283,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
 		release_swap_writer(handle);
 		return -ENOMEM;
 	}
-	handle->cur_swap = alloc_swap_page(root_swap, handle->bitmap);
+	handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap);
 	if (!handle->cur_swap) {
 		release_swap_writer(handle);
 		return -ENOSPC;
@@ -300,11 +296,11 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
 				struct bio **bio_chain)
 {
 	int error = 0;
-	unsigned long offset;
+	sector_t offset;
 
 	if (!handle->cur)
 		return -EINVAL;
-	offset = alloc_swap_page(root_swap, handle->bitmap);
+	offset = alloc_swapdev_block(root_swap, handle->bitmap);
 	error = write_page(buf, offset, bio_chain);
 	if (error)
 		return error;
@@ -313,7 +309,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
 		error = wait_on_bio_chain(bio_chain);
 		if (error)
 			goto out;
-		offset = alloc_swap_page(root_swap, handle->bitmap);
+		offset = alloc_swapdev_block(root_swap, handle->bitmap);
 		if (!offset)
 			return -ENOSPC;
 		handle->cur->next_swap = offset;
@@ -413,37 +409,47 @@ int swsusp_write(void)
 	struct swsusp_info *header;
 	int error;
 
-	if ((error = swsusp_swap_check())) {
+	error = swsusp_swap_check();
+	if (error) {
 		printk(KERN_ERR "swsusp: Cannot find swap device, try "
 				"swapon -a.\n");
 		return error;
 	}
 	memset(&snapshot, 0, sizeof(struct snapshot_handle));
 	error = snapshot_read_next(&snapshot, PAGE_SIZE);
-	if (error < PAGE_SIZE)
-		return error < 0 ? error : -EFAULT;
+	if (error < PAGE_SIZE) {
+		if (error >= 0)
+			error = -EFAULT;
+
+		goto out;
+	}
 	header = (struct swsusp_info *)data_of(snapshot);
 	if (!enough_swap(header->pages)) {
 		printk(KERN_ERR "swsusp: Not enough free swap\n");
-		return -ENOSPC;
+		error = -ENOSPC;
+		goto out;
 	}
 	error = get_swap_writer(&handle);
 	if (!error) {
-		unsigned long start = handle.cur_swap;
+		sector_t start = handle.cur_swap;
+
 		error = swap_write_page(&handle, header, NULL);
 		if (!error)
 			error = save_image(&handle, &snapshot,
 					header->pages - 1);
+
 		if (!error) {
 			flush_swap_writer(&handle);
 			printk("S");
-			error = mark_swapfiles(swp_entry(root_swap, start));
+			error = mark_swapfiles(start);
 			printk("|\n");
 		}
 	}
 	if (error)
 		free_all_swap_pages(root_swap, handle.bitmap);
 	release_swap_writer(&handle);
+out:
+	swsusp_close();
 	return error;
 }
 
@@ -459,17 +465,18 @@ static void release_swap_reader(struct swap_map_handle *handle)
 	handle->cur = NULL;
 }
 
-static int get_swap_reader(struct swap_map_handle *handle,
-                                      swp_entry_t start)
+static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
 {
 	int error;
 
-	if (!swp_offset(start))
+	if (!start)
 		return -EINVAL;
+
 	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
 	if (!handle->cur)
 		return -ENOMEM;
-	error = bio_read_page(swp_offset(start), handle->cur, NULL);
+
+	error = bio_read_page(start, handle->cur, NULL);
 	if (error) {
 		release_swap_reader(handle);
 		return error;
@@ -481,7 +488,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
 static int swap_read_page(struct swap_map_handle *handle, void *buf,
 				struct bio **bio_chain)
 {
-	unsigned long offset;
+	sector_t offset;
 	int error;
 
 	if (!handle->cur)
@@ -608,7 +615,7 @@ int swsusp_check(void)
 		if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
 			memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
 			/* Reset swap signature now */
-			error = bio_write_page(0, &swsusp_header);
+			error = bio_write_page(0, &swsusp_header, NULL);
 		} else {
 			return -EINVAL;
 		}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 0b66659dc516..4147a756a8c7 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -134,18 +134,18 @@ static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit)
 	return 0;
 }
 
-unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap)
+sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap)
 {
 	unsigned long offset;
 
 	offset = swp_offset(get_swap_page_of_type(swap));
 	if (offset) {
-		if (bitmap_set(bitmap, offset)) {
+		if (bitmap_set(bitmap, offset))
 			swap_free(swp_entry(swap, offset));
-			offset = 0;
-		}
+		else
+			return swapdev_block(swap, offset);
 	}
-	return offset;
+	return 0;
 }
 
 void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index a327b18a5ffd..f0b7ef8bdd74 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -126,7 +126,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 {
 	int error = 0;
 	struct snapshot_data *data;
-	loff_t offset, avail;
+	loff_t avail;
+	sector_t offset;
 
 	if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
 		return -ENOTTY;
@@ -240,10 +241,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 				break;
 			}
 		}
-		offset = alloc_swap_page(data->swap, data->bitmap);
+		offset = alloc_swapdev_block(data->swap, data->bitmap);
 		if (offset) {
 			offset <<= PAGE_SHIFT;
-			error = put_user(offset, (loff_t __user *)arg);
+			error = put_user(offset, (sector_t __user *)arg);
 		} else {
 			error = -ENOSPC;
 		}
diff --git a/mm/page_io.c b/mm/page_io.c
index d4840ecbf8f9..dbffec0d78c9 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -147,48 +147,3 @@ int swap_readpage(struct file *file, struct page *page)
 out:
 	return ret;
 }
-
-#ifdef CONFIG_SOFTWARE_SUSPEND
-/*
- * A scruffy utility function to read or write an arbitrary swap page
- * and wait on the I/O.  The caller must have a ref on the page.
- *
- * We use end_swap_bio_read() even for writes, because it happens to do what
- * we want.
- */
-int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
-			struct bio **bio_chain)
-{
-	struct bio *bio;
-	int ret = 0;
-	int bio_rw;
-
-	lock_page(page);
-
-	bio = get_swap_bio(GFP_KERNEL, entry.val, page, end_swap_bio_read);
-	if (bio == NULL) {
-		unlock_page(page);
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	bio_rw = rw;
-	if (!bio_chain)
-		bio_rw |= (1 << BIO_RW_SYNC);
-	if (bio_chain)
-		bio_get(bio);
-	submit_bio(bio_rw, bio);
-	if (bio_chain == NULL) {
-		wait_on_page_locked(page);
-
-		if (!PageUptodate(page) || PageError(page))
-			ret = -EIO;
-	}
-	if (bio_chain) {
-		bio->bi_private = *bio_chain;
-		*bio_chain = bio;
-	}
-out:
-	return ret;
-}
-#endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2bfacbac0f4c..55242363de64 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -945,6 +945,23 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
 	}
 }
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/*
+ * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
+ * corresponding to given index in swap_info (swap type).
+ */
+sector_t swapdev_block(int swap_type, pgoff_t offset)
+{
+	struct swap_info_struct *sis;
+
+	if (swap_type >= nr_swapfiles)
+		return 0;
+
+	sis = swap_info + swap_type;
+	return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
+
 /*
  * Free all of a swapdev's extent information
  */
-- 
cgit v1.2.3


From 9a154d9d95b7b9845938242f5c62505b3cab5bcd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:12 -0800
Subject: [PATCH] swsusp: add resume_offset command line parameter

Add the kernel command line parameter "resume_offset=" allowing us to specify
the offset, in <PAGE_SIZE> units, from the beginning of the partition pointed
to by the "resume=" parameter at which the swap header is located.

This offset can be determined, for example, by an application using the FIBMAP
ioctl to obtain the swap header's block number for given file.

[akpm@osdl.org: we don't know what type sector_t is]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/disk.c  | 15 +++++++++++++++
 kernel/power/power.h |  1 +
 kernel/power/swap.c  | 15 ++++++++++-----
 3 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b1fb7866b0b3..d79feeb45459 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -27,6 +27,7 @@
 static int noresume = 0;
 char resume_file[256] = CONFIG_PM_STD_PARTITION;
 dev_t swsusp_resume_device;
+sector_t swsusp_resume_block;
 
 /**
  *	power_down - Shut machine down for hibernate.
@@ -423,6 +424,19 @@ static int __init resume_setup(char *str)
 	return 1;
 }
 
+static int __init resume_offset_setup(char *str)
+{
+	unsigned long long offset;
+
+	if (noresume)
+		return 1;
+
+	if (sscanf(str, "%llu", &offset) == 1)
+		swsusp_resume_block = offset;
+
+	return 1;
+}
+
 static int __init noresume_setup(char *str)
 {
 	noresume = 1;
@@ -430,4 +444,5 @@ static int __init noresume_setup(char *str)
 }
 
 __setup("noresume", noresume_setup);
+__setup("resume_offset=", resume_offset_setup);
 __setup("resume=", resume_setup);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 210ebba26020..adaf7d4fbdaf 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -42,6 +42,7 @@ extern const void __nosave_begin, __nosave_end;
 extern unsigned long image_size;
 extern int in_suspend;
 extern dev_t swsusp_resume_device;
+extern sector_t swsusp_resume_block;
 
 extern asmlinkage int swsusp_arch_suspend(void);
 extern asmlinkage int swsusp_arch_resume(void);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1b08f46bbb7e..aa5a9bff01f1 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -161,13 +161,14 @@ static int mark_swapfiles(sector_t start)
 {
 	int error;
 
-	bio_read_page(0, &swsusp_header, NULL);
+	bio_read_page(swsusp_resume_block, &swsusp_header, NULL);
 	if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
 	    !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
 		memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
 		memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
 		swsusp_header.image = start;
-		error = bio_write_page(0, &swsusp_header, NULL);
+		error = bio_write_page(swsusp_resume_block,
+					&swsusp_header, NULL);
 	} else {
 		printk(KERN_ERR "swsusp: Swap header not found!\n");
 		error = -ENODEV;
@@ -184,7 +185,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */
 {
 	int res;
 
-	res = swap_type_of(swsusp_resume_device, 0);
+	res = swap_type_of(swsusp_resume_device, swsusp_resume_block);
 	if (res < 0)
 		return res;
 
@@ -610,12 +611,16 @@ int swsusp_check(void)
 	if (!IS_ERR(resume_bdev)) {
 		set_blocksize(resume_bdev, PAGE_SIZE);
 		memset(&swsusp_header, 0, sizeof(swsusp_header));
-		if ((error = bio_read_page(0, &swsusp_header, NULL)))
+		error = bio_read_page(swsusp_resume_block,
+					&swsusp_header, NULL);
+		if (error)
 			return error;
+
 		if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
 			memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
 			/* Reset swap signature now */
-			error = bio_write_page(0, &swsusp_header, NULL);
+			error = bio_write_page(swsusp_resume_block,
+						&swsusp_header, NULL);
 		} else {
 			return -EINVAL;
 		}
-- 
cgit v1.2.3


From 37b2ba12df88f0e29f2d52aaf1ab22789377d5b5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:15 -0800
Subject: [PATCH] swsusp: add ioctl for swap files support

To be able to use swap files as suspend storage from the userland suspend
tools we need an additional ioctl() that will allow us to provide the kernel
with both the swap header's offset and the identification of the resume
partition.

The new ioctl() should be regarded as a replacement for the
SNAPSHOT_SET_SWAP_FILE ioctl() that from now on will be considered as
obsolete, but has to stay for backwards compatibility of the interface.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/power.h | 14 +++++++++++++-
 kernel/power/user.c  | 31 +++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/power.h b/kernel/power/power.h
index adaf7d4fbdaf..7dbfd9f67e1c 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -106,6 +106,16 @@ extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
 extern int snapshot_image_loaded(struct snapshot_handle *handle);
 extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
 
+/*
+ * This structure is used to pass the values needed for the identification
+ * of the resume swap area from a user space to the kernel via the
+ * SNAPSHOT_SET_SWAP_AREA ioctl
+ */
+struct resume_swap_area {
+	loff_t offset;
+	u_int32_t dev;
+} __attribute__((packed));
+
 #define SNAPSHOT_IOC_MAGIC	'3'
 #define SNAPSHOT_FREEZE			_IO(SNAPSHOT_IOC_MAGIC, 1)
 #define SNAPSHOT_UNFREEZE		_IO(SNAPSHOT_IOC_MAGIC, 2)
@@ -119,7 +129,9 @@ extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
 #define SNAPSHOT_SET_SWAP_FILE		_IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
 #define SNAPSHOT_S2RAM			_IO(SNAPSHOT_IOC_MAGIC, 11)
 #define SNAPSHOT_PMOPS			_IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
-#define SNAPSHOT_IOC_MAXNR	12
+#define SNAPSHOT_SET_SWAP_AREA		_IOW(SNAPSHOT_IOC_MAGIC, 13, \
+							struct resume_swap_area)
+#define SNAPSHOT_IOC_MAXNR	13
 
 #define PMOPS_PREPARE	1
 #define PMOPS_ENTER	2
diff --git a/kernel/power/user.c b/kernel/power/user.c
index f0b7ef8bdd74..05c58a2c0dd4 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -343,6 +343,37 @@ OutS3:
 		}
 		break;
 
+	case SNAPSHOT_SET_SWAP_AREA:
+		if (data->bitmap) {
+			error = -EPERM;
+		} else {
+			struct resume_swap_area swap_area;
+			dev_t swdev;
+
+			error = copy_from_user(&swap_area, (void __user *)arg,
+					sizeof(struct resume_swap_area));
+			if (error) {
+				error = -EFAULT;
+				break;
+			}
+
+			/*
+			 * User space encodes device types as two-byte values,
+			 * so we need to recode them
+			 */
+			swdev = old_decode_dev(swap_area.dev);
+			if (swdev) {
+				offset = swap_area.offset;
+				data->swap = swap_type_of(swdev, offset);
+				if (data->swap < 0)
+					error = -ENODEV;
+			} else {
+				data->swap = -1;
+				error = -EINVAL;
+			}
+		}
+		break;
+
 	default:
 		error = -ENOTTY;
 
-- 
cgit v1.2.3


From 8357376d3df21b7d6f857931a57ac50da9c66e26 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:18 -0800
Subject: [PATCH] swsusp: Improve handling of highmem

Currently swsusp saves the contents of highmem pages by copying them to the
normal zone which is quite inefficient (eg.  it requires two normal pages
to be used for saving one highmem page).  This may be improved by using
highmem for saving the contents of saveable highmem pages.

Namely, during the suspend phase of the suspend-resume cycle we try to
allocate as many free highmem pages as there are saveable highmem pages.
If there are not enough highmem image pages to store the contents of all of
the saveable highmem pages, some of them will be stored in the "normal"
memory.  Next, we allocate as many free "normal" pages as needed to store
the (remaining) image data.  We use a memory bitmap to mark the allocated
free pages (ie.  highmem as well as "normal" image pages).

Now, we use another memory bitmap to mark all of the saveable pages
(highmem as well as "normal") and the contents of the saveable pages are
copied into the image pages.  Then, the second bitmap is used to save the
pfns corresponding to the saveable pages and the first one is used to save
their data.

During the resume phase the pfns of the pages that were saveable during the
suspend are loaded from the image and used to mark the "unsafe" page
frames.  Next, we try to allocate as many free highmem page frames as to
load all of the image data that had been in the highmem before the suspend
and we allocate so many free "normal" page frames that the total number of
allocated free pages (highmem and "normal") is equal to the size of the
image.  While doing this we have to make sure that there will be some extra
free "normal" and "safe" page frames for two lists of PBEs constructed
later.

Now, the image data are loaded, if possible, into their "original" page
frames.  The image data that cannot be written into their "original" page
frames are loaded into "safe" page frames and their "original" kernel
virtual addresses, as well as the addresses of the "safe" pages containing
their copies, are stored in one of two lists of PBEs.

One list of PBEs is for the copies of "normal" suspend pages (ie.  "normal"
pages that were saveable during the suspend) and it is used in the same way
as previously (ie.  by the architecture-dependent parts of swsusp).  The
other list of PBEs is for the copies of highmem suspend pages.  The pages
in this list are restored (in a reversible way) right before the
arch-dependent code is called.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h |   9 +-
 kernel/power/power.h    |   2 +-
 kernel/power/snapshot.c | 851 ++++++++++++++++++++++++++++++++++++------------
 kernel/power/swap.c     |   2 +-
 kernel/power/swsusp.c   |  53 +--
 kernel/power/user.c     |   2 +-
 mm/vmscan.c             |   3 +
 7 files changed, 680 insertions(+), 242 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index b1237f16ecde..bf99bd49f8ef 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -9,10 +9,13 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 
-/* page backup entry */
+/* struct pbe is used for creating lists of pages that should be restored
+ * atomically during the resume from disk, because the page frames they have
+ * occupied before the suspend are in use.
+ */
 struct pbe {
-	unsigned long address;		/* address of the copy */
-	unsigned long orig_address;	/* original address of page */
+	void *address;		/* address of the copy */
+	void *orig_address;	/* original address of a page */
 	struct pbe *next;
 };
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7dbfd9f67e1c..3763343bde2f 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -103,8 +103,8 @@ struct snapshot_handle {
 extern unsigned int snapshot_additional_pages(struct zone *zone);
 extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
 extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
+extern void snapshot_write_finalize(struct snapshot_handle *handle);
 extern int snapshot_image_loaded(struct snapshot_handle *handle);
-extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
 
 /*
  * This structure is used to pass the values needed for the identification
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 99f9b7d177d6..fd8251d40eb8 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1,15 +1,15 @@
 /*
  * linux/kernel/power/snapshot.c
  *
- * This file provide system snapshot/restore functionality.
+ * This file provides system snapshot/restore functionality for swsusp.
  *
  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
  *
- * This file is released under the GPLv2, and is based on swsusp.c.
+ * This file is released under the GPLv2.
  *
  */
 
-
 #include <linux/version.h>
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -34,137 +34,24 @@
 
 #include "power.h"
 
-/* List of PBEs used for creating and restoring the suspend image */
+/* List of PBEs needed for restoring the pages that were allocated before
+ * the suspend and included in the suspend image, but have also been
+ * allocated by the "resume" kernel, so their contents cannot be written
+ * directly to their "original" page frames.
+ */
 struct pbe *restore_pblist;
 
-static unsigned int nr_copy_pages;
-static unsigned int nr_meta_pages;
+/* Pointer to an auxiliary buffer (1 page) */
 static void *buffer;
 
-#ifdef CONFIG_HIGHMEM
-unsigned int count_highmem_pages(void)
-{
-	struct zone *zone;
-	unsigned long zone_pfn;
-	unsigned int n = 0;
-
-	for_each_zone (zone)
-		if (is_highmem(zone)) {
-			mark_free_pages(zone);
-			for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
-				struct page *page;
-				unsigned long pfn = zone_pfn + zone->zone_start_pfn;
-				if (!pfn_valid(pfn))
-					continue;
-				page = pfn_to_page(pfn);
-				if (PageReserved(page))
-					continue;
-				if (PageNosaveFree(page))
-					continue;
-				n++;
-			}
-		}
-	return n;
-}
-
-struct highmem_page {
-	char *data;
-	struct page *page;
-	struct highmem_page *next;
-};
-
-static struct highmem_page *highmem_copy;
-
-static int save_highmem_zone(struct zone *zone)
-{
-	unsigned long zone_pfn;
-	mark_free_pages(zone);
-	for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
-		struct page *page;
-		struct highmem_page *save;
-		void *kaddr;
-		unsigned long pfn = zone_pfn + zone->zone_start_pfn;
-
-		if (!(pfn%10000))
-			printk(".");
-		if (!pfn_valid(pfn))
-			continue;
-		page = pfn_to_page(pfn);
-		/*
-		 * This condition results from rvmalloc() sans vmalloc_32()
-		 * and architectural memory reservations. This should be
-		 * corrected eventually when the cases giving rise to this
-		 * are better understood.
-		 */
-		if (PageReserved(page))
-			continue;
-		BUG_ON(PageNosave(page));
-		if (PageNosaveFree(page))
-			continue;
-		save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
-		if (!save)
-			return -ENOMEM;
-		save->next = highmem_copy;
-		save->page = page;
-		save->data = (void *) get_zeroed_page(GFP_ATOMIC);
-		if (!save->data) {
-			kfree(save);
-			return -ENOMEM;
-		}
-		kaddr = kmap_atomic(page, KM_USER0);
-		memcpy(save->data, kaddr, PAGE_SIZE);
-		kunmap_atomic(kaddr, KM_USER0);
-		highmem_copy = save;
-	}
-	return 0;
-}
-
-int save_highmem(void)
-{
-	struct zone *zone;
-	int res = 0;
-
-	pr_debug("swsusp: Saving Highmem");
-	drain_local_pages();
-	for_each_zone (zone) {
-		if (is_highmem(zone))
-			res = save_highmem_zone(zone);
-		if (res)
-			return res;
-	}
-	printk("\n");
-	return 0;
-}
-
-int restore_highmem(void)
-{
-	printk("swsusp: Restoring Highmem\n");
-	while (highmem_copy) {
-		struct highmem_page *save = highmem_copy;
-		void *kaddr;
-		highmem_copy = save->next;
-
-		kaddr = kmap_atomic(save->page, KM_USER0);
-		memcpy(kaddr, save->data, PAGE_SIZE);
-		kunmap_atomic(kaddr, KM_USER0);
-		free_page((long) save->data);
-		kfree(save);
-	}
-	return 0;
-}
-#else
-static inline unsigned int count_highmem_pages(void) {return 0;}
-static inline int save_highmem(void) {return 0;}
-static inline int restore_highmem(void) {return 0;}
-#endif
-
 /**
  *	@safe_needed - on resume, for storing the PBE list and the image,
  *	we can only use memory pages that do not conflict with the pages
- *	used before suspend.
+ *	used before suspend.  The unsafe pages have PageNosaveFree set
+ *	and we count them using unsafe_pages.
  *
- *	The unsafe pages are marked with the PG_nosave_free flag
- *	and we count them using unsafe_pages
+ *	Each allocated image page is marked as PageNosave and PageNosaveFree
+ *	so that swsusp_free() can release it.
  */
 
 #define PG_ANY		0
@@ -174,7 +61,7 @@ static inline int restore_highmem(void) {return 0;}
 
 static unsigned int allocated_unsafe_pages;
 
-static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
+static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 {
 	void *res;
 
@@ -195,20 +82,38 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 
 unsigned long get_safe_page(gfp_t gfp_mask)
 {
-	return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE);
+	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
+}
+
+static struct page *alloc_image_page(gfp_t gfp_mask) {
+	struct page *page;
+
+	page = alloc_page(gfp_mask);
+	if (page) {
+		SetPageNosave(page);
+		SetPageNosaveFree(page);
+	}
+	return page;
 }
 
 /**
  *	free_image_page - free page represented by @addr, allocated with
- *	alloc_image_page (page flags set by it must be cleared)
+ *	get_image_page (page flags set by it must be cleared)
  */
 
 static inline void free_image_page(void *addr, int clear_nosave_free)
 {
-	ClearPageNosave(virt_to_page(addr));
+	struct page *page;
+
+	BUG_ON(!virt_addr_valid(addr));
+
+	page = virt_to_page(addr);
+
+	ClearPageNosave(page);
 	if (clear_nosave_free)
-		ClearPageNosaveFree(virt_to_page(addr));
-	free_page((unsigned long)addr);
+		ClearPageNosaveFree(page);
+
+	__free_page(page);
 }
 
 /* struct linked_page is used to build chains of pages */
@@ -269,7 +174,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 	if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 		struct linked_page *lp;
 
-		lp = alloc_image_page(ca->gfp_mask, ca->safe_needed);
+		lp = get_image_page(ca->gfp_mask, ca->safe_needed);
 		if (!lp)
 			return NULL;
 
@@ -446,8 +351,8 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 
 	/* Compute the number of zones */
 	nr = 0;
-	for_each_zone (zone)
-		if (populated_zone(zone) && !is_highmem(zone))
+	for_each_zone(zone)
+		if (populated_zone(zone))
 			nr++;
 
 	/* Allocate the list of zones bitmap objects */
@@ -459,10 +364,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 	}
 
 	/* Initialize the zone bitmap objects */
-	for_each_zone (zone) {
+	for_each_zone(zone) {
 		unsigned long pfn;
 
-		if (!populated_zone(zone) || is_highmem(zone))
+		if (!populated_zone(zone))
 			continue;
 
 		zone_bm->start_pfn = zone->zone_start_pfn;
@@ -481,7 +386,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 		while (bb) {
 			unsigned long *ptr;
 
-			ptr = alloc_image_page(gfp_mask, safe_needed);
+			ptr = get_image_page(gfp_mask, safe_needed);
 			bb->data = ptr;
 			if (!ptr)
 				goto Free;
@@ -669,9 +574,81 @@ unsigned int snapshot_additional_pages(struct zone *zone)
 
 	res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
 	res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
-	return res;
+	return 2 * res;
 }
 
+#ifdef CONFIG_HIGHMEM
+/**
+ *	count_free_highmem_pages - compute the total number of free highmem
+ *	pages, system-wide.
+ */
+
+static unsigned int count_free_highmem_pages(void)
+{
+	struct zone *zone;
+	unsigned int cnt = 0;
+
+	for_each_zone(zone)
+		if (populated_zone(zone) && is_highmem(zone))
+			cnt += zone->free_pages;
+
+	return cnt;
+}
+
+/**
+ *	saveable_highmem_page - Determine whether a highmem page should be
+ *	included in the suspend image.
+ *
+ *	We should save the page if it isn't Nosave or NosaveFree, or Reserved,
+ *	and it isn't a part of a free chunk of pages.
+ */
+
+static struct page *saveable_highmem_page(unsigned long pfn)
+{
+	struct page *page;
+
+	if (!pfn_valid(pfn))
+		return NULL;
+
+	page = pfn_to_page(pfn);
+
+	BUG_ON(!PageHighMem(page));
+
+	if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page))
+		return NULL;
+
+	return page;
+}
+
+/**
+ *	count_highmem_pages - compute the total number of saveable highmem
+ *	pages.
+ */
+
+unsigned int count_highmem_pages(void)
+{
+	struct zone *zone;
+	unsigned int n = 0;
+
+	for_each_zone(zone) {
+		unsigned long pfn, max_zone_pfn;
+
+		if (!is_highmem(zone))
+			continue;
+
+		mark_free_pages(zone);
+		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+			if (saveable_highmem_page(pfn))
+				n++;
+	}
+	return n;
+}
+#else
+static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
+static inline unsigned int count_highmem_pages(void) { return 0; }
+#endif /* CONFIG_HIGHMEM */
+
 /**
  *	pfn_is_nosave - check if given pfn is in the 'nosave' section
  */
@@ -684,12 +661,12 @@ static inline int pfn_is_nosave(unsigned long pfn)
 }
 
 /**
- *	saveable - Determine whether a page should be cloned or not.
- *	@pfn:	The page
+ *	saveable - Determine whether a non-highmem page should be included in
+ *	the suspend image.
  *
- *	We save a page if it isn't Nosave, and is not in the range of pages
- *	statically defined as 'unsaveable', and it
- *	isn't a part of a free chunk of pages.
+ *	We should save the page if it isn't Nosave, and is not in the range
+ *	of pages statically defined as 'unsaveable', and it isn't a part of
+ *	a free chunk of pages.
  */
 
 static struct page *saveable_page(unsigned long pfn)
@@ -701,76 +678,130 @@ static struct page *saveable_page(unsigned long pfn)
 
 	page = pfn_to_page(pfn);
 
-	if (PageNosave(page))
+	BUG_ON(PageHighMem(page));
+
+	if (PageNosave(page) || PageNosaveFree(page))
 		return NULL;
+
 	if (PageReserved(page) && pfn_is_nosave(pfn))
 		return NULL;
-	if (PageNosaveFree(page))
-		return NULL;
 
 	return page;
 }
 
+/**
+ *	count_data_pages - compute the total number of saveable non-highmem
+ *	pages.
+ */
+
 unsigned int count_data_pages(void)
 {
 	struct zone *zone;
 	unsigned long pfn, max_zone_pfn;
 	unsigned int n = 0;
 
-	for_each_zone (zone) {
+	for_each_zone(zone) {
 		if (is_highmem(zone))
 			continue;
+
 		mark_free_pages(zone);
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-			n += !!saveable_page(pfn);
+			if(saveable_page(pfn))
+				n++;
 	}
 	return n;
 }
 
-static inline void copy_data_page(long *dst, long *src)
+/* This is needed, because copy_page and memcpy are not usable for copying
+ * task structs.
+ */
+static inline void do_copy_page(long *dst, long *src)
 {
 	int n;
 
-	/* copy_page and memcpy are not usable for copying task structs. */
 	for (n = PAGE_SIZE / sizeof(long); n; n--)
 		*dst++ = *src++;
 }
 
+#ifdef CONFIG_HIGHMEM
+static inline struct page *
+page_is_saveable(struct zone *zone, unsigned long pfn)
+{
+	return is_highmem(zone) ?
+			saveable_highmem_page(pfn) : saveable_page(pfn);
+}
+
+static inline void
+copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+{
+	struct page *s_page, *d_page;
+	void *src, *dst;
+
+	s_page = pfn_to_page(src_pfn);
+	d_page = pfn_to_page(dst_pfn);
+	if (PageHighMem(s_page)) {
+		src = kmap_atomic(s_page, KM_USER0);
+		dst = kmap_atomic(d_page, KM_USER1);
+		do_copy_page(dst, src);
+		kunmap_atomic(src, KM_USER0);
+		kunmap_atomic(dst, KM_USER1);
+	} else {
+		src = page_address(s_page);
+		if (PageHighMem(d_page)) {
+			/* Page pointed to by src may contain some kernel
+			 * data modified by kmap_atomic()
+			 */
+			do_copy_page(buffer, src);
+			dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0);
+			memcpy(dst, buffer, PAGE_SIZE);
+			kunmap_atomic(dst, KM_USER0);
+		} else {
+			dst = page_address(d_page);
+			do_copy_page(dst, src);
+		}
+	}
+}
+#else
+#define page_is_saveable(zone, pfn)	saveable_page(pfn)
+
+static inline void
+copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+{
+	do_copy_page(page_address(pfn_to_page(dst_pfn)),
+			page_address(pfn_to_page(src_pfn)));
+}
+#endif /* CONFIG_HIGHMEM */
+
 static void
 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
 {
 	struct zone *zone;
 	unsigned long pfn;
 
-	for_each_zone (zone) {
+	for_each_zone(zone) {
 		unsigned long max_zone_pfn;
 
-		if (is_highmem(zone))
-			continue;
-
 		mark_free_pages(zone);
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-			if (saveable_page(pfn))
+			if (page_is_saveable(zone, pfn))
 				memory_bm_set_bit(orig_bm, pfn);
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
 	do {
 		pfn = memory_bm_next_pfn(orig_bm);
-		if (likely(pfn != BM_END_OF_MAP)) {
-			struct page *page;
-			void *src;
-
-			page = pfn_to_page(pfn);
-			src = page_address(page);
-			page = pfn_to_page(memory_bm_next_pfn(copy_bm));
-			copy_data_page(page_address(page), src);
-		}
+		if (likely(pfn != BM_END_OF_MAP))
+			copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
 	} while (pfn != BM_END_OF_MAP);
 }
 
+/* Total number of image pages */
+static unsigned int nr_copy_pages;
+/* Number of pages needed for saving the original pfns of the image pages */
+static unsigned int nr_meta_pages;
+
 /**
  *	swsusp_free - free pages allocated for the suspend.
  *
@@ -792,7 +823,7 @@ void swsusp_free(void)
 				if (PageNosave(page) && PageNosaveFree(page)) {
 					ClearPageNosave(page);
 					ClearPageNosaveFree(page);
-					free_page((long) page_address(page));
+					__free_page(page);
 				}
 			}
 	}
@@ -802,34 +833,108 @@ void swsusp_free(void)
 	buffer = NULL;
 }
 
+#ifdef CONFIG_HIGHMEM
+/**
+  *	count_pages_for_highmem - compute the number of non-highmem pages
+  *	that will be necessary for creating copies of highmem pages.
+  */
+
+static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
+{
+	unsigned int free_highmem = count_free_highmem_pages();
+
+	if (free_highmem >= nr_highmem)
+		nr_highmem = 0;
+	else
+		nr_highmem -= free_highmem;
+
+	return nr_highmem;
+}
+#else
+static unsigned int
+count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
+#endif /* CONFIG_HIGHMEM */
 
 /**
- *	enough_free_mem - Make sure we enough free memory to snapshot.
- *
- *	Returns TRUE or FALSE after checking the number of available
- *	free pages.
+ *	enough_free_mem - Make sure we have enough free memory for the
+ *	snapshot image.
  */
 
-static int enough_free_mem(unsigned int nr_pages)
+static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 {
 	struct zone *zone;
 	unsigned int free = 0, meta = 0;
 
-	for_each_zone (zone)
-		if (!is_highmem(zone)) {
+	for_each_zone(zone) {
+		meta += snapshot_additional_pages(zone);
+		if (!is_highmem(zone))
 			free += zone->free_pages;
-			meta += snapshot_additional_pages(zone);
-		}
+	}
 
-	pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n",
+	nr_pages += count_pages_for_highmem(nr_highmem);
+	pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n",
 		nr_pages, PAGES_FOR_IO, meta, free);
 
 	return free > nr_pages + PAGES_FOR_IO + meta;
 }
 
+#ifdef CONFIG_HIGHMEM
+/**
+ *	get_highmem_buffer - if there are some highmem pages in the suspend
+ *	image, we may need the buffer to copy them and/or load their data.
+ */
+
+static inline int get_highmem_buffer(int safe_needed)
+{
+	buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
+	return buffer ? 0 : -ENOMEM;
+}
+
+/**
+ *	alloc_highmem_image_pages - allocate some highmem pages for the image.
+ *	Try to allocate as many pages as needed, but if the number of free
+ *	highmem pages is lesser than that, allocate them all.
+ */
+
+static inline unsigned int
+alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
+{
+	unsigned int to_alloc = count_free_highmem_pages();
+
+	if (to_alloc > nr_highmem)
+		to_alloc = nr_highmem;
+
+	nr_highmem -= to_alloc;
+	while (to_alloc-- > 0) {
+		struct page *page;
+
+		page = alloc_image_page(__GFP_HIGHMEM);
+		memory_bm_set_bit(bm, page_to_pfn(page));
+	}
+	return nr_highmem;
+}
+#else
+static inline int get_highmem_buffer(int safe_needed) { return 0; }
+
+static inline unsigned int
+alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ *	swsusp_alloc - allocate memory for the suspend image
+ *
+ *	We first try to allocate as many highmem pages as there are
+ *	saveable highmem pages in the system.  If that fails, we allocate
+ *	non-highmem pages for the copies of the remaining highmem ones.
+ *
+ *	In this approach it is likely that the copies of highmem pages will
+ *	also be located in the high memory, because of the way in which
+ *	copy_data_pages() works.
+ */
+
 static int
 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
-		unsigned int nr_pages)
+		unsigned int nr_pages, unsigned int nr_highmem)
 {
 	int error;
 
@@ -841,13 +946,19 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
 	if (error)
 		goto Free;
 
+	if (nr_highmem > 0) {
+		error = get_highmem_buffer(PG_ANY);
+		if (error)
+			goto Free;
+
+		nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
+	}
 	while (nr_pages-- > 0) {
-		struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+		struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
+
 		if (!page)
 			goto Free;
 
-		SetPageNosave(page);
-		SetPageNosaveFree(page);
 		memory_bm_set_bit(copy_bm, page_to_pfn(page));
 	}
 	return 0;
@@ -857,30 +968,39 @@ Free:
 	return -ENOMEM;
 }
 
-/* Memory bitmap used for marking saveable pages */
+/* Memory bitmap used for marking saveable pages (during suspend) or the
+ * suspend image pages (during resume)
+ */
 static struct memory_bitmap orig_bm;
-/* Memory bitmap used for marking allocated pages that will contain the copies
- * of saveable pages
+/* Memory bitmap used on suspend for marking allocated pages that will contain
+ * the copies of saveable pages.  During resume it is initially used for
+ * marking the suspend image pages, but then its set bits are duplicated in
+ * @orig_bm and it is released.  Next, on systems with high memory, it may be
+ * used for marking "safe" highmem pages, but it has to be reinitialized for
+ * this purpose.
  */
 static struct memory_bitmap copy_bm;
 
 asmlinkage int swsusp_save(void)
 {
-	unsigned int nr_pages;
+	unsigned int nr_pages, nr_highmem;
 
-	pr_debug("swsusp: critical section: \n");
+	printk("swsusp: critical section: \n");
 
 	drain_local_pages();
 	nr_pages = count_data_pages();
-	printk("swsusp: Need to copy %u pages\n", nr_pages);
+	nr_highmem = count_highmem_pages();
+	printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem);
 
-	if (!enough_free_mem(nr_pages)) {
+	if (!enough_free_mem(nr_pages, nr_highmem)) {
 		printk(KERN_ERR "swsusp: Not enough free memory\n");
 		return -ENOMEM;
 	}
 
-	if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages))
+	if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
+		printk(KERN_ERR "swsusp: Memory allocation failed\n");
 		return -ENOMEM;
+	}
 
 	/* During allocating of suspend pagedir, new cold pages may appear.
 	 * Kill them.
@@ -894,10 +1014,12 @@ asmlinkage int swsusp_save(void)
 	 * touch swap space! Except we must write out our image of course.
 	 */
 
+	nr_pages += nr_highmem;
 	nr_copy_pages = nr_pages;
-	nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
 	printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
+
 	return 0;
 }
 
@@ -960,7 +1082,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
 
 	if (!buffer) {
 		/* This makes the buffer be freed by swsusp_free() */
-		buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
+		buffer = get_image_page(GFP_ATOMIC, PG_ANY);
 		if (!buffer)
 			return -ENOMEM;
 	}
@@ -975,9 +1097,23 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
 			memset(buffer, 0, PAGE_SIZE);
 			pack_pfns(buffer, &orig_bm);
 		} else {
-			unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+			struct page *page;
 
-			handle->buffer = page_address(pfn_to_page(pfn));
+			page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
+			if (PageHighMem(page)) {
+				/* Highmem pages are copied to the buffer,
+				 * because we can't return with a kmapped
+				 * highmem page (we may not be called again).
+				 */
+				void *kaddr;
+
+				kaddr = kmap_atomic(page, KM_USER0);
+				memcpy(buffer, kaddr, PAGE_SIZE);
+				kunmap_atomic(kaddr, KM_USER0);
+				handle->buffer = buffer;
+			} else {
+				handle->buffer = page_address(page);
+			}
 		}
 		handle->prev = handle->cur;
 	}
@@ -1005,7 +1141,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
 	unsigned long pfn, max_zone_pfn;
 
 	/* Clear page flags */
-	for_each_zone (zone) {
+	for_each_zone(zone) {
 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 			if (pfn_valid(pfn))
@@ -1101,6 +1237,218 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 	}
 }
 
+/* List of "safe" pages that may be used to store data loaded from the suspend
+ * image
+ */
+static struct linked_page *safe_pages_list;
+
+#ifdef CONFIG_HIGHMEM
+/* struct highmem_pbe is used for creating the list of highmem pages that
+ * should be restored atomically during the resume from disk, because the page
+ * frames they have occupied before the suspend are in use.
+ */
+struct highmem_pbe {
+	struct page *copy_page;	/* data is here now */
+	struct page *orig_page;	/* data was here before the suspend */
+	struct highmem_pbe *next;
+};
+
+/* List of highmem PBEs needed for restoring the highmem pages that were
+ * allocated before the suspend and included in the suspend image, but have
+ * also been allocated by the "resume" kernel, so their contents cannot be
+ * written directly to their "original" page frames.
+ */
+static struct highmem_pbe *highmem_pblist;
+
+/**
+ *	count_highmem_image_pages - compute the number of highmem pages in the
+ *	suspend image.  The bits in the memory bitmap @bm that correspond to the
+ *	image pages are assumed to be set.
+ */
+
+static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
+{
+	unsigned long pfn;
+	unsigned int cnt = 0;
+
+	memory_bm_position_reset(bm);
+	pfn = memory_bm_next_pfn(bm);
+	while (pfn != BM_END_OF_MAP) {
+		if (PageHighMem(pfn_to_page(pfn)))
+			cnt++;
+
+		pfn = memory_bm_next_pfn(bm);
+	}
+	return cnt;
+}
+
+/**
+ *	prepare_highmem_image - try to allocate as many highmem pages as
+ *	there are highmem image pages (@nr_highmem_p points to the variable
+ *	containing the number of highmem image pages).  The pages that are
+ *	"safe" (ie. will not be overwritten when the suspend image is
+ *	restored) have the corresponding bits set in @bm (it must be
+ *	unitialized).
+ *
+ *	NOTE: This function should not be called if there are no highmem
+ *	image pages.
+ */
+
+static unsigned int safe_highmem_pages;
+
+static struct memory_bitmap *safe_highmem_bm;
+
+static int
+prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+{
+	unsigned int to_alloc;
+
+	if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
+		return -ENOMEM;
+
+	if (get_highmem_buffer(PG_SAFE))
+		return -ENOMEM;
+
+	to_alloc = count_free_highmem_pages();
+	if (to_alloc > *nr_highmem_p)
+		to_alloc = *nr_highmem_p;
+	else
+		*nr_highmem_p = to_alloc;
+
+	safe_highmem_pages = 0;
+	while (to_alloc-- > 0) {
+		struct page *page;
+
+		page = alloc_page(__GFP_HIGHMEM);
+		if (!PageNosaveFree(page)) {
+			/* The page is "safe", set its bit the bitmap */
+			memory_bm_set_bit(bm, page_to_pfn(page));
+			safe_highmem_pages++;
+		}
+		/* Mark the page as allocated */
+		SetPageNosave(page);
+		SetPageNosaveFree(page);
+	}
+	memory_bm_position_reset(bm);
+	safe_highmem_bm = bm;
+	return 0;
+}
+
+/**
+ *	get_highmem_page_buffer - for given highmem image page find the buffer
+ *	that suspend_write_next() should set for its caller to write to.
+ *
+ *	If the page is to be saved to its "original" page frame or a copy of
+ *	the page is to be made in the highmem, @buffer is returned.  Otherwise,
+ *	the copy of the page is to be made in normal memory, so the address of
+ *	the copy is returned.
+ *
+ *	If @buffer is returned, the caller of suspend_write_next() will write
+ *	the page's contents to @buffer, so they will have to be copied to the
+ *	right location on the next call to suspend_write_next() and it is done
+ *	with the help of copy_last_highmem_page().  For this purpose, if
+ *	@buffer is returned, @last_highmem page is set to the page to which
+ *	the data will have to be copied from @buffer.
+ */
+
+static struct page *last_highmem_page;
+
+static void *
+get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+{
+	struct highmem_pbe *pbe;
+	void *kaddr;
+
+	if (PageNosave(page) && PageNosaveFree(page)) {
+		/* We have allocated the "original" page frame and we can
+		 * use it directly to store the loaded page.
+		 */
+		last_highmem_page = page;
+		return buffer;
+	}
+	/* The "original" page frame has not been allocated and we have to
+	 * use a "safe" page frame to store the loaded page.
+	 */
+	pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
+	if (!pbe) {
+		swsusp_free();
+		return NULL;
+	}
+	pbe->orig_page = page;
+	if (safe_highmem_pages > 0) {
+		struct page *tmp;
+
+		/* Copy of the page will be stored in high memory */
+		kaddr = buffer;
+		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
+		safe_highmem_pages--;
+		last_highmem_page = tmp;
+		pbe->copy_page = tmp;
+	} else {
+		/* Copy of the page will be stored in normal memory */
+		kaddr = safe_pages_list;
+		safe_pages_list = safe_pages_list->next;
+		pbe->copy_page = virt_to_page(kaddr);
+	}
+	pbe->next = highmem_pblist;
+	highmem_pblist = pbe;
+	return kaddr;
+}
+
+/**
+ *	copy_last_highmem_page - copy the contents of a highmem image from
+ *	@buffer, where the caller of snapshot_write_next() has place them,
+ *	to the right location represented by @last_highmem_page .
+ */
+
+static void copy_last_highmem_page(void)
+{
+	if (last_highmem_page) {
+		void *dst;
+
+		dst = kmap_atomic(last_highmem_page, KM_USER0);
+		memcpy(dst, buffer, PAGE_SIZE);
+		kunmap_atomic(dst, KM_USER0);
+		last_highmem_page = NULL;
+	}
+}
+
+static inline int last_highmem_page_copied(void)
+{
+	return !last_highmem_page;
+}
+
+static inline void free_highmem_data(void)
+{
+	if (safe_highmem_bm)
+		memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
+
+	if (buffer)
+		free_image_page(buffer, PG_UNSAFE_CLEAR);
+}
+#else
+static inline int get_safe_write_buffer(void) { return 0; }
+
+static unsigned int
+count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
+
+static inline int
+prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+{
+	return 0;
+}
+
+static inline void *
+get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+{
+	return NULL;
+}
+
+static inline void copy_last_highmem_page(void) {}
+static inline int last_highmem_page_copied(void) { return 1; }
+static inline void free_highmem_data(void) {}
+#endif /* CONFIG_HIGHMEM */
+
 /**
  *	prepare_image - use the memory bitmap @bm to mark the pages that will
  *	be overwritten in the process of restoring the system memory state
@@ -1110,20 +1458,25 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
  *	The idea is to allocate a new memory bitmap first and then allocate
  *	as many pages as needed for the image data, but not to assign these
  *	pages to specific tasks initially.  Instead, we just mark them as
- *	allocated and create a list of "safe" pages that will be used later.
+ *	allocated and create a lists of "safe" pages that will be used
+ *	later.  On systems with high memory a list of "safe" highmem pages is
+ *	also created.
  */
 
 #define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
 
-static struct linked_page *safe_pages_list;
-
 static int
 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 {
-	unsigned int nr_pages;
+	unsigned int nr_pages, nr_highmem;
 	struct linked_page *sp_list, *lp;
 	int error;
 
+	/* If there is no highmem, the buffer will not be necessary */
+	free_image_page(buffer, PG_UNSAFE_CLEAR);
+	buffer = NULL;
+
+	nr_highmem = count_highmem_image_pages(bm);
 	error = mark_unsafe_pages(bm);
 	if (error)
 		goto Free;
@@ -1134,6 +1487,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
 	duplicate_memory_bitmap(new_bm, bm);
 	memory_bm_free(bm, PG_UNSAFE_KEEP);
+	if (nr_highmem > 0) {
+		error = prepare_highmem_image(bm, &nr_highmem);
+		if (error)
+			goto Free;
+	}
 	/* Reserve some safe pages for potential later use.
 	 *
 	 * NOTE: This way we make sure there will be enough safe pages for the
@@ -1142,10 +1500,10 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	 */
 	sp_list = NULL;
 	/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
-	nr_pages = nr_copy_pages - allocated_unsafe_pages;
+	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
-		lp = alloc_image_page(GFP_ATOMIC, PG_SAFE);
+		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
 		if (!lp) {
 			error = -ENOMEM;
 			goto Free;
@@ -1156,7 +1514,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	}
 	/* Preallocate memory for the image */
 	safe_pages_list = NULL;
-	nr_pages = nr_copy_pages - allocated_unsafe_pages;
+	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
 		if (!lp) {
@@ -1196,6 +1554,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 	struct pbe *pbe;
 	struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
 
+	if (PageHighMem(page))
+		return get_highmem_page_buffer(page, ca);
+
 	if (PageNosave(page) && PageNosaveFree(page))
 		/* We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
@@ -1210,12 +1571,12 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 		swsusp_free();
 		return NULL;
 	}
-	pbe->orig_address = (unsigned long)page_address(page);
-	pbe->address = (unsigned long)safe_pages_list;
+	pbe->orig_address = page_address(page);
+	pbe->address = safe_pages_list;
 	safe_pages_list = safe_pages_list->next;
 	pbe->next = restore_pblist;
 	restore_pblist = pbe;
-	return (void *)pbe->address;
+	return pbe->address;
 }
 
 /**
@@ -1249,14 +1610,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
 		return 0;
 
-	if (!buffer) {
-		/* This makes the buffer be freed by swsusp_free() */
-		buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
+	if (handle->offset == 0) {
+		if (!buffer)
+			/* This makes the buffer be freed by swsusp_free() */
+			buffer = get_image_page(GFP_ATOMIC, PG_ANY);
+
 		if (!buffer)
 			return -ENOMEM;
-	}
-	if (!handle->offset)
+
 		handle->buffer = buffer;
+	}
 	handle->sync_read = 1;
 	if (handle->prev < handle->cur) {
 		if (handle->prev == 0) {
@@ -1284,8 +1647,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 					return -ENOMEM;
 			}
 		} else {
+			copy_last_highmem_page();
 			handle->buffer = get_buffer(&orig_bm, &ca);
-			handle->sync_read = 0;
+			if (handle->buffer != buffer)
+				handle->sync_read = 0;
 		}
 		handle->prev = handle->cur;
 	}
@@ -1301,15 +1666,73 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 	return count;
 }
 
+/**
+ *	snapshot_write_finalize - must be called after the last call to
+ *	snapshot_write_next() in case the last page in the image happens
+ *	to be a highmem page and its contents should be stored in the
+ *	highmem.  Additionally, it releases the memory that will not be
+ *	used any more.
+ */
+
+void snapshot_write_finalize(struct snapshot_handle *handle)
+{
+	copy_last_highmem_page();
+	/* Free only if we have loaded the image entirely */
+	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
+		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+		free_highmem_data();
+	}
+}
+
 int snapshot_image_loaded(struct snapshot_handle *handle)
 {
-	return !(!nr_copy_pages ||
+	return !(!nr_copy_pages || !last_highmem_page_copied() ||
 			handle->cur <= nr_meta_pages + nr_copy_pages);
 }
 
-void snapshot_free_unused_memory(struct snapshot_handle *handle)
+#ifdef CONFIG_HIGHMEM
+/* Assumes that @buf is ready and points to a "safe" page */
+static inline void
+swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
 {
-	/* Free only if we have loaded the image entirely */
-	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
-		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+	void *kaddr1, *kaddr2;
+
+	kaddr1 = kmap_atomic(p1, KM_USER0);
+	kaddr2 = kmap_atomic(p2, KM_USER1);
+	memcpy(buf, kaddr1, PAGE_SIZE);
+	memcpy(kaddr1, kaddr2, PAGE_SIZE);
+	memcpy(kaddr2, buf, PAGE_SIZE);
+	kunmap_atomic(kaddr1, KM_USER0);
+	kunmap_atomic(kaddr2, KM_USER1);
+}
+
+/**
+ *	restore_highmem - for each highmem page that was allocated before
+ *	the suspend and included in the suspend image, and also has been
+ *	allocated by the "resume" kernel swap its current (ie. "before
+ *	resume") contents with the previous (ie. "before suspend") one.
+ *
+ *	If the resume eventually fails, we can call this function once
+ *	again and restore the "before resume" highmem state.
+ */
+
+int restore_highmem(void)
+{
+	struct highmem_pbe *pbe = highmem_pblist;
+	void *buf;
+
+	if (!pbe)
+		return 0;
+
+	buf = get_image_page(GFP_ATOMIC, PG_SAFE);
+	if (!buf)
+		return -ENOMEM;
+
+	while (pbe) {
+		swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
+		pbe = pbe->next;
+	}
+	free_image_page(buf, PG_UNSAFE_CLEAR);
+	return 0;
 }
+#endif /* CONFIG_HIGHMEM */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index aa5a9bff01f1..cbd187e90410 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -558,7 +558,7 @@ static int load_image(struct swap_map_handle *handle,
 		error = err2;
 	if (!error) {
 		printk("\b\b\b\bdone\n");
-		snapshot_free_unused_memory(snapshot);
+		snapshot_write_finalize(snapshot);
 		if (!snapshot_image_loaded(snapshot))
 			error = -ENODATA;
 	}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 4147a756a8c7..68de5c1dbd78 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -64,10 +64,8 @@ int in_suspend __nosavedata = 0;
 
 #ifdef CONFIG_HIGHMEM
 unsigned int count_highmem_pages(void);
-int save_highmem(void);
 int restore_highmem(void);
 #else
-static inline int save_highmem(void) { return 0; }
 static inline int restore_highmem(void) { return 0; }
 static inline unsigned int count_highmem_pages(void) { return 0; }
 #endif
@@ -184,7 +182,7 @@ static inline unsigned long __shrink_memory(long tmp)
 
 int swsusp_shrink_memory(void)
 {
-	long size, tmp;
+	long tmp;
 	struct zone *zone;
 	unsigned long pages = 0;
 	unsigned int i = 0;
@@ -192,15 +190,27 @@ int swsusp_shrink_memory(void)
 
 	printk("Shrinking memory...  ");
 	do {
-		size = 2 * count_highmem_pages();
-		size += size / 50 + count_data_pages() + PAGES_FOR_IO;
+		long size, highmem_size;
+
+		highmem_size = count_highmem_pages();
+		size = count_data_pages() + PAGES_FOR_IO;
 		tmp = size;
+		size += highmem_size;
 		for_each_zone (zone)
-			if (!is_highmem(zone) && populated_zone(zone)) {
-				tmp -= zone->free_pages;
-				tmp += zone->lowmem_reserve[ZONE_NORMAL];
-				tmp += snapshot_additional_pages(zone);
+			if (populated_zone(zone)) {
+				if (is_highmem(zone)) {
+					highmem_size -= zone->free_pages;
+				} else {
+					tmp -= zone->free_pages;
+					tmp += zone->lowmem_reserve[ZONE_NORMAL];
+					tmp += snapshot_additional_pages(zone);
+				}
 			}
+
+		if (highmem_size < 0)
+			highmem_size = 0;
+
+		tmp += highmem_size;
 		if (tmp > 0) {
 			tmp = __shrink_memory(tmp);
 			if (!tmp)
@@ -223,6 +233,7 @@ int swsusp_suspend(void)
 
 	if ((error = arch_prepare_suspend()))
 		return error;
+
 	local_irq_disable();
 	/* At this point, device_suspend() has been called, but *not*
 	 * device_power_down(). We *must* device_power_down() now.
@@ -235,18 +246,11 @@ int swsusp_suspend(void)
 		goto Enable_irqs;
 	}
 
-	if ((error = save_highmem())) {
-		printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
-		goto Restore_highmem;
-	}
-
 	save_processor_state();
 	if ((error = swsusp_arch_suspend()))
 		printk(KERN_ERR "Error %d suspending\n", error);
 	/* Restore control flow magically appears here */
 	restore_processor_state();
-Restore_highmem:
-	restore_highmem();
 	/* NOTE:  device_power_up() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
@@ -268,18 +272,23 @@ int swsusp_resume(void)
 		printk(KERN_ERR "Some devices failed to power down, very bad\n");
 	/* We'll ignore saved state, but this gets preempt count (etc) right */
 	save_processor_state();
-	error = swsusp_arch_resume();
-	/* Code below is only ever reached in case of failure. Otherwise
-	 * execution continues at place where swsusp_arch_suspend was called
-         */
-	BUG_ON(!error);
+	error = restore_highmem();
+	if (!error) {
+		error = swsusp_arch_resume();
+		/* The code below is only ever reached in case of a failure.
+		 * Otherwise execution continues at place where
+		 * swsusp_arch_suspend() was called
+        	 */
+		BUG_ON(!error);
+		/* This call to restore_highmem() undos the previous one */
+		restore_highmem();
+	}
 	/* The only reason why swsusp_arch_resume() can fail is memory being
 	 * very tight, so we have to free it as soon as we can to avoid
 	 * subsequent failures
 	 */
 	swsusp_free();
 	restore_processor_state();
-	restore_highmem();
 	touch_softlockup_watchdog();
 	device_power_up();
 	local_irq_enable();
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 05c58a2c0dd4..a63b25c63b49 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -194,12 +194,12 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		break;
 
 	case SNAPSHOT_ATOMIC_RESTORE:
+		snapshot_write_finalize(&data->handle);
 		if (data->mode != O_WRONLY || !data->frozen ||
 		    !snapshot_image_loaded(&data->handle)) {
 			error = -EPERM;
 			break;
 		}
-		snapshot_free_unused_memory(&data->handle);
 		down(&pm_sem);
 		pm_prepare_console();
 		suspend_console();
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2e97baa3b2aa..2a6a79f68138 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1260,6 +1260,9 @@ out:
 	}
 	if (!all_zones_ok) {
 		cond_resched();
+
+		try_to_freeze();
+
 		goto loop_again;
 	}
 
-- 
cgit v1.2.3


From 859491218770315ba95ee3fa09961fc71c506cae Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:19 -0800
Subject: [PATCH] swsusp: use __GFP_WAIT

swsusp uses GFP_ATOMIC, but it can afford to use __GFP_WAIT, which will
permit it to reclaim clean pagecache instead of emitting scary
page-allocation-failure messages.

Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/swap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index cbd187e90410..52e70ca832a8 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -63,7 +63,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
 {
 	struct bio *bio;
 
-	bio = bio_alloc(GFP_ATOMIC, 1);
+	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
 	if (!bio)
 		return -ENOMEM;
 	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
@@ -216,7 +216,7 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
 		return -ENOSPC;
 
 	if (bio_chain) {
-		src = (void *)__get_free_page(GFP_ATOMIC);
+		src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
 		if (src) {
 			memcpy(src, buf, PAGE_SIZE);
 		} else {
@@ -473,7 +473,7 @@ static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
 	if (!start)
 		return -EINVAL;
 
-	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
+	handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
 	if (!handle->cur)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 8a05aac2631aa0e6494d9dc990f8c68ed8b8fde7 Mon Sep 17 00:00:00 2001
From: Stefan Seyfried <seife@suse.de>
Date: Wed, 6 Dec 2006 20:34:21 -0800
Subject: [PATCH] swsusp: fix platform mode

At some point after 2.6.13, in-kernel software suspend got "incomplete" for
the so-called "platform" mode.  pm_ops->prepare() is never called.  A
visible sign of this is the "moon" light on thinkpads not flashing during
suspend.  Fix by readding the pm_ops->prepare call during suspend.

Signed-off-by: Stefan Seyfried <seife@suse.de>
Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/disk.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'kernel')

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d79feeb45459..f5079231383e 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -29,6 +29,22 @@ char resume_file[256] = CONFIG_PM_STD_PARTITION;
 dev_t swsusp_resume_device;
 sector_t swsusp_resume_block;
 
+/**
+ *	platform_prepare - prepare the machine for hibernation using the
+ *	platform driver if so configured and return an error code if it fails
+ */
+
+static inline int platform_prepare(void)
+{
+	int error = 0;
+
+	if (pm_disk_mode == PM_DISK_PLATFORM) {
+		if (pm_ops && pm_ops->prepare)
+			error = pm_ops->prepare(PM_SUSPEND_DISK);
+	}
+	return error;
+}
+
 /**
  *	power_down - Shut machine down for hibernate.
  *	@mode:		Suspend-to-disk mode
@@ -91,9 +107,15 @@ static int prepare_processes(void)
 		goto thaw;
 	}
 
+	error = platform_prepare();
+	if (error)
+		goto thaw;
+
 	/* Free memory before shutting down devices. */
 	if (!(error = swsusp_shrink_memory()))
 		return 0;
+
+	platform_finish();
 thaw:
 	thaw_processes();
 enable_cpus:
-- 
cgit v1.2.3


From 7dfb71030f7636a0d65200158113c37764552f93 Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <ncunningham@linuxmail.org>
Date: Wed, 6 Dec 2006 20:34:23 -0800
Subject: [PATCH] Add include/linux/freezer.h and move definitions from sched.h

Move process freezing functions from include/linux/sched.h to freezer.h, so
that modifications to the freezer or the kernel configuration don't require
recompiling just about everything.

[akpm@osdl.org: fix ueagle driver]
Signed-off-by: Nigel Cunningham <nigel@suspend2.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/kernel/signal.c                  |  1 +
 arch/avr32/kernel/signal.c                |  2 +-
 arch/frv/kernel/signal.c                  |  2 +-
 arch/h8300/kernel/signal.c                |  2 +-
 arch/i386/kernel/io_apic.c                |  1 +
 arch/m32r/kernel/signal.c                 |  2 +-
 arch/powerpc/kernel/signal_32.c           |  2 +-
 arch/sh/kernel/signal.c                   |  1 +
 arch/sh64/kernel/signal.c                 |  2 +-
 drivers/block/pktcdvd.c                   |  2 +-
 drivers/char/hvc_console.c                |  1 +
 drivers/edac/edac_mc.c                    |  1 +
 drivers/ieee1394/nodemgr.c                |  1 +
 drivers/input/gameport/gameport.c         |  1 +
 drivers/input/serio/serio.c               |  1 +
 drivers/macintosh/therm_adt746x.c         |  1 +
 drivers/macintosh/via-pmu.c               |  2 +-
 drivers/macintosh/windfarm_core.c         |  1 +
 drivers/md/md.c                           |  2 +-
 drivers/media/dvb/dvb-core/dvb_frontend.c |  2 +-
 drivers/media/video/msp3400-driver.c      |  2 +-
 drivers/media/video/tvaudio.c             |  1 +
 drivers/media/video/video-buf-dvb.c       |  2 +-
 drivers/media/video/vivi.c                |  1 +
 drivers/mfd/ucb1x00-ts.c                  |  2 +-
 drivers/net/irda/stir4200.c               |  1 +
 drivers/net/wireless/airo.c               |  1 +
 drivers/pcmcia/cs.c                       |  1 +
 drivers/pnp/pnpbios/core.c                |  1 +
 drivers/usb/atm/ueagle-atm.c              |  2 +
 drivers/usb/core/hub.c                    |  1 +
 drivers/usb/gadget/file_storage.c         |  2 +-
 drivers/usb/storage/usb.c                 |  2 +-
 drivers/w1/w1.c                           |  1 +
 fs/afs/kafsasyncd.c                       |  1 +
 fs/afs/kafstimod.c                        |  1 +
 fs/cifs/cifsfs.c                          |  1 +
 fs/cifs/connect.c                         |  1 +
 fs/jbd/journal.c                          |  2 +-
 fs/jbd2/journal.c                         |  2 +-
 fs/jffs/intrep.c                          |  1 +
 fs/jffs2/background.c                     |  1 +
 fs/jfs/jfs_logmgr.c                       |  2 +-
 fs/jfs/jfs_txnmgr.c                       |  2 +-
 fs/lockd/clntproc.c                       |  1 +
 fs/xfs/linux-2.6/xfs_buf.c                |  1 +
 fs/xfs/linux-2.6/xfs_super.c              |  1 +
 include/linux/freezer.h                   | 84 +++++++++++++++++++++++++++++++
 include/linux/sched.h                     | 81 -----------------------------
 init/do_mounts_initrd.c                   |  1 +
 kernel/audit.c                            |  1 +
 kernel/power/disk.c                       |  1 +
 kernel/power/main.c                       |  1 +
 kernel/power/process.c                    |  1 +
 kernel/power/user.c                       |  1 +
 kernel/rtmutex-tester.c                   |  1 +
 kernel/sched.c                            |  2 +-
 kernel/signal.c                           |  1 +
 mm/pdflush.c                              |  1 +
 mm/vmscan.c                               |  1 +
 net/rxrpc/krxiod.c                        |  1 +
 net/rxrpc/krxsecd.c                       |  1 +
 net/rxrpc/krxtimod.c                      |  1 +
 net/sunrpc/svcsock.c                      |  1 +
 64 files changed, 147 insertions(+), 101 deletions(-)
 create mode 100644 include/linux/freezer.h

(limited to 'kernel')

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 48cf7fffddf2..f38a60a03b8c 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -11,6 +11,7 @@
 #include <linux/signal.h>
 #include <linux/ptrace.h>
 #include <linux/personality.h>
+#include <linux/freezer.h>
 
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 33096651c24f..0ec14854a200 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -15,7 +15,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 
 #include <asm/uaccess.h>
 #include <asm/ucontext.h>
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index b8a5882b8625..85baeae9666a 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -21,7 +21,7 @@
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
 #include <linux/personality.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index 7787f70a05bb..02955604d760 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -38,7 +38,7 @@
 #include <linux/personality.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 
 #include <asm/setup.h>
 #include <asm/uaccess.h>
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 3b7a63e0ed1a..44c5a3206b2a 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -34,6 +34,7 @@
 #include <linux/pci.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
+#include <linux/freezer.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index b60cea4aebaa..092ea86bb079 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -21,7 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 320353f0926f..e4ebe1a6228e 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -36,7 +36,7 @@
 #include <linux/stddef.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #endif
 
 #include <asm/uaccess.h>
diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c
index 50d7c4993bef..bb1c480a59c7 100644
--- a/arch/sh/kernel/signal.c
+++ b/arch/sh/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <linux/elf.h>
 #include <linux/personality.h>
 #include <linux/binfmts.h>
+#include <linux/freezer.h>
 
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
diff --git a/arch/sh64/kernel/signal.c b/arch/sh64/kernel/signal.c
index 9e2ffc45c0e0..1666d3efb52e 100644
--- a/arch/sh64/kernel/signal.c
+++ b/arch/sh64/kernel/signal.c
@@ -22,7 +22,7 @@
 #include <linux/errno.h>
 #include <linux/wait.h>
 #include <linux/personality.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
 #include <linux/stddef.h>
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index f2904f67af47..e45eaa264119 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -54,7 +54,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/miscdevice.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/mutex.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_ioctl.h>
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 9902ffad3b12..cc2cd46bedc6 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -38,6 +38,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/delay.h>
+#include <linux/freezer.h>
 
 #include <asm/uaccess.h>
 
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 75e9e38330ff..1b4fc9221803 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -28,6 +28,7 @@
 #include <linux/sysdev.h>
 #include <linux/ctype.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/page.h>
 #include <asm/edac.h>
diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c
index 8e7b83f84485..e829c9336b3c 100644
--- a/drivers/ieee1394/nodemgr.c
+++ b/drivers/ieee1394/nodemgr.c
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/moduleparam.h>
+#include <linux/freezer.h>
 #include <asm/atomic.h>
 
 #include "csr.h"
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index a0af97efe6ac..79dfb4b25c97 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -23,6 +23,7 @@
 #include <linux/kthread.h>
 #include <linux/sched.h>	/* HZ */
 #include <linux/mutex.h>
+#include <linux/freezer.h>
 
 /*#include <asm/io.h>*/
 
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 211943f85cb6..5f1d4032fd57 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
+#include <linux/freezer.h>
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION("Serio abstraction core");
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 13b953ae8ebc..3d3bf1643e73 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -24,6 +24,7 @@
 #include <linux/suspend.h>
 #include <linux/kthread.h>
 #include <linux/moduleparam.h>
+#include <linux/freezer.h>
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index e63ea1c1f3c1..c8558d4ed506 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -42,7 +42,7 @@
 #include <linux/interrupt.h>
 #include <linux/device.h>
 #include <linux/sysdev.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/syscalls.h>
 #include <linux/cpu.h>
 #include <asm/prom.h>
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c
index ab3faa702d58..e947af982f93 100644
--- a/drivers/macintosh/windfarm_core.c
+++ b/drivers/macintosh/windfarm_core.c
@@ -34,6 +34,7 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
+#include <linux/freezer.h>
 
 #include <asm/prom.h>
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8cbf9c9df1c3..6c4345bde07e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -39,10 +39,10 @@
 #include <linux/raid/bitmap.h>
 #include <linux/sysctl.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
-#include <linux/suspend.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
 #include <linux/ctype.h>
+#include <linux/freezer.h>
 
 #include <linux/init.h>
 
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index a2ab2eebfc68..e85972222ab4 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -34,7 +34,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/list.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/jiffies.h>
 #include <asm/processor.h>
 
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index cf43df3fe708..e1b56dc13c3f 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -56,7 +56,7 @@
 #include <media/tvaudio.h>
 #include <media/msp3400.h>
 #include <linux/kthread.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include "msp3400-driver.h"
 
 /* ---------------------------------------------------------------------- */
diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
index fcaef4bf8289..d506dfaa45a9 100644
--- a/drivers/media/video/tvaudio.c
+++ b/drivers/media/video/tvaudio.c
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/smp_lock.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <media/tvaudio.h>
 #include <media/v4l2-common.h>
diff --git a/drivers/media/video/video-buf-dvb.c b/drivers/media/video/video-buf-dvb.c
index f53edf1923b7..fcc5467e7636 100644
--- a/drivers/media/video/video-buf-dvb.c
+++ b/drivers/media/video/video-buf-dvb.c
@@ -20,7 +20,7 @@
 #include <linux/fs.h>
 #include <linux/kthread.h>
 #include <linux/file.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 
 #include <media/video-buf.h>
 #include <media/video-buf-dvb.h>
diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c
index 3c8dc72dc8e9..9986de5cb3d6 100644
--- a/drivers/media/video/vivi.c
+++ b/drivers/media/video/vivi.c
@@ -36,6 +36,7 @@
 #include <media/v4l2-common.h>
 #include <linux/kthread.h>
 #include <linux/highmem.h>
+#include <linux/freezer.h>
 
 /* Wake up at about 30 fps */
 #define WAKE_NUMERATOR 30
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 82938ad6ddbd..ce1a48108210 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -28,7 +28,7 @@
 #include <linux/string.h>
 #include <linux/input.h>
 #include <linux/device.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
 
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 3b4c47875935..c14a74634fd5 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -50,6 +50,7 @@
 #include <linux/usb.h>
 #include <linux/crc32.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 #include <net/irda/irda.h>
 #include <net/irda/irlap.h>
 #include <net/irda/irda_device.h>
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index efcdaf1c5f73..44a22701da97 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -49,6 +49,7 @@
 #include <asm/uaccess.h>
 #include <net/ieee80211.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include "airo.h"
 
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index f9cd831a3f31..606a46740338 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -29,6 +29,7 @@
 #include <linux/pci.h>
 #include <linux/device.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 #include <asm/system.h>
 #include <asm/irq.h>
 
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 81a6c83d89a6..81186f479a3f 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -61,6 +61,7 @@
 #include <linux/dmi.h>
 #include <linux/delay.h>
 #include <linux/acpi.h>
+#include <linux/freezer.h>
 
 #include <asm/page.h>
 #include <asm/desc.h>
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index f2d196fa1e8b..dae4ef1e8fe5 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -64,6 +64,8 @@
 #include <linux/kthread.h>
 #include <linux/version.h>
 #include <linux/mutex.h>
+#include <linux/freezer.h>
+
 #include <asm/unaligned.h>
 
 #include "usbatm.h"
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 77c05be5241a..2651c2e2a89f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -22,6 +22,7 @@
 #include <linux/usbdevice_fs.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
+#include <linux/freezer.h>
 
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 8b975d15538d..c98316ce8384 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -250,7 +250,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/utsname.h>
 
 #include <linux/usb_ch9.h>
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index b401084b3d22..70644506651f 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -49,7 +49,7 @@
 
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index de3e9791f80d..63c07243993c 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <asm/atomic.h>
 
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index f09a794f248e..615df2407cb2 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/freezer.h>
 #include "cell.h"
 #include "server.h"
 #include "volume.h"
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 65bc05ab8182..694344e4d3c7 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/freezer.h>
 #include "cell.h"
 #include "volume.h"
 #include "kafstimod.h"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e6b5866e5001..71bc87a37fc1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -34,6 +34,7 @@
 #include <linux/mempool.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #define DECLARE_GLOBALS_HERE
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71f77914ce93..2caca06b4bae 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/completion.h>
 #include <linux/pagevec.h>
+#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include "cifspdu.h"
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a8774bed20b6..10fff9443938 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -31,7 +31,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/poison.h>
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 50356019ae30..44fc32bfd7f1 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -31,7 +31,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/poison.h>
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 4a543e114970..478a74e2e9d5 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -66,6 +66,7 @@
 #include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/ctype.h>
+#include <linux/freezer.h>
 
 #include "intrep.h"
 #include "jffs_fm.h"
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index ff2a872e80e7..6eb3daebd563 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -16,6 +16,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/completion.h>
 #include <linux/sched.h>
+#include <linux/freezer.h>
 #include "nodelist.h"
 
 
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index b89c9aba0466..5065baa530b6 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -67,7 +67,7 @@
 #include <linux/kthread.h>
 #include <linux/buffer_head.h>		/* for sync_blockdev() */
 #include <linux/bio.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include "jfs_incore.h"
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 81f6f04af192..d558e51b0df8 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -46,7 +46,7 @@
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
 #include <linux/completion.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3d84f600b633..50643b6a5556 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -13,6 +13,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/utsname.h>
 #include <linux/smp_lock.h>
+#include <linux/freezer.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index eef4a0ba11e9..b971237c5a9f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -32,6 +32,7 @@
 #include <linux/kthread.h>
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
+#include <linux/freezer.h>
 
 STATIC kmem_zone_t *xfs_buf_zone;
 STATIC kmem_shaker_t xfs_buf_shake;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index de05abbbe7fd..b93265b7c79c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -56,6 +56,7 @@
 #include <linux/mempool.h>
 #include <linux/writeback.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 
 STATIC struct quotactl_ops xfs_quotactl_operations;
 STATIC struct super_operations xfs_super_operations;
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
new file mode 100644
index 000000000000..266373f74445
--- /dev/null
+++ b/include/linux/freezer.h
@@ -0,0 +1,84 @@
+/* Freezer declarations */
+
+#ifdef CONFIG_PM
+/*
+ * Check if a process has been frozen
+ */
+static inline int frozen(struct task_struct *p)
+{
+	return p->flags & PF_FROZEN;
+}
+
+/*
+ * Check if there is a request to freeze a process
+ */
+static inline int freezing(struct task_struct *p)
+{
+	return p->flags & PF_FREEZE;
+}
+
+/*
+ * Request that a process be frozen
+ * FIXME: SMP problem. We may not modify other process' flags!
+ */
+static inline void freeze(struct task_struct *p)
+{
+	p->flags |= PF_FREEZE;
+}
+
+/*
+ * Sometimes we may need to cancel the previous 'freeze' request
+ */
+static inline void do_not_freeze(struct task_struct *p)
+{
+	p->flags &= ~PF_FREEZE;
+}
+
+/*
+ * Wake up a frozen process
+ */
+static inline int thaw_process(struct task_struct *p)
+{
+	if (frozen(p)) {
+		p->flags &= ~PF_FROZEN;
+		wake_up_process(p);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * freezing is complete, mark process as frozen
+ */
+static inline void frozen_process(struct task_struct *p)
+{
+	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+}
+
+extern void refrigerator(void);
+extern int freeze_processes(void);
+extern void thaw_processes(void);
+
+static inline int try_to_freeze(void)
+{
+	if (freezing(current)) {
+		refrigerator();
+		return 1;
+	} else
+		return 0;
+}
+#else
+static inline int frozen(struct task_struct *p) { return 0; }
+static inline int freezing(struct task_struct *p) { return 0; }
+static inline void freeze(struct task_struct *p) { BUG(); }
+static inline int thaw_process(struct task_struct *p) { return 1; }
+static inline void frozen_process(struct task_struct *p) { BUG(); }
+
+static inline void refrigerator(void) {}
+static inline int freeze_processes(void) { BUG(); return 0; }
+static inline void thaw_processes(void) {}
+
+static inline int try_to_freeze(void) { return 0; }
+
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index acfd2e15c5f2..837a012f573c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1618,87 +1618,6 @@ extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
 
 extern void normalize_rt_tasks(void);
 
-#ifdef CONFIG_PM
-/*
- * Check if a process has been frozen
- */
-static inline int frozen(struct task_struct *p)
-{
-	return p->flags & PF_FROZEN;
-}
-
-/*
- * Check if there is a request to freeze a process
- */
-static inline int freezing(struct task_struct *p)
-{
-	return p->flags & PF_FREEZE;
-}
-
-/*
- * Request that a process be frozen
- * FIXME: SMP problem. We may not modify other process' flags!
- */
-static inline void freeze(struct task_struct *p)
-{
-	p->flags |= PF_FREEZE;
-}
-
-/*
- * Sometimes we may need to cancel the previous 'freeze' request
- */
-static inline void do_not_freeze(struct task_struct *p)
-{
-	p->flags &= ~PF_FREEZE;
-}
-
-/*
- * Wake up a frozen process
- */
-static inline int thaw_process(struct task_struct *p)
-{
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
-		wake_up_process(p);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * freezing is complete, mark process as frozen
- */
-static inline void frozen_process(struct task_struct *p)
-{
-	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
-}
-
-extern void refrigerator(void);
-extern int freeze_processes(void);
-extern void thaw_processes(void);
-
-static inline int try_to_freeze(void)
-{
-	if (freezing(current)) {
-		refrigerator();
-		return 1;
-	} else
-		return 0;
-}
-#else
-static inline int frozen(struct task_struct *p) { return 0; }
-static inline int freezing(struct task_struct *p) { return 0; }
-static inline void freeze(struct task_struct *p) { BUG(); }
-static inline int thaw_process(struct task_struct *p) { return 1; }
-static inline void frozen_process(struct task_struct *p) { BUG(); }
-
-static inline void refrigerator(void) {}
-static inline int freeze_processes(void) { BUG(); return 0; }
-static inline void thaw_processes(void) {}
-
-static inline int try_to_freeze(void) { return 0; }
-
-#endif /* CONFIG_PM */
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 919a80cb322e..2cfd7cb36e79 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -6,6 +6,7 @@
 #include <linux/romfs_fs.h>
 #include <linux/initrd.h>
 #include <linux/sched.h>
+#include <linux/freezer.h>
 
 #include "do_mounts.h"
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 98106f6078b0..d9b690ac684b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -57,6 +57,7 @@
 #include <linux/netlink.h>
 #include <linux/selinux.h>
 #include <linux/inotify.h>
+#include <linux/freezer.h>
 
 #include "audit.h"
 
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f5079231383e..53b3b57c0223 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -20,6 +20,7 @@
 #include <linux/pm.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
+#include <linux/freezer.h>
 
 #include "power.h"
 
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 873228c71dab..6096c71b182b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -18,6 +18,7 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/resume-trace.h>
+#include <linux/freezer.h>
 
 #include "power.h"
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 72e72d2c61e6..29be608e8349 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -13,6 +13,7 @@
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/freezer.h>
 
 /* 
  * Timeout for stopping processes
diff --git a/kernel/power/user.c b/kernel/power/user.c
index a63b25c63b49..26c66941c001 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -22,6 +22,7 @@
 #include <linux/fs.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
+#include <linux/freezer.h>
 
 #include <asm/uaccess.h>
 
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 6dcea9dd8c94..015fc633c96c 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/sysdev.h>
 #include <linux/timer.h>
+#include <linux/freezer.h>
 
 #include "rtmutex.h"
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 3399701c680e..12fdbef1d9bf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -34,7 +34,7 @@
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 8e19d2785486..bc972d7e6319 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/capability.h>
+#include <linux/freezer.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
diff --git a/mm/pdflush.c b/mm/pdflush.c
index b02102feeb4b..8ce0900dc95c 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -21,6 +21,7 @@
 #include <linux/writeback.h>	// Prototypes pdflush_operation()
 #include <linux/kthread.h>
 #include <linux/cpuset.h>
+#include <linux/freezer.h>
 
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2a6a79f68138..f6616e81fac7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -36,6 +36,7 @@
 #include <linux/rwsem.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
index dada34a77b21..49effd92144e 100644
--- a/net/rxrpc/krxiod.c
+++ b/net/rxrpc/krxiod.c
@@ -13,6 +13,7 @@
 #include <linux/completion.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
+#include <linux/freezer.h>
 #include <rxrpc/krxiod.h>
 #include <rxrpc/transport.h>
 #include <rxrpc/peer.h>
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
index cea4eb5e2497..3ab0f77409f4 100644
--- a/net/rxrpc/krxsecd.c
+++ b/net/rxrpc/krxsecd.c
@@ -27,6 +27,7 @@
 #include <rxrpc/call.h>
 #include <linux/udp.h>
 #include <linux/ip.h>
+#include <linux/freezer.h>
 #include <net/sock.h>
 #include "internal.h"
 
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
index 3e7466900bd4..9a9b6132dba4 100644
--- a/net/rxrpc/krxtimod.c
+++ b/net/rxrpc/krxtimod.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/freezer.h>
 #include <rxrpc/rxrpc.h>
 #include <rxrpc/krxtimod.h>
 #include <asm/errno.h>
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 64ca1f61dd94..1c68956824e3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -32,6 +32,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/file.h>
+#include <linux/freezer.h>
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/ip.h>
-- 
cgit v1.2.3


From 32d50f57dab94d8c46566a903bbb633ee72fdcc2 Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <ncunningham@linuxmail.org>
Date: Wed, 6 Dec 2006 20:34:25 -0800
Subject: [PATCH] swsusp: quieten Freezer if !CONFIG_PM_DEBUG

The freezer currently prints an '=' for every process that is frozen.  This
is pretty pointless, as the equals sign says nothing about which process is
frozen, and makes logs look messier (especially if there were a large
number of processes running).  All we really need to know is that we
started trying to freeze processes and what processes (if any) failed to
freeze, or that we succeeded.

Signed-off-by: Nigel Cunningham <nigel@suspend2.net>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/process.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 29be608e8349..b0edfc6f2798 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -40,7 +40,6 @@ void refrigerator(void)
 	long save;
 	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
-	printk("=");
 
 	frozen_process(current);
 	spin_lock_irq(&current->sighand->siglock);
-- 
cgit v1.2.3


From 14b5b7cfaa110b1d25b8f80b01a8c97cf2db30bc Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <ncunningham@linuxmail.org>
Date: Wed, 6 Dec 2006 20:34:26 -0800
Subject: [PATCH] swsusp: clean up whitespace in freezer output

Minor whitespace and formatting modifications for the freezer.

Signed-off-by: Nigel Cunningham <nigel@suspend2.net>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/process.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/process.c b/kernel/power/process.c
index b0edfc6f2798..fedabad5a180 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -86,7 +86,7 @@ int freeze_processes(void)
 	unsigned long start_time;
 	struct task_struct *g, *p;
 
-	printk( "Stopping tasks: " );
+	printk("Stopping tasks... ");
 	start_time = jiffies;
 	user_frozen = 0;
 	do {
@@ -134,21 +134,21 @@ int freeze_processes(void)
 	 * but it cleans up leftover PF_FREEZE requests.
 	 */
 	if (todo) {
-		printk( "\n" );
-		printk(KERN_ERR " stopping tasks timed out "
+		printk("\n");
+		printk(KERN_ERR "Stopping tasks timed out "
 			"after %d seconds (%d tasks remaining):\n",
 			TIMEOUT / HZ, todo);
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (freezeable(p) && !frozen(p))
-				printk(KERN_ERR "  %s\n", p->comm);
+				printk(KERN_ERR " %s\n", p->comm);
 			cancel_freezing(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 		return todo;
 	}
 
-	printk( "|\n" );
+	printk("done.\n");
 	BUG_ON(in_atomic());
 	return 0;
 }
@@ -157,18 +157,18 @@ void thaw_processes(void)
 {
 	struct task_struct *g, *p;
 
-	printk( "Restarting tasks..." );
+	printk("Restarting tasks... ");
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
 		if (!freezeable(p))
 			continue;
 		if (!thaw_process(p))
-			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+			printk(KERN_INFO "Strange, %s not stopped\n", p->comm);
 	} while_each_thread(g, p);
 
 	read_unlock(&tasklist_lock);
 	schedule();
-	printk( " done\n" );
+	printk("done.\n");
 }
 
 EXPORT_SYMBOL(refrigerator);
-- 
cgit v1.2.3


From ff39593ad0ff7a79a3717edac6634407aa8200c2 Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <ncunningham@linuxmail.org>
Date: Wed, 6 Dec 2006 20:34:28 -0800
Subject: [PATCH] swsusp: thaw userspace and kernel space separately

Modify process thawing so that we can thaw kernel space without thawing
userspace, and thaw kernelspace first.  This will be useful in later
patches, where I intend to get swsusp thawing kernel threads only before
seeking to free memory.

Signed-off-by: Nigel Cunningham <nigel@suspend2.net>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/freezer.h |  9 ++++++++-
 kernel/power/process.c  | 25 ++++++++++++++++++-------
 2 files changed, 26 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 266373f74445..294ebea859c9 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -1,5 +1,8 @@
 /* Freezer declarations */
 
+#define FREEZER_KERNEL_THREADS 0
+#define FREEZER_ALL_THREADS 1
+
 #ifdef CONFIG_PM
 /*
  * Check if a process has been frozen
@@ -57,7 +60,8 @@ static inline void frozen_process(struct task_struct *p)
 
 extern void refrigerator(void);
 extern int freeze_processes(void);
-extern void thaw_processes(void);
+#define thaw_processes() do { thaw_some_processes(FREEZER_ALL_THREADS); } while(0)
+#define thaw_kernel_threads() do { thaw_some_processes(FREEZER_KERNEL_THREADS); } while(0)
 
 static inline int try_to_freeze(void)
 {
@@ -67,6 +71,9 @@ static inline int try_to_freeze(void)
 	} else
 		return 0;
 }
+
+extern void thaw_some_processes(int all);
+
 #else
 static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
diff --git a/kernel/power/process.c b/kernel/power/process.c
index fedabad5a180..cba8a5890eda 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -153,18 +153,29 @@ int freeze_processes(void)
 	return 0;
 }
 
-void thaw_processes(void)
+void thaw_some_processes(int all)
 {
 	struct task_struct *g, *p;
+	int pass = 0; /* Pass 0 = Kernel space, 1 = Userspace */
 
 	printk("Restarting tasks... ");
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
-		if (!freezeable(p))
-			continue;
-		if (!thaw_process(p))
-			printk(KERN_INFO "Strange, %s not stopped\n", p->comm);
-	} while_each_thread(g, p);
+	do {
+		do_each_thread(g, p) {
+			/*
+			 * is_user = 0 if kernel thread or borrowed mm,
+			 * 1 otherwise.
+			 */
+			int is_user = !!(p->mm && !(p->flags & PF_BORROWED_MM));
+			if (!freezeable(p) || (is_user != pass))
+				continue;
+			if (!thaw_process(p))
+				printk(KERN_INFO
+					"Strange, %s not stopped\n", p->comm);
+		} while_each_thread(g, p);
+
+		pass++;
+	} while (pass < 2 && all);
 
 	read_unlock(&tasklist_lock);
 	schedule();
-- 
cgit v1.2.3


From 2d4a34c9365c6e3f94a5b26ce296e1fce9b66c8b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:29 -0800
Subject: [PATCH] swsusp: Support i386 systems with PAE or without PSE

Make swsusp support i386 systems with PAE or without PSE.

This is done by creating temporary page tables located in resume-safe page
frames before the suspend image is restored in the same way as x86_64 does
it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andi Kleen <ak@suse.de>
Cc: Dave Jones <davej@redhat.com>
Cc: Nigel Cunningham <ncunningham@linuxmail.org>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/power/Makefile   |   2 +-
 arch/i386/power/suspend.c  | 158 +++++++++++++++++++++++++++++++++++++++++++++
 arch/i386/power/swsusp.S   |   9 ++-
 include/asm-i386/suspend.h |  13 +---
 kernel/power/Kconfig       |   2 +-
 5 files changed, 168 insertions(+), 16 deletions(-)
 create mode 100644 arch/i386/power/suspend.c

(limited to 'kernel')

diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile
index 8cfa4e8a719d..2de7bbf03cd7 100644
--- a/arch/i386/power/Makefile
+++ b/arch/i386/power/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_PM)		+= cpu.o
-obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o
+obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o suspend.o
diff --git a/arch/i386/power/suspend.c b/arch/i386/power/suspend.c
new file mode 100644
index 000000000000..db5e98d2eb73
--- /dev/null
+++ b/arch/i386/power/suspend.c
@@ -0,0 +1,158 @@
+/*
+ * Suspend support specific for i386 - temporary page tables
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ */
+
+#include <linux/suspend.h>
+#include <linux/bootmem.h>
+
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+/* Defined in arch/i386/power/swsusp.S */
+extern int restore_image(void);
+
+/* Pointer to the temporary resume page tables */
+pgd_t *resume_pg_dir;
+
+/* The following three functions are based on the analogous code in
+ * arch/i386/mm/init.c
+ */
+
+/*
+ * Create a middle page table on a resume-safe page and put a pointer to it in
+ * the given global directory entry.  This only returns the gd entry
+ * in non-PAE compilation mode, since the middle layer is folded.
+ */
+static pmd_t *resume_one_md_table_init(pgd_t *pgd)
+{
+	pud_t *pud;
+	pmd_t *pmd_table;
+
+#ifdef CONFIG_X86_PAE
+	pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC);
+	if (!pmd_table)
+		return NULL;
+
+	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+	pud = pud_offset(pgd, 0);
+
+	BUG_ON(pmd_table != pmd_offset(pud, 0));
+#else
+	pud = pud_offset(pgd, 0);
+	pmd_table = pmd_offset(pud, 0);
+#endif
+
+	return pmd_table;
+}
+
+/*
+ * Create a page table on a resume-safe page and place a pointer to it in
+ * a middle page directory entry.
+ */
+static pte_t *resume_one_page_table_init(pmd_t *pmd)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC);
+		if (!page_table)
+			return NULL;
+
+		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+
+		BUG_ON(page_table != pte_offset_kernel(pmd, 0));
+
+		return page_table;
+	}
+
+	return pte_offset_kernel(pmd, 0);
+}
+
+/*
+ * This maps the physical memory to kernel virtual address space, a total
+ * of max_low_pfn pages, by creating page tables starting from address
+ * PAGE_OFFSET.  The page tables are allocated out of resume-safe pages.
+ */
+static int resume_physical_mapping_init(pgd_t *pgd_base)
+{
+	unsigned long pfn;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int pgd_idx, pmd_idx;
+
+	pgd_idx = pgd_index(PAGE_OFFSET);
+	pgd = pgd_base + pgd_idx;
+	pfn = 0;
+
+	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+		pmd = resume_one_md_table_init(pgd);
+		if (!pmd)
+			return -ENOMEM;
+
+		if (pfn >= max_low_pfn)
+			continue;
+
+		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
+			if (pfn >= max_low_pfn)
+				break;
+
+			/* Map with big pages if possible, otherwise create
+			 * normal page tables.
+			 * NOTE: We can mark everything as executable here
+			 */
+			if (cpu_has_pse) {
+				set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+				pfn += PTRS_PER_PTE;
+			} else {
+				pte_t *max_pte;
+
+				pte = resume_one_page_table_init(pmd);
+				if (!pte)
+					return -ENOMEM;
+
+				max_pte = pte + PTRS_PER_PTE;
+				for (; pte < max_pte; pte++, pfn++) {
+					if (pfn >= max_low_pfn)
+						break;
+
+					set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+static inline void resume_init_first_level_page_table(pgd_t *pg_dir)
+{
+#ifdef CONFIG_X86_PAE
+	int i;
+
+	/* Init entries of the first-level page table to the zero page */
+	for (i = 0; i < PTRS_PER_PGD; i++)
+		set_pgd(pg_dir + i,
+			__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+#endif
+}
+
+int swsusp_arch_resume(void)
+{
+	int error;
+
+	resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+	if (!resume_pg_dir)
+		return -ENOMEM;
+
+	resume_init_first_level_page_table(resume_pg_dir);
+	error = resume_physical_mapping_init(resume_pg_dir);
+	if (error)
+		return error;
+
+	/* We have got enough memory and from now on we cannot recover */
+	restore_image();
+	return 0;
+}
diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S
index 8a2b50a0aaad..53662e05b393 100644
--- a/arch/i386/power/swsusp.S
+++ b/arch/i386/power/swsusp.S
@@ -28,8 +28,9 @@ ENTRY(swsusp_arch_suspend)
 	call swsusp_save
 	ret
 
-ENTRY(swsusp_arch_resume)
-	movl	$swsusp_pg_dir-__PAGE_OFFSET, %ecx
+ENTRY(restore_image)
+	movl	resume_pg_dir, %ecx
+	subl	$__PAGE_OFFSET, %ecx
 	movl	%ecx, %cr3
 
 	movl	restore_pblist, %edx
@@ -51,6 +52,10 @@ copy_loop:
 	.p2align 4,,7
 
 done:
+	/* go back to the original page tables */
+	movl	$swapper_pg_dir, %ecx
+	subl	$__PAGE_OFFSET, %ecx
+	movl	%ecx, %cr3
 	/* Flush TLB, including "global" things (vmalloc) */
 	movl	mmu_cr4_features, %eax
 	movl	%eax, %edx
diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h
index 08be1e5009d4..c1da5caafaf7 100644
--- a/include/asm-i386/suspend.h
+++ b/include/asm-i386/suspend.h
@@ -6,18 +6,7 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 
-static inline int
-arch_prepare_suspend(void)
-{
-	/* If you want to make non-PSE machine work, turn off paging
-           in swsusp_arch_suspend. swsusp_pg_dir should have identity mapping, so
-           it could work...  */
-	if (!cpu_has_pse) {
-		printk(KERN_ERR "PSE is required for swsusp.\n");
-		return -EPERM;
-	}
-	return 0;
-}
+static inline int arch_prepare_suspend(void) { return 0; }
 
 /* image of the saved processor state */
 struct saved_context {
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 825068ca3479..710ed084e7c5 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -78,7 +78,7 @@ config PM_SYSFS_DEPRECATED
 
 config SOFTWARE_SUSPEND
 	bool "Software Suspend"
-	depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP))
+	depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
 	---help---
 	  Enable the possibility of suspending the machine.
 	  It doesn't need ACPI or APM.
-- 
cgit v1.2.3


From 112cecb2cc0e7341db92281ba04b26c41bb8146d Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Wed, 6 Dec 2006 20:34:31 -0800
Subject: [PATCH] suspend: don't change cpus_allowed for task initiating the
 suspend

Don't modify the cpus_allowed of the task initiating the suspend.
_cpu_down() already makes sure that the task doing the suspend doesn't run
on dying cpu.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Nigel Cunningham <nigel@suspend2.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpu.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 272254f20d97..9124669f4586 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -270,11 +270,7 @@ int disable_nonboot_cpus(void)
 			goto out;
 		}
 	}
-	error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
-	if (error) {
-		printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
-		goto out;
-	}
+
 	/* We take down all of the non-boot CPUs in one shot to avoid races
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
-- 
cgit v1.2.3


From 0d3a9abe8ae055e1052295698bcd0722c92eff47 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:32 -0800
Subject: [PATCH] swsusp: Measure memory shrinking time

Make swsusp measure and print the time needed to shrink memory during the
suspend.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/power.h  |  4 ++++
 kernel/power/swap.c   | 24 ++----------------------
 kernel/power/swsusp.c | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/power.h b/kernel/power/power.h
index 3763343bde2f..3afa5dbe6bc5 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -171,3 +171,7 @@ extern int swsusp_read(void);
 extern int swsusp_write(void);
 extern void swsusp_close(void);
 extern int suspend_enter(suspend_state_t state);
+
+struct timeval;
+extern void swsusp_show_speed(struct timeval *, struct timeval *,
+				unsigned int, char *);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 52e70ca832a8..dedf8797b723 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -133,26 +133,6 @@ static int wait_on_bio_chain(struct bio **bio_chain)
 	return ret;
 }
 
-static void show_speed(struct timeval *start, struct timeval *stop,
-			unsigned nr_pages, char *msg)
-{
-	s64 elapsed_centisecs64;
-	int centisecs;
-	int k;
-	int kps;
-
-	elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
-	do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
-	centisecs = elapsed_centisecs64;
-	if (centisecs == 0)
-		centisecs = 1;	/* avoid div-by-zero */
-	k = nr_pages * (PAGE_SIZE / 1024);
-	kps = (k * 100) / centisecs;
-	printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
-			centisecs / 100, centisecs % 100,
-			kps / 1000, (kps % 1000) / 10);
-}
-
 /*
  * Saving part
  */
@@ -375,7 +355,7 @@ static int save_image(struct swap_map_handle *handle,
 		error = err2;
 	if (!error)
 		printk("\b\b\b\bdone\n");
-	show_speed(&start, &stop, nr_to_write, "Wrote");
+	swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
 	return error;
 }
 
@@ -562,7 +542,7 @@ static int load_image(struct swap_map_handle *handle,
 		if (!snapshot_image_loaded(snapshot))
 			error = -ENODATA;
 	}
-	show_speed(&start, &stop, nr_to_read, "Read");
+	swsusp_show_speed(&start, &stop, nr_to_read, "Read");
 	return error;
 }
 
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 68de5c1dbd78..aa31432bbd97 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -49,6 +49,7 @@
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/highmem.h>
+#include <linux/time.h>
 
 #include "power.h"
 
@@ -163,6 +164,34 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
 	}
 }
 
+/**
+ *	swsusp_show_speed - print the time elapsed between two events represented by
+ *	@start and @stop
+ *
+ *	@nr_pages -	number of pages processed between @start and @stop
+ *	@msg -		introductory message to print
+ */
+
+void swsusp_show_speed(struct timeval *start, struct timeval *stop,
+			unsigned nr_pages, char *msg)
+{
+	s64 elapsed_centisecs64;
+	int centisecs;
+	int k;
+	int kps;
+
+	elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
+	do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
+	centisecs = elapsed_centisecs64;
+	if (centisecs == 0)
+		centisecs = 1;	/* avoid div-by-zero */
+	k = nr_pages * (PAGE_SIZE / 1024);
+	kps = (k * 100) / centisecs;
+	printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
+			centisecs / 100, centisecs % 100,
+			kps / 1000, (kps % 1000) / 10);
+}
+
 /**
  *	swsusp_shrink_memory -  Try to free as much memory as needed
  *
@@ -187,8 +216,10 @@ int swsusp_shrink_memory(void)
 	unsigned long pages = 0;
 	unsigned int i = 0;
 	char *p = "-\\|/";
+	struct timeval start, stop;
 
 	printk("Shrinking memory...  ");
+	do_gettimeofday(&start);
 	do {
 		long size, highmem_size;
 
@@ -222,7 +253,9 @@ int swsusp_shrink_memory(void)
 		}
 		printk("\b%c", p[i++%4]);
 	} while (tmp > 0);
+	do_gettimeofday(&stop);
 	printk("\bdone (%lu pages freed)\n", pages);
+	swsusp_show_speed(&start, &stop, pages, "Freed");
 
 	return 0;
 }
-- 
cgit v1.2.3


From 3eb1b3a40722cbb46631db373af66d13d1e7ac81 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:34 -0800
Subject: [PATCH] suspend to disk fails if gdb is suspended with a traced child

Fix http://bugzilla.kernel.org/show_bug.cgi?id=7534

Fix the freezing of processes so that it won't fail if there is a traced
process the parent of which has been stopped.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: maurice barnum <pixi+kbug@burble.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/process.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/process.c b/kernel/power/process.c
index cba8a5890eda..1badb9a89ade 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -97,7 +97,9 @@ int freeze_processes(void)
 				continue;
 			if (frozen(p))
 				continue;
-			if (p->state == TASK_TRACED && frozen(p->parent)) {
+			if (p->state == TASK_TRACED &&
+			    (frozen(p->parent) ||
+			     p->parent->state == TASK_STOPPED)) {
 				cancel_freezing(p);
 				continue;
 			}
-- 
cgit v1.2.3


From a6d70980602e6f1869ebcdcbfaf55a0a5941583e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 6 Dec 2006 20:34:35 -0800
Subject: [PATCH] convert pm_sem to a mutex

The power management semaphore is only used as mutex, so convert it.

[akpm@osdl.org: fix rotten bug]
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/disk.c  | 16 ++++++++--------
 kernel/power/main.c  | 10 +++++-----
 kernel/power/power.h |  4 +++-
 kernel/power/user.c  | 24 ++++++++++++------------
 4 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 53b3b57c0223..08d9e7aac0f8 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -213,10 +213,10 @@ static int software_resume(void)
 {
 	int error;
 
-	down(&pm_sem);
+	mutex_lock(&pm_mutex);
 	if (!swsusp_resume_device) {
 		if (!strlen(resume_file)) {
-			up(&pm_sem);
+			mutex_unlock(&pm_mutex);
 			return -ENOENT;
 		}
 		swsusp_resume_device = name_to_dev_t(resume_file);
@@ -231,7 +231,7 @@ static int software_resume(void)
 		 * FIXME: If noresume is specified, we need to find the partition
 		 * and reset it back to normal swap space.
 		 */
-		up(&pm_sem);
+		mutex_unlock(&pm_mutex);
 		return 0;
 	}
 
@@ -275,7 +275,7 @@ static int software_resume(void)
 	unprepare_processes();
  Done:
 	/* For success case, the suspend path will release the lock */
-	up(&pm_sem);
+	mutex_unlock(&pm_mutex);
 	pr_debug("PM: Resume from disk failed.\n");
 	return 0;
 }
@@ -336,7 +336,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
 	p = memchr(buf, '\n', n);
 	len = p ? p - buf : n;
 
-	down(&pm_sem);
+	mutex_lock(&pm_mutex);
 	for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) {
 		if (!strncmp(buf, pm_disk_modes[i], len)) {
 			mode = i;
@@ -360,7 +360,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
 
 	pr_debug("PM: suspend-to-disk mode set to '%s'\n",
 		 pm_disk_modes[mode]);
-	up(&pm_sem);
+	mutex_unlock(&pm_mutex);
 	return error ? error : n;
 }
 
@@ -385,9 +385,9 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n)
 	if (maj != MAJOR(res) || min != MINOR(res))
 		goto out;
 
-	down(&pm_sem);
+	mutex_lock(&pm_mutex);
 	swsusp_resume_device = res;
-	up(&pm_sem);
+	mutex_unlock(&pm_mutex);
 	printk("Attempting manual resume\n");
 	noresume = 0;
 	software_resume();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6096c71b182b..751157b7897e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -25,7 +25,7 @@
 /*This is just an arbitrary number */
 #define FREE_PAGE_NUMBER (100)
 
-DECLARE_MUTEX(pm_sem);
+DEFINE_MUTEX(pm_mutex);
 
 struct pm_ops *pm_ops;
 suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
@@ -37,9 +37,9 @@ suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
 
 void pm_set_ops(struct pm_ops * ops)
 {
-	down(&pm_sem);
+	mutex_lock(&pm_mutex);
 	pm_ops = ops;
-	up(&pm_sem);
+	mutex_unlock(&pm_mutex);
 }
 
 
@@ -183,7 +183,7 @@ static int enter_state(suspend_state_t state)
 
 	if (!valid_state(state))
 		return -ENODEV;
-	if (down_trylock(&pm_sem))
+	if (!mutex_trylock(&pm_mutex))
 		return -EBUSY;
 
 	if (state == PM_SUSPEND_DISK) {
@@ -201,7 +201,7 @@ static int enter_state(suspend_state_t state)
 	pr_debug("PM: Finishing wakeup.\n");
 	suspend_finish(state);
  Unlock:
-	up(&pm_sem);
+	mutex_unlock(&pm_mutex);
 	return error;
 }
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 3afa5dbe6bc5..eb461b816bf4 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -22,7 +22,9 @@ static inline int pm_suspend_disk(void)
 	return -EPERM;
 }
 #endif
-extern struct semaphore pm_sem;
+
+extern struct mutex pm_mutex;
+
 #define power_attr(_name) \
 static struct subsys_attribute _name##_attr = {	\
 	.attr	= {				\
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 26c66941c001..905dc269c3fc 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -79,10 +79,10 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 	free_all_swap_pages(data->swap, data->bitmap);
 	free_bitmap(data->bitmap);
 	if (data->frozen) {
-		down(&pm_sem);
+		mutex_lock(&pm_mutex);
 		thaw_processes();
 		enable_nonboot_cpus();
-		up(&pm_sem);
+		mutex_unlock(&pm_mutex);
 	}
 	atomic_inc(&device_available);
 	return 0;
@@ -144,7 +144,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 	case SNAPSHOT_FREEZE:
 		if (data->frozen)
 			break;
-		down(&pm_sem);
+		mutex_lock(&pm_mutex);
 		error = disable_nonboot_cpus();
 		if (!error) {
 			error = freeze_processes();
@@ -154,7 +154,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 				error = -EBUSY;
 			}
 		}
-		up(&pm_sem);
+		mutex_unlock(&pm_mutex);
 		if (!error)
 			data->frozen = 1;
 		break;
@@ -162,10 +162,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 	case SNAPSHOT_UNFREEZE:
 		if (!data->frozen)
 			break;
-		down(&pm_sem);
+		mutex_lock(&pm_mutex);
 		thaw_processes();
 		enable_nonboot_cpus();
-		up(&pm_sem);
+		mutex_unlock(&pm_mutex);
 		data->frozen = 0;
 		break;
 
@@ -174,7 +174,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
-		down(&pm_sem);
+		mutex_lock(&pm_mutex);
 		/* Free memory before shutting down devices. */
 		error = swsusp_shrink_memory();
 		if (!error) {
@@ -187,7 +187,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			}
 			resume_console();
 		}
-		up(&pm_sem);
+		mutex_unlock(&pm_mutex);
 		if (!error)
 			error = put_user(in_suspend, (unsigned int __user *)arg);
 		if (!error)
@@ -201,7 +201,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
-		down(&pm_sem);
+		mutex_lock(&pm_mutex);
 		pm_prepare_console();
 		suspend_console();
 		error = device_suspend(PMSG_PRETHAW);
@@ -211,7 +211,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		}
 		resume_console();
 		pm_restore_console();
-		up(&pm_sem);
+		mutex_unlock(&pm_mutex);
 		break;
 
 	case SNAPSHOT_FREE:
@@ -286,7 +286,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			break;
 		}
 
-		if (down_trylock(&pm_sem)) {
+		if (!mutex_trylock(&pm_mutex)) {
 			error = -EBUSY;
 			break;
 		}
@@ -314,7 +314,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			pm_ops->finish(PM_SUSPEND_MEM);
 
 OutS3:
-		up(&pm_sem);
+		mutex_unlock(&pm_mutex);
 		break;
 
 	case SNAPSHOT_PMOPS:
-- 
cgit v1.2.3


From a9b6f562f14dc28fb4b2415f0f275cede0abe9b5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:37 -0800
Subject: [PATCH] swsusp: Untangle thaw_processes

Move the loop from thaw_processes() to a separate function and call it
independently for kernel threads and user space processes so that the order
of thawing tasks is clearly visible.

Drop thaw_kernel_threads() which is never used.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/freezer.h |  6 +-----
 kernel/power/process.c  | 49 +++++++++++++++++++++++++++----------------------
 2 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 294ebea859c9..6e05e3e7ce39 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -1,8 +1,5 @@
 /* Freezer declarations */
 
-#define FREEZER_KERNEL_THREADS 0
-#define FREEZER_ALL_THREADS 1
-
 #ifdef CONFIG_PM
 /*
  * Check if a process has been frozen
@@ -60,8 +57,7 @@ static inline void frozen_process(struct task_struct *p)
 
 extern void refrigerator(void);
 extern int freeze_processes(void);
-#define thaw_processes() do { thaw_some_processes(FREEZER_ALL_THREADS); } while(0)
-#define thaw_kernel_threads() do { thaw_some_processes(FREEZER_KERNEL_THREADS); } while(0)
+extern void thaw_processes(void);
 
 static inline int try_to_freeze(void)
 {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 1badb9a89ade..fd0ebb942f50 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -20,6 +20,8 @@
  */
 #define TIMEOUT	(20 * HZ)
 
+#define FREEZER_KERNEL_THREADS 0
+#define FREEZER_USER_SPACE 1
 
 static inline int freezeable(struct task_struct * p)
 {
@@ -79,6 +81,11 @@ static void cancel_freezing(struct task_struct *p)
 	}
 }
 
+static inline int is_user_space(struct task_struct *p)
+{
+	return p->mm && !(p->flags & PF_BORROWED_MM);
+}
+
 /* 0 = success, else # of processes that we failed to stop */
 int freeze_processes(void)
 {
@@ -103,10 +110,9 @@ int freeze_processes(void)
 				cancel_freezing(p);
 				continue;
 			}
-			if (p->mm && !(p->flags & PF_BORROWED_MM)) {
-				/* The task is a user-space one.
-				 * Freeze it unless there's a vfork completion
-				 * pending
+			if (is_user_space(p)) {
+				/* Freeze the task unless there is a vfork
+				 * completion pending
 				 */
 				if (!p->vfork_done)
 					freeze_process(p);
@@ -155,31 +161,30 @@ int freeze_processes(void)
 	return 0;
 }
 
-void thaw_some_processes(int all)
+static void thaw_tasks(int thaw_user_space)
 {
 	struct task_struct *g, *p;
-	int pass = 0; /* Pass 0 = Kernel space, 1 = Userspace */
 
-	printk("Restarting tasks... ");
 	read_lock(&tasklist_lock);
-	do {
-		do_each_thread(g, p) {
-			/*
-			 * is_user = 0 if kernel thread or borrowed mm,
-			 * 1 otherwise.
-			 */
-			int is_user = !!(p->mm && !(p->flags & PF_BORROWED_MM));
-			if (!freezeable(p) || (is_user != pass))
-				continue;
-			if (!thaw_process(p))
-				printk(KERN_INFO
-					"Strange, %s not stopped\n", p->comm);
-		} while_each_thread(g, p);
+	do_each_thread(g, p) {
+		if (!freezeable(p))
+			continue;
 
-		pass++;
-	} while (pass < 2 && all);
+		if (is_user_space(p) == !thaw_user_space)
+			continue;
 
+		if (!thaw_process(p))
+			printk(KERN_WARNING " Strange, %s not stopped\n",
+				p->comm );
+	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
+}
+
+void thaw_processes(void)
+{
+	printk("Restarting tasks ... ");
+	thaw_tasks(FREEZER_KERNEL_THREADS);
+	thaw_tasks(FREEZER_USER_SPACE);
 	schedule();
 	printk("done.\n");
 }
-- 
cgit v1.2.3


From 11b2ce2ba90f801e2a5ebba4e6b7da72d87f2b13 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:40 -0800
Subject: [PATCH] swsusp: Untangle freeze_processes

Move the loop from freeze_processes() to a separate function and call it
independently for user space processes and kernel threads so that the order
of freezing tasks is clearly visible.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/process.c | 84 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 30 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/process.c b/kernel/power/process.c
index fd0ebb942f50..99eeb119b06d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -86,24 +86,23 @@ static inline int is_user_space(struct task_struct *p)
 	return p->mm && !(p->flags & PF_BORROWED_MM);
 }
 
-/* 0 = success, else # of processes that we failed to stop */
-int freeze_processes(void)
+static unsigned int try_to_freeze_tasks(int freeze_user_space)
 {
-	int todo, nr_user, user_frozen;
-	unsigned long start_time;
 	struct task_struct *g, *p;
+	unsigned long end_time;
+	unsigned int todo;
 
-	printk("Stopping tasks... ");
-	start_time = jiffies;
-	user_frozen = 0;
+	end_time = jiffies + TIMEOUT;
 	do {
-		nr_user = todo = 0;
+		todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!freezeable(p))
 				continue;
+
 			if (frozen(p))
 				continue;
+
 			if (p->state == TASK_TRACED &&
 			    (frozen(p->parent) ||
 			     p->parent->state == TASK_STOPPED)) {
@@ -111,51 +110,76 @@ int freeze_processes(void)
 				continue;
 			}
 			if (is_user_space(p)) {
+				if (!freeze_user_space)
+					continue;
+
 				/* Freeze the task unless there is a vfork
 				 * completion pending
 				 */
 				if (!p->vfork_done)
 					freeze_process(p);
-				nr_user++;
 			} else {
-				/* Freeze only if the user space is frozen */
-				if (user_frozen)
-					freeze_process(p);
-				todo++;
+				if (freeze_user_space)
+					continue;
+
+				freeze_process(p);
 			}
+			todo++;
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
-		todo += nr_user;
-		if (!user_frozen && !nr_user) {
-			sys_sync();
-			start_time = jiffies;
-		}
-		user_frozen = !nr_user;
 		yield();			/* Yield is okay here */
-		if (todo && time_after(jiffies, start_time + TIMEOUT))
+		if (todo && time_after(jiffies, end_time))
 			break;
-	} while(todo);
+	} while (todo);
 
-	/* This does not unfreeze processes that are already frozen
-	 * (we have slightly ugly calling convention in that respect,
-	 * and caller must call thaw_processes() if something fails),
-	 * but it cleans up leftover PF_FREEZE requests.
-	 */
 	if (todo) {
+		/* This does not unfreeze processes that are already frozen
+		 * (we have slightly ugly calling convention in that respect,
+		 * and caller must call thaw_processes() if something fails),
+		 * but it cleans up leftover PF_FREEZE requests.
+		 */
 		printk("\n");
-		printk(KERN_ERR "Stopping tasks timed out "
-			"after %d seconds (%d tasks remaining):\n",
-			TIMEOUT / HZ, todo);
+		printk(KERN_ERR "Stopping %s timed out after %d seconds "
+				"(%d tasks refusing to freeze):\n",
+				freeze_user_space ? "user space processes" :
+					"kernel threads",
+				TIMEOUT / HZ, todo);
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
+			if (is_user_space(p) == !freeze_user_space)
+				continue;
+
 			if (freezeable(p) && !frozen(p))
 				printk(KERN_ERR " %s\n", p->comm);
+
 			cancel_freezing(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
-		return todo;
 	}
 
+	return todo;
+}
+
+/**
+ *	freeze_processes - tell processes to enter the refrigerator
+ *
+ *	Returns 0 on success, or the number of processes that didn't freeze,
+ *	although they were told to.
+ */
+int freeze_processes(void)
+{
+	unsigned int nr_unfrozen;
+
+	printk("Stopping tasks ... ");
+	nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE);
+	if (nr_unfrozen)
+		return nr_unfrozen;
+
+	sys_sync();
+	nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS);
+	if (nr_unfrozen)
+		return nr_unfrozen;
+
 	printk("done.\n");
 	BUG_ON(in_atomic());
 	return 0;
-- 
cgit v1.2.3


From 5b6d15de2d4c8149902a680a6cd1d3b26cd2e828 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:43 -0800
Subject: [PATCH] swsusp: Fix coding style in suspend.c

Fix coding style in suspend.c.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/snapshot.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index fd8251d40eb8..712f1524d846 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -85,7 +85,8 @@ unsigned long get_safe_page(gfp_t gfp_mask)
 	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
 }
 
-static struct page *alloc_image_page(gfp_t gfp_mask) {
+static struct page *alloc_image_page(gfp_t gfp_mask)
+{
 	struct page *page;
 
 	page = alloc_page(gfp_mask);
-- 
cgit v1.2.3


From 59a493350e7aefff7e262efa39e017517b31b8e8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:44 -0800
Subject: [PATCH] swsusp: Fix labels

Move all labels in the swsusp code to the second column, so that they won't
fool diff -p.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/disk.c     | 6 +++---
 kernel/power/snapshot.c | 8 ++++----
 kernel/power/swap.c     | 4 ++--
 kernel/power/swsusp.c   | 2 +-
 kernel/power/user.c     | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 08d9e7aac0f8..126dda61f45d 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -117,9 +117,9 @@ static int prepare_processes(void)
 		return 0;
 
 	platform_finish();
-thaw:
+ thaw:
 	thaw_processes();
-enable_cpus:
+ enable_cpus:
 	enable_nonboot_cpus();
 	pm_restore_console();
 	return error;
@@ -392,7 +392,7 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n)
 	noresume = 0;
 	software_resume();
 	ret = n;
-out:
+ out:
 	return ret;
 }
 
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 712f1524d846..c024606221c4 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -411,7 +411,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 	memory_bm_position_reset(bm);
 	return 0;
 
-Free:
+ Free:
 	bm->p_list = ca.chain;
 	memory_bm_free(bm, PG_UNSAFE_CLEAR);
 	return -ENOMEM;
@@ -557,7 +557,7 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 	memory_bm_position_reset(bm);
 	return BM_END_OF_MAP;
 
-Return_pfn:
+ Return_pfn:
 	bm->cur.chunk = chunk;
 	bm->cur.bit = bit;
 	return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
@@ -964,7 +964,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
 	}
 	return 0;
 
-Free:
+ Free:
 	swsusp_free();
 	return -ENOMEM;
 }
@@ -1540,7 +1540,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	}
 	return 0;
 
-Free:
+ Free:
 	swsusp_free();
 	return error;
 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index dedf8797b723..f133d4a6d817 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -301,7 +301,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
 		handle->cur_swap = offset;
 		handle->k = 0;
 	}
-out:
+ out:
 	return error;
 }
 
@@ -429,7 +429,7 @@ int swsusp_write(void)
 	if (error)
 		free_all_swap_pages(root_swap, handle.bitmap);
 	release_swap_writer(&handle);
-out:
+ out:
 	swsusp_close();
 	return error;
 }
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index aa31432bbd97..31aa0390c777 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -288,7 +288,7 @@ int swsusp_suspend(void)
 	 * that suspended with irqs off ... no overall powerup.
 	 */
 	device_power_up();
-Enable_irqs:
+ Enable_irqs:
 	local_irq_enable();
 	return error;
 }
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 905dc269c3fc..069732e5695c 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -313,7 +313,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		if (pm_ops->finish)
 			pm_ops->finish(PM_SUSPEND_MEM);
 
-OutS3:
+ OutS3:
 		mutex_unlock(&pm_mutex);
 		break;
 
-- 
cgit v1.2.3


From 2d87595ea628ea58415ba4638c553a8c2fbd90e2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:47 -0800
Subject: [PATCH] PM: Fix swsusp debug mode testproc

The 'testproc' swsusp debug mode thaws tasks twice in a row, which is _very_
confusing.  Fix that.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/disk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 126dda61f45d..6180379d5b84 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -151,7 +151,7 @@ int pm_suspend_disk(void)
 		return error;
 
 	if (pm_disk_mode == PM_DISK_TESTPROC)
-		goto Thaw;
+		return 0;
 
 	suspend_console();
 	error = device_suspend(PMSG_FREEZE);
-- 
cgit v1.2.3


From 5045cfc103566878228ca36d05a0ae0076673e5a Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 6 Dec 2006 20:34:48 -0800
Subject: [PATCH] swsusp: kill write-only variable

Cleanup write-only variable, suggested by D Binderman.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/disk.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 6180379d5b84..0b00f56c2ad0 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -58,12 +58,10 @@ static inline int platform_prepare(void)
 
 static void power_down(suspend_disk_method_t mode)
 {
-	int error = 0;
-
 	switch(mode) {
 	case PM_DISK_PLATFORM:
 		kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
-		error = pm_ops->enter(PM_SUSPEND_DISK);
+		pm_ops->enter(PM_SUSPEND_DISK);
 		break;
 	case PM_DISK_SHUTDOWN:
 		kernel_power_off();
-- 
cgit v1.2.3


From 341a595850dac1b0503df34260257d71b4fdf72c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:49 -0800
Subject: [PATCH] Support for freezeable workqueues

Make it possible to create a workqueue the worker thread of which will be
frozen during suspend, along with other kernel threads.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: David Chinner <dgc@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/workqueue.h |  8 +++++---
 kernel/workqueue.c        | 20 ++++++++++++++------
 2 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4a3ea83c6d16..f0cb1df7b475 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -147,9 +147,11 @@ struct execute_work {
 
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
-						    int singlethread);
-#define create_workqueue(name) __create_workqueue((name), 0)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1)
+						    int singlethread,
+						    int freezeable);
+#define create_workqueue(name) __create_workqueue((name), 0, 0)
+#define create_freezeable_workqueue(name) __create_workqueue((name), 0, 1)
+#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8d1e7cb8a51a..2945b094d871 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -29,6 +29,7 @@
 #include <linux/kthread.h>
 #include <linux/hardirq.h>
 #include <linux/mempolicy.h>
+#include <linux/freezer.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -55,6 +56,8 @@ struct cpu_workqueue_struct {
 	struct task_struct *thread;
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
+
+	int freezeable;		/* Freeze the thread during suspend */
 } ____cacheline_aligned;
 
 /*
@@ -265,7 +268,8 @@ static int worker_thread(void *__cwq)
 	struct k_sigaction sa;
 	sigset_t blocked;
 
-	current->flags |= PF_NOFREEZE;
+	if (!cwq->freezeable)
+		current->flags |= PF_NOFREEZE;
 
 	set_user_nice(current, -5);
 
@@ -288,6 +292,9 @@ static int worker_thread(void *__cwq)
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
+		if (cwq->freezeable)
+			try_to_freeze();
+
 		add_wait_queue(&cwq->more_work, &wait);
 		if (list_empty(&cwq->worklist))
 			schedule();
@@ -364,7 +371,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
-						   int cpu)
+						   int cpu, int freezeable)
 {
 	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 	struct task_struct *p;
@@ -374,6 +381,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
 	cwq->thread = NULL;
 	cwq->insert_sequence = 0;
 	cwq->remove_sequence = 0;
+	cwq->freezeable = freezeable;
 	INIT_LIST_HEAD(&cwq->worklist);
 	init_waitqueue_head(&cwq->more_work);
 	init_waitqueue_head(&cwq->work_done);
@@ -389,7 +397,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
 }
 
 struct workqueue_struct *__create_workqueue(const char *name,
-					    int singlethread)
+					    int singlethread, int freezeable)
 {
 	int cpu, destroy = 0;
 	struct workqueue_struct *wq;
@@ -409,7 +417,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	mutex_lock(&workqueue_mutex);
 	if (singlethread) {
 		INIT_LIST_HEAD(&wq->list);
-		p = create_workqueue_thread(wq, singlethread_cpu);
+		p = create_workqueue_thread(wq, singlethread_cpu, freezeable);
 		if (!p)
 			destroy = 1;
 		else
@@ -417,7 +425,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	} else {
 		list_add(&wq->list, &workqueues);
 		for_each_online_cpu(cpu) {
-			p = create_workqueue_thread(wq, cpu);
+			p = create_workqueue_thread(wq, cpu, freezeable);
 			if (p) {
 				kthread_bind(p, cpu);
 				wake_up_process(p);
@@ -667,7 +675,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		mutex_lock(&workqueue_mutex);
 		/* Create a new workqueue thread for it. */
 		list_for_each_entry(wq, &workqueues, list) {
-			if (!create_workqueue_thread(wq, hotcpu)) {
+			if (!create_workqueue_thread(wq, hotcpu, 0)) {
 				printk("workqueue for %i failed\n", hotcpu);
 				return NOTIFY_BAD;
 			}
-- 
cgit v1.2.3


From ed07536ed6731775219c1df7fa26a7588753e693 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 6 Dec 2006 20:35:24 -0800
Subject: [PATCH] lockdep: annotate nfs/nfsd in-kernel sockets

Stick NFS sockets in their own class to avoid some lockdep warnings.  NFS
sockets are never exposed to user-space, and will hence not trigger certain
code paths that would otherwise pose deadlock scenarios.

[akpm@osdl.org: cleanups]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Dickson <SteveD@redhat.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
[ Fixed patch corruption by quilt, pointed out by Peter Zijlstra ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/net/sock.h    | 19 +++++++++++++++++++
 kernel/lockdep.c      |  1 +
 net/core/sock.c       | 23 +++++------------------
 net/sunrpc/svcsock.c  | 31 +++++++++++++++++++++++++++++++
 net/sunrpc/xprtsock.c | 31 +++++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 18 deletions(-)

(limited to 'kernel')

diff --git a/include/net/sock.h b/include/net/sock.h
index 730899ce5162..03684e702d13 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -746,6 +746,25 @@ static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
  */
 #define sock_owned_by_user(sk)	((sk)->sk_lock.owner)
 
+/*
+ * Macro so as to not evaluate some arguments when
+ * lockdep is not enabled.
+ *
+ * Mark both the sk_lock and the sk_lock.slock as a
+ * per-address-family lock class.
+ */
+#define sock_lock_init_class_and_name(sk, sname, skey, name, key) 	\
+do {									\
+	sk->sk_lock.owner = NULL;					\
+	init_waitqueue_head(&sk->sk_lock.wq);				\
+	spin_lock_init(&(sk)->sk_lock.slock);				\
+	debug_check_no_locks_freed((void *)&(sk)->sk_lock,		\
+			sizeof((sk)->sk_lock));				\
+	lockdep_set_class_and_name(&(sk)->sk_lock.slock,		\
+		       	(skey), (sname));				\
+	lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0);	\
+} while (0)
+
 extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass));
 
 static inline void lock_sock(struct sock *sk)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c9fefdb1a7db..e33f6207f5b3 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2645,6 +2645,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 	}
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 
 static void print_held_locks_bug(struct task_struct *curr)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 4a432da441e9..0ed5b4f0bc40 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -810,24 +810,11 @@ lenout:
  */
 static void inline sock_lock_init(struct sock *sk)
 {
-	spin_lock_init(&sk->sk_lock.slock);
-	sk->sk_lock.owner = NULL;
-	init_waitqueue_head(&sk->sk_lock.wq);
-	/*
-	 * Make sure we are not reinitializing a held lock:
-	 */
-	debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
-
-	/*
-	 * Mark both the sk_lock and the sk_lock.slock as a
-	 * per-address-family lock class:
-	 */
-	lockdep_set_class_and_name(&sk->sk_lock.slock,
-				   af_family_slock_keys + sk->sk_family,
-				   af_family_slock_key_strings[sk->sk_family]);
-	lockdep_init_map(&sk->sk_lock.dep_map,
-			 af_family_key_strings[sk->sk_family],
-			 af_family_keys + sk->sk_family, 0);
+	sock_lock_init_class_and_name(sk,
+			af_family_slock_key_strings[sk->sk_family],
+			af_family_slock_keys + sk->sk_family,
+			af_family_key_strings[sk->sk_family],
+			af_family_keys + sk->sk_family);
 }
 
 /**
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 1c68956824e3..99f54fb6d669 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -85,6 +85,35 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req);
  */
 static int svc_conn_age_period = 6*60;
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key svc_key[2];
+static struct lock_class_key svc_slock_key[2];
+
+static inline void svc_reclassify_socket(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	BUG_ON(sk->sk_lock.owner != NULL);
+	switch (sk->sk_family) {
+	case AF_INET:
+		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
+		    &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]);
+		break;
+
+	case AF_INET6:
+		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
+		    &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]);
+		break;
+
+	default:
+		BUG();
+	}
+}
+#else
+static inline void svc_reclassify_socket(struct socket *sock)
+{
+}
+#endif
+
 /*
  * Queue up an idle server thread.  Must have pool->sp_lock held.
  * Note: this is really a stack rather than a queue, so that we only
@@ -1557,6 +1586,8 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
 	if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
 		return error;
 
+	svc_reclassify_socket(sock);
+
 	if (type == SOCK_STREAM)
 		sock->sk->sk_reuse = 1; /* allow address reuse */
 	error = kernel_bind(sock, (struct sockaddr *) sin,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index cfe3c15be948..2fc4a3123261 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1058,6 +1058,35 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 	return err;
 }
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key xs_key[2];
+static struct lock_class_key xs_slock_key[2];
+
+static inline void xs_reclassify_socket(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	BUG_ON(sk->sk_lock.owner != NULL);
+	switch (sk->sk_family) {
+	case AF_INET:
+		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS",
+			&xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]);
+		break;
+
+	case AF_INET6:
+		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS",
+			&xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]);
+		break;
+
+	default:
+		BUG();
+	}
+}
+#else
+static inline void xs_reclassify_socket(struct socket *sock)
+{
+}
+#endif
+
 /**
  * xs_udp_connect_worker - set up a UDP socket
  * @work: RPC transport to connect
@@ -1081,6 +1110,7 @@ static void xs_udp_connect_worker(struct work_struct *work)
 		dprintk("RPC:      can't create UDP transport socket (%d).\n", -err);
 		goto out;
 	}
+	xs_reclassify_socket(sock);
 
 	if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
 		sock_release(sock);
@@ -1165,6 +1195,7 @@ static void xs_tcp_connect_worker(struct work_struct *work)
 			dprintk("RPC:      can't create TCP transport socket (%d).\n", -err);
 			goto out;
 		}
+		xs_reclassify_socket(sock);
 
 		if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
 			sock_release(sock);
-- 
cgit v1.2.3


From 07354a00901d103085e4376b7df0aad264c1836a Mon Sep 17 00:00:00 2001
From: "Adam B. Jerome" <abj@novell.com>
Date: Wed, 6 Dec 2006 20:35:30 -0800
Subject: [PATCH] /proc/kallsyms reports lower-case types for some non-exported
 symbols

This patch addresses incorrect symbol type information reported through
/proc/kallsyms.  A lowercase character should designate the symbol as local
(or non-exported).  An uppercase character should designate the symbol as
global (or external).

Without this patch, some non-exported symbols are incorrectly assigned an
upper-case designation in /proc/kallsyms.  This patch corrects this
condition by converting non-exported symbols types to lower case when
appropriate and eliminates the superfluous upcase_if_global function

Signed-off-by: Adam B. Jerome <abj@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/kallsyms.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index eeac3e313b2b..54befe36ee0b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -20,6 +20,7 @@
 #include <linux/proc_fs.h>
 #include <linux/sched.h>	/* for cond_resched */
 #include <linux/mm.h>
+#include <linux/ctype.h>
 
 #include <asm/sections.h>
 
@@ -301,13 +302,6 @@ struct kallsym_iter
 	char name[KSYM_NAME_LEN+1];
 };
 
-/* Only label it "global" if it is exported. */
-static void upcase_if_global(struct kallsym_iter *iter)
-{
-	if (is_exported(iter->name, iter->owner))
-		iter->type += 'A' - 'a';
-}
-
 static int get_ksymbol_mod(struct kallsym_iter *iter)
 {
 	iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms,
@@ -316,7 +310,10 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
 	if (iter->owner == NULL)
 		return 0;
 
-	upcase_if_global(iter);
+	/* Label it "global" if it is exported, "local" if not exported. */
+	iter->type = is_exported(iter->name, iter->owner)
+		? toupper(iter->type) : tolower(iter->type);
+
 	return 1;
 }
 
-- 
cgit v1.2.3


From e59e2ae2c29700117a54e85c106017c24837119f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:35:59 -0800
Subject: [PATCH] SysRq-X: show blocked tasks

Add SysRq-X support: show blocked (TASK_UNINTERRUPTIBLE) tasks only.

Useful for debugging IO stalls.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/sysrq.c  | 14 +++++++++++++-
 include/linux/sched.h | 11 ++++++++++-
 kernel/sched.c        | 11 ++++++++---
 3 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index c64f5bcff947..05810c8d20bc 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -182,6 +182,18 @@ static struct sysrq_key_op sysrq_showstate_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 
+static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty)
+{
+	show_state_filter(TASK_UNINTERRUPTIBLE);
+}
+static struct sysrq_key_op sysrq_showstate_blocked_op = {
+	.handler	= sysrq_handle_showstate_blocked,
+	.help_msg	= "showBlockedTasks",
+	.action_msg	= "Show Blocked State",
+	.enable_mask	= SYSRQ_ENABLE_DUMP,
+};
+
+
 static void sysrq_handle_showmem(int key, struct tty_struct *tty)
 {
 	show_mem();
@@ -304,7 +316,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
 	/* May be assigned at init time by SMP VOYAGER */
 	NULL,				/* v */
 	NULL,				/* w */
-	NULL,				/* x */
+	&sysrq_showstate_blocked_op,	/* x */
 	NULL,				/* y */
 	NULL				/* z */
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 837a012f573c..0a90cefb0b0d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -194,7 +194,16 @@ extern void init_idle(struct task_struct *idle, int cpu);
 
 extern cpumask_t nohz_cpu_mask;
 
-extern void show_state(void);
+/*
+ * Only dump TASK_* tasks. (-1 for all tasks)
+ */
+extern void show_state_filter(unsigned long state_filter);
+
+static inline void show_state(void)
+{
+	show_state_filter(-1);
+}
+
 extern void show_regs(struct pt_regs *);
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 12fdbef1d9bf..1848e280504d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4804,7 +4804,7 @@ static void show_task(struct task_struct *p)
 		show_stack(p, NULL);
 }
 
-void show_state(void)
+void show_state_filter(unsigned long state_filter)
 {
 	struct task_struct *g, *p;
 
@@ -4824,11 +4824,16 @@ void show_state(void)
 		 * console might take alot of time:
 		 */
 		touch_nmi_watchdog();
-		show_task(p);
+		if (p->state & state_filter)
+			show_task(p);
 	} while_each_thread(g, p);
 
 	read_unlock(&tasklist_lock);
-	debug_show_all_locks();
+	/*
+	 * Only show locks if all tasks are dumped:
+	 */
+	if (state_filter == -1)
+		debug_show_all_locks();
 }
 
 /**
-- 
cgit v1.2.3


From 4cf303487d5dddaace2daca8437c555f3f0bc1aa Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 6 Dec 2006 20:36:06 -0800
Subject: [PATCH] Export pm_suspend for the shared APM emulation

The new shared APM emulation just like its ARM and MIPS predecessors uses
pm_suspend() which was only exported on SH.  Move export to close to it's
definition where it really should be anyway.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sh/kernel/sh_ksyms.c | 4 ----
 kernel/power/main.c       | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/arch/sh/kernel/sh_ksyms.c b/arch/sh/kernel/sh_ksyms.c
index c706f3bfd897..ceee79143401 100644
--- a/arch/sh/kernel/sh_ksyms.c
+++ b/arch/sh/kernel/sh_ksyms.c
@@ -99,10 +99,6 @@ EXPORT_SYMBOL(__down_trylock);
 EXPORT_SYMBOL(synchronize_irq);
 #endif
 
-#ifdef CONFIG_PM
-EXPORT_SYMBOL(pm_suspend);
-#endif
-
 EXPORT_SYMBOL(csum_partial);
 #ifdef CONFIG_IPV6
 EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 751157b7897e..500eb87f643d 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -8,6 +8,7 @@
  *
  */
 
+#include <linux/module.h>
 #include <linux/suspend.h>
 #include <linux/kobject.h>
 #include <linux/string.h>
@@ -230,7 +231,7 @@ int pm_suspend(suspend_state_t state)
 	return -EINVAL;
 }
 
-
+EXPORT_SYMBOL(pm_suspend);
 
 decl_subsys(power,NULL,NULL);
 
-- 
cgit v1.2.3


From 696040670a12f66b17a839011f96d9ca376f688b Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 6 Dec 2006 20:36:15 -0800
Subject: [PATCH] cpuset: minor code refinements

A couple of minor code simplifications to the kernel/cpuset.c code.  No
functional change.  Just a little less code and a little more readable.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpuset.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6313c38c930e..bd1e89c4c96a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -729,9 +729,11 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 	}
 
 	/* Remaining checks don't apply to root cpuset */
-	if ((par = cur->parent) == NULL)
+	if (cur == &top_cpuset)
 		return 0;
 
+	par = cur->parent;
+
 	/* We must be a subset of our parent cpuset */
 	if (!is_cpuset_subset(trial, par))
 		return -EACCES;
@@ -1060,10 +1062,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
 	cpu_exclusive_changed =
 		(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
 	mutex_lock(&callback_mutex);
-	if (turning_on)
-		set_bit(bit, &cs->flags);
-	else
-		clear_bit(bit, &cs->flags);
+	cs->flags = trialcs.flags;
 	mutex_unlock(&callback_mutex);
 
 	if (cpu_exclusive_changed)
-- 
cgit v1.2.3


From 910b1b2e6d7d10e1c3bffdd12a90ec82f535f9a5 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@o2.pl>
Date: Thu, 7 Dec 2006 10:45:25 +0100
Subject: [PATCH] lockdep: internal locking fixes

Here are mainly some lockdep returns with 0 with unlocking fixes.

Signed-off-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e33f6207f5b3..f76a24f2ac20 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -237,8 +237,10 @@ e	   locks. */
 	trace->max_entries = trace->nr_entries;
 
 	nr_stack_trace_entries += trace->nr_entries;
-	if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
+	if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) {
+		__raw_spin_unlock(&hash_lock);
 		return 0;
+	}
 
 	if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
 		__raw_spin_unlock(&hash_lock);
@@ -474,7 +476,8 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
 		return 0;
 
 	entry->class = this;
-	save_trace(&entry->trace);
+	if (!save_trace(&entry->trace))
+		return 0;
 
 	/*
 	 * Since we never remove from the dependency list, the list can
@@ -562,8 +565,12 @@ static noinline int print_circular_bug_tail(void)
 	if (debug_locks_silent)
 		return 0;
 
+	/* hash_lock unlocked by the header */
+	__raw_spin_lock(&hash_lock);
 	this.class = check_source->class;
-	save_trace(&this.trace);
+	if (!save_trace(&this.trace))
+		return 0;
+	__raw_spin_unlock(&hash_lock);
 	print_circular_bug_entry(&this, 0);
 
 	printk("\nother info that might help us debug this:\n\n");
@@ -966,14 +973,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 			       &prev->class->locks_after, next->acquire_ip);
 	if (!ret)
 		return 0;
-	/*
-	 * Return value of 2 signals 'dependency already added',
-	 * in that case we dont have to add the backlink either.
-	 */
-	if (ret == 2)
-		return 2;
+
 	ret = add_lock_to_list(next->class, prev->class,
 			       &next->class->locks_before, next->acquire_ip);
+	if (!ret)
+		return 0;
 
 	/*
 	 * Debugging printouts:
@@ -1025,7 +1029,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 		 * added:
 		 */
 		if (hlock->read != 2) {
-			check_prev_add(curr, hlock, next);
+			if (!check_prev_add(curr, hlock, next))
+				return 0;
 			/*
 			 * Stop after the first non-trylock entry,
 			 * as non-trylock entries have added their
-- 
cgit v1.2.3


From b23984d0a12a4821b2e9712c71550f321eb88bb5 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@o2.pl>
Date: Wed, 6 Dec 2006 20:36:23 -0800
Subject: [PATCH] lockdep: misc fixes in lockdep.c

 - numeric string size replaced with constant in print_lock_name and
   print_lockdep_cache,

 - return on null pointer in print_lock_dependencies,

 - one more lockdep return with 0 with unlocking fix in mark_lock.

Signed-off-by: Jarek Poplawski <jarkao2@o2.pl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index f76a24f2ac20..300d61bb314b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -359,7 +359,7 @@ get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4
 
 static void print_lock_name(struct lock_class *class)
 {
-	char str[128], c1, c2, c3, c4;
+	char str[KSYM_NAME_LEN + 1], c1, c2, c3, c4;
 	const char *name;
 
 	get_usage_chars(class, &c1, &c2, &c3, &c4);
@@ -381,7 +381,7 @@ static void print_lock_name(struct lock_class *class)
 static void print_lockdep_cache(struct lockdep_map *lock)
 {
 	const char *name;
-	char str[128];
+	char str[KSYM_NAME_LEN + 1];
 
 	name = lock->name;
 	if (!name)
@@ -451,7 +451,9 @@ static void print_lock_dependencies(struct lock_class *class, int depth)
 	print_lock_class_header(class, depth);
 
 	list_for_each_entry(entry, &class->locks_after, entry) {
-		DEBUG_LOCKS_WARN_ON(!entry->class);
+		if (DEBUG_LOCKS_WARN_ON(!entry->class))
+			return;
+
 		print_lock_dependencies(entry->class, depth + 1);
 
 		printk("%*s ... acquired at:\n",depth,"");
@@ -1733,6 +1735,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 		debug_atomic_dec(&nr_unused_locks);
 		break;
 	default:
+		__raw_spin_unlock(&hash_lock);
 		debug_locks_off();
 		WARN_ON(1);
 		return 0;
-- 
cgit v1.2.3


From fec1d0115240593b39898289e6e1413ea6e44a84 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 6 Dec 2006 20:36:34 -0800
Subject: [PATCH] Disable CLONE_CHILD_CLEARTID for abnormal exit

The CLONE_CHILD_CLEARTID flag is used by NPTL to have its threads
communicate via memory/futex when they exit, so pthread_join can
synchronize using a simple futex wait.  The word of user memory where NPTL
stores a thread's own TID is what it passes; this gets reset to zero at
thread exit.

It is not desireable to touch this user memory when threads are dying due
to a fatal signal.  A core dump is more usefully representative of the
dying program state if the threads live at the time of the crash have their
NPTL data structures unperturbed.  The userland expectation of
CLONE_CHILD_CLEARTID has only ever been that it works for a thread making
an _exit system call.

This problem was identified by Ernie Petrides <petrides@redhat.com>.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Ernie Petrides <petrides@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 2cf74edd3295..f37980df1d58 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -448,7 +448,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 		tsk->vfork_done = NULL;
 		complete(vfork_done);
 	}
-	if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
+
+	/*
+	 * If we're exiting normally, clear a user-space tid field if
+	 * requested.  We leave this alone when dying by signal, to leave
+	 * the value intact in a core dump, and to save the unnecessary
+	 * trouble otherwise.  Userland only wants this done for a sys_exit.
+	 */
+	if (tsk->clear_child_tid
+	    && !(tsk->flags & PF_SIGNALED)
+	    && atomic_read(&mm->mm_users) > 1) {
 		u32 __user * tidptr = tsk->clear_child_tid;
 		tsk->clear_child_tid = NULL;
 
-- 
cgit v1.2.3


From 064b022c7adb2d853378078a9dc141f8288d1c73 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 6 Dec 2006 20:36:37 -0800
Subject: [PATCH] profile: fix uaccess handling

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/profile.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/profile.c b/kernel/profile.c
index f940b462eec9..15b012df4ff1 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,7 +442,8 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	read = 0;
 
 	while (p < sizeof(unsigned int) && count > 0) {
-		put_user(*((char *)(&sample_step)+p),buf);
+		if (put_user(*((char *)(&sample_step)+p),buf))
+			return -EFAULT;
 		buf++; p++; count--; read++;
 	}
 	pnt = (char *)prof_buffer + p - sizeof(atomic_t);
-- 
cgit v1.2.3


From 128fb95650b3273a8dc9ba5514b6fe7db8ea30bf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 6 Dec 2006 20:36:50 -0800
Subject: [PATCH] taskstats_exit_alloc: optimize/simplify

If there are no listeners, every task does unneeded kmem_cache alloc/free on
exit. We don't need listeners->sem for 'if (!list_empty())' check. Yes, we may
have a false positive, but this doesn't differ from the case when the listener
is unregistered after we drop the semaphore. So we don't need to do allocation
beforehand.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/taskstats.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index f5f92014ae98..d9d7c3576238 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -416,7 +416,6 @@ err:
 void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
 {
 	struct listener_list *listeners;
-	struct taskstats *tmp;
 	/*
 	 * This is the cpu on which the task is exiting currently and will
 	 * be the one for which the exit event is sent, even if the cpu
@@ -424,19 +423,11 @@ void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
 	 */
 	*mycpu = raw_smp_processor_id();
 
-	*ptidstats = NULL;
-	tmp = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
-	if (!tmp)
-		return;
-
 	listeners = &per_cpu(listener_array, *mycpu);
-	down_read(&listeners->sem);
-	if (!list_empty(&listeners->list)) {
-		*ptidstats = tmp;
-		tmp = NULL;
-	}
-	up_read(&listeners->sem);
-	kfree(tmp);
+
+	*ptidstats = NULL;
+	if (!list_empty(&listeners->list))
+		*ptidstats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
 }
 
 /* Send pid data out on exit */
-- 
cgit v1.2.3


From 115085ea0794c0f339be8f9d25505c7f9861d824 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 6 Dec 2006 20:36:51 -0800
Subject: [PATCH] taskstats: cleanup do_exit() path

do_exit:
	taskstats_exit_alloc()
	...
	taskstats_exit_send()
	taskstats_exit_free()

I think this is not good, let it be a single function exported to the core
kernel, taskstats_exit(), which does alloc + send + free itself.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/taskstats_kern.h | 17 ++---------------
 kernel/exit.c                  |  8 ++------
 kernel/taskstats.c             | 41 +++++++++++++++--------------------------
 3 files changed, 19 insertions(+), 47 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index ce8a912e5426..f1261a532496 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -15,12 +15,6 @@
 extern struct kmem_cache *taskstats_cache;
 extern struct mutex taskstats_exit_mutex;
 
-static inline void taskstats_exit_free(struct taskstats *tidstats)
-{
-	if (tidstats)
-		kmem_cache_free(taskstats_cache, tidstats);
-}
-
 static inline void taskstats_tgid_init(struct signal_struct *sig)
 {
 	sig->stats = NULL;
@@ -54,17 +48,10 @@ static inline void taskstats_tgid_free(struct signal_struct *sig)
 		kmem_cache_free(taskstats_cache, sig->stats);
 }
 
-extern void taskstats_exit_alloc(struct taskstats **, unsigned int *);
-extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int);
+extern void taskstats_exit(struct task_struct *, int group_dead);
 extern void taskstats_init_early(void);
 #else
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
-{}
-static inline void taskstats_exit_free(struct taskstats *ptidstats)
-{}
-static inline void taskstats_exit_send(struct task_struct *tsk,
-				       struct taskstats *tidstats,
-				       int group_dead, unsigned int cpu)
+static inline void taskstats_exit(struct task_struct *tsk, int group_dead)
 {}
 static inline void taskstats_tgid_init(struct signal_struct *sig)
 {}
diff --git a/kernel/exit.c b/kernel/exit.c
index 06de6c4e8ca3..4e3f919edc48 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -850,9 +850,7 @@ static void exit_notify(struct task_struct *tsk)
 fastcall NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
-	struct taskstats *tidstats;
 	int group_dead;
-	unsigned int mycpu;
 
 	profile_task_exit(tsk);
 
@@ -890,8 +888,6 @@ fastcall NORET_TYPE void do_exit(long code)
 				current->comm, current->pid,
 				preempt_count());
 
-	taskstats_exit_alloc(&tidstats, &mycpu);
-
 	acct_update_integrals(tsk);
 	if (tsk->mm) {
 		update_hiwater_rss(tsk->mm);
@@ -911,8 +907,8 @@ fastcall NORET_TYPE void do_exit(long code)
 #endif
 	if (unlikely(tsk->audit_context))
 		audit_free(tsk);
-	taskstats_exit_send(tsk, tidstats, group_dead, mycpu);
-	taskstats_exit_free(tidstats);
+
+	taskstats_exit(tsk, group_dead);
 
 	exit_mm(tsk);
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d9d7c3576238..2654886fe058 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -119,10 +119,10 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
 /*
  * Send taskstats data in @skb to listeners registered for @cpu's exit data
  */
-static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
+static void send_cpu_listeners(struct sk_buff *skb,
+					struct listener_list *listeners)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
-	struct listener_list *listeners;
 	struct listener *s, *tmp;
 	struct sk_buff *skb_next, *skb_cur = skb;
 	void *reply = genlmsg_data(genlhdr);
@@ -135,7 +135,6 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
 	}
 
 	rc = 0;
-	listeners = &per_cpu(listener_array, cpu);
 	down_read(&listeners->sem);
 	list_for_each_entry(s, &listeners->list, list) {
 		skb_next = NULL;
@@ -413,28 +412,12 @@ err:
 	return rc;
 }
 
-void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
-{
-	struct listener_list *listeners;
-	/*
-	 * This is the cpu on which the task is exiting currently and will
-	 * be the one for which the exit event is sent, even if the cpu
-	 * on which this function is running changes later.
-	 */
-	*mycpu = raw_smp_processor_id();
-
-	listeners = &per_cpu(listener_array, *mycpu);
-
-	*ptidstats = NULL;
-	if (!list_empty(&listeners->list))
-		*ptidstats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
-}
-
 /* Send pid data out on exit */
-void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
-			int group_dead, unsigned int mycpu)
+void taskstats_exit(struct task_struct *tsk, int group_dead)
 {
 	int rc;
+	struct listener_list *listeners;
+	struct taskstats *tidstats;
 	struct sk_buff *rep_skb;
 	void *reply;
 	size_t size;
@@ -458,12 +441,17 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
 		fill_tgid_exit(tsk);
 	}
 
+	listeners = &__raw_get_cpu_var(listener_array);
+	if (list_empty(&listeners->list))
+		return;
+
+	tidstats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
 	if (!tidstats)
 		return;
 
 	rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
 	if (rc < 0)
-		goto ret;
+		goto free_stats;
 
 	rc = fill_pid(tsk->pid, tsk, tidstats);
 	if (rc < 0)
@@ -492,15 +480,16 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
 	nla_nest_end(rep_skb, na);
 
 send:
-	send_cpu_listeners(rep_skb, mycpu);
+	send_cpu_listeners(rep_skb, listeners);
+free_stats:
+	kmem_cache_free(taskstats_cache, tidstats);
 	return;
 
 nla_put_failure:
 	genlmsg_cancel(rep_skb, reply);
 err_skb:
 	nlmsg_free(rep_skb);
-ret:
-	return;
+	goto free_stats;
 }
 
 static struct genl_ops taskstats_ops = {
-- 
cgit v1.2.3


From 34ec12349c8a9505adc59d72f92b4595bc2483ff Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 6 Dec 2006 20:36:52 -0800
Subject: [PATCH] taskstats: cleanup ->signal->stats allocation

Allocate ->signal->stats on demand in taskstats_exit(), this allows us to
remove taskstats_tgid_alloc() (the last non-trivial inline) from taskstat's
public interface.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/taskstats_kern.h | 24 ------------------------
 kernel/fork.c                  |  1 -
 kernel/taskstats.c             | 26 +++++++++++++++++++++++++-
 3 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index f1261a532496..7e9680f4afdd 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -20,28 +20,6 @@ static inline void taskstats_tgid_init(struct signal_struct *sig)
 	sig->stats = NULL;
 }
 
-static inline void taskstats_tgid_alloc(struct task_struct *tsk)
-{
-	struct signal_struct *sig = tsk->signal;
-	struct taskstats *stats;
-
-	if (sig->stats != NULL)
-		return;
-
-	/* No problem if kmem_cache_zalloc() fails */
-	stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
-
-	spin_lock_irq(&tsk->sighand->siglock);
-	if (!sig->stats) {
-		sig->stats = stats;
-		stats = NULL;
-	}
-	spin_unlock_irq(&tsk->sighand->siglock);
-
-	if (stats)
-		kmem_cache_free(taskstats_cache, stats);
-}
-
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {
 	if (sig->stats)
@@ -55,8 +33,6 @@ static inline void taskstats_exit(struct task_struct *tsk, int group_dead)
 {}
 static inline void taskstats_tgid_init(struct signal_struct *sig)
 {}
-static inline void taskstats_tgid_alloc(struct task_struct *tsk)
-{}
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {}
 static inline void taskstats_init_early(void)
diff --git a/kernel/fork.c b/kernel/fork.c
index f37980df1d58..658838148647 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -847,7 +847,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	if (clone_flags & CLONE_THREAD) {
 		atomic_inc(&current->signal->count);
 		atomic_inc(&current->signal->live);
-		taskstats_tgid_alloc(current);
 		return 0;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 2654886fe058..7d793d6b1e90 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -412,6 +412,30 @@ err:
 	return rc;
 }
 
+static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+	struct taskstats *stats;
+
+	if (sig->stats || thread_group_empty(tsk))
+		goto ret;
+
+	/* No problem if kmem_cache_zalloc() fails */
+	stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (!sig->stats) {
+		sig->stats = stats;
+		stats = NULL;
+	}
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (stats)
+		kmem_cache_free(taskstats_cache, stats);
+ret:
+	return sig->stats;
+}
+
 /* Send pid data out on exit */
 void taskstats_exit(struct task_struct *tsk, int group_dead)
 {
@@ -433,7 +457,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	size = nla_total_size(sizeof(u32)) +
 		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
 
-	is_thread_group = (tsk->signal->stats != NULL);
+	is_thread_group = !!taskstats_tgid_alloc(tsk);
 	if (is_thread_group) {
 		/* PID + STATS + TGID + STATS */
 		size = 2 * size;
-- 
cgit v1.2.3


From 68062b86fc0f480b806d270a8278709a5a41bb67 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 6 Dec 2006 20:36:53 -0800
Subject: [PATCH] taskstats: factor out reply assembling

Introduce mk_reply() helper which does all nla_put()s on reply.

Saves 453 bytes and a preparation for the next patch.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@sgi.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/taskstats.c | 55 +++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

(limited to 'kernel')

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 7d793d6b1e90..d2a41339f37b 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -343,6 +343,25 @@ static int parse(struct nlattr *na, cpumask_t *mask)
 	return ret;
 }
 
+static int mk_reply(struct sk_buff *skb, int type, u32 pid, struct taskstats *stats)
+{
+	struct nlattr *na;
+	int aggr;
+
+	aggr = TASKSTATS_TYPE_AGGR_TGID;
+	if (type == TASKSTATS_TYPE_PID)
+		aggr = TASKSTATS_TYPE_AGGR_PID;
+
+	na = nla_nest_start(skb, aggr);
+	NLA_PUT_U32(skb, type, pid);
+	NLA_PUT_TYPE(skb, struct taskstats, TASKSTATS_TYPE_STATS, *stats);
+	nla_nest_end(skb, na);
+
+	return 0;
+nla_put_failure:
+	return -1;
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	int rc = 0;
@@ -350,7 +369,6 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct taskstats stats;
 	void *reply;
 	size_t size;
-	struct nlattr *na;
 	cpumask_t mask;
 
 	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
@@ -382,27 +400,21 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 		if (rc < 0)
 			goto err;
 
-		na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
-		NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid);
-		NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
-				stats);
+		if (mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid, &stats))
+			goto nla_put_failure;
 	} else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
 		u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
 		rc = fill_tgid(tgid, NULL, &stats);
 		if (rc < 0)
 			goto err;
 
-		na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
-		NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid);
-		NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
-				stats);
+		if (mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid, &stats))
+			goto nla_put_failure;
 	} else {
 		rc = -EINVAL;
 		goto err;
 	}
 
-	nla_nest_end(rep_skb, na);
-
 	return send_reply(rep_skb, info->snd_pid);
 
 nla_put_failure:
@@ -446,7 +458,6 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	void *reply;
 	size_t size;
 	int is_thread_group;
-	struct nlattr *na;
 
 	if (!family_registered)
 		return;
@@ -481,27 +492,17 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	if (rc < 0)
 		goto err_skb;
 
-	na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
-	NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid);
-	NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
-			*tidstats);
-	nla_nest_end(rep_skb, na);
-
-	if (!is_thread_group)
-		goto send;
+	if (mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid, tidstats))
+		goto nla_put_failure;
 
 	/*
 	 * Doesn't matter if tsk is the leader or the last group member leaving
 	 */
-	if (!group_dead)
+	if (!is_thread_group || !group_dead)
 		goto send;
 
-	na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
-	NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
-	/* No locking needed for tsk->signal->stats since group is dead */
-	NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
-			*tsk->signal->stats);
-	nla_nest_end(rep_skb, na);
+	if (mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid, tsk->signal->stats))
+		goto nla_put_failure;
 
 send:
 	send_cpu_listeners(rep_skb, listeners);
-- 
cgit v1.2.3


From 51de4d90852ba4cfa5743594ec4a7f158b52dc43 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 6 Dec 2006 20:36:54 -0800
Subject: [PATCH] taskstats: use nla_reserve() for reply assembling

Currently taskstats_user_cmd()/taskstats_exit() do:

	1) allocate stats
	2) fill stats
	3) make a temporary copy on stack (236 bytes)
	4) copy that copy to skb
	5) free stats

With the help of nla_reserve() we can operate on skb->data directly,
thus avoiding all these steps except 2).

So, before this patch:

	// copy *stats to skb->data
	int mk_reply(skb, ..., struct taskstats *stats);

	fill_pid(stats);
	mk_reply(skb, ..., stats);

After:
	// return a pointer to skb->data
	struct taskstats *mk_reply(skb, ...);

	stat = mk_reply(skb, ...);
	fill_pid(stats);

Shrinks taskatsks.o by 162 bytes.

A stupid benchmark (send one million TASKSTATS_CMD_ATTR_PID) shows the

		real user sys
	before:
		4.02 0.06 3.96
		4.02 0.04 3.98
		4.02 0.04 3.97
	after:
		3.86 0.08 3.78
		3.88 0.10 3.77
		3.89 0.09 3.80

but this looks suspiciously good.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@sgi.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/taskstats.c | 86 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 42 deletions(-)

(limited to 'kernel')

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d2a41339f37b..b0aad99c4f8d 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -185,6 +185,7 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
 	} else
 		get_task_struct(tsk);
 
+	memset(stats, 0, sizeof(*stats));
 	/*
 	 * Each accounting subsystem adds calls to its functions to
 	 * fill in relevant parts of struct taskstsats as follows
@@ -227,6 +228,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
 
 	if (first->signal->stats)
 		memcpy(stats, first->signal->stats, sizeof(*stats));
+	else
+		memset(stats, 0, sizeof(*stats));
 
 	tsk = first;
 	do {
@@ -343,9 +346,9 @@ static int parse(struct nlattr *na, cpumask_t *mask)
 	return ret;
 }
 
-static int mk_reply(struct sk_buff *skb, int type, u32 pid, struct taskstats *stats)
+static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 {
-	struct nlattr *na;
+	struct nlattr *na, *ret;
 	int aggr;
 
 	aggr = TASKSTATS_TYPE_AGGR_TGID;
@@ -353,20 +356,23 @@ static int mk_reply(struct sk_buff *skb, int type, u32 pid, struct taskstats *st
 		aggr = TASKSTATS_TYPE_AGGR_PID;
 
 	na = nla_nest_start(skb, aggr);
-	NLA_PUT_U32(skb, type, pid);
-	NLA_PUT_TYPE(skb, struct taskstats, TASKSTATS_TYPE_STATS, *stats);
+	if (nla_put(skb, type, sizeof(pid), &pid) < 0)
+		goto err;
+	ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
+	if (!ret)
+		goto err;
 	nla_nest_end(skb, na);
 
-	return 0;
-nla_put_failure:
-	return -1;
+	return nla_data(ret);
+err:
+	return NULL;
 }
 
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	int rc = 0;
 	struct sk_buff *rep_skb;
-	struct taskstats stats;
+	struct taskstats *stats;
 	void *reply;
 	size_t size;
 	cpumask_t mask;
@@ -389,36 +395,36 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 	size = nla_total_size(sizeof(u32)) +
 		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
 
-	memset(&stats, 0, sizeof(stats));
 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
 	if (rc < 0)
 		return rc;
 
+	rc = -EINVAL;
 	if (info->attrs[TASKSTATS_CMD_ATTR_PID]) {
 		u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
-		rc = fill_pid(pid, NULL, &stats);
-		if (rc < 0)
-			goto err;
+		stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid);
+		if (!stats)
+			goto nla_err;
 
-		if (mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid, &stats))
-			goto nla_put_failure;
+		rc = fill_pid(pid, NULL, stats);
+		if (rc < 0)
+			goto nla_err;
 	} else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
 		u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
-		rc = fill_tgid(tgid, NULL, &stats);
-		if (rc < 0)
-			goto err;
+		stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid);
+		if (!stats)
+			goto nla_err;
 
-		if (mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid, &stats))
-			goto nla_put_failure;
-	} else {
-		rc = -EINVAL;
+		rc = fill_tgid(tgid, NULL, stats);
+		if (rc < 0)
+			goto nla_err;
+	} else
 		goto err;
-	}
 
 	return send_reply(rep_skb, info->snd_pid);
 
-nla_put_failure:
-	rc = genlmsg_cancel(rep_skb, reply);
+nla_err:
+	genlmsg_cancel(rep_skb, reply);
 err:
 	nlmsg_free(rep_skb);
 	return rc;
@@ -453,7 +459,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 {
 	int rc;
 	struct listener_list *listeners;
-	struct taskstats *tidstats;
+	struct taskstats *stats;
 	struct sk_buff *rep_skb;
 	void *reply;
 	size_t size;
@@ -480,20 +486,17 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	if (list_empty(&listeners->list))
 		return;
 
-	tidstats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
-	if (!tidstats)
-		return;
-
 	rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
 	if (rc < 0)
-		goto free_stats;
+		return;
 
-	rc = fill_pid(tsk->pid, tsk, tidstats);
-	if (rc < 0)
-		goto err_skb;
+	stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid);
+	if (!stats)
+		goto nla_err;
 
-	if (mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid, tidstats))
-		goto nla_put_failure;
+	rc = fill_pid(tsk->pid, tsk, stats);
+	if (rc < 0)
+		goto nla_err;
 
 	/*
 	 * Doesn't matter if tsk is the leader or the last group member leaving
@@ -501,20 +504,19 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	if (!is_thread_group || !group_dead)
 		goto send;
 
-	if (mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid, tsk->signal->stats))
-		goto nla_put_failure;
+	stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid);
+	if (!stats)
+		goto nla_err;
+
+	memcpy(stats, tsk->signal->stats, sizeof(*stats));
 
 send:
 	send_cpu_listeners(rep_skb, listeners);
-free_stats:
-	kmem_cache_free(taskstats_cache, tidstats);
 	return;
 
-nla_put_failure:
+nla_err:
 	genlmsg_cancel(rep_skb, reply);
-err_skb:
 	nlmsg_free(rep_skb);
-	goto free_stats;
 }
 
 static struct genl_ops taskstats_ops = {
-- 
cgit v1.2.3


From 37167485302c8876cb0303af113696e88c2945aa Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 6 Dec 2006 20:36:55 -0800
Subject: [PATCH] taskstats: cleanup reply assembling

Thomas Graf wrote:
>
> nla_nest_start() may return NULL, either rely on prepare_reply() to be
> correct and BUG() on failure or do proper error handling for all
> functions.

nla_put() in taskstat.c can fail only if the 'size' argument of alloc_skb()
was not right. This is a kernel bug, we should not hide it. So add 'BUG()'
on error path and check for 'na == NULL'.

> genlmsg_cancel() is only required in error paths for dumping
> procedures.

So we can remove 'genlmsg_cancel()' calls and 'void *reply' (saves 227 bytes).

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/taskstats.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index b0aad99c4f8d..4c3476fa058d 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -69,7 +69,7 @@ enum actions {
 };
 
 static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
-			void **replyp, size_t size)
+				size_t size)
 {
 	struct sk_buff *skb;
 	void *reply;
@@ -94,7 +94,6 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 	}
 
 	*skbp = skb;
-	*replyp = reply;
 	return 0;
 }
 
@@ -351,11 +350,13 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 	struct nlattr *na, *ret;
 	int aggr;
 
-	aggr = TASKSTATS_TYPE_AGGR_TGID;
-	if (type == TASKSTATS_TYPE_PID)
-		aggr = TASKSTATS_TYPE_AGGR_PID;
+	aggr = (type == TASKSTATS_TYPE_PID)
+			? TASKSTATS_TYPE_AGGR_PID
+			: TASKSTATS_TYPE_AGGR_TGID;
 
 	na = nla_nest_start(skb, aggr);
+	if (!na)
+		goto err;
 	if (nla_put(skb, type, sizeof(pid), &pid) < 0)
 		goto err;
 	ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
@@ -373,7 +374,6 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 	int rc = 0;
 	struct sk_buff *rep_skb;
 	struct taskstats *stats;
-	void *reply;
 	size_t size;
 	cpumask_t mask;
 
@@ -395,7 +395,7 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 	size = nla_total_size(sizeof(u32)) +
 		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
 
-	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
+	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
 	if (rc < 0)
 		return rc;
 
@@ -404,27 +404,24 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 		u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
 		stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid);
 		if (!stats)
-			goto nla_err;
+			goto err;
 
 		rc = fill_pid(pid, NULL, stats);
 		if (rc < 0)
-			goto nla_err;
+			goto err;
 	} else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) {
 		u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
 		stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid);
 		if (!stats)
-			goto nla_err;
+			goto err;
 
 		rc = fill_tgid(tgid, NULL, stats);
 		if (rc < 0)
-			goto nla_err;
+			goto err;
 	} else
 		goto err;
 
 	return send_reply(rep_skb, info->snd_pid);
-
-nla_err:
-	genlmsg_cancel(rep_skb, reply);
 err:
 	nlmsg_free(rep_skb);
 	return rc;
@@ -461,7 +458,6 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	struct listener_list *listeners;
 	struct taskstats *stats;
 	struct sk_buff *rep_skb;
-	void *reply;
 	size_t size;
 	int is_thread_group;
 
@@ -486,17 +482,17 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	if (list_empty(&listeners->list))
 		return;
 
-	rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
+	rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size);
 	if (rc < 0)
 		return;
 
 	stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid);
 	if (!stats)
-		goto nla_err;
+		goto err;
 
 	rc = fill_pid(tsk->pid, tsk, stats);
 	if (rc < 0)
-		goto nla_err;
+		goto err;
 
 	/*
 	 * Doesn't matter if tsk is the leader or the last group member leaving
@@ -506,16 +502,14 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 
 	stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid);
 	if (!stats)
-		goto nla_err;
+		goto err;
 
 	memcpy(stats, tsk->signal->stats, sizeof(*stats));
 
 send:
 	send_cpu_listeners(rep_skb, listeners);
 	return;
-
-nla_err:
-	genlmsg_cancel(rep_skb, reply);
+err:
 	nlmsg_free(rep_skb);
 }
 
-- 
cgit v1.2.3


From a4c410f00f7ca4bd448b0d63f6f882fd244dc991 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 6 Dec 2006 20:37:21 -0800
Subject: [PATCH] lockdep: print current locks on in_atomic warnings

Add debug_show_held_locks(current) to __might_sleep() and schedule(); this
makes finding the offending lock leak easier.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 1848e280504d..343e1794233e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3333,6 +3333,7 @@ asmlinkage void __sched schedule(void)
 		printk(KERN_ERR "BUG: scheduling while atomic: "
 			"%s/0x%08x/%d\n",
 			current->comm, preempt_count(), current->pid);
+		debug_show_held_locks(current);
 		dump_stack();
 	}
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
@@ -6872,6 +6873,7 @@ void __might_sleep(char *file, int line)
 				" context at %s:%d\n", file, line);
 		printk("in_atomic():%d, irqs_disabled():%d\n",
 			in_atomic(), irqs_disabled());
+		debug_show_held_locks(current);
 		dump_stack();
 	}
 #endif
-- 
cgit v1.2.3


From 6cfd76a26d9fe2ba54b9d496a48c1d9285e5c5ed Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 6 Dec 2006 20:37:22 -0800
Subject: [PATCH] lockdep: name some old style locks

Name some of the remaning 'old_style_spin_init' locks

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/traps.c       | 2 +-
 include/asm-i386/rwsem.h       | 4 ++--
 include/linux/init_task.h      | 2 +-
 include/linux/mutex.h          | 2 +-
 include/linux/rtmutex.h        | 2 +-
 include/linux/rwsem-spinlock.h | 3 ++-
 include/linux/sunrpc/sched.h   | 4 ++--
 kernel/acct.c                  | 3 ++-
 kernel/irq/handle.c            | 2 +-
 net/sunrpc/svcauth.c           | 3 ++-
 security/keys/process_keys.c   | 2 +-
 11 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index fe9c5e8e7e6f..3124f1b04d67 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -452,7 +452,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 		u32 lock_owner;
 		int lock_owner_depth;
 	} die = {
-		.lock =			SPIN_LOCK_UNLOCKED,
+		.lock =			__SPIN_LOCK_UNLOCKED(die.lock),
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index bc598d6388e3..041906f3c6df 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -75,8 +75,8 @@ struct rw_semaphore {
 
 
 #define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
-	__RWSEM_DEP_MAP_INIT(name) }
+{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
+  LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 33c5daacc743..733790d4f7db 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -73,7 +73,7 @@
 extern struct nsproxy init_nsproxy;
 #define INIT_NSPROXY(nsproxy) {						\
 	.count		= ATOMIC_INIT(1),				\
-	.nslock		= SPIN_LOCK_UNLOCKED,				\
+	.nslock		= __SPIN_LOCK_UNLOCKED(nsproxy.nslock),		\
 	.uts_ns		= &init_uts_ns,					\
 	.namespace	= NULL,						\
 	INIT_IPC_NS(ipc_ns)						\
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 27c48daa3183..b2b91c477563 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -94,7 +94,7 @@ do {							\
 
 #define __MUTEX_INITIALIZER(lockname) \
 		{ .count = ATOMIC_INIT(1) \
-		, .wait_lock = SPIN_LOCK_UNLOCKED \
+		, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
 		, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
 		__DEBUG_MUTEX_INITIALIZER(lockname) \
 		__DEP_MAP_MUTEX_INITIALIZER(lockname) }
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 5d41dee82f80..b0090e9f7884 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -63,7 +63,7 @@ struct hrtimer_sleeper;
 #endif
 
 #define __RT_MUTEX_INITIALIZER(mutexname) \
-	{ .wait_lock = SPIN_LOCK_UNLOCKED \
+	{ .wait_lock = __SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
 	, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, mutexname.wait_lock) \
 	, .owner = NULL \
 	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index ae1fcadd598e..813cee13da0d 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -44,7 +44,8 @@ struct rw_semaphore {
 #endif
 
 #define __RWSEM_INITIALIZER(name) \
-{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
+{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
+  __RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index f399c138f79d..0746c3b16f3a 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -222,7 +222,7 @@ struct rpc_wait_queue {
 
 #ifndef RPC_DEBUG
 # define RPC_WAITQ_INIT(var,qname) { \
-		.lock = SPIN_LOCK_UNLOCKED, \
+		.lock = __SPIN_LOCK_UNLOCKED(var.lock), \
 		.tasks = { \
 			[0] = LIST_HEAD_INIT(var.tasks[0]), \
 			[1] = LIST_HEAD_INIT(var.tasks[1]), \
@@ -231,7 +231,7 @@ struct rpc_wait_queue {
 	}
 #else
 # define RPC_WAITQ_INIT(var,qname) { \
-		.lock = SPIN_LOCK_UNLOCKED, \
+		.lock = __SPIN_LOCK_UNLOCKED(var.lock), \
 		.tasks = { \
 			[0] = LIST_HEAD_INIT(var.tasks[0]), \
 			[1] = LIST_HEAD_INIT(var.tasks[1]), \
diff --git a/kernel/acct.c b/kernel/acct.c
index 0aad5ca36a81..dc12db8600e7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -89,7 +89,8 @@ struct acct_glbs {
 	struct timer_list	timer;
 };
 
-static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED};
+static struct acct_glbs acct_globals __cacheline_aligned =
+	{__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
 
 /*
  * Called whenever the timer says to check the free space.
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a681912bc89a..aff1f0fabb0d 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -54,7 +54,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned = {
 		.chip = &no_irq_chip,
 		.handle_irq = handle_bad_irq,
 		.depth = 1,
-		.lock = SPIN_LOCK_UNLOCKED,
+		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
 #ifdef CONFIG_SMP
 		.affinity = CPU_MASK_ALL
 #endif
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index ee9bb1522d5e..c7bb5f7f21a5 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -119,7 +119,8 @@ EXPORT_SYMBOL(svc_auth_unregister);
 #define	DN_HASHMASK	(DN_HASHMAX-1)
 
 static struct hlist_head	auth_domain_table[DN_HASHMAX];
-static spinlock_t	auth_domain_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t	auth_domain_lock =
+	__SPIN_LOCK_UNLOCKED(auth_domain_lock);
 
 void auth_domain_put(struct auth_domain *dom)
 {
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 32150cf7c37f..b6f86808475a 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -27,7 +27,7 @@ static DEFINE_MUTEX(key_session_mutex);
 struct key_user root_key_user = {
 	.usage		= ATOMIC_INIT(3),
 	.consq		= LIST_HEAD_INIT(root_key_user.consq),
-	.lock		= SPIN_LOCK_UNLOCKED,
+	.lock		= __SPIN_LOCK_UNLOCKED(root_key_user.lock),
 	.nkeys		= ATOMIC_INIT(2),
 	.nikeys		= ATOMIC_INIT(2),
 	.uid		= 0,
-- 
cgit v1.2.3


From ece8a684c75df215320b4155944979e3f78c5c93 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:37:24 -0800
Subject: [PATCH] sleep profiling

Implement prof=sleep profiling.  TASK_UNINTERRUPTIBLE sleeps will be taken
as a profile hit, and every millisecond spent sleeping causes a profile-hit
for the call site that initiated the sleep.

Sample readprofile output on i386:

   306 ps2_sendbyte                               1.3973
   432 call_usermodehelper_keys                   1.9548
   484 ps2_command                                0.6453
   790 __driver_attach                            4.7879
  1593 msleep                                    44.2500
  3976 sync_buffer                               64.1290
  4076 do_lookup                                 12.4648
  8587 sync_page                                122.6714
 20820 total                                      0.0067

(NOTE: architectures need to check whether get_wchan() can be called from
deep within the wakeup path.)

akpm: we need to mark more functions __sched.  lock_sock(), msleep(), others..

akpm: the contention in do_lookup() is a surprise.  Presumably doing disk
reads for directory contents while holding i_mutex.

[akpm@osdl.org: various fixes]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  1 +
 include/linux/profile.h             | 24 ++++++++++++++++++++++-
 kernel/profile.c                    | 39 ++++++++++++++++++++++++++++---------
 kernel/sched.c                      | 11 +++++++++++
 4 files changed, 65 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8fe6b834ef27..2a40d9f6ffad 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1294,6 +1294,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			Param: "schedule" - profile schedule points.
 			Param: <number> - step/bucket size as a power of 2 for
 				statistical time based profiling.
+			Param: "sleep" - profile D-state sleeping (millisecs)
 
 	processor.max_cstate=	[HW,ACPI]
 			Limit processor to maximum C-state
diff --git a/include/linux/profile.h b/include/linux/profile.h
index acce53fd38b6..5670b340c4ef 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -6,10 +6,15 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/cpumask.h>
+#include <linux/cache.h>
+
 #include <asm/errno.h>
 
+extern int prof_on __read_mostly;
+
 #define CPU_PROFILING	1
 #define SCHED_PROFILING	2
+#define SLEEP_PROFILING	3
 
 struct proc_dir_entry;
 struct pt_regs;
@@ -18,7 +23,24 @@ struct notifier_block;
 /* init basic kernel profiler */
 void __init profile_init(void);
 void profile_tick(int);
-void profile_hit(int, void *);
+
+/*
+ * Add multiple profiler hits to a given address:
+ */
+void profile_hits(int, void *ip, unsigned int nr_hits);
+
+/*
+ * Single profiler hit:
+ */
+static inline void profile_hit(int type, void *ip)
+{
+	/*
+	 * Speedup for the common (no profiling enabled) case:
+	 */
+	if (unlikely(prof_on == type))
+		profile_hits(type, ip, 1);
+}
+
 #ifdef CONFIG_PROC_FS
 void create_prof_cpu_mask(struct proc_dir_entry *);
 #else
diff --git a/kernel/profile.c b/kernel/profile.c
index 15b012df4ff1..04fd84e8cdbe 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -40,7 +40,7 @@ int (*timer_hook)(struct pt_regs *) __read_mostly;
 
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
-static int prof_on __read_mostly;
+int prof_on __read_mostly;
 static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
@@ -51,9 +51,19 @@ static DEFINE_MUTEX(profile_flip_mutex);
 static int __init profile_setup(char * str)
 {
 	static char __initdata schedstr[] = "schedule";
+	static char __initdata sleepstr[] = "sleep";
 	int par;
 
-	if (!strncmp(str, schedstr, strlen(schedstr))) {
+	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
+		prof_on = SLEEP_PROFILING;
+		if (str[strlen(sleepstr)] == ',')
+			str += strlen(sleepstr) + 1;
+		if (get_option(&str, &par))
+			prof_shift = par;
+		printk(KERN_INFO
+			"kernel sleep profiling enabled (shift: %ld)\n",
+			prof_shift);
+	} else if (!strncmp(str, sleepstr, strlen(sleepstr))) {
 		prof_on = SCHED_PROFILING;
 		if (str[strlen(schedstr)] == ',')
 			str += strlen(schedstr) + 1;
@@ -204,7 +214,8 @@ EXPORT_SYMBOL_GPL(profile_event_unregister);
  * positions to which hits are accounted during short intervals (e.g.
  * several seconds) is usually very small. Exclusion from buffer
  * flipping is provided by interrupt disablement (note that for
- * SCHED_PROFILING profile_hit() may be called from process context).
+ * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from
+ * process context).
  * The hash function is meant to be lightweight as opposed to strong,
  * and was vaguely inspired by ppc64 firmware-supported inverted
  * pagetable hash functions, but uses a full hashtable full of finite
@@ -257,7 +268,7 @@ static void profile_discard_flip_buffers(void)
 	mutex_unlock(&profile_flip_mutex);
 }
 
-void profile_hit(int type, void *__pc)
+void profile_hits(int type, void *__pc, unsigned int nr_hits)
 {
 	unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
 	int i, j, cpu;
@@ -274,21 +285,31 @@ void profile_hit(int type, void *__pc)
 		put_cpu();
 		return;
 	}
+	/*
+	 * We buffer the global profiler buffer into a per-CPU
+	 * queue and thus reduce the number of global (and possibly
+	 * NUMA-alien) accesses. The write-queue is self-coalescing:
+	 */
 	local_irq_save(flags);
 	do {
 		for (j = 0; j < PROFILE_GRPSZ; ++j) {
 			if (hits[i + j].pc == pc) {
-				hits[i + j].hits++;
+				hits[i + j].hits += nr_hits;
 				goto out;
 			} else if (!hits[i + j].hits) {
 				hits[i + j].pc = pc;
-				hits[i + j].hits = 1;
+				hits[i + j].hits = nr_hits;
 				goto out;
 			}
 		}
 		i = (i + secondary) & (NR_PROFILE_HIT - 1);
 	} while (i != primary);
-	atomic_inc(&prof_buffer[pc]);
+
+	/*
+	 * Add the current hit(s) and flush the write-queue out
+	 * to the global buffer:
+	 */
+	atomic_add(nr_hits, &prof_buffer[pc]);
 	for (i = 0; i < NR_PROFILE_HIT; ++i) {
 		atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
 		hits[i].pc = hits[i].hits = 0;
@@ -356,14 +377,14 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
 #define profile_flip_buffers()		do { } while (0)
 #define profile_discard_flip_buffers()	do { } while (0)
 
-void profile_hit(int type, void *__pc)
+void profile_hits(int type, void *__pc, unsigned int nr_hits)
 {
 	unsigned long pc;
 
 	if (prof_on != type || !prof_buffer)
 		return;
 	pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
-	atomic_inc(&prof_buffer[min(pc, prof_len - 1)]);
+	atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
 }
 #endif /* !CONFIG_SMP */
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 343e1794233e..75a005ed4eda 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -948,6 +948,17 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 	}
 #endif
 
+	/*
+	 * Sleep time is in units of nanosecs, so shift by 20 to get a
+	 * milliseconds-range estimation of the amount of time that the task
+	 * spent sleeping:
+	 */
+	if (unlikely(prof_on == SLEEP_PROFILING)) {
+		if (p->state == TASK_UNINTERRUPTIBLE)
+			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
+				     (now - p->timestamp) >> 20);
+	}
+
 	if (!rt_task(p))
 		p->prio = recalc_task_prio(p, now);
 
-- 
cgit v1.2.3


From d5abe669172f20a4129a711de0f250a4e07db298 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 6 Dec 2006 20:37:26 -0800
Subject: [PATCH] debug: workqueue locking sanity

Workqueue functions should not leak locks, assert so, printing the
last function ran.

Use macros in lockdep.h to avoid include dependency pains.

[akpm@osdl.org: build fix]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/lockdep.h |  5 +++++
 kernel/workqueue.c      | 13 +++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'kernel')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 819f08f1310d..da19aeb0dbe8 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -243,6 +243,8 @@ extern void lock_release(struct lockdep_map *lock, int nested,
 
 # define INIT_LOCKDEP				.lockdep_recursion = 0,
 
+#define lockdep_depth(tsk)	((tsk)->lockdep_depth)
+
 #else /* !LOCKDEP */
 
 static inline void lockdep_off(void)
@@ -277,6 +279,9 @@ static inline int lockdep_internal(void)
  * The class key takes no space if lockdep is disabled:
  */
 struct lock_class_key { };
+
+#define lockdep_depth(tsk)	(0)
+
 #endif /* !LOCKDEP */
 
 #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2945b094d871..5484d6e045c2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -30,6 +30,8 @@
 #include <linux/hardirq.h>
 #include <linux/mempolicy.h>
 #include <linux/freezer.h>
+#include <linux/kallsyms.h>
+#include <linux/debug_locks.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -253,6 +255,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 			work_release(work);
 		f(work);
 
+		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
+			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
+					"%s/0x%08x/%d\n",
+					current->comm, preempt_count(),
+				       	current->pid);
+			printk(KERN_ERR "    last function: ");
+			print_symbol("%s\n", (unsigned long)f);
+			debug_show_held_locks(current);
+			dump_stack();
+		}
+
 		spin_lock_irqsave(&cwq->lock, flags);
 		cwq->remove_sequence++;
 		wake_up(&cwq->work_done);
-- 
cgit v1.2.3


From 40fcfc87222e2e8af6379ec366f0cb2a411570cd Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 6 Dec 2006 20:37:27 -0800
Subject: [PATCH] HZ: 300Hz support

Fix two things.  Firstly the unit is "Hz" not "HZ".  Secondly it is useful
to have 300Hz support when doing multimedia work.  250 is fine for us in
Europe but the US frame rate is 30fps (29.99 blah for pedants).  300 gives
us a tick divisible by both 25 and 30, and for interlace work 50 and 60.
It's also giving similar performance to 250Hz.

I'd argue we should remove 250 and add 300, but that might be excess
disruption for now.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/Kconfig.hz | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 248e1c396f8b..4af15802ccd4 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -7,7 +7,7 @@ choice
 	default HZ_250
 	help
 	 Allows the configuration of the timer frequency. It is customary
-	 to have the timer interrupt run at 1000 HZ but 100 HZ may be more
+	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
 	 beneficial for servers and NUMA systems that do not need to have
 	 a fast response for user interaction and that may experience bus
 	 contention and cacheline bounces as a result of timer interrupts.
@@ -19,21 +19,30 @@ choice
 	config HZ_100
 		bool "100 HZ"
 	help
-	  100 HZ is a typical choice for servers, SMP and NUMA systems
+	  100 Hz is a typical choice for servers, SMP and NUMA systems
 	  with lots of processors that may show reduced performance if
 	  too many timer interrupts are occurring.
 
 	config HZ_250
 		bool "250 HZ"
 	help
-	 250 HZ is a good compromise choice allowing server performance
+	 250 Hz is a good compromise choice allowing server performance
 	 while also showing good interactive responsiveness even
-	 on SMP and NUMA systems.
+	 on SMP and NUMA systems. If you are going to be using NTSC video
+	 or multimedia, selected 300Hz instead.
+
+	config HZ_300
+		bool "300 HZ"
+	help
+	 300 Hz is a good compromise choice allowing server performance
+	 while also showing good interactive responsiveness even
+	 on SMP and NUMA systems and exactly dividing by both PAL and
+	 NTSC frame rates for video and multimedia work.
 
 	config HZ_1000
 		bool "1000 HZ"
 	help
-	 1000 HZ is the preferred choice for desktop systems and other
+	 1000 Hz is the preferred choice for desktop systems and other
 	 systems requiring fast interactive responses to events.
 
 endchoice
@@ -42,5 +51,6 @@ config HZ
 	int
 	default 100 if HZ_100
 	default 250 if HZ_250
+	default 300 if HZ_300
 	default 1000 if HZ_1000
 
-- 
cgit v1.2.3


From c36264dfb2d6fa6383082de0a1bba8e12b477da1 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 6 Dec 2006 20:37:42 -0800
Subject: [PATCH] remove the syslog interface when printk is disabled

Attempts to read() from the non-existent dmesg buffer will return zero and
userspace tends to get stuck in a busyloop.

So just remove /dev/kmsg altogether if CONFIG_PRINTK=n.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/Makefile    | 3 ++-
 fs/proc/proc_misc.c | 2 ++
 kernel/printk.c     | 7 +------
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 7431d7ba2d09..f6c776272572 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,8 +8,9 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		kmsg.o proc_tty.o proc_misc.o
+		proc_tty.o proc_misc.o
 
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
 proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
 proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o
+proc-$(CONFIG_PRINTK)	+= kmsg.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 93c43b676e59..51815cece6f3 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -696,9 +696,11 @@ void __init proc_misc_init(void)
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	/* And now for trickier ones */
+#ifdef CONFIG_PRINTK
 	entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
 	if (entry)
 		entry->proc_fops = &proc_kmsg_operations;
+#endif
 	create_seq_entry("devices", 0, &proc_devinfo_operations);
 	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
 #ifdef CONFIG_BLOCK
diff --git a/kernel/printk.c b/kernel/printk.c
index 66426552fbfe..ba59c2a30ed0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -631,12 +631,7 @@ EXPORT_SYMBOL(vprintk);
 
 asmlinkage long sys_syslog(int type, char __user *buf, int len)
 {
-	return 0;
-}
-
-int do_syslog(int type, char __user *buf, int len)
-{
-	return 0;
+	return -ENOSYS;
 }
 
 static void call_console_drivers(unsigned long start, unsigned long end)
-- 
cgit v1.2.3


From b4c6c34a530b4d1c626f4ac0a884e0a9b849378c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 Dec 2006 20:38:11 -0800
Subject: [PATCH] kprobes: enable booster on the preemptible kernel

When we are unregistering a kprobe-booster, we can't release its
instruction buffer immediately on the preemptive kernel, because some
processes might be preempted on the buffer.  The freeze_processes() and
thaw_processes() functions can clean most of processes up from the buffer.
There are still some non-frozen threads who have the PF_NOFREEZE flag.  If
those threads are sleeping (not preempted) at the known place outside the
buffer, we can ensure safety of freeing.

However, the processing of this check routine takes a long time.  So, this
patch introduces the garbage collection mechanism of insn_slot.  It also
introduces the "dirty" flag to free_insn_slot because of efficiency.

The "clean" instruction slots (dirty flag is cleared) are released
immediately.  But the "dirty" slots which are used by boosted kprobes, are
marked as garbages.  collect_garbage_slots() will be invoked to release
"dirty" slots if there are more than INSNS_PER_PAGE garbage slots or if
there are no unused slots.

Cc: "Keshavamurthy, Anil S" <anil.s.keshavamurthy@intel.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: "bibo,mao" <bibo.mao@intel.com>
Cc: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Cc: Yumiko Sugita <yumiko.sugita.yf@hitachi.com>
Cc: Satoshi Oshima <soshima@redhat.com>
Cc: Hideo Aoki <haoki@redhat.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/kprobes.c    |   4 +-
 arch/ia64/kernel/kprobes.c    |   2 +-
 arch/powerpc/kernel/kprobes.c |   2 +-
 arch/s390/kernel/kprobes.c    |   2 +-
 arch/x86_64/kernel/kprobes.c  |   2 +-
 include/linux/kprobes.h       |   2 +-
 kernel/kprobes.c              | 117 ++++++++++++++++++++++++++++++++++--------
 7 files changed, 103 insertions(+), 28 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index fc79e1e859c4..af1d53344993 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -184,7 +184,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
 	mutex_unlock(&kprobe_mutex);
 }
 
@@ -333,7 +333,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 		return 1;
 
 ss_probe:
-#ifndef CONFIG_PREEMPT
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
 	if (p->ainsn.boostable == 1 && !p->post_handler){
 		/* Boost up -- we can execute copied instructions directly */
 		reset_current_kprobe();
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 51217d63285e..4d592ee9300b 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -481,7 +481,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }
 /*
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 7b8d12b9026c..4657563f8813 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -85,7 +85,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 67914fe7f317..576368c4f605 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -200,7 +200,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }
 
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index ac241567e682..209c8c0bec71 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -224,7 +224,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }
 
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index ac4c0559f751..769be39b9681 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -165,7 +165,7 @@ extern void arch_disarm_kprobe(struct kprobe *p);
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern kprobe_opcode_t *get_insn_slot(void);
-extern void free_insn_slot(kprobe_opcode_t *slot);
+extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 610c837ad9e0..17ec4afb0994 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -38,6 +38,7 @@
 #include <linux/module.h>
 #include <linux/moduleloader.h>
 #include <linux/kallsyms.h>
+#include <linux/freezer.h>
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
@@ -83,9 +84,36 @@ struct kprobe_insn_page {
 	kprobe_opcode_t *insns;		/* Page of instruction slots */
 	char slot_used[INSNS_PER_PAGE];
 	int nused;
+	int ngarbage;
 };
 
 static struct hlist_head kprobe_insn_pages;
+static int kprobe_garbage_slots;
+static int collect_garbage_slots(void);
+
+static int __kprobes check_safety(void)
+{
+	int ret = 0;
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
+	ret = freeze_processes();
+	if (ret == 0) {
+		struct task_struct *p, *q;
+		do_each_thread(p, q) {
+			if (p != current && p->state == TASK_RUNNING &&
+			    p->pid != 0) {
+				printk("Check failed: %s is running\n",p->comm);
+				ret = -1;
+				goto loop_end;
+			}
+		} while_each_thread(p, q);
+	}
+loop_end:
+	thaw_processes();
+#else
+	synchronize_sched();
+#endif
+	return ret;
+}
 
 /**
  * get_insn_slot() - Find a slot on an executable page for an instruction.
@@ -96,6 +124,7 @@ kprobe_opcode_t __kprobes *get_insn_slot(void)
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
 
+      retry:
 	hlist_for_each(pos, &kprobe_insn_pages) {
 		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
 		if (kip->nused < INSNS_PER_PAGE) {
@@ -112,7 +141,11 @@ kprobe_opcode_t __kprobes *get_insn_slot(void)
 		}
 	}
 
-	/* All out of space.  Need to allocate a new page. Use slot 0.*/
+	/* If there are any garbage slots, collect it and try again. */
+	if (kprobe_garbage_slots && collect_garbage_slots() == 0) {
+		goto retry;
+	}
+	/* All out of space.  Need to allocate a new page. Use slot 0. */
 	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
 	if (!kip) {
 		return NULL;
@@ -133,10 +166,62 @@ kprobe_opcode_t __kprobes *get_insn_slot(void)
 	memset(kip->slot_used, 0, INSNS_PER_PAGE);
 	kip->slot_used[0] = 1;
 	kip->nused = 1;
+	kip->ngarbage = 0;
 	return kip->insns;
 }
 
-void __kprobes free_insn_slot(kprobe_opcode_t *slot)
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
+{
+	kip->slot_used[idx] = 0;
+	kip->nused--;
+	if (kip->nused == 0) {
+		/*
+		 * Page is no longer in use.  Free it unless
+		 * it's the last one.  We keep the last one
+		 * so as not to have to set it up again the
+		 * next time somebody inserts a probe.
+		 */
+		hlist_del(&kip->hlist);
+		if (hlist_empty(&kprobe_insn_pages)) {
+			INIT_HLIST_NODE(&kip->hlist);
+			hlist_add_head(&kip->hlist,
+				       &kprobe_insn_pages);
+		} else {
+			module_free(NULL, kip->insns);
+			kfree(kip);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static int __kprobes collect_garbage_slots(void)
+{
+	struct kprobe_insn_page *kip;
+	struct hlist_node *pos, *next;
+
+	/* Ensure no-one is preepmted on the garbages */
+	if (check_safety() != 0)
+		return -EAGAIN;
+
+	hlist_for_each_safe(pos, next, &kprobe_insn_pages) {
+		int i;
+		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
+		if (kip->ngarbage == 0)
+			continue;
+		kip->ngarbage = 0;	/* we will collect all garbages */
+		for (i = 0; i < INSNS_PER_PAGE; i++) {
+			if (kip->slot_used[i] == -1 &&
+			    collect_one_slot(kip, i))
+				break;
+		}
+	}
+	kprobe_garbage_slots = 0;
+	return 0;
+}
+
+void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
@@ -146,28 +231,18 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot)
 		if (kip->insns <= slot &&
 		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
 			int i = (slot - kip->insns) / MAX_INSN_SIZE;
-			kip->slot_used[i] = 0;
-			kip->nused--;
-			if (kip->nused == 0) {
-				/*
-				 * Page is no longer in use.  Free it unless
-				 * it's the last one.  We keep the last one
-				 * so as not to have to set it up again the
-				 * next time somebody inserts a probe.
-				 */
-				hlist_del(&kip->hlist);
-				if (hlist_empty(&kprobe_insn_pages)) {
-					INIT_HLIST_NODE(&kip->hlist);
-					hlist_add_head(&kip->hlist,
-						&kprobe_insn_pages);
-				} else {
-					module_free(NULL, kip->insns);
-					kfree(kip);
-				}
+			if (dirty) {
+				kip->slot_used[i] = -1;
+				kip->ngarbage++;
+			} else {
+				collect_one_slot(kip, i);
 			}
-			return;
+			break;
 		}
 	}
+	if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) {
+		collect_garbage_slots();
+	}
 }
 #endif
 
-- 
cgit v1.2.3


From 02316067852187b8bec781bec07410e91af79627 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:38:17 -0800
Subject: [PATCH] hotplug CPU: clean up hotcpu_notifier() use

There was lots of #ifdef noise in the kernel due to hotcpu_notifier(fn,
prio) not correctly marking 'fn' as used in the !HOTPLUG_CPU case, and thus
generating compiler warnings of unused symbols, hence forcing people to add
#ifdefs.

the compiler can skip truly unused functions just fine:

    text    data     bss     dec     hex filename
 1624412  728710 3674856 6027978  5bfaca vmlinux.before
 1624412  728710 3674856 6027978  5bfaca vmlinux.after

[akpm@osdl.org: topology.c fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/cpu/mcheck/therm_throt.c | 2 --
 arch/i386/kernel/cpuid.c                  | 2 --
 arch/i386/kernel/microcode.c              | 2 --
 arch/i386/kernel/msr.c                    | 2 --
 arch/ia64/kernel/palinfo.c                | 2 --
 arch/ia64/kernel/salinfo.c                | 2 --
 arch/s390/appldata/appldata_base.c        | 2 --
 arch/x86_64/kernel/mce.c                  | 2 --
 arch/x86_64/kernel/mce_amd.c              | 4 ++--
 arch/x86_64/kernel/vsyscall.c             | 2 --
 block/ll_rw_blk.c                         | 4 ----
 drivers/base/topology.c                   | 2 --
 drivers/cpufreq/cpufreq.c                 | 2 --
 fs/buffer.c                               | 2 --
 include/linux/cpu.h                       | 6 +++---
 kernel/cpuset.c                           | 4 ----
 kernel/profile.c                          | 3 +--
 kernel/sched.c                            | 3 ---
 kernel/workqueue.c                        | 2 --
 lib/radix-tree.c                          | 2 --
 mm/page_alloc.c                           | 4 ----
 mm/swap.c                                 | 2 ++
 mm/vmscan.c                               | 2 --
 net/core/dev.c                            | 2 --
 net/core/flow.c                           | 2 --
 25 files changed, 8 insertions(+), 56 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index bad8b4420709..065005c3f168 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -116,7 +116,6 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
 	return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
 {
 	return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
@@ -153,7 +152,6 @@ static struct notifier_block thermal_throttle_cpu_notifier =
 {
 	.notifier_call = thermal_throttle_cpu_callback,
 };
-#endif /* CONFIG_HOTPLUG_CPU */
 
 static __init int thermal_throttle_init_device(void)
 {
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index ab0c327e79dc..23b2cc748d4e 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -167,7 +167,6 @@ static int cpuid_device_create(int i)
 	return err;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
@@ -187,7 +186,6 @@ static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier =
 {
 	.notifier_call = cpuid_class_cpu_callback,
 };
-#endif /* !CONFIG_HOTPLUG_CPU */
 
 static int __init cpuid_init(void)
 {
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index 23f5984d0654..972346604f9d 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -703,7 +703,6 @@ static struct sysdev_driver mc_sysdev_driver = {
 	.resume = mc_sysdev_resume,
 };
 
-#ifdef CONFIG_HOTPLUG_CPU
 static __cpuinit int
 mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 {
@@ -726,7 +725,6 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 static struct notifier_block mc_cpu_notifier = {
 	.notifier_call = mc_cpu_callback,
 };
-#endif
 
 static int __init microcode_init (void)
 {
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index a773f776c9ea..7763c67ca282 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -250,7 +250,6 @@ static int msr_device_create(int i)
 	return err;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int msr_class_cpu_callback(struct notifier_block *nfb,
 				unsigned long action, void *hcpu)
 {
@@ -271,7 +270,6 @@ static struct notifier_block __cpuinitdata msr_class_cpu_notifier =
 {
 	.notifier_call = msr_class_cpu_callback,
 };
-#endif
 
 static int __init msr_init(void)
 {
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 0b546e2b36ac..c4c10a0b99d9 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -952,7 +952,6 @@ remove_palinfo_proc_entries(unsigned int hcpu)
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int palinfo_cpu_callback(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
@@ -974,7 +973,6 @@ static struct notifier_block palinfo_cpu_notifier =
 	.notifier_call = palinfo_cpu_callback,
 	.priority = 0,
 };
-#endif
 
 static int __init
 palinfo_init(void)
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index e63b8ca5344a..fd607ca51a8d 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -575,7 +575,6 @@ static struct file_operations salinfo_data_fops = {
 	.write   = salinfo_log_write,
 };
 
-#ifdef	CONFIG_HOTPLUG_CPU
 static int __devinit
 salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 {
@@ -620,7 +619,6 @@ static struct notifier_block salinfo_cpu_notifier =
 	.notifier_call = salinfo_cpu_callback,
 	.priority = 0,
 };
-#endif	/* CONFIG_HOTPLUG_CPU */
 
 static int __init
 salinfo_init(void)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 67d5cf9cba83..b8c237290263 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -561,7 +561,6 @@ appldata_offline_cpu(int cpu)
 	spin_unlock(&appldata_timer_lock);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int __cpuinit
 appldata_cpu_notify(struct notifier_block *self,
 		    unsigned long action, void *hcpu)
@@ -582,7 +581,6 @@ appldata_cpu_notify(struct notifier_block *self,
 static struct notifier_block appldata_nb = {
 	.notifier_call = appldata_cpu_notify,
 };
-#endif
 
 /*
  * appldata_init()
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index c7587fc39015..bc863c464a1f 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -641,7 +641,6 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 	return err;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static void mce_remove_device(unsigned int cpu)
 {
 	int i;
@@ -674,7 +673,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 static struct notifier_block mce_cpu_notifier = {
 	.notifier_call = mce_cpu_callback,
 };
-#endif
 
 static __init int mce_init_device(void)
 {
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index 883fe747f64c..fa09debad4b7 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -551,7 +551,6 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 /*
  * let's be hotplug friendly.
  * in case of multiple core processors, the first core always takes ownership
@@ -594,12 +593,14 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 
 	sprintf(name, "threshold_bank%i", bank);
 
+#ifdef CONFIG_SMP
 	/* sibling symlink */
 	if (shared_bank[bank] && b->blocks->cpu != cpu) {
 		sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
 		per_cpu(threshold_banks, cpu)[bank] = NULL;
 		return;
 	}
+#endif
 
 	/* remove all sibling symlinks before unregistering */
 	for_each_cpu_mask(i, b->cpus) {
@@ -656,7 +657,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
 static struct notifier_block threshold_cpu_notifier = {
 	.notifier_call = threshold_cpu_callback,
 };
-#endif /* CONFIG_HOTPLUG_CPU */
 
 static __init int threshold_init_device(void)
 {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 630036c06c75..3785e4954734 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -275,7 +275,6 @@ static void __cpuinit cpu_vsyscall_init(void *arg)
 	vsyscall_set_cpu(raw_smp_processor_id());
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int __cpuinit
 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 {
@@ -284,7 +283,6 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
 	return NOTIFY_DONE;
 }
-#endif
 
 static void __init map_vsyscall(void)
 {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index a4ff3271d4a8..31512cd9f3ad 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3459,8 +3459,6 @@ static void blk_done_softirq(struct softirq_action *h)
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
 static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
 			  void *hcpu)
 {
@@ -3486,8 +3484,6 @@ static struct notifier_block __devinitdata blk_cpu_notifier = {
 	.notifier_call	= blk_cpu_notify,
 };
 
-#endif /* CONFIG_HOTPLUG_CPU */
-
 /**
  * blk_complete_request - end I/O on a request
  * @req:      the request being processed
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 3d12b85b0962..067a9e8bc377 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -108,7 +108,6 @@ static int __cpuinit topology_add_dev(unsigned int cpu)
 	return rc;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static void __cpuinit topology_remove_dev(unsigned int cpu)
 {
 	struct sys_device *sys_dev = get_cpu_sysdev(cpu);
@@ -136,7 +135,6 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb,
 	}
 	return rc ? NOTIFY_BAD : NOTIFY_OK;
 }
-#endif
 
 static int __cpuinit topology_sysfs_init(void)
 {
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7a7c6e6dfe4f..47ab42db122a 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1537,7 +1537,6 @@ int cpufreq_update_policy(unsigned int cpu)
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int cpufreq_cpu_callback(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
@@ -1577,7 +1576,6 @@ static struct notifier_block __cpuinitdata cpufreq_cpu_notifier =
 {
     .notifier_call = cpufreq_cpu_callback,
 };
-#endif /* CONFIG_HOTPLUG_CPU */
 
 /*********************************************************************
  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
diff --git a/fs/buffer.c b/fs/buffer.c
index a8ca0ac21488..517860f2d75b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2972,7 +2972,6 @@ init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static void buffer_exit_cpu(int cpu)
 {
 	int i;
@@ -2994,7 +2993,6 @@ static int buffer_cpu_notify(struct notifier_block *self,
 		buffer_exit_cpu((unsigned long)hcpu);
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 void __init buffer_init(void)
 {
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index f02d71bf6894..71dc6ba4f73f 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,9 +89,9 @@ int cpu_down(unsigned int cpu);
 #define lock_cpu_hotplug()	do { } while (0)
 #define unlock_cpu_hotplug()	do { } while (0)
 #define lock_cpu_hotplug_interruptible() 0
-#define hotcpu_notifier(fn, pri)	do { } while (0)
-#define register_hotcpu_notifier(nb)	do { } while (0)
-#define unregister_hotcpu_notifier(nb)	do { } while (0)
+#define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
+#define register_hotcpu_notifier(nb)	do { (void)(nb); } while (0)
+#define unregister_hotcpu_notifier(nb)	do { (void)(nb); } while (0)
 
 /* CPUs don't go offline once they're online w/o CONFIG_HOTPLUG_CPU */
 static inline int cpu_is_offline(int cpu) { return 0; }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index bd1e89c4c96a..9b62b4c03ad0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2044,7 +2044,6 @@ out:
 	return err;
 }
 
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
 /*
  * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
  * or memory nodes, we need to walk over the cpuset hierarchy,
@@ -2108,9 +2107,7 @@ static void common_cpu_mem_hotplug_unplug(void)
 	mutex_unlock(&callback_mutex);
 	mutex_unlock(&manage_mutex);
 }
-#endif
 
-#ifdef CONFIG_HOTPLUG_CPU
 /*
  * The top_cpuset tracks what CPUs and Memory Nodes are online,
  * period.  This is necessary in order to make cpusets transparent
@@ -2127,7 +2124,6 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
 	common_cpu_mem_hotplug_unplug();
 	return 0;
 }
-#endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
diff --git a/kernel/profile.c b/kernel/profile.c
index 04fd84e8cdbe..0961d93e1d91 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -319,7 +319,6 @@ out:
 	put_cpu();
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int __devinit profile_cpu_callback(struct notifier_block *info,
 					unsigned long action, void *__cpu)
 {
@@ -372,10 +371,10 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
 	}
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 #else /* !CONFIG_SMP */
 #define profile_flip_buffers()		do { } while (0)
 #define profile_discard_flip_buffers()	do { } while (0)
+#define profile_cpu_callback		NULL
 
 void profile_hits(int type, void *__pc, unsigned int nr_hits)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 75a005ed4eda..c83f531c2886 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6740,8 +6740,6 @@ SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
 	    sched_smt_power_savings_store);
 #endif
 
-
-#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Force a reinitialization of the sched domains hierarchy.  The domains
  * and groups cannot be updated in place without racing with the balancing
@@ -6774,7 +6772,6 @@ static int update_sched_domains(struct notifier_block *nfb,
 
 	return NOTIFY_OK;
 }
-#endif
 
 void __init sched_init_smp(void)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5484d6e045c2..c5257316f4b9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -655,7 +655,6 @@ int current_is_keventd(void)
 
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 /* Take the work from this (downed) CPU. */
 static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
 {
@@ -738,7 +737,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 	return NOTIFY_OK;
 }
-#endif
 
 void init_workqueues(void)
 {
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index e2cefabb5aa0..d69ddbe43865 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -996,7 +996,6 @@ static __init void radix_tree_init_maxindex(void)
 		height_to_maxindex[i] = __maxindex(i);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int radix_tree_callback(struct notifier_block *nfb,
                             unsigned long action,
                             void *hcpu)
@@ -1016,7 +1015,6 @@ static int radix_tree_callback(struct notifier_block *nfb,
        }
        return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 void __init radix_tree_init(void)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2273952300d4..27ec7a1b8022 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -701,7 +701,6 @@ void drain_node_pages(int nodeid)
 }
 #endif
 
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
 static void __drain_pages(unsigned int cpu)
 {
 	unsigned long flags;
@@ -723,7 +722,6 @@ static void __drain_pages(unsigned int cpu)
 		}
 	}
 }
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_PM
 
@@ -2907,7 +2905,6 @@ void __init free_area_init(unsigned long *zones_size)
 			__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int page_alloc_cpu_notify(struct notifier_block *self,
 				 unsigned long action, void *hcpu)
 {
@@ -2922,7 +2919,6 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
 	}
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 void __init page_alloc_init(void)
 {
diff --git a/mm/swap.c b/mm/swap.c
index 017e72ca9bbb..2ed7be39795e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -514,5 +514,7 @@ void __init swap_setup(void)
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
 	 */
+#ifdef CONFIG_HOTPLUG_CPU
 	hotcpu_notifier(cpu_swap_callback, 0);
+#endif
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f6616e81fac7..093f5fe6dd77 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1513,7 +1513,6 @@ out:
 }
 #endif
 
-#ifdef CONFIG_HOTPLUG_CPU
 /* It's optimal to keep kswapds on the same CPUs as their memory, but
    not required for correctness.  So if the last cpu in a node goes
    away, we get changed to run anywhere: as the first one comes back,
@@ -1534,7 +1533,6 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 	}
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 /*
  * This kswapd start function will be called by init and node-hot-add.
diff --git a/net/core/dev.c b/net/core/dev.c
index 59d058a3b504..e660cb57e42a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3340,7 +3340,6 @@ void unregister_netdev(struct net_device *dev)
 
 EXPORT_SYMBOL(unregister_netdev);
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int dev_cpu_callback(struct notifier_block *nfb,
 			    unsigned long action,
 			    void *ocpu)
@@ -3384,7 +3383,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_NET_DMA
 /**
diff --git a/net/core/flow.c b/net/core/flow.c
index 104c25d00a1d..d137f971f97d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -340,7 +340,6 @@ static void __devinit flow_cache_cpu_prepare(int cpu)
 	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int flow_cache_cpu(struct notifier_block *nfb,
 			  unsigned long action,
 			  void *hcpu)
@@ -349,7 +348,6 @@ static int flow_cache_cpu(struct notifier_block *nfb,
 		__flow_cache_shrink((unsigned long)hcpu, 0);
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 static int __init flow_cache_init(void)
 {
-- 
cgit v1.2.3


From ebe7e5fe4b41deeb2731c5b52d8c8e6ac08b1f74 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:21 -0800
Subject: [PATCH] remove kernel/lockdep.c:lockdep_internal

Remove the no longer used lockdep_internal().

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/lockdep.h | 6 ------
 kernel/lockdep.c        | 7 -------
 2 files changed, 13 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index da19aeb0dbe8..498bfbd3b4e1 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -193,7 +193,6 @@ extern void lockdep_free_key_range(void *start, unsigned long size);
 
 extern void lockdep_off(void);
 extern void lockdep_on(void);
-extern int lockdep_internal(void);
 
 /*
  * These methods are used by specific locking variants (spinlocks,
@@ -255,11 +254,6 @@ static inline void lockdep_on(void)
 {
 }
 
-static inline int lockdep_internal(void)
-{
-	return 0;
-}
-
 # define lock_acquire(l, s, t, r, c, i)		do { } while (0)
 # define lock_release(l, n, i)			do { } while (0)
 # define lockdep_init()				do { } while (0)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 300d61bb314b..3926c3674354 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -140,13 +140,6 @@ void lockdep_on(void)
 
 EXPORT_SYMBOL(lockdep_on);
 
-int lockdep_internal(void)
-{
-	return current->lockdep_recursion != 0;
-}
-
-EXPORT_SYMBOL(lockdep_internal);
-
 /*
  * Debugging switches:
  */
-- 
cgit v1.2.3


From d3228a887cae75ef2b8b1211c31c539bef5a5698 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:22 -0800
Subject: [PATCH] make kernel/signal.c:kill_proc_info() static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 1 -
 kernel/signal.c       | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0a90cefb0b0d..3a767242e72f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1305,7 +1305,6 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
 extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
 extern int kill_pg_info(int, struct siginfo *, pid_t);
-extern int kill_proc_info(int, struct siginfo *, pid_t);
 extern void do_notify_parent(struct task_struct *, int);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
diff --git a/kernel/signal.c b/kernel/signal.c
index bc972d7e6319..ec81defde339 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1134,8 +1134,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 	return error;
 }
 
-int
-kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+static int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
 	int error;
 	rcu_read_lock();
-- 
cgit v1.2.3


From 1c69d921ed9cc6593ad4f60c0f9951cb0d62b0b4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Dec 2006 20:38:44 -0800
Subject: [PATCH] rcu: add a prefetch() in rcu_do_batch()

On some workloads, (for example when lot of close() syscalls are done), RCU
qlen can be quite large, and RCU heads are no longer in cpu cache when
rcu_do_batch() is called.

This patch adds a prefetch() in rcu_do_batch() to give CPU a hint to bring
back cache lines containing 'struct rcu_head's.

Most list manipulations macros include prefetch(), but not open coded ones
(at least with current C compilers :) )

I got a nice speedup on a trivial benchmark (3.48 us per iteration instead
of 3.95 us on a 1.6 GHz Pentium-M)

while (1) { pipe(p); close(fd[0]); close(fd[1]);}

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcupdate.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 26bb5ffe1ef1..3554b76da84c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -235,12 +235,14 @@ static void rcu_do_batch(struct rcu_data *rdp)
 
 	list = rdp->donelist;
 	while (list) {
-		next = rdp->donelist = list->next;
+		next = list->next;
+		prefetch(next);
 		list->func(list);
 		list = next;
 		if (++count >= rdp->blimit)
 			break;
 	}
+	rdp->donelist = list;
 
 	local_irq_disable();
 	rdp->qlen -= count;
-- 
cgit v1.2.3


From 4668edc334ee90cf50c382c3e423cfc510b5a126 Mon Sep 17 00:00:00 2001
From: Burman Yan <yan_952@hotmail.com>
Date: Wed, 6 Dec 2006 20:38:51 -0800
Subject: [PATCH] kernel core: replace kmalloc+memset with kzalloc

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 ipc/sem.c            | 3 +--
 kernel/auditfilter.c | 3 +--
 kernel/futex.c       | 3 +--
 kernel/kexec.c       | 3 +--
 lib/kobject.c        | 3 +--
 mm/nommu.c           | 6 ++----
 6 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'kernel')

diff --git a/ipc/sem.c b/ipc/sem.c
index 21b3289d640c..d3e12efd55cb 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1070,14 +1070,13 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	ipc_rcu_getref(sma);
 	sem_unlock(sma);
 
-	new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		ipc_lock_by_ptr(&sma->sem_perm);
 		ipc_rcu_putref(sma);
 		sem_unlock(sma);
 		return ERR_PTR(-ENOMEM);
 	}
-	memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*nsems);
 	new->semadj = (short *) &new[1];
 	new->semid = semid;
 
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 4f40d923af8e..2e896f8ae29e 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -636,10 +636,9 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
 	struct audit_rule *rule;
 	int i;
 
-	rule = kmalloc(sizeof(*rule), GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
 	if (unlikely(!rule))
 		return NULL;
-	memset(rule, 0, sizeof(*rule));
 
 	rule->flags = krule->flags | krule->listnr;
 	rule->action = krule->action;
diff --git a/kernel/futex.c b/kernel/futex.c
index af7b81cbde30..7c0d0d4fa7f7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -324,12 +324,11 @@ static int refill_pi_state_cache(void)
 	if (likely(current->pi_state_cache))
 		return 0;
 
-	pi_state = kmalloc(sizeof(*pi_state), GFP_KERNEL);
+	pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
 
 	if (!pi_state)
 		return -ENOMEM;
 
-	memset(pi_state, 0, sizeof(*pi_state));
 	INIT_LIST_HEAD(&pi_state->list);
 	/* pi_mutex gets initialized later */
 	pi_state->owner = NULL;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index fcdd5d2bc3f4..d43692cf2321 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -108,11 +108,10 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
 
 	/* Allocate a controlling structure */
 	result = -ENOMEM;
-	image = kmalloc(sizeof(*image), GFP_KERNEL);
+	image = kzalloc(sizeof(*image), GFP_KERNEL);
 	if (!image)
 		goto out;
 
-	memset(image, 0, sizeof(*image));
 	image->head = 0;
 	image->entry = &image->head;
 	image->last_entry = &image->head;
diff --git a/lib/kobject.c b/lib/kobject.c
index 744a4b102c7f..7ce6dc138e90 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -111,10 +111,9 @@ char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
 	len = get_kobj_path_length(kobj);
 	if (len == 0)
 		return NULL;
-	path = kmalloc(len, gfp_mask);
+	path = kzalloc(len, gfp_mask);
 	if (!path)
 		return NULL;
-	memset(path, 0x00, len);
 	fill_kobj_path(kobj, path, len);
 
 	return path;
diff --git a/mm/nommu.c b/mm/nommu.c
index 6a2a8aada401..af874569d0f1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -808,10 +808,9 @@ unsigned long do_mmap_pgoff(struct file *file,
 	vm_flags = determine_vm_flags(file, prot, flags, capabilities);
 
 	/* we're going to need to record the mapping if it works */
-	vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
+	vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
 	if (!vml)
 		goto error_getting_vml;
-	memset(vml, 0, sizeof(*vml));
 
 	down_write(&nommu_vma_sem);
 
@@ -887,11 +886,10 @@ unsigned long do_mmap_pgoff(struct file *file,
 	}
 
 	/* we're going to need a VMA struct as well */
-	vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+	vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
 	if (!vma)
 		goto error_getting_vma;
 
-	memset(vma, 0, sizeof(*vma));
 	INIT_LIST_HEAD(&vma->anon_vma_node);
 	atomic_set(&vma->vm_usage, 1);
 	if (file)
-- 
cgit v1.2.3


From 95362fa90312ff2d52c0b4d42412cd7ceeb3b89b Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 6 Dec 2006 20:39:03 -0800
Subject: [PATCH] futex: init error check

Check register_filesystem() and kern_mount() return values.

Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/futex.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index 7c0d0d4fa7f7..d60b7f7a8cc3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1857,10 +1857,16 @@ static struct file_system_type futex_fs_type = {
 
 static int __init init(void)
 {
-	unsigned int i;
+	int i = register_filesystem(&futex_fs_type);
+
+	if (i)
+		return i;
 
-	register_filesystem(&futex_fs_type);
 	futex_mnt = kern_mount(&futex_fs_type);
+	if (IS_ERR(futex_mnt)) {
+		unregister_filesystem(&futex_fs_type);
+		return PTR_ERR(futex_mnt);
+	}
 
 	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
 		INIT_LIST_HEAD(&futex_queues[i].chain);
-- 
cgit v1.2.3


From bd9b0bac6f601655044fc35978e26231dffee03e Mon Sep 17 00:00:00 2001
From: "BP, Praveen" <praveenbp@ti.com>
Date: Wed, 6 Dec 2006 20:39:09 -0800
Subject: [PATCH] sysctl: string length calculated is wrong if it contains
 negative numbers

In the functions do_proc_dointvec() and do_proc_doulongvec_minmax(),
there seems to be a bug in string length calculation if string contains
negative integer.

The console log given below explains the bug. Setting negative values
may not be a right thing to do for "console log level" but then the test
(given below) can be used to demonstrate the bug in the code.

# echo "-1 -1 -1 -123456" > /proc/sys/kernel/printk
# cat /proc/sys/kernel/printk
-1      -1      -1      -1234
#
# echo "-1 -1 -1 123456" > /proc/sys/kernel/printk
# cat /proc/sys/kernel/printk
-1      -1      -1      1234
#

(akpm: the bug is that 123456 gets truncated)

It works as expected if string contains all +ve integers

# echo "1 2 3 4" > /proc/sys/kernel/printk
# cat /proc/sys/kernel/printk
1       2       3       4
#

The patch given below fixes the issue.

Signed-off-by: Praveen BP <praveenbp@ti.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7abe9704e75a..6d7147cf66bb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1875,7 +1875,7 @@ static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
 			p = buf;
 			if (*p == '-' && left > 1) {
 				neg = 1;
-				left--, p++;
+				p++;
 			}
 			if (*p < '0' || *p > '9')
 				break;
@@ -2126,7 +2126,7 @@ static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
 			p = buf;
 			if (*p == '-' && left > 1) {
 				neg = 1;
-				left--, p++;
+				p++;
 			}
 			if (*p < '0' || *p > '9')
 				break;
-- 
cgit v1.2.3


From 301827acbe49d0ba7ec9770803970893ac9ded97 Mon Sep 17 00:00:00 2001
From: Chris Caputo <ccaputo@alt.net>
Date: Wed, 6 Dec 2006 20:39:11 -0800
Subject: [PATCH] sched: correct output of show_state()

At present show_state prints a header the does not match the output of
show_task, as follows:

-
                                               sibling
  task             PC      pid father child younger older
init          S 00000000     0     1      0     2               (NOTLB)
-

This patch corrects the output of show_state so that the header is
aligned with the data, ala:

-
                         free                        sibling
  task             PC    stack   pid father child younger older
init          S 00000000     0     1      0     2               (NOTLB)
-

Signed-off-by: Chris Caputo <ccaputo@alt.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index c83f531c2886..b43cef02ce5b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4822,12 +4822,12 @@ void show_state_filter(unsigned long state_filter)
 
 #if (BITS_PER_LONG == 32)
 	printk("\n"
-	       "                                               sibling\n");
-	printk("  task             PC      pid father child younger older\n");
+	       "                         free                        sibling\n");
+	printk("  task             PC    stack   pid father child younger older\n");
 #else
 	printk("\n"
-	       "                                                       sibling\n");
-	printk("  task                 PC          pid father child younger older\n");
+	       "                                 free                        sibling\n");
+	printk("  task                 PC        stack   pid father child younger older\n");
 #endif
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-- 
cgit v1.2.3


From 50cc670aebf4fc64afaf533fb9fa1c8570f09d74 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:39:30 -0800
Subject: [PATCH] lockdep: more chains

Some have reported a chain-table overflow - double its size.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep_internals.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index eab043c83bb2..8ce09bc4613d 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -20,7 +20,7 @@
 #define MAX_LOCKDEP_KEYS_BITS	11
 #define MAX_LOCKDEP_KEYS	(1UL << MAX_LOCKDEP_KEYS_BITS)
 
-#define MAX_LOCKDEP_CHAINS_BITS	13
+#define MAX_LOCKDEP_CHAINS_BITS	14
 #define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
 
 /*
-- 
cgit v1.2.3


From 2ee91f197c0bc654b24eed5831fd12aa0d566a7d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:39:32 -0800
Subject: [PATCH] lockdep: show more details about self-test failures

Make the locking self-test failures (of 'FAILURE' type) easier to debug by
printing more information.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/debug_locks.h | 2 +-
 kernel/mutex-debug.c        | 3 +++
 lib/locking-selftest.c      | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 952bee79a8f3..a1c10b0c4cf0 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -24,7 +24,7 @@ extern int debug_locks_off(void);
 	int __ret = 0;							\
 									\
 	if (unlikely(c)) {						\
-		if (debug_locks_off())					\
+		if (debug_locks_silent || debug_locks_off())		\
 			WARN_ON(1);					\
 		__ret = 1;						\
 	}								\
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 18651641a7b5..841539d72c55 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -77,6 +77,9 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 
 void debug_mutex_unlock(struct mutex *lock)
 {
+	if (unlikely(!debug_locks))
+		return;
+
 	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
 	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
 	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 7945787f439a..280332c1827c 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -963,7 +963,9 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 			printk("failed|");
 		} else {
 			unexpected_testcase_failures++;
+
 			printk("FAILED|");
+			dump_stack();
 		}
 	} else {
 		testcase_successes++;
-- 
cgit v1.2.3


From 3908fd2ed920af818aa596672da68ba26173ff27 Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zach@vmware.com>
Date: Wed, 6 Dec 2006 20:39:39 -0800
Subject: [PATCH] softirq: remove BUG_ONs which can incorrectly trigger

It is possible to have tasklets get scheduled before softirqd has had a chance
to spawn on all CPUs.  This is totally harmless; after success during action
CPU_UP_PREPARE, action CPU_ONLINE will be called, which immediately wakes
softirqd on the appropriate CPU to process the already pending tasklets.  So
there is no danger of having a missed wakeup for any tasklets that were
already pending.

In particular, i386 is affected by this during startup, and is visible when
using a very large initrd; during the time it takes for the initrd to be
decompressed, a timer IRQ can come in and schedule RCU callbacks.  It is also
possible that resending of a hardware IRQ via a softirq triggers the same bug.

Because of different timing conditions, this shows up in all emulators and
virtual machines tested, including Xen, VMware, Virtual PC, and Qemu.  It is
also possible to trigger on native hardware with a large enough initrd,
although I don't have a reliable case demonstrating that.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Cc: <caglar@pardus.org.tr>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/softirq.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/softirq.c b/kernel/softirq.c
index bf25015dce16..918e52df090e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -574,8 +574,6 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
-		BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
 		if (IS_ERR(p)) {
 			printk("ksoftirqd for %i failed\n", hotcpu);
-- 
cgit v1.2.3


From 045f147f3290395661b56b9231fc4d221e150963 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:40:15 -0800
Subject: [PATCH] remove EXPORT_UNUSED_SYMBOL'ed symbols

In time for 2.6.20, we can get rid of this junk.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/printk.c | 3 ---
 mm/bootmem.c    | 2 --
 mm/memory.c     | 1 -
 mm/mmzone.c     | 5 -----
 4 files changed, 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index ba59c2a30ed0..c3d90a58e4c5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -53,8 +53,6 @@ int console_printk[4] = {
 	DEFAULT_CONSOLE_LOGLEVEL,	/* default_console_loglevel */
 };
 
-EXPORT_UNUSED_SYMBOL(console_printk);  /*  June 2006  */
-
 /*
  * Low lever drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
@@ -772,7 +770,6 @@ int is_console_locked(void)
 {
 	return console_locked;
 }
-EXPORT_UNUSED_SYMBOL(is_console_locked);  /*  June 2006  */
 
 /**
  * release_console_sem - unlock the console system
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 94253428f091..00a96970b237 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -27,8 +27,6 @@ unsigned long max_low_pfn;
 unsigned long min_low_pfn;
 unsigned long max_pfn;
 
-EXPORT_UNUSED_SYMBOL(max_pfn);  /*  June 2006  */
-
 static LIST_HEAD(bdata_list);
 #ifdef CONFIG_CRASH_DUMP
 /*
diff --git a/mm/memory.c b/mm/memory.c
index a07120da868b..4198df0dff1c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1902,7 +1902,6 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 
 	return 0;
 }
-EXPORT_UNUSED_SYMBOL(vmtruncate_range);  /*  June 2006  */
 
 /**
  * swapin_readahead - swap in pages in hope we need them soon
diff --git a/mm/mmzone.c b/mm/mmzone.c
index febea1c98168..eb5838634f18 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -14,8 +14,6 @@ struct pglist_data *first_online_pgdat(void)
 	return NODE_DATA(first_online_node);
 }
 
-EXPORT_UNUSED_SYMBOL(first_online_pgdat);  /*  June 2006  */
-
 struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 {
 	int nid = next_online_node(pgdat->node_id);
@@ -24,8 +22,6 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 		return NULL;
 	return NODE_DATA(nid);
 }
-EXPORT_UNUSED_SYMBOL(next_online_pgdat);  /*  June 2006  */
-
 
 /*
  * next_zone - helper magic for for_each_zone()
@@ -45,5 +41,4 @@ struct zone *next_zone(struct zone *zone)
 	}
 	return zone;
 }
-EXPORT_UNUSED_SYMBOL(next_zone);  /*  June 2006  */
 
-- 
cgit v1.2.3


From a09c17a6fdad9ae5b5ea1c3383080f84ec76ab20 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@cs.washington.edu>
Date: Wed, 6 Dec 2006 20:40:18 -0800
Subject: [PATCH] sys: remove unused variable

Remove unused 'new_ruid' variable.

Reported by David Binderman <dcb314@hotmail.com>.

Signed-off-by: David Rientjes <rientjes@cs.washington.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index c87b461de38d..a0c1a29a507f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1102,14 +1102,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 asmlinkage long sys_setuid(uid_t uid)
 {
 	int old_euid = current->euid;
-	int old_ruid, old_suid, new_ruid, new_suid;
+	int old_ruid, old_suid, new_suid;
 	int retval;
 
 	retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
 	if (retval)
 		return retval;
 
-	old_ruid = new_ruid = current->uid;
+	old_ruid = current->uid;
 	old_suid = current->suid;
 	new_suid = old_suid;
 	
-- 
cgit v1.2.3


From 012d3ca8d85bf3ae5278ba897dd1ca3999247107 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@freedesktop.org>
Date: Wed, 6 Dec 2006 20:40:19 -0800
Subject: [PATCH] Add Sparse annotations to SRCU wrapper functions in
 rcutorture

The SRCU wrapper functions srcu_torture_read_lock and
srcu_torture_read_unlock in rcutorture intentionally change the SRCU
context; annotate them accordingly, to avoid a warning.

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcutorture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index e2bda18f6f42..c52f981ea008 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -401,7 +401,7 @@ static void srcu_torture_cleanup(void)
 	cleanup_srcu_struct(&srcu_ctl);
 }
 
-static int srcu_torture_read_lock(void)
+static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
 {
 	return srcu_read_lock(&srcu_ctl);
 }
@@ -419,7 +419,7 @@ static void srcu_read_delay(struct rcu_random_state *rrsp)
 		schedule_timeout_interruptible(longdelay);
 }
 
-static void srcu_torture_read_unlock(int idx)
+static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
 {
 	srcu_read_unlock(&srcu_ctl, idx);
 }
-- 
cgit v1.2.3


From ccdea2f88b5689f0fd29c3804be43a3acf0311e3 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 6 Dec 2006 20:40:26 -0800
Subject: [PATCH] futex: remove unneeded barrier

When disassembling a kernel I found around over 90 sync Instructions from
mb, rmb and wmb calls in the kernel and only few of those make any sense to
me.  So here's the first one - I think the wmb() in kernel/futex.c is not
needed on uniprocessors so should become an smb_wmb().

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/futex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index d60b7f7a8cc3..a8302a1620ea 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -552,7 +552,7 @@ static void wake_futex(struct futex_q *q)
 	 * at the end of wake_up_all() does not prevent this store from
 	 * moving.
 	 */
-	wmb();
+	smp_wmb();
 	q->lock_ptr = NULL;
 }
 
-- 
cgit v1.2.3


From 15ad7cdcfd76450d4beebc789ec646664238184d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 6 Dec 2006 20:40:36 -0800
Subject: [PATCH] struct seq_operations and struct file_operations
 constification

 - move some file_operations structs into the .rodata section

 - move static strings from policy_types[] array into the .rodata section

 - fix generic seq_operations usages, so that those structs may be defined
   as "const" as well

[akpm@osdl.org: couple of fixes]
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/seq_file.c            | 4 ++--
 include/linux/cpuset.h   | 2 +-
 include/linux/mmzone.h   | 2 +-
 include/linux/relay.h    | 2 +-
 include/linux/sched.h    | 2 +-
 include/linux/seq_file.h | 4 ++--
 kernel/configs.c         | 2 +-
 kernel/cpuset.c          | 4 ++--
 kernel/dma.c             | 2 +-
 kernel/futex.c           | 2 +-
 kernel/kallsyms.c        | 4 ++--
 kernel/lockdep_proc.c    | 6 +++---
 kernel/module.c          | 2 +-
 kernel/power/user.c      | 2 +-
 kernel/profile.c         | 2 +-
 kernel/relay.c           | 2 +-
 kernel/resource.c        | 6 +++---
 kernel/sched.c           | 2 +-
 kernel/sysctl.c          | 2 +-
 mm/mempolicy.c           | 4 ++--
 mm/page_alloc.c          | 2 +-
 mm/shmem.c               | 4 ++--
 mm/slab.c                | 4 ++--
 mm/swapfile.c            | 4 ++--
 mm/vmstat.c              | 8 ++++----
 25 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'kernel')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 555b9ac04c25..10690aa401c7 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -26,7 +26,7 @@
  *	ERR_PTR(error).  In the end of sequence they return %NULL. ->show()
  *	returns 0 in case of success and negative number in case of error.
  */
-int seq_open(struct file *file, struct seq_operations *op)
+int seq_open(struct file *file, const struct seq_operations *op)
 {
 	struct seq_file *p = file->private_data;
 
@@ -408,7 +408,7 @@ EXPORT_SYMBOL(single_open);
 
 int single_release(struct inode *inode, struct file *file)
 {
-	struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
+	const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
 	int res = seq_release(inode, file);
 	kfree(op);
 	return res;
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 748d2c996631..8821e1f75b44 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -46,7 +46,7 @@ extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
 extern int cpuset_memory_pressure_enabled;
 extern void __cpuset_memory_pressure_bump(void);
 
-extern struct file_operations proc_cpuset_operations;
+extern const struct file_operations proc_cpuset_operations;
 extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
 
 extern void cpuset_lock(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index da6002dec205..e339a7345f25 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -278,7 +278,7 @@ struct zone {
 	/*
 	 * rarely used fields:
 	 */
-	char			*name;
+	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
 /*
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 0e3d91b76996..c6a48bfc8b14 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -274,7 +274,7 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf,
 /*
  * exported relay file operations, kernel/relay.c
  */
-extern struct file_operations relay_file_operations;
+extern const struct file_operations relay_file_operations;
 
 #endif /* _LINUX_RELAY_H */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3a767242e72f..dede82c63445 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -573,7 +573,7 @@ struct sched_info {
 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
 
 #ifdef CONFIG_SCHEDSTATS
-extern struct file_operations proc_schedstat_operations;
+extern const struct file_operations proc_schedstat_operations;
 #endif /* CONFIG_SCHEDSTATS */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index b95f6eb7254c..3e3cccbb1cac 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -20,7 +20,7 @@ struct seq_file {
 	loff_t index;
 	loff_t version;
 	struct mutex lock;
-	struct seq_operations *op;
+	const struct seq_operations *op;
 	void *private;
 };
 
@@ -31,7 +31,7 @@ struct seq_operations {
 	int (*show) (struct seq_file *m, void *v);
 };
 
-int seq_open(struct file *, struct seq_operations *);
+int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
 loff_t seq_lseek(struct file *, loff_t, int);
 int seq_release(struct inode *, struct file *);
diff --git a/kernel/configs.c b/kernel/configs.c
index f9e31974f4ad..8fa1fb28f8a7 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -75,7 +75,7 @@ ikconfig_read_current(struct file *file, char __user *buf,
 	return count;
 }
 
-static struct file_operations ikconfig_file_ops = {
+static const struct file_operations ikconfig_file_ops = {
 	.owner = THIS_MODULE,
 	.read = ikconfig_read_current,
 };
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9b62b4c03ad0..4ef1d29297ca 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1531,7 +1531,7 @@ static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
 }
 
-static struct file_operations cpuset_file_operations = {
+static const struct file_operations cpuset_file_operations = {
 	.read = cpuset_file_read,
 	.write = cpuset_file_write,
 	.llseek = generic_file_llseek,
@@ -2605,7 +2605,7 @@ static int cpuset_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_cpuset_show, pid);
 }
 
-struct file_operations proc_cpuset_operations = {
+const struct file_operations proc_cpuset_operations = {
 	.open		= cpuset_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/kernel/dma.c b/kernel/dma.c
index 2020644c938a..937b13ca33ba 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -140,7 +140,7 @@ static int proc_dma_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_dma_show, NULL);
 }
 
-static struct file_operations proc_dma_operations = {
+static const struct file_operations proc_dma_operations = {
 	.open		= proc_dma_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/kernel/futex.c b/kernel/futex.c
index a8302a1620ea..95989a3b4168 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1492,7 +1492,7 @@ static unsigned int futex_poll(struct file *filp,
 	return ret;
 }
 
-static struct file_operations futex_fops = {
+static const struct file_operations futex_fops = {
 	.release	= futex_close,
 	.poll		= futex_poll,
 };
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 54befe36ee0b..ab63cfc42992 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -398,7 +398,7 @@ static int s_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-static struct seq_operations kallsyms_op = {
+static const struct seq_operations kallsyms_op = {
 	.start = s_start,
 	.next = s_next,
 	.stop = s_stop,
@@ -433,7 +433,7 @@ static int kallsyms_release(struct inode *inode, struct file *file)
 	return seq_release(inode, file);
 }
 
-static struct file_operations kallsyms_operations = {
+static const struct file_operations kallsyms_operations = {
 	.open = kallsyms_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index f6e72eaab3fa..b554b40a4aa6 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -113,7 +113,7 @@ static int l_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations lockdep_ops = {
+static const struct seq_operations lockdep_ops = {
 	.start	= l_start,
 	.next	= l_next,
 	.stop	= l_stop,
@@ -135,7 +135,7 @@ static int lockdep_open(struct inode *inode, struct file *file)
 	return res;
 }
 
-static struct file_operations proc_lockdep_operations = {
+static const struct file_operations proc_lockdep_operations = {
 	.open		= lockdep_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -319,7 +319,7 @@ static int lockdep_stats_open(struct inode *inode, struct file *file)
 	return single_open(file, lockdep_stats_show, NULL);
 }
 
-static struct file_operations proc_lockdep_stats_operations = {
+static const struct file_operations proc_lockdep_stats_operations = {
 	.open		= lockdep_stats_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/kernel/module.c b/kernel/module.c
index e2d09d604ca0..d9eae45d0145 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2209,7 +2209,7 @@ static int m_show(struct seq_file *m, void *p)
    Where refcount is a number or -, and deps is a comma-separated list
    of depends or -.
 */
-struct seq_operations modules_op = {
+const struct seq_operations modules_op = {
 	.start	= m_start,
 	.next	= m_next,
 	.stop	= m_stop,
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 069732e5695c..89443b85163b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -383,7 +383,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 	return error;
 }
 
-static struct file_operations snapshot_fops = {
+static const struct file_operations snapshot_fops = {
 	.open = snapshot_open,
 	.release = snapshot_release,
 	.read = snapshot_read,
diff --git a/kernel/profile.c b/kernel/profile.c
index 0961d93e1d91..fb5e03d57e9d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -501,7 +501,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf,
 	return count;
 }
 
-static struct file_operations proc_profile_operations = {
+static const struct file_operations proc_profile_operations = {
 	.read		= read_profile,
 	.write		= write_profile,
 };
diff --git a/kernel/relay.c b/kernel/relay.c
index 2b92e8ece85b..75a3a9a7efc2 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1013,7 +1013,7 @@ static ssize_t relay_file_sendfile(struct file *filp,
 				       actor, &desc);
 }
 
-struct file_operations relay_file_operations = {
+const struct file_operations relay_file_operations = {
 	.open		= relay_file_open,
 	.poll		= relay_file_poll,
 	.mmap		= relay_file_mmap,
diff --git a/kernel/resource.c b/kernel/resource.c
index 6de60c12143e..7b9a497419d9 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -88,7 +88,7 @@ static int r_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations resource_op = {
+static const struct seq_operations resource_op = {
 	.start	= r_start,
 	.next	= r_next,
 	.stop	= r_stop,
@@ -115,14 +115,14 @@ static int iomem_open(struct inode *inode, struct file *file)
 	return res;
 }
 
-static struct file_operations proc_ioports_operations = {
+static const struct file_operations proc_ioports_operations = {
 	.open		= ioports_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
 
-static struct file_operations proc_iomem_operations = {
+static const struct file_operations proc_iomem_operations = {
 	.open		= iomem_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/kernel/sched.c b/kernel/sched.c
index b43cef02ce5b..f385eff4682d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -505,7 +505,7 @@ static int schedstat_open(struct inode *inode, struct file *file)
 	return res;
 }
 
-struct file_operations proc_schedstat_operations = {
+const struct file_operations proc_schedstat_operations = {
 	.open    = schedstat_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6d7147cf66bb..758dbbf972a5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -170,7 +170,7 @@ static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
 static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
 static int proc_opensys(struct inode *, struct file *);
 
-struct file_operations proc_sys_file_operations = {
+const struct file_operations proc_sys_file_operations = {
 	.open		= proc_opensys,
 	.read		= proc_readsys,
 	.write		= proc_writesys,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ad864f8708b0..b917d6fdc1bb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1707,8 +1707,8 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
  * Display pages allocated per node and memory policy via /proc.
  */
 
-static const char *policy_types[] = { "default", "prefer", "bind",
-				      "interleave" };
+static const char * const policy_types[] =
+	{ "default", "prefer", "bind", "interleave" };
 
 /*
  * Convert a mempolicy into a string.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 27ec7a1b8022..cace22b3ac25 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -83,7 +83,7 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
 
 EXPORT_SYMBOL(totalram_pages);
 
-static char *zone_names[MAX_NR_ZONES] = {
+static char * const zone_names[MAX_NR_ZONES] = {
 	 "DMA",
 #ifdef CONFIG_ZONE_DMA32
 	 "DMA32",
diff --git a/mm/shmem.c b/mm/shmem.c
index 007653680a75..c820b4f77b8d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -177,7 +177,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 
 static struct super_operations shmem_ops;
 static const struct address_space_operations shmem_aops;
-static struct file_operations shmem_file_operations;
+static const struct file_operations shmem_file_operations;
 static struct inode_operations shmem_inode_operations;
 static struct inode_operations shmem_dir_inode_operations;
 static struct inode_operations shmem_special_inode_operations;
@@ -2319,7 +2319,7 @@ static const struct address_space_operations shmem_aops = {
 	.migratepage	= migrate_page,
 };
 
-static struct file_operations shmem_file_operations = {
+static const struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
 #ifdef CONFIG_TMPFS
 	.llseek		= generic_file_llseek,
diff --git a/mm/slab.c b/mm/slab.c
index 86f5d6e995bb..068cb4503c15 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4142,7 +4142,7 @@ static int s_show(struct seq_file *m, void *p)
  * + further values on SMP and with statistics enabled
  */
 
-struct seq_operations slabinfo_op = {
+const struct seq_operations slabinfo_op = {
 	.start = s_start,
 	.next = s_next,
 	.stop = s_stop,
@@ -4340,7 +4340,7 @@ static int leaks_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-struct seq_operations slabstats_op = {
+const struct seq_operations slabstats_op = {
 	.start = leaks_start,
 	.next = s_next,
 	.stop = s_stop,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 55242363de64..c5431072f422 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1368,7 +1368,7 @@ static int swap_show(struct seq_file *swap, void *v)
 	return 0;
 }
 
-static struct seq_operations swaps_op = {
+static const struct seq_operations swaps_op = {
 	.start =	swap_start,
 	.next =		swap_next,
 	.stop =		swap_stop,
@@ -1380,7 +1380,7 @@ static int swaps_open(struct inode *inode, struct file *file)
 	return seq_open(file, &swaps_op);
 }
 
-static struct file_operations proc_swaps_operations = {
+static const struct file_operations proc_swaps_operations = {
 	.open		= swaps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ef4176843d9e..dc005a0c96ae 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -430,7 +430,7 @@ static int frag_show(struct seq_file *m, void *arg)
 	return 0;
 }
 
-struct seq_operations fragmentation_op = {
+const struct seq_operations fragmentation_op = {
 	.start	= frag_start,
 	.next	= frag_next,
 	.stop	= frag_stop,
@@ -452,7 +452,7 @@ struct seq_operations fragmentation_op = {
 #define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \
 					TEXT_FOR_HIGHMEM(xx)
 
-static char *vmstat_text[] = {
+static const char * const vmstat_text[] = {
 	/* Zoned VM counters */
 	"nr_anon_pages",
 	"nr_mapped",
@@ -597,7 +597,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 	return 0;
 }
 
-struct seq_operations zoneinfo_op = {
+const struct seq_operations zoneinfo_op = {
 	.start	= frag_start, /* iterate over all zones. The same as in
 			       * fragmentation. */
 	.next	= frag_next,
@@ -660,7 +660,7 @@ static void vmstat_stop(struct seq_file *m, void *arg)
 	m->private = NULL;
 }
 
-struct seq_operations vmstat_op = {
+const struct seq_operations vmstat_op = {
 	.start	= vmstat_start,
 	.next	= vmstat_next,
 	.stop	= vmstat_stop,
-- 
cgit v1.2.3


From 85916f8166b59eeac63d2b4f7f1df8de849334b4 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus@valinux.co.jp>
Date: Wed, 6 Dec 2006 20:40:41 -0800
Subject: [PATCH] Kexec / Kdump: Unify elf note code

The elf note saving code is currently duplicated over several
architectures.  This cleanup patch simply adds code to a common file and
then replaces the arch-specific code with calls to the newly added code.

The only drawback with this approach is that s390 doesn't fully support
kexec-on-panic which for that arch leads to introduction of unused code.

Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/crash.c    | 66 ++-----------------------------------------
 arch/powerpc/kernel/crash.c | 59 ++------------------------------------
 arch/x86_64/kernel/crash.c  | 69 ++-------------------------------------------
 include/linux/kexec.h       |  1 +
 kernel/kexec.c              | 56 ++++++++++++++++++++++++++++++++++++
 5 files changed, 63 insertions(+), 188 deletions(-)

(limited to 'kernel')

diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 144b43288965..a5e0e990ea95 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -31,68 +31,6 @@
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
 
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
-							       size_t data_len)
-{
-	struct elf_note note;
-
-	note.n_namesz = strlen(name) + 1;
-	note.n_descsz = data_len;
-	note.n_type   = type;
-	memcpy(buf, &note, sizeof(note));
-	buf += (sizeof(note) +3)/4;
-	memcpy(buf, name, note.n_namesz);
-	buf += (note.n_namesz + 3)/4;
-	memcpy(buf, data, note.n_descsz);
-	buf += (note.n_descsz + 3)/4;
-
-	return buf;
-}
-
-static void final_note(u32 *buf)
-{
-	struct elf_note note;
-
-	note.n_namesz = 0;
-	note.n_descsz = 0;
-	note.n_type   = 0;
-	memcpy(buf, &note, sizeof(note));
-}
-
-static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
-{
-	struct elf_prstatus prstatus;
-	u32 *buf;
-
-	if ((cpu < 0) || (cpu >= NR_CPUS))
-		return;
-
-	/* Using ELF notes here is opportunistic.
-	 * I need a well defined structure format
-	 * for the data I pass, and I need tags
-	 * on the data to indicate what information I have
-	 * squirrelled away.  ELF notes happen to provide
-	 * all of that, so there is no need to invent something new.
-	 */
-	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
-	if (!buf)
-		return;
-	memset(&prstatus, 0, sizeof(prstatus));
-	prstatus.pr_pid = current->pid;
-	elf_core_copy_regs(&prstatus.pr_reg, regs);
-	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
-				sizeof(prstatus));
-	final_note(buf);
-}
-
-static void crash_save_self(struct pt_regs *regs)
-{
-	int cpu;
-
-	cpu = safe_smp_processor_id();
-	crash_save_this_cpu(regs, cpu);
-}
-
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 static atomic_t waiting_for_crash_ipi;
 
@@ -121,7 +59,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		regs = &fixed_regs;
 	}
-	crash_save_this_cpu(regs, cpu);
+	crash_save_cpu(regs, cpu);
 	disable_local_APIC();
 	atomic_dec(&waiting_for_crash_ipi);
 	/* Assume hlt works */
@@ -195,5 +133,5 @@ void machine_crash_shutdown(struct pt_regs *regs)
 #if defined(CONFIG_X86_IO_APIC)
 	disable_IO_APIC();
 #endif
-	crash_save_self(regs);
+	crash_save_cpu(regs, safe_smp_processor_id());
 }
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 89b03c8da9d2..d3f2080d2eee 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -46,61 +46,6 @@ int crashing_cpu = -1;
 static cpumask_t cpus_in_crash = CPU_MASK_NONE;
 cpumask_t cpus_in_sr = CPU_MASK_NONE;
 
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
-							       size_t data_len)
-{
-	struct elf_note note;
-
-	note.n_namesz = strlen(name) + 1;
-	note.n_descsz = data_len;
-	note.n_type   = type;
-	memcpy(buf, &note, sizeof(note));
-	buf += (sizeof(note) +3)/4;
-	memcpy(buf, name, note.n_namesz);
-	buf += (note.n_namesz + 3)/4;
-	memcpy(buf, data, note.n_descsz);
-	buf += (note.n_descsz + 3)/4;
-
-	return buf;
-}
-
-static void final_note(u32 *buf)
-{
-	struct elf_note note;
-
-	note.n_namesz = 0;
-	note.n_descsz = 0;
-	note.n_type   = 0;
-	memcpy(buf, &note, sizeof(note));
-}
-
-static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
-{
-	struct elf_prstatus prstatus;
-	u32 *buf;
-
-	if ((cpu < 0) || (cpu >= NR_CPUS))
-		return;
-
-	/* Using ELF notes here is opportunistic.
-	 * I need a well defined structure format
-	 * for the data I pass, and I need tags
-	 * on the data to indicate what information I have
-	 * squirrelled away.  ELF notes happen to provide
-	 * all of that that no need to invent something new.
-	 */
-	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
-	if (!buf) 
-		return;
-
-	memset(&prstatus, 0, sizeof(prstatus));
-	prstatus.pr_pid = current->pid;
-	elf_core_copy_regs(&prstatus.pr_reg, regs);
-	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
-			sizeof(prstatus));
-	final_note(buf);
-}
-
 #ifdef CONFIG_SMP
 static atomic_t enter_on_soft_reset = ATOMIC_INIT(0);
 
@@ -113,7 +58,7 @@ void crash_ipi_callback(struct pt_regs *regs)
 
 	hard_irq_disable();
 	if (!cpu_isset(cpu, cpus_in_crash))
-		crash_save_this_cpu(regs, cpu);
+		crash_save_cpu(regs, cpu);
 	cpu_set(cpu, cpus_in_crash);
 
 	/*
@@ -306,7 +251,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	 * such that another IPI will not be sent.
 	 */
 	crashing_cpu = smp_processor_id();
-	crash_save_this_cpu(regs, crashing_cpu);
+	crash_save_cpu(regs, crashing_cpu);
 	crash_kexec_prepare_cpus(crashing_cpu);
 	cpu_set(crashing_cpu, cpus_in_crash);
 	if (ppc_md.kexec_cpu_down)
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 3525f884af82..95a7a2c13131 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -28,71 +28,6 @@
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
 
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type,
-						void *data, size_t data_len)
-{
-	struct elf_note note;
-
-	note.n_namesz = strlen(name) + 1;
-	note.n_descsz = data_len;
-	note.n_type   = type;
-	memcpy(buf, &note, sizeof(note));
-	buf += (sizeof(note) +3)/4;
-	memcpy(buf, name, note.n_namesz);
-	buf += (note.n_namesz + 3)/4;
-	memcpy(buf, data, note.n_descsz);
-	buf += (note.n_descsz + 3)/4;
-
-	return buf;
-}
-
-static void final_note(u32 *buf)
-{
-	struct elf_note note;
-
-	note.n_namesz = 0;
-	note.n_descsz = 0;
-	note.n_type   = 0;
-	memcpy(buf, &note, sizeof(note));
-}
-
-static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
-{
-	struct elf_prstatus prstatus;
-	u32 *buf;
-
-	if ((cpu < 0) || (cpu >= NR_CPUS))
-		return;
-
-	/* Using ELF notes here is opportunistic.
-	 * I need a well defined structure format
-	 * for the data I pass, and I need tags
-	 * on the data to indicate what information I have
-	 * squirrelled away.  ELF notes happen to provide
-	 * all of that, no need to invent something new.
-	 */
-
-	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
-
-	if (!buf)
-		return;
-
-	memset(&prstatus, 0, sizeof(prstatus));
-	prstatus.pr_pid = current->pid;
-	elf_core_copy_regs(&prstatus.pr_reg, regs);
-	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
-					sizeof(prstatus));
-	final_note(buf);
-}
-
-static void crash_save_self(struct pt_regs *regs)
-{
-	int cpu;
-
-	cpu = smp_processor_id();
-	crash_save_this_cpu(regs, cpu);
-}
-
 #ifdef CONFIG_SMP
 static atomic_t waiting_for_crash_ipi;
 
@@ -117,7 +52,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 		return NOTIFY_STOP;
 	local_irq_disable();
 
-	crash_save_this_cpu(regs, cpu);
+	crash_save_cpu(regs, cpu);
 	disable_local_APIC();
 	atomic_dec(&waiting_for_crash_ipi);
 	/* Assume hlt works */
@@ -196,5 +131,5 @@ void machine_crash_shutdown(struct pt_regs *regs)
 
 	disable_IO_APIC();
 
-	crash_save_self(regs);
+	crash_save_cpu(regs, smp_processor_id());
 }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a4ede62b339d..e3abcec6c51c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -105,6 +105,7 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
+void crash_save_cpu(struct pt_regs *regs, int cpu);
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index d43692cf2321..afbbbe981be2 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -20,6 +20,8 @@
 #include <linux/syscalls.h>
 #include <linux/ioport.h>
 #include <linux/hardirq.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1066,6 +1068,60 @@ void crash_kexec(struct pt_regs *regs)
 	}
 }
 
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+			    size_t data_len)
+{
+	struct elf_note note;
+
+	note.n_namesz = strlen(name) + 1;
+	note.n_descsz = data_len;
+	note.n_type   = type;
+	memcpy(buf, &note, sizeof(note));
+	buf += (sizeof(note) + 3)/4;
+	memcpy(buf, name, note.n_namesz);
+	buf += (note.n_namesz + 3)/4;
+	memcpy(buf, data, note.n_descsz);
+	buf += (note.n_descsz + 3)/4;
+
+	return buf;
+}
+
+static void final_note(u32 *buf)
+{
+	struct elf_note note;
+
+	note.n_namesz = 0;
+	note.n_descsz = 0;
+	note.n_type   = 0;
+	memcpy(buf, &note, sizeof(note));
+}
+
+void crash_save_cpu(struct pt_regs *regs, int cpu)
+{
+	struct elf_prstatus prstatus;
+	u32 *buf;
+
+	if ((cpu < 0) || (cpu >= NR_CPUS))
+		return;
+
+	/* Using ELF notes here is opportunistic.
+	 * I need a well defined structure format
+	 * for the data I pass, and I need tags
+	 * on the data to indicate what information I have
+	 * squirrelled away.  ELF notes happen to provide
+	 * all of that, so there is no need to invent something new.
+	 */
+	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
+	if (!buf)
+		return;
+	memset(&prstatus, 0, sizeof(prstatus));
+	prstatus.pr_pid = current->pid;
+	elf_core_copy_regs(&prstatus.pr_reg, regs);
+	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
+				sizeof(prstatus));
+	final_note(buf);
+}
+
 static int __init crash_notes_memory_init(void)
 {
 	/* Allocate memory for saving cpu registers. */
-- 
cgit v1.2.3


From 70e4506765602cca047cfa31933836e354c61a63 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:40:50 -0800
Subject: [PATCH] lockdep: register_lock_class() fix

The hash_lock must only ever be taken with irqs disabled.  This happens in
all the important places, except one codepath: register_lock_class().  The
race should trigger rarely because register_lock_class() is quite rare and
single-threaded (happens during init most of the time).

The fix is to disable irqs.

( bug found live in -rt: there preemption is alot more agressive and
  preempting with the hash-lock held caused a lockup.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3926c3674354..62e73ce68197 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1182,6 +1182,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	struct lockdep_subclass_key *key;
 	struct list_head *hash_head;
 	struct lock_class *class;
+	unsigned long flags;
 
 	class = look_up_lock_class(lock, subclass);
 	if (likely(class))
@@ -1203,6 +1204,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	key = lock->key->subkeys + subclass;
 	hash_head = classhashentry(key);
 
+	raw_local_irq_save(flags);
 	__raw_spin_lock(&hash_lock);
 	/*
 	 * We have to do the hash-walk again, to avoid races
@@ -1217,6 +1219,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	 */
 	if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
 		__raw_spin_unlock(&hash_lock);
+		raw_local_irq_restore(flags);
 		debug_locks_off();
 		printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
 		printk("turning off the locking correctness validator.\n");
@@ -1239,15 +1242,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 
 	if (verbose(class)) {
 		__raw_spin_unlock(&hash_lock);
+		raw_local_irq_restore(flags);
 		printk("\nnew class %p: %s", class->key, class->name);
 		if (class->name_version > 1)
 			printk("#%d", class->name_version);
 		printk("\n");
 		dump_stack();
+		raw_local_irq_save(flags);
 		__raw_spin_lock(&hash_lock);
 	}
 out_unlock_set:
 	__raw_spin_unlock(&hash_lock);
+	raw_local_irq_restore(flags);
 
 	if (!subclass || force)
 		lock->class_cache = class;
-- 
cgit v1.2.3


From 792908225064b1d841a8990b9d1d1cfc4e0e5bb2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:40:51 -0800
Subject: [PATCH] add ignore_loglevel boot option

Sometimes the kernel prints something interesting while userspace bootup
keeps messages turned off via loglevel.  Enable the printing of /all/
kernel messages via the "ignore_loglevel" boot option.  Off by default.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  4 ++++
 kernel/printk.c                     | 14 +++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2a40d9f6ffad..6b3c3e37a277 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -650,6 +650,10 @@ and is between 256 and 4096 characters. It is defined in the file
 	idle=		[HW]
 			Format: idle=poll or idle=halt
 
+	ignore_loglevel	[KNL]
+			Ignore loglevel setting - this will print /all/
+			kernel messages to the console. Useful for debugging.
+
 	ihash_entries=	[KNL]
 			Set number of hash buckets for inode cache.
 
diff --git a/kernel/printk.c b/kernel/printk.c
index c3d90a58e4c5..185bb45eacf7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -333,13 +333,25 @@ static void __call_console_drivers(unsigned long start, unsigned long end)
 	}
 }
 
+static int __read_mostly ignore_loglevel;
+
+int __init ignore_loglevel_setup(char *str)
+{
+	ignore_loglevel = 1;
+	printk(KERN_INFO "debug: ignoring loglevel setting.\n");
+
+	return 1;
+}
+
+__setup("ignore_loglevel", ignore_loglevel_setup);
+
 /*
  * Write out chars from start to end - 1 inclusive
  */
 static void _call_console_drivers(unsigned long start,
 				unsigned long end, int msg_log_level)
 {
-	if (msg_log_level < console_loglevel &&
+	if ((msg_log_level < console_loglevel || ignore_loglevel) &&
 			console_drivers && start != end) {
 		if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
 			/* wrapped write */
-- 
cgit v1.2.3


From d3ed11c35635487d34ad476e1d6a66dd298ab2de Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Wed, 6 Dec 2006 20:41:37 -0800
Subject: [PATCH] cpuset: allow a larger buffer for writes to cpuset files

When using fake NUMA setup, the number of memory nodes can greatly exceed
the number of CPUs.  So the current limit in cpuset_common_file_write() is
insufficient.

Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpuset.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ef1d29297ca..0a6b4d89f9a0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1280,7 +1280,8 @@ typedef enum {
 	FILE_TASKLIST,
 } cpuset_filetype_t;
 
-static ssize_t cpuset_common_file_write(struct file *file, const char __user *userbuf,
+static ssize_t cpuset_common_file_write(struct file *file,
+					const char __user *userbuf,
 					size_t nbytes, loff_t *unused_ppos)
 {
 	struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
@@ -1291,7 +1292,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
 	int retval = 0;
 
 	/* Crude upper limit on largest legitimate cpulist user might write. */
-	if (nbytes > 100 + 6 * NR_CPUS)
+	if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES))
 		return -E2BIG;
 
 	/* +1 for nul-terminator */
-- 
cgit v1.2.3


From 68380b581383c028830f79ec2670f4a193854aa6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Thu, 7 Dec 2006 09:28:19 -0800
Subject: Add "run_scheduled_work()" workqueue function

This allows workqueue users to run just their own pending work, rather
than wait for the whole workqueue to finish running.  This solves the
deadlock with networking libphy that was due to other workqueue entries
possibly needing a lock that was held by the routine that wanted to
flush its own work.

It's not wonderful: if you absolutely need to synchronize with the work
function having been executed, any user strictly speaking should have
its own completion tracking logic, since when we run things explicitly
by hand, the generic workqueue layer can no longer help us synchronize.

Also, this is strictly only usable for work that has been scheduled
without any delayed timers.  You can not mix the new interface with
schedule_delayed_work().

But it's better than what we had currently.

Acked-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/net/phy/phy.c     |  3 +-
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 73 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 4044bb1ada86..e175f3910b18 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -587,8 +587,7 @@ int phy_stop_interrupts(struct phy_device *phydev)
 	 * Finish any pending work; we might have been scheduled
 	 * to be called from keventd ourselves, though.
 	 */
-	if (!current_is_keventd())
-		flush_scheduled_work();
+	run_scheduled_work(&phydev->phy_queue);
 
 	free_irq(phydev->irq, phydev);
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f0cb1df7b475..edef8d50b26b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -162,6 +162,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
+extern int FASTCALL(run_scheduled_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c5257316f4b9..6b186750e9be 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -108,6 +108,79 @@ static inline void *get_wq_data(struct work_struct *work)
 	return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
 }
 
+static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cwq->lock, flags);
+	/*
+	 * We need to re-validate the work info after we've gotten
+	 * the cpu_workqueue lock. We can run the work now iff:
+	 *
+	 *  - the wq_data still matches the cpu_workqueue_struct
+	 *  - AND the work is still marked pending
+	 *  - AND the work is still on a list (which will be this
+	 *    workqueue_struct list)
+	 *
+	 * All these conditions are important, because we
+	 * need to protect against the work being run right
+	 * now on another CPU (all but the last one might be
+	 * true if it's currently running and has not been
+	 * released yet, for example).
+	 */
+	if (get_wq_data(work) == cwq
+	    && work_pending(work)
+	    && !list_empty(&work->entry)) {
+		work_func_t f = work->func;
+		list_del_init(&work->entry);
+		spin_unlock_irqrestore(&cwq->lock, flags);
+
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+			work_release(work);
+		f(work);
+
+		spin_lock_irqsave(&cwq->lock, flags);
+		cwq->remove_sequence++;
+		wake_up(&cwq->work_done);
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&cwq->lock, flags);
+	return ret;
+}
+
+/**
+ * run_scheduled_work - run scheduled work synchronously
+ * @work: work to run
+ *
+ * This checks if the work was pending, and runs it
+ * synchronously if so. It returns a boolean to indicate
+ * whether it had any scheduled work to run or not.
+ *
+ * NOTE! This _only_ works for normal work_structs. You
+ * CANNOT use this for delayed work, because the wq data
+ * for delayed work will not point properly to the per-
+ * CPU workqueue struct, but will change!
+ */
+int fastcall run_scheduled_work(struct work_struct *work)
+{
+	for (;;) {
+		struct cpu_workqueue_struct *cwq;
+
+		if (!work_pending(work))
+			return 0;
+		if (list_empty(&work->entry))
+			return 0;
+		/* NOTE! This depends intimately on __queue_work! */
+		cwq = get_wq_data(work);
+		if (!cwq)
+			return 0;
+		if (__run_work(cwq, work))
+			return 1;
+	}
+}
+EXPORT_SYMBOL(run_scheduled_work);
+
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
-- 
cgit v1.2.3


From a79561134f38de12dce14ed72138f38e55ef53fc Mon Sep 17 00:00:00 2001
From: Zou Nan hai <nanhai.zou@intel.com>
Date: Thu, 7 Dec 2006 09:51:35 -0800
Subject: [IA64] IA64 Kexec/kdump

Changes and updates.

1. Remove fake rendz path and related code according to discuss with Khalid Aziz.
2. fc.i offset fix in relocate_kernel.S.
3. iospic shutdown code eoi and mask race fix from Fujitsu.
4. Warm boot hook in machine_kexec to SN SAL code from Jack Steiner.
5. Send slave to SAL slave loop patch from Jay Lan.
6. Kdump on non-recoverable MCA event patch from Jay Lan
7. Use CTL_UNNUMBERED in kdump_on_init sysctl.

Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/Kconfig                  |  23 +++
 arch/ia64/kernel/Makefile          |   1 +
 arch/ia64/kernel/crash.c           | 245 +++++++++++++++++++++++++++
 arch/ia64/kernel/efi.c             |  65 +++++++-
 arch/ia64/kernel/entry.S           |   2 +-
 arch/ia64/kernel/iosapic.c         |  21 +++
 arch/ia64/kernel/machine_kexec.c   | 133 +++++++++++++++
 arch/ia64/kernel/mca.c             |   5 +
 arch/ia64/kernel/relocate_kernel.S | 334 +++++++++++++++++++++++++++++++++++++
 arch/ia64/kernel/setup.c           |  38 +++++
 arch/ia64/kernel/smp.c             |  28 +++-
 arch/ia64/sn/kernel/setup.c        |   8 +
 include/asm-ia64/kexec.h           |  47 ++++++
 include/asm-ia64/machvec.h         |   5 +
 include/asm-ia64/machvec_sn2.h     |   2 +
 include/asm-ia64/meminit.h         |   3 +-
 include/asm-ia64/sn/sn_sal.h       |   9 +
 include/linux/kexec.h              |   5 +
 kernel/kexec.c                     |   1 +
 19 files changed, 969 insertions(+), 6 deletions(-)
 create mode 100644 arch/ia64/kernel/crash.c
 create mode 100644 arch/ia64/kernel/machine_kexec.c
 create mode 100644 arch/ia64/kernel/relocate_kernel.S
 create mode 100644 include/asm-ia64/kexec.h

(limited to 'kernel')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 683b12c6f76c..75d839715b2f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -434,6 +434,29 @@ config IA64_ESI
 
 source "drivers/sn/Kconfig"
 
+config KEXEC
+	bool "kexec system call (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is indepedent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similiarity to the exec system call.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.  It may help to enable device hotplugging
+	  support.  As of this writing the exact hardware interface is
+	  strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+	  bool "kernel crash dumps (EXPERIMENTAL)"
+	  depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	  help
+	    Generate crash dump after being started by kexec.
+
 source "drivers/firmware/Kconfig"
 
 source "fs/Kconfig.binfmt"
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index cfa099b04cda..8ae384eb5357 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
new file mode 100644
index 000000000000..0aabedf95dad
--- /dev/null
+++ b/arch/ia64/kernel/crash.c
@@ -0,0 +1,245 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp	Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+
+#include <asm/kdebug.h>
+#include <asm/mca.h>
+#include <asm/uaccess.h>
+
+int kdump_status[NR_CPUS];
+atomic_t kdump_cpu_freezed;
+atomic_t kdump_in_progress;
+int kdump_on_init = 1;
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+		size_t csize, unsigned long offset, int userbuf)
+{
+	void  *vaddr;
+
+	if (!csize)
+		return 0;
+	vaddr = __va(pfn<<PAGE_SHIFT);
+	if (userbuf) {
+		if (copy_to_user(buf, (vaddr + offset), csize)) {
+			return -EFAULT;
+		}
+	} else
+		memcpy(buf, (vaddr + offset), csize);
+	return csize;
+}
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+		size_t data_len)
+{
+	struct elf_note *note = (struct elf_note *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = data_len;
+	note->n_type   = type;
+	buf += (sizeof(*note) + 3)/4;
+	memcpy(buf, name, note->n_namesz);
+	buf += (note->n_namesz + 3)/4;
+	memcpy(buf, data, data_len);
+	buf += (data_len + 3)/4;
+	return buf;
+}
+
+static void
+final_note(void *buf)
+{
+	memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu()
+{
+	void *buf;
+	unsigned long cfm, sof, sol;
+
+	int cpu = smp_processor_id();
+	struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+	elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+	memset(prstatus, 0, sizeof(*prstatus));
+	prstatus->pr_pid = current->pid;
+
+	ia64_dump_cpu_regs(dst);
+	cfm = dst[43];
+	sol = (cfm >> 7) & 0x7f;
+	sof = cfm & 0x7f;
+	dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+			sof - sol);
+
+	buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+	if (!buf)
+		return;
+	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+			sizeof(*prstatus));
+	final_note(buf);
+}
+
+static int
+kdump_wait_cpu_freeze(void)
+{
+	int cpu_num = num_online_cpus() - 1;
+	int timeout = 1000;
+	while(timeout-- > 0) {
+		if (atomic_read(&kdump_cpu_freezed) == cpu_num)
+			return 0;
+		udelay(1000);
+	}
+	return 1;
+}
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+	/* This function is only called after the system
+	 * has paniced or is otherwise in a critical state.
+	 * The minimum amount of code to allow a kexec'd kernel
+	 * to run successfully needs to happen here.
+	 *
+	 * In practice this means shooting down the other cpus in
+	 * an SMP system.
+	 */
+	kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+	kdump_smp_send_stop();
+	if (kdump_wait_cpu_freeze() && kdump_on_init) 	{
+		//not all cpu response to IPI, send INIT to freeze them
+		kdump_smp_send_init();
+	}
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+	local_irq_disable();
+	kexec_disable_iosapic();
+	machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+	int cpuid;
+	local_irq_disable();
+	cpuid = smp_processor_id();
+	crash_save_this_cpu();
+	current->thread.ksp = (__u64)info->sw - 16;
+	atomic_inc(&kdump_cpu_freezed);
+	kdump_status[cpuid] = 1;
+	mb();
+	if (cpuid == 0) {
+		for (;;)
+			cpu_relax();
+	} else
+		ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct ia64_mca_notify_die *nd;
+	struct die_args *args = data;
+
+	if (!kdump_on_init)
+		return NOTIFY_DONE;
+
+	if (val != DIE_INIT_MONARCH_ENTER &&
+	    val != DIE_INIT_SLAVE_ENTER &&
+	    val != DIE_MCA_RENDZVOUS_LEAVE &&
+	    val != DIE_MCA_MONARCH_LEAVE)
+		return NOTIFY_DONE;
+
+	nd = (struct ia64_mca_notify_die *)args->err;
+	/* Reason code 1 means machine check rendezous*/
+	if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) &&
+		 nd->sos->rv_rc == 1)
+		return NOTIFY_DONE;
+
+	switch (val) {
+		case DIE_INIT_MONARCH_ENTER:
+			machine_kdump_on_init();
+			break;
+		case DIE_INIT_SLAVE_ENTER:
+			unw_init_running(kdump_cpu_freeze, NULL);
+			break;
+		case DIE_MCA_RENDZVOUS_LEAVE:
+			if (atomic_read(&kdump_in_progress))
+				unw_init_running(kdump_cpu_freeze, NULL);
+			break;
+		case DIE_MCA_MONARCH_LEAVE:
+		     /* die_register->signr indicate if MCA is recoverable */
+			if (!args->signr)
+				machine_kdump_on_init();
+			break;
+	}
+	return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table kdump_on_init_table[] = {
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "kdump_on_init",
+		.data = &kdump_on_init,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table sys_table[] = {
+	{
+	  .ctl_name = CTL_KERN,
+	  .procname = "kernel",
+	  .mode = 0555,
+	  .child = kdump_on_init_table,
+	},
+	{ .ctl_name = 0 }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+	char *from = strstr(saved_command_line, "elfcorehdr=");
+	static struct notifier_block kdump_init_notifier_nb = {
+		.notifier_call = kdump_init_notifier,
+	};
+	int ret;
+	if (from)
+		elfcorehdr_addr = memparse(from+11, &from);
+	saved_max_pfn = (unsigned long)-1;
+	if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+		return ret;
+#ifdef CONFIG_SYSCTL
+	register_sysctl_table(sys_table, 0);
+#endif
+	return 0;
+}
+
+__initcall(machine_crash_setup);
+
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index bb8770a177b5..9b96e7dbaf67 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/efi.h>
+#include <linux/kexec.h>
 
 #include <asm/io.h>
 #include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...);
 struct efi efi;
 EXPORT_SYMBOL(efi);
 static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
 
 #define efi_call_virt(f, args...)	(*(f))(args)
 
@@ -421,6 +422,8 @@ efi_init (void)
 			mem_limit = memparse(cp + 4, &cp);
 		} else if (memcmp(cp, "max_addr=", 9) == 0) {
 			max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+		} else if (memcmp(cp, "min_addr=", 9) == 0) {
+			min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
 		} else {
 			while (*cp != ' ' && *cp)
 				++cp;
@@ -428,6 +431,8 @@ efi_init (void)
 				++cp;
 		}
 	}
+	if (min_addr != 0UL)
+		printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
 	if (max_addr != ~0UL)
 		printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
 
@@ -894,7 +899,8 @@ find_memmap_space (void)
 		as = max(contig_low, md->phys_addr);
 		ae = min(contig_high, efi_md_end(md));
 
-		/* keep within max_addr= command line arg */
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
 		ae = min(ae, max_addr);
 		if (ae <= as)
 			continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 		} else
 			ae = efi_md_end(md);
 
-		/* keep within max_addr= command line arg */
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
 		ae = min(ae, max_addr);
 		if (ae <= as)
 			continue;
@@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 			 */
 			insert_resource(res, code_resource);
 			insert_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+                        insert_resource(res, &efi_memmap_res);
+                        insert_resource(res, &boot_param_res);
+			if (crashk_res.end > crashk_res.start)
+				insert_resource(res, &crashk_res);
+#endif
 		}
 	}
 }
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+   rsvd_regions are sorted
+ */
+unsigned long
+kdump_find_rsvd_region (unsigned long size,
+		struct rsvd_region *r, int n)
+{
+  int i;
+  u64 start, end;
+  u64 alignment = 1UL << _PAGE_SIZE_64M;
+  void *efi_map_start, *efi_map_end, *p;
+  efi_memory_desc_t *md;
+  u64 efi_desc_size;
+
+  efi_map_start = __va(ia64_boot_param->efi_memmap);
+  efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+  efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+  for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+	  md = p;
+	  if (!efi_wb(md))
+		  continue;
+	  start = ALIGN(md->phys_addr, alignment);
+	  end = efi_md_end(md);
+	  for (i = 0; i < n; i++) {
+		if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+			if (__pa(r[i].start) > start + size)
+				return start;
+			start = ALIGN(__pa(r[i].end), alignment);
+			if (i < n-1 && __pa(r[i+1].start) < start + size)
+				continue;
+			else
+				break;
+		}
+	  }
+	  if (end > start + size)
+		return start;
+  }
+
+  printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
+	size);
+  return ~0UL;
+}
+#endif
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3390b7c5a63f..15234ed3a341 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1575,7 +1575,7 @@ sys_call_table:
 	data8 sys_mq_timedreceive		// 1265
 	data8 sys_mq_notify
 	data8 sys_mq_getsetattr
-	data8 sys_ni_syscall			// reserved for kexec_load
+	data8 sys_kexec_load
 	data8 sys_ni_syscall			// reserved for vserver
 	data8 sys_waitid			// 1270
 	data8 sys_add_key
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 60d64950e3c2..0fc5fb7865cf 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -288,6 +288,27 @@ nop (unsigned int irq)
 	/* do nothing... */
 }
 
+
+#ifdef CONFIG_KEXEC
+void
+kexec_disable_iosapic(void)
+{
+	struct iosapic_intr_info *info;
+	struct iosapic_rte_info *rte;
+	u8 vec = 0;
+	for (info = iosapic_intr_info; info <
+			iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) {
+		list_for_each_entry(rte, &info->rtes,
+				rte_list) {
+			iosapic_write(rte->addr,
+					IOSAPIC_RTE_LOW(rte->rte_index),
+					IOSAPIC_MASK|vec);
+			iosapic_eoi(rte->addr, vec);
+		}
+	}
+}
+#endif
+
 static void
 mask_irq (unsigned int irq)
 {
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
new file mode 100644
index 000000000000..468233fa2cee
--- /dev/null
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -0,0 +1,133 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+
+typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
+		struct ia64_boot_param *, unsigned long);
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+        .name  = "EFI Memory Map",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+        .name  = "Boot parameter",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *control_code_buffer;
+	const unsigned long *func;
+
+	func = (unsigned long *)&relocate_new_kernel;
+	/* Pre-load control code buffer to minimize work in kexec path */
+	control_code_buffer = page_address(image->control_code_page);
+	memcpy((void *)control_code_buffer, (const void *)func[0],
+			relocate_new_kernel_size);
+	flush_icache_range((unsigned long)control_code_buffer,
+			(unsigned long)control_code_buffer + relocate_new_kernel_size);
+	ia64_kimage = image;
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id())
+			cpu_down(cpu);
+	}
+	kexec_disable_iosapic();
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+extern void *efi_get_pal_addr(void);
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+	struct kimage *image = arg;
+	relocate_new_kernel_t rnk;
+	void *pal_addr = efi_get_pal_addr();
+	unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+	unsigned long vector;
+	int ii;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		crash_save_this_cpu();
+		current->thread.ksp = (__u64)info->sw - 16;
+	}
+
+	/* Interrupts aren't acceptable while we reboot */
+	local_irq_disable();
+
+	/* Mask CMC and Performance Monitor interrupts */
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* Mask ITV and Local Redirect Registers */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+
+	/* terminate possible nested in-service interrupts */
+	for (ii = 0; ii < 16; ii++)
+		ia64_eoi();
+
+	/* unmask TPR and clear any pending interrupts */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+	ia64_srlz_d();
+	vector = ia64_get_ivr();
+	while (vector != IA64_SPURIOUS_INT_VECTOR) {
+		ia64_eoi();
+		vector = ia64_get_ivr();
+	}
+	platform_kernel_launch_event();
+	rnk = (relocate_new_kernel_t)&code_addr;
+	(*rnk)(image->head, image->start, ia64_boot_param,
+		     GRANULEROUNDDOWN((unsigned long) pal_addr));
+	BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+	unw_init_running(ia64_machine_kexec, image);
+	for(;;);
+}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6bedd97570ca..87c1c4f42872 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -82,6 +82,7 @@
 #include <asm/system.h>
 #include <asm/sal.h>
 #include <asm/mca.h>
+#include <asm/kexec.h>
 
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
@@ -1238,6 +1239,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	} else {
 		/* Dump buffered message to console */
 		ia64_mlogbuf_finish(1);
+#ifdef CONFIG_CRASH_DUMP
+		atomic_set(&kdump_in_progress, 1);
+		monarch_cpu = -1;
+#endif
 	}
 	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
 			== NOTIFY_STOP)
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..ae473e3f2a0d
--- /dev/null
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -0,0 +1,334 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz  <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp,  Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+       /* Must be relocatable PIC code callable as a C function
+        */
+GLOBAL_ENTRY(relocate_new_kernel)
+	.prologue
+	alloc r31=ar.pfs,4,0,0,0
+        .body
+.reloc_entry:
+{
+	rsm psr.i| psr.ic
+	mov r2=ip
+}
+	;;
+{
+        flushrs                         // must be first insn in group
+        srlz.i
+}
+	;;
+	dep r2=0,r2,61,3		//to physical address
+	;;
+	//first switch to physical mode
+	add r3=1f-.reloc_entry, r2
+	movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+	mov ar.rsc=0	          	// put RSE in enforced lazy mode
+	;;
+	add sp=(memory_stack_end - 16 - .reloc_entry),r2
+	add r8=(register_stack - .reloc_entry),r2
+	;;
+	mov r18=ar.rnat
+	mov ar.bspstore=r8
+	;;
+        mov cr.ipsr=r16
+        mov cr.iip=r3
+        mov cr.ifs=r0
+	srlz.i
+	;;
+	mov ar.rnat=r18
+	rfi
+	;;
+1:
+	//physical mode code begin
+	mov b6=in1
+	dep r28=0,in2,61,3	//to physical address
+
+	// purge all TC entries
+#define O(member)       IA64_CPUINFO_##member##_OFFSET
+        GET_THIS_PADDR(r2, cpu_info)    // load phys addr of cpu_info into r2
+        ;;
+        addl r17=O(PTCE_STRIDE),r2
+        addl r2=O(PTCE_BASE),r2
+        ;;
+        ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;    	// r18=ptce_base
+        ld4 r19=[r2],4                                  // r19=ptce_count[0]
+        ld4 r21=[r17],4                                 // r21=ptce_stride[0]
+        ;;
+        ld4 r20=[r2]                                    // r20=ptce_count[1]
+        ld4 r22=[r17]                                   // r22=ptce_stride[1]
+        mov r24=r0
+        ;;
+        adds r20=-1,r20
+        ;;
+#undef O
+2:
+        cmp.ltu p6,p7=r24,r19
+(p7)    br.cond.dpnt.few 4f
+        mov ar.lc=r20
+3:
+        ptc.e r18
+        ;;
+        add r18=r22,r18
+        br.cloop.sptk.few 3b
+        ;;
+        add r18=r21,r18
+        add r24=1,r24
+        ;;
+        br.sptk.few 2b
+4:
+        srlz.i
+        ;;
+	//purge TR entry for kernel text and data
+        movl r16=KERNEL_START
+        mov r18=KERNEL_TR_PAGE_SHIFT<<2
+        ;;
+        ptr.i r16, r18
+        ptr.d r16, r18
+        ;;
+        srlz.i
+        ;;
+
+	// purge TR entry for percpu data
+        movl r16=PERCPU_ADDR
+        mov r18=PERCPU_PAGE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.d
+	;;
+
+        // purge TR entry for pal code
+        mov r16=in3
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.i r16,r18
+        ;;
+        srlz.i
+	;;
+
+        // purge TR entry for stack
+        mov r16=IA64_KR(CURRENT_STACK)
+        ;;
+        shl r16=r16,IA64_GRANULE_SHIFT
+        movl r19=PAGE_OFFSET
+        ;;
+        add r16=r19,r16
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.i
+	;;
+
+	//copy segments
+	movl r16=PAGE_MASK
+        mov  r30=in0                    // in0 is page_list
+        br.sptk.few .dest_page
+	;;
+.loop:
+	ld8  r30=[in0], 8;;
+.dest_page:
+	tbit.z p0, p6=r30, 0;;    	// 0x1 dest page
+(p6)	and r17=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 1;;		// 0x2 indirect page
+(p6)	and in0=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 2;;		// 0x4 end flag
+(p6)	br.cond.sptk.few .end_loop;;
+
+	tbit.z p6, p0=r30, 3;;		// 0x8 source page
+(p6)	br.cond.sptk.few .loop
+
+	and r18=r30, r16
+
+	// simple copy page, may optimize later
+	movl r14=PAGE_SIZE/8 - 1;;
+	mov ar.lc=r14;;
+1:
+	ld8 r14=[r18], 8;;
+	st8 [r17]=r14;;
+	fc.i r17
+	add r17=8, r17
+	br.ctop.sptk.few 1b
+	br.sptk.few .loop
+	;;
+
+.end_loop:
+	sync.i			// for fc.i
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	br.call.sptk.many b0=b6;;
+
+.align  32
+memory_stack:
+	.fill           8192, 1, 0
+memory_stack_end:
+register_stack:
+	.fill           8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+.global relocate_new_kernel_size
+relocate_new_kernel_size:
+	data8	relocate_new_kernel_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+        .prologue
+        alloc loc0=ar.pfs,1,2,0,0
+        .body
+        mov     ar.rsc=0                // put RSE in enforced lazy mode
+        add     loc1=4*8, in0           // save r4 and r5 first
+        ;;
+{
+        flushrs                         // flush dirty regs to backing store
+        srlz.i
+}
+        st8 [loc1]=r4, 8
+        ;;
+        st8 [loc1]=r5, 8
+        ;;
+        add loc1=32*8, in0
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r0, 8			// r0
+        st8 [loc1]=r4, 8		// rnat
+        mov r5=pr
+        ;;
+        st8 [in0]=r1, 8			// r1
+        st8 [loc1]=r5, 8		// pr
+        mov r4=b0
+        ;;
+        st8 [in0]=r2, 8			// r2
+        st8 [loc1]=r4, 8		// b0
+        mov r5=b1;
+        ;;
+        st8 [in0]=r3, 24		// r3
+        st8 [loc1]=r5, 8		// b1
+        mov r4=b2
+        ;;
+        st8 [in0]=r6, 8			// r6
+        st8 [loc1]=r4, 8		// b2
+	mov r5=b3
+        ;;
+        st8 [in0]=r7, 8			// r7
+        st8 [loc1]=r5, 8		// b3
+        mov r4=b4
+        ;;
+        st8 [in0]=r8, 8			// r8
+        st8 [loc1]=r4, 8		// b4
+        mov r5=b5
+        ;;
+        st8 [in0]=r9, 8			// r9
+        st8 [loc1]=r5, 8		// b5
+        mov r4=b6
+        ;;
+        st8 [in0]=r10, 8		// r10
+        st8 [loc1]=r5, 8		// b6
+        mov r5=b7
+        ;;
+        st8 [in0]=r11, 8		// r11
+        st8 [loc1]=r5, 8		// b7
+        mov r4=b0
+        ;;
+        st8 [in0]=r12, 8		// r12
+        st8 [loc1]=r4, 8		// ip
+        mov r5=loc0
+	;;
+        st8 [in0]=r13, 8		// r13
+        extr.u r5=r5, 0, 38		// ar.pfs.pfm
+	mov r4=r0			// user mask
+        ;;
+        st8 [in0]=r14, 8		// r14
+        st8 [loc1]=r5, 8		// cfm
+        ;;
+        st8 [in0]=r15, 8		// r15
+        st8 [loc1]=r4, 8        	// user mask
+	mov r5=ar.rsc
+        ;;
+        st8 [in0]=r16, 8		// r16
+        st8 [loc1]=r5, 8        	// ar.rsc
+        mov r4=ar.bsp
+        ;;
+        st8 [in0]=r17, 8		// r17
+        st8 [loc1]=r4, 8        	// ar.bsp
+        mov r5=ar.bspstore
+        ;;
+        st8 [in0]=r18, 8		// r18
+        st8 [loc1]=r5, 8        	// ar.bspstore
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r19, 8		// r19
+        st8 [loc1]=r4, 8        	// ar.rnat
+        mov r5=ar.ccv
+        ;;
+        st8 [in0]=r20, 8		// r20
+	st8 [loc1]=r5, 8        	// ar.ccv
+        mov r4=ar.unat
+        ;;
+        st8 [in0]=r21, 8		// r21
+        st8 [loc1]=r4, 8        	// ar.unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r22, 8		// r22
+        st8 [loc1]=r5, 8        	// ar.fpsr
+        mov r4 = ar.unat
+        ;;
+        st8 [in0]=r23, 8		// r23
+        st8 [loc1]=r4, 8        	// unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r24, 8		// r24
+        st8 [loc1]=r5, 8        	// fpsr
+        mov r4 = ar.pfs
+        ;;
+        st8 [in0]=r25, 8		// r25
+        st8 [loc1]=r4, 8        	// ar.pfs
+        mov r5 = ar.lc
+        ;;
+        st8 [in0]=r26, 8		// r26
+        st8 [loc1]=r5, 8        	// ar.lc
+        mov r4 = ar.ec
+        ;;
+        st8 [in0]=r27, 8		// r27
+        st8 [loc1]=r4, 8        	// ar.ec
+        mov r5 = ar.csd
+        ;;
+        st8 [in0]=r28, 8		// r28
+        st8 [loc1]=r5, 8        	// ar.csd
+        mov r4 = ar.ssd
+        ;;
+        st8 [in0]=r29, 8		// r29
+        st8 [loc1]=r4, 8        	// ar.ssd
+        ;;
+        st8 [in0]=r30, 8		// r30
+        ;;
+	st8 [in0]=r31, 8		// r31
+        mov ar.pfs=loc0
+        ;;
+        br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index d10404a41756..14e1200376a9 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -43,6 +43,8 @@
 #include <linux/initrd.h>
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -252,6 +254,41 @@ reserve_memory (void)
 	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
 	n++;
 
+#ifdef CONFIG_KEXEC
+	/* crashkernel=size@offset specifies the size to reserve for a crash
+	 * kernel.(offset is ingored for keep compatibility with other archs)
+	 * By reserving this memory we guarantee that linux never set's it
+	 * up as a DMA target.Useful for holding code to do something
+	 * appropriate after a kernel panic.
+	 */
+	{
+		char *from = strstr(saved_command_line, "crashkernel=");
+		unsigned long base, size;
+		if (from) {
+			size = memparse(from + 12, &from);
+			if (size) {
+				sort_regions(rsvd_region, n);
+				base = kdump_find_rsvd_region(size,
+				rsvd_region, n);
+				if (base != ~0UL) {
+					rsvd_region[n].start =
+						(unsigned long)__va(base);
+					rsvd_region[n].end =
+						(unsigned long)__va(base + size);
+					n++;
+					crashk_res.start = base;
+					crashk_res.end = base + size - 1;
+				}
+			}
+		}
+		efi_memmap_res.start = ia64_boot_param->efi_memmap;
+                efi_memmap_res.end = efi_memmap_res.start +
+                        ia64_boot_param->efi_memmap_size;
+                boot_param_res.start = __pa(ia64_boot_param);
+                boot_param_res.end = boot_param_res.start +
+                        sizeof(*ia64_boot_param);
+	}
+#endif
 	/* end of memory marker */
 	rsvd_region[n].start = ~0UL;
 	rsvd_region[n].end   = ~0UL;
@@ -263,6 +300,7 @@ reserve_memory (void)
 	sort_regions(rsvd_region, num_rsvd_regions);
 }
 
+
 /**
  * find_initrd - get initrd parameters from the boot parameter structure
  *
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 6ab95ceaf9d4..b1b9aa4364b9 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/efi.h>
 #include <linux/bitops.h>
+#include <linux/kexec.h>
 
 #include <asm/atomic.h>
 #include <asm/current.h>
@@ -66,6 +67,7 @@ static volatile struct call_data_struct *call_data;
 
 #define IPI_CALL_FUNC		0
 #define IPI_CPU_STOP		1
+#define IPI_KDUMP_CPU_STOP	3
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
 static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
@@ -155,7 +157,11 @@ handle_IPI (int irq, void *dev_id)
 			      case IPI_CPU_STOP:
 				stop_this_cpu();
 				break;
-
+#ifdef CONFIG_CRASH_DUMP
+			      case IPI_KDUMP_CPU_STOP:
+				unw_init_running(kdump_cpu_freeze, NULL);
+				break;
+#endif
 			      default:
 				printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
 				break;
@@ -213,6 +219,26 @@ send_IPI_self (int op)
 	send_IPI_single(smp_processor_id(), op);
 }
 
+#ifdef CONFIG_CRASH_DUMP
+void
+kdump_smp_send_stop()
+{
+ 	send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+
+void
+kdump_smp_send_init()
+{
+	unsigned int cpu, self_cpu;
+	self_cpu = smp_processor_id();
+	for_each_online_cpu(cpu) {
+		if (cpu != self_cpu) {
+			if(kdump_status[cpu] == 0)
+				platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+		}
+	}
+}
+#endif
 /*
  * Called with preeemption disabled.
  */
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 1d009f93244d..a934ad069425 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -769,5 +769,13 @@ int sn_prom_feature_available(int id)
 		return 0;
 	return test_bit(id, sn_prom_features);
 }
+
+void
+sn_kernel_launch_event(void)
+{
+	/* ignore status until we understand possible failure, if any*/
+	if (ia64_sn_kernel_launch_event())
+		printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n");
+}
 EXPORT_SYMBOL(sn_prom_feature_available);
 
diff --git a/include/asm-ia64/kexec.h b/include/asm-ia64/kexec.h
new file mode 100644
index 000000000000..01c36b004747
--- /dev/null
+++ b/include/asm-ia64/kexec.h
@@ -0,0 +1,47 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define MAX_NOTE_BYTES 1024
+
+#define kexec_flush_icache_page(page) do { \
+                unsigned long page_addr = (unsigned long)page_address(page); \
+                flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+        } while(0)
+
+extern struct kimage *ia64_kimage;
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+const extern unsigned int relocate_new_kernel_size;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+		struct ia64_boot_param *, unsigned long);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void kdump_smp_send_init(void);
+extern void kexec_disable_iosapic(void);
+extern void crash_save_this_cpu(void);
+struct rsvd_region;
+extern unsigned long kdump_find_rsvd_region(unsigned long size,
+		struct rsvd_region *rsvd_regions, int n);
+extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg);
+extern int kdump_status[];
+extern atomic_t kdump_cpu_freezed;
+extern atomic_t kdump_in_progress;
+
+#endif /* _ASM_IA64_KEXEC_H */
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index 8f784f8e45b0..a3891eb3f217 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -37,6 +37,7 @@ typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
 					u8 size);
 typedef void ia64_mv_migrate_t(struct task_struct * task);
 typedef void ia64_mv_pci_fixup_bus_t (struct pci_bus *);
+typedef void ia64_mv_kernel_launch_event_t(void);
 
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
@@ -218,6 +219,7 @@ struct ia64_machine_vector {
 	ia64_mv_setup_msi_irq_t *setup_msi_irq;
 	ia64_mv_teardown_msi_irq_t *teardown_msi_irq;
 	ia64_mv_pci_fixup_bus_t *pci_fixup_bus;
+	ia64_mv_kernel_launch_event_t *kernel_launch_event;
 } __attribute__((__aligned__(16))); /* align attrib? see above comment */
 
 #define MACHVEC_INIT(name)			\
@@ -318,6 +320,9 @@ extern ia64_mv_dma_supported		swiotlb_dma_supported;
 #ifndef platform_tlb_migrate_finish
 # define platform_tlb_migrate_finish	machvec_noop_mm
 #endif
+#ifndef platform_kernel_launch_event
+# define platform_kernel_launch_event	machvec_noop
+#endif
 #ifndef platform_dma_init
 # define platform_dma_init		swiotlb_init
 #endif
diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h
index 83325f6db03e..eaa2fce0fecd 100644
--- a/include/asm-ia64/machvec_sn2.h
+++ b/include/asm-ia64/machvec_sn2.h
@@ -67,6 +67,7 @@ extern ia64_mv_dma_sync_sg_for_device	sn_dma_sync_sg_for_device;
 extern ia64_mv_dma_mapping_error	sn_dma_mapping_error;
 extern ia64_mv_dma_supported		sn_dma_supported;
 extern ia64_mv_migrate_t		sn_migrate;
+extern ia64_mv_kernel_launch_event_t	sn_kernel_launch_event;
 extern ia64_mv_setup_msi_irq_t		sn_setup_msi_irq;
 extern ia64_mv_teardown_msi_irq_t	sn_teardown_msi_irq;
 extern ia64_mv_pci_fixup_bus_t		sn_pci_fixup_bus;
@@ -121,6 +122,7 @@ extern ia64_mv_pci_fixup_bus_t		sn_pci_fixup_bus;
 #define platform_dma_mapping_error		sn_dma_mapping_error
 #define platform_dma_supported		sn_dma_supported
 #define platform_migrate		sn_migrate
+#define platform_kernel_launch_event    sn_kernel_launch_event
 #ifdef CONFIG_PCI_MSI
 #define platform_setup_msi_irq		sn_setup_msi_irq
 #define platform_teardown_msi_irq	sn_teardown_msi_irq
diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index c3b1f862e6e7..c8df75901083 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -15,11 +15,12 @@
  * 	- initrd (optional)
  * 	- command line string
  * 	- kernel code & data
+ * 	- crash dumping code reserved region
  * 	- Kernel memory map built from EFI memory map
  *
  * More could be added if necessary
  */
-#define IA64_MAX_RSVD_REGIONS 6
+#define IA64_MAX_RSVD_REGIONS 7
 
 struct rsvd_region {
 	unsigned long start;	/* virtual address of beginning of element */
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index be5d83ad7cb1..2c4004eb5a68 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -88,6 +88,8 @@
 #define  SN_SAL_INJECT_ERROR			   0x02000067
 #define  SN_SAL_SET_CPU_NUMBER			   0x02000068
 
+#define  SN_SAL_KERNEL_LAUNCH_EVENT		   0x02000069
+
 /*
  * Service-specific constants
  */
@@ -1155,4 +1157,11 @@ ia64_sn_set_cpu_number(int cpu)
 	SAL_CALL_NOLOCK(rv, SN_SAL_SET_CPU_NUMBER, cpu, 0, 0, 0, 0, 0, 0);
 	return rv.status;
 }
+static inline int
+ia64_sn_kernel_launch_event(void)
+{
+ 	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
 #endif /* _ASM_IA64_SN_SN_SAL_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a4ede62b339d..e14cd388c65b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -108,6 +108,10 @@ int kexec_should_crash(struct task_struct *);
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
 
+#ifndef kexec_flush_icache_page
+#define kexec_flush_icache_page(page)
+#endif
+
 #define KEXEC_ON_CRASH  0x00000001
 #define KEXEC_ARCH_MASK 0xffff0000
 
@@ -133,6 +137,7 @@ extern struct resource crashk_res;
 typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
 extern note_buf_t *crash_notes;
 
+
 #else /* !CONFIG_KEXEC */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index fcdd5d2bc3f4..05aada293592 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -851,6 +851,7 @@ static int kimage_load_crash_segment(struct kimage *image,
 			memset(ptr + uchunk, 0, mchunk - uchunk);
 		}
 		result = copy_from_user(ptr, buf, uchunk);
+		kexec_flush_icache_page(page);
 		kunmap(page);
 		if (result) {
 			result = (result < 0) ? result : -EIO;
-- 
cgit v1.2.3


From aad094701c6355cb2b3d74a07ec0496f4a48c787 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 8 Dec 2006 02:35:57 -0800
Subject: [PATCH] move kallsyms data to .rodata

Kallsyms data is never written to, so it can as well benefit from
CONFIG_DEBUG_RODATA.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/kallsyms.c  | 16 ++++++++--------
 scripts/kallsyms.c |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index ab63cfc42992..6f294ff4f9ee 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -31,14 +31,14 @@
 #endif
 
 /* These will be re-linked against their real values during the second link stage */
-extern unsigned long kallsyms_addresses[] __attribute__((weak));
-extern unsigned long kallsyms_num_syms __attribute__((weak,section("data")));
-extern u8 kallsyms_names[] __attribute__((weak));
+extern const unsigned long kallsyms_addresses[] __attribute__((weak));
+extern const unsigned long kallsyms_num_syms __attribute__((weak));
+extern const u8 kallsyms_names[] __attribute__((weak));
 
-extern u8 kallsyms_token_table[] __attribute__((weak));
-extern u16 kallsyms_token_index[] __attribute__((weak));
+extern const u8 kallsyms_token_table[] __attribute__((weak));
+extern const u16 kallsyms_token_index[] __attribute__((weak));
 
-extern unsigned long kallsyms_markers[] __attribute__((weak));
+extern const unsigned long kallsyms_markers[] __attribute__((weak));
 
 static inline int is_kernel_inittext(unsigned long addr)
 {
@@ -84,7 +84,7 @@ static int is_ksym_addr(unsigned long addr)
 static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
 {
 	int len, skipped_first = 0;
-	u8 *tptr, *data;
+	const u8 *tptr, *data;
 
 	/* get the compressed symbol length from the first symbol byte */
 	data = &kallsyms_names[off];
@@ -132,7 +132,7 @@ static char kallsyms_get_symbol_type(unsigned int off)
  * kallsyms array */
 static unsigned int get_symbol_offset(unsigned long pos)
 {
-	u8 *name;
+	const u8 *name;
 	int i;
 
 	/* use the closest marker we have. We have markers every 256 positions,
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index f359b730c2c5..8b809b264d18 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -265,7 +265,7 @@ static void write_src(void)
 	printf("#define ALGN .align 4\n");
 	printf("#endif\n");
 
-	printf(".data\n");
+	printf("\t.section .rodata, \"a\"\n");
 
 	/* Provide proper symbols relocatability by their '_text'
 	 * relativeness.  The symbol names cannot be used to construct
-- 
cgit v1.2.3


From 6e2ac66470976ad7f57e0948572669b2bdfea2d0 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Fri, 8 Dec 2006 02:35:58 -0800
Subject: [PATCH] CPEI gets warning at
 kernel/irq/migration.c:27/move_masked_irq()

While running my MCA test (hardware error injection) on 2.6.19,
I got some warning like following:

> BUG: warning at kernel/irq/migration.c:27/move_masked_irq()
>
> Call Trace:
>  [<a000000100013d20>] show_stack+0x40/0xa0
>                                 sp=e00000006b2578d0 bsp=e00000006b2510b0
>  [<a000000100013db0>] dump_stack+0x30/0x60
>                                 sp=e00000006b257aa0 bsp=e00000006b251098
>  [<a0000001000de430>] move_masked_irq+0xb0/0x240
>                                 sp=e00000006b257aa0 bsp=e00000006b251070
>  [<a0000001000de6a0>] move_native_irq+0xe0/0x180
>                                 sp=e00000006b257aa0 bsp=e00000006b251040
>  [<a00000010004ff50>] iosapic_end_level_irq+0x30/0xe0
>                                 sp=e00000006b257aa0 bsp=e00000006b251020
>  [<a0000001000d94d0>] __do_IRQ+0x170/0x400
>                                 sp=e00000006b257aa0 bsp=e00000006b250fd8
>  [<a0000001000116f0>] ia64_handle_irq+0x1b0/0x260
>                                 sp=e00000006b257aa0 bsp=e00000006b250fa8
>  [<a00000010000c3a0>] ia64_leave_kernel+0x0/0x280
>                                 sp=e00000006b257aa0 bsp=e00000006b250fa8
>  [<a000000100690cf0>] _spin_unlock_irqrestore+0x30/0x60
>                                 sp=e00000006b257c70 bsp=e00000006b250f90

It comes from:

[kernel/irq/migration.c]
  26         if (CHECK_IRQ_PER_CPU(desc->status)) {
  27                 WARN_ON(1);
  28                 return;
  29         }

By putting some printk in kernel, I found that irqbalance is trying to
move CPEI which is handled as PER_CPU irq. That's why.

CPEI(Corrected Platform Error Interrupt) is ia64 specific irq, is
allowed to pin to particular processor which selected by the platform, and
even it is PER_CPU but it has set_affinity handler (=iosapic_set_affinity)
as same as other IO-SAPIC-level interrupts. (I don't know why, but
I guess that there would be typical situation where the handler for
migration is needed, such as hotplug - the processor going to be
offline/hot-removed.)

To shut up this warning, there are 2 way at least:
 a) fix CPEI stuff
 b) prohibit setting affinity to PER_CPU irq

I'm not sure what stuff of CPEI need to be fixed, but I think that
returning error to attempting move PER_CPU irq is useful for all
applications since it will never work.

Following small patch takes b) style.
It works, the warning disappeared and irqbalance still runs well.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/irq/proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 9a352667007c..61f5c717a8f5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -54,7 +54,8 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
 	unsigned int irq = (int)(long)data, full_count = count, err;
 	cpumask_t new_value, tmp;
 
-	if (!irq_desc[irq].chip->set_affinity || no_irq_affinity)
+	if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
+				CHECK_IRQ_PER_CPU(irq_desc[irq].status))
 		return -EIO;
 
 	err = cpumask_parse_user(buffer, count, new_value);
-- 
cgit v1.2.3


From 24ec839c431eb79bb8f6abc00c4e1eb3b8c4d517 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Dec 2006 02:36:04 -0800
Subject: [PATCH] tty: ->signal->tty locking

Fix the locking of signal->tty.

Use ->sighand->siglock to protect ->signal->tty; this lock is already used
by most other members of ->signal/->sighand.  And unless we are 'current'
or the tasklist_lock is held we need ->siglock to access ->signal anyway.

(NOTE: sys_unshare() is broken wrt ->sighand locking rules)

Note that tty_mutex is held over tty destruction, so while holding
tty_mutex any tty pointer remains valid.  Otherwise the lifetime of ttys
are governed by their open file handles.  This leaves some holes for tty
access from signal->tty (or any other non file related tty access).

It solves the tty SLAB scribbles we were seeing.

(NOTE: the change from group_send_sig_info to __group_send_sig_info needs to
       be examined by someone familiar with the security framework, I think
       it is safe given the SEND_SIG_PRIV from other __group_send_sig_info
       invocations)

[schwidefsky@de.ibm.com: 3270 fix]
[akpm@osdl.org: various post-viro fixes]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Alan Cox <alan@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/solaris/misc.c |   4 +-
 arch/um/kernel/exec.c       |   7 +-
 drivers/char/tty_io.c       | 237 ++++++++++++++++++++++++++------------------
 drivers/s390/char/fs3270.c  |  12 ++-
 fs/dquot.c                  |  14 +--
 fs/open.c                   |   1 +
 include/linux/tty.h         |  11 +-
 kernel/acct.c               |   9 +-
 kernel/auditsc.c            |   2 +
 kernel/exit.c               |   4 +-
 kernel/sys.c                |   6 +-
 security/selinux/hooks.c    |  11 +-
 12 files changed, 186 insertions(+), 132 deletions(-)

(limited to 'kernel')

diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 9ed997982f8d..e84241d5f7f4 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -423,9 +423,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
 			   Solaris setpgrp and setsid? */
 			ret = sys_setpgid(0, 0);
 			if (ret) return ret;
-			mutex_lock(&tty_mutex);
-			current->signal->tty = NULL;
-			mutex_unlock(&tty_mutex);
+			proc_clear_tty(current);
 			return process_group(current);
 		}
 	case 2: /* getsid */
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 0561c43b4685..8d56ec6cca79 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -39,12 +39,13 @@ static long execve1(char *file, char __user * __user *argv,
 		    char __user *__user *env)
 {
         long error;
+	struct tty_struct *tty;
 
 #ifdef CONFIG_TTY_LOG
 	mutex_lock(&tty_mutex);
-	task_lock(current);	/* FIXME:  is this needed ? */
-	log_exec(argv, current->signal->tty);
-	task_unlock(current);
+	tty = get_current_tty();
+	if (tty)
+		log_exec(argv, tty);
 	mutex_unlock(&tty_mutex);
 #endif
         error = do_execve(file, argv, env, &current->thread.regs);
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index b3cfc8bc613c..0c856a6f3677 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(tty_std_termios);
    
 LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
 
-/* Semaphore to protect creating and releasing a tty. This is shared with
+/* Mutex to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
 DEFINE_MUTEX(tty_mutex);
 EXPORT_SYMBOL(tty_mutex);
@@ -250,7 +250,7 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
 				    "!= #fd's(%d) in %s\n",
 		       tty->name, tty->count, count, routine);
 		return count;
-       }	
+	}
 #endif
 	return 0;
 }
@@ -259,18 +259,6 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
  * Tty buffer allocation management
  */
 
-
-/**
- *	tty_buffer_free_all		-	free buffers used by a tty
- *	@tty: tty to free from
- *
- *	Remove all the buffers pending on a tty whether queued with data
- *	or in the free ring. Must be called when the tty is no longer in use
- *
- *	Locking: none
- */
-
-
 /**
  *	tty_buffer_free_all		-	free buffers used by a tty
  *	@tty: tty to free from
@@ -614,7 +602,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags);
  *	they are not on hot paths so a little discipline won't do 
  *	any harm.
  *
- *	Locking: takes termios_sem
+ *	Locking: takes termios_mutex
  */
  
 static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
@@ -915,7 +903,7 @@ static void tty_ldisc_enable(struct tty_struct *tty)
  *	context.
  *
  *	Locking: takes tty_ldisc_lock.
- *		called functions take termios_sem
+ *		 called functions take termios_mutex
  */
  
 static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
@@ -1267,12 +1255,12 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *
  *	Locking:
  *		BKL
- *		redirect lock for undoing redirection
- *		file list lock for manipulating list of ttys
- *		tty_ldisc_lock from called functions
- *		termios_sem resetting termios data
- *		tasklist_lock to walk task list for hangup event
- *
+ *		  redirect lock for undoing redirection
+ *		  file list lock for manipulating list of ttys
+ *		  tty_ldisc_lock from called functions
+ *		  termios_mutex resetting termios data
+ *		  tasklist_lock to walk task list for hangup event
+ *		    ->siglock to protect ->signal/->sighand
  */
 static void do_tty_hangup(struct work_struct *work)
 {
@@ -1354,14 +1342,18 @@ static void do_tty_hangup(struct work_struct *work)
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
 		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			spin_lock_irq(&p->sighand->siglock);
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
-			if (!p->signal->leader)
+			if (!p->signal->leader) {
+				spin_unlock_irq(&p->sighand->siglock);
 				continue;
-			group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
-			group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+			}
+			__group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+			__group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
+			spin_unlock_irq(&p->sighand->siglock);
 		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
@@ -1453,6 +1445,14 @@ int tty_hung_up_p(struct file * filp)
 
 EXPORT_SYMBOL(tty_hung_up_p);
 
+static void session_clear_tty(pid_t session)
+{
+	struct task_struct *p;
+	do_each_task_pid(session, PIDTYPE_SID, p) {
+		proc_clear_tty(p);
+	} while_each_task_pid(session, PIDTYPE_SID, p);
+}
+
 /**
  *	disassociate_ctty	-	disconnect controlling tty
  *	@on_exit: true if exiting so need to "hang up" the session
@@ -1469,31 +1469,35 @@ EXPORT_SYMBOL(tty_hung_up_p);
  *	The argument on_exit is set to 1 if called when a process is
  *	exiting; it is 0 if called by the ioctl TIOCNOTTY.
  *
- *	Locking: tty_mutex is taken to protect current->signal->tty
+ *	Locking:
  *		BKL is taken for hysterical raisins
- *		Tasklist lock is taken (under tty_mutex) to walk process
- *		lists for the session.
+ *		  tty_mutex is taken to protect tty
+ *		  ->siglock is taken to protect ->signal/->sighand
+ *		  tasklist_lock is taken to walk process list for sessions
+ *		    ->siglock is taken to protect ->signal/->sighand
  */
 
 void disassociate_ctty(int on_exit)
 {
 	struct tty_struct *tty;
-	struct task_struct *p;
 	int tty_pgrp = -1;
+	int session;
 
 	lock_kernel();
 
 	mutex_lock(&tty_mutex);
-	tty = current->signal->tty;
+	tty = get_current_tty();
 	if (tty) {
 		tty_pgrp = tty->pgrp;
 		mutex_unlock(&tty_mutex);
+		/* XXX: here we race, there is nothing protecting tty */
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
 	} else {
-		if (current->signal->tty_old_pgrp) {
-			kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
-			kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+		pid_t old_pgrp = current->signal->tty_old_pgrp;
+		if (old_pgrp) {
+			kill_pg(old_pgrp, SIGHUP, on_exit);
+			kill_pg(old_pgrp, SIGCONT, on_exit);
 		}
 		mutex_unlock(&tty_mutex);
 		unlock_kernel();	
@@ -1505,19 +1509,29 @@ void disassociate_ctty(int on_exit)
 			kill_pg(tty_pgrp, SIGCONT, on_exit);
 	}
 
-	/* Must lock changes to tty_old_pgrp */
-	mutex_lock(&tty_mutex);
+	spin_lock_irq(&current->sighand->siglock);
 	current->signal->tty_old_pgrp = 0;
-	tty->session = 0;
-	tty->pgrp = -1;
+	session = current->signal->session;
+	spin_unlock_irq(&current->sighand->siglock);
+
+	mutex_lock(&tty_mutex);
+	/* It is possible that do_tty_hangup has free'd this tty */
+	tty = get_current_tty();
+	if (tty) {
+		tty->session = 0;
+		tty->pgrp = 0;
+	} else {
+#ifdef TTY_DEBUG_HANGUP
+		printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+		       " = NULL", tty);
+#endif
+	}
+	mutex_unlock(&tty_mutex);
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
-		p->signal->tty = NULL;
-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+	session_clear_tty(session);
 	read_unlock(&tasklist_lock);
-	mutex_unlock(&tty_mutex);
 	unlock_kernel();
 }
 
@@ -2337,16 +2351,10 @@ static void release_dev(struct file * filp)
 	 * tty.
 	 */
 	if (tty_closing || o_tty_closing) {
-		struct task_struct *p;
-
 		read_lock(&tasklist_lock);
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-			p->signal->tty = NULL;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		session_clear_tty(tty->session);
 		if (o_tty)
-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
-				p->signal->tty = NULL;
-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+			session_clear_tty(o_tty->session);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -2443,9 +2451,9 @@ static void release_dev(struct file * filp)
  *	The termios state of a pty is reset on first open so that
  *	settings don't persist across reuse.
  *
- *	Locking: tty_mutex protects current->signal->tty, get_tty_driver and
- *		init_dev work. tty->count should protect the rest.
- *		task_lock is held to update task details for sessions
+ *	Locking: tty_mutex protects tty, get_tty_driver and init_dev work.
+ *		 tty->count should protect the rest.
+ *		 ->siglock protects ->signal/->sighand
  */
 
 static int tty_open(struct inode * inode, struct file * filp)
@@ -2467,12 +2475,13 @@ retry_open:
 	mutex_lock(&tty_mutex);
 
 	if (device == MKDEV(TTYAUX_MAJOR,0)) {
-		if (!current->signal->tty) {
+		tty = get_current_tty();
+		if (!tty) {
 			mutex_unlock(&tty_mutex);
 			return -ENXIO;
 		}
-		driver = current->signal->tty->driver;
-		index = current->signal->tty->index;
+		driver = tty->driver;
+		index = tty->index;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
 		goto got_driver;
@@ -2547,17 +2556,16 @@ got_driver:
 			filp->f_op = &tty_fops;
 		goto retry_open;
 	}
+
+	mutex_lock(&tty_mutex);
+	spin_lock_irq(&current->sighand->siglock);
 	if (!noctty &&
 	    current->signal->leader &&
 	    !current->signal->tty &&
-	    tty->session == 0) {
-	    	task_lock(current);
-		current->signal->tty = tty;
-		task_unlock(current);
-		current->signal->tty_old_pgrp = 0;
-		tty->session = current->signal->session;
-		tty->pgrp = process_group(current);
-	}
+	    tty->session == 0)
+		__proc_set_tty(current, tty);
+	spin_unlock_irq(&current->sighand->siglock);
+	mutex_unlock(&tty_mutex);
 	return 0;
 }
 
@@ -2747,7 +2755,7 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
  *
  *	Copies the kernel idea of the window size into the user buffer.
  *
- *	Locking: tty->termios_sem is taken to ensure the winsize data
+ *	Locking: tty->termios_mutex is taken to ensure the winsize data
  *		is consistent.
  */
 
@@ -2774,8 +2782,8 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
  *	Locking:
  *		Called function use the console_sem is used to ensure we do
  *	not try and resize the console twice at once.
- *		The tty->termios_sem is used to ensure we don't double
- *	resize and get confused. Lock order - tty->termios.sem before
+ *		The tty->termios_mutex is used to ensure we don't double
+ *	resize and get confused. Lock order - tty->termios_mutex before
  *	console sem
  */
 
@@ -2880,25 +2888,28 @@ static int fionbio(struct file *file, int __user *p)
  *	leader to set this tty as the controlling tty for the session.
  *
  *	Locking:
- *		Takes tasklist lock internally to walk sessions
- *		Takes task_lock() when updating signal->tty
  *		Takes tty_mutex() to protect tty instance
- *
+ *		Takes tasklist_lock internally to walk sessions
+ *		Takes ->siglock() when updating signal->tty
  */
 
 static int tiocsctty(struct tty_struct *tty, int arg)
 {
-	struct task_struct *p;
-
+	int ret = 0;
 	if (current->signal->leader &&
 	    (current->signal->session == tty->session))
-		return 0;
+		return ret;
+
+	mutex_lock(&tty_mutex);
 	/*
 	 * The process must be a session leader and
 	 * not have a controlling tty already.
 	 */
-	if (!current->signal->leader || current->signal->tty)
-		return -EPERM;
+	if (!current->signal->leader || current->signal->tty) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
 	if (tty->session > 0) {
 		/*
 		 * This tty is already the controlling
@@ -2908,24 +2919,18 @@ static int tiocsctty(struct tty_struct *tty, int arg)
 			/*
 			 * Steal it away
 			 */
-
 			read_lock(&tasklist_lock);
-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-				p->signal->tty = NULL;
-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+			session_clear_tty(tty->session);
 			read_unlock(&tasklist_lock);
-		} else
-			return -EPERM;
+		} else {
+			ret = -EPERM;
+			goto unlock;
+		}
 	}
-	mutex_lock(&tty_mutex);
-	task_lock(current);
-	current->signal->tty = tty;
-	task_unlock(current);
+	proc_set_tty(current, tty);
+unlock:
 	mutex_unlock(&tty_mutex);
-	current->signal->tty_old_pgrp = 0;
-	tty->session = current->signal->session;
-	tty->pgrp = process_group(current);
-	return 0;
+	return ret;
 }
 
 /**
@@ -2937,7 +2942,7 @@ static int tiocsctty(struct tty_struct *tty, int arg)
  *	Obtain the process group of the tty. If there is no process group
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2995,7 +3000,7 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
  *	Obtain the session id of the tty. If there is no session
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -3214,14 +3219,11 @@ int tty_ioctl(struct inode * inode, struct file * file,
 			clear_bit(TTY_EXCLUSIVE, &tty->flags);
 			return 0;
 		case TIOCNOTTY:
-			/* FIXME: taks lock or tty_mutex ? */
 			if (current->signal->tty != tty)
 				return -ENOTTY;
 			if (current->signal->leader)
 				disassociate_ctty(0);
-			task_lock(current);
-			current->signal->tty = NULL;
-			task_unlock(current);
+			proc_clear_tty(current);
 			return 0;
 		case TIOCSCTTY:
 			return tiocsctty(tty, arg);
@@ -3321,7 +3323,7 @@ static void __do_SAK(struct work_struct *work)
 	
 	if (!tty)
 		return;
-	session  = tty->session;
+	session = tty->session;
 	
 	/* We don't want an ldisc switch during this */
 	disc = tty_ldisc_ref(tty);
@@ -3834,9 +3836,52 @@ int tty_unregister_driver(struct tty_driver *driver)
 	cdev_del(&driver->cdev);
 	return 0;
 }
-
 EXPORT_SYMBOL(tty_unregister_driver);
 
+dev_t tty_devnum(struct tty_struct *tty)
+{
+	return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
+}
+EXPORT_SYMBOL(tty_devnum);
+
+void proc_clear_tty(struct task_struct *p)
+{
+	spin_lock_irq(&p->sighand->siglock);
+	p->signal->tty = NULL;
+	spin_unlock_irq(&p->sighand->siglock);
+}
+EXPORT_SYMBOL(proc_clear_tty);
+
+void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+{
+	if (tty) {
+		tty->session = tsk->signal->session;
+		tty->pgrp = process_group(tsk);
+	}
+	tsk->signal->tty = tty;
+	tsk->signal->tty_old_pgrp = 0;
+}
+
+void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+{
+	spin_lock_irq(&tsk->sighand->siglock);
+	__proc_set_tty(tsk, tty);
+	spin_unlock_irq(&tsk->sighand->siglock);
+}
+
+struct tty_struct *get_current_tty(void)
+{
+	struct tty_struct *tty;
+	WARN_ON_ONCE(!mutex_is_locked(&tty_mutex));
+	tty = current->signal->tty;
+	/*
+	 * session->tty can be changed/cleared from under us, make sure we
+	 * issue the load. The obtained pointer, when not NULL, is valid as
+	 * long as we hold tty_mutex.
+	 */
+	barrier();
+	return tty;
+}
 
 /*
  * Initialize the console device. This is called *early*, so
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 78f8bda81dae..ef205ddd31a4 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -424,11 +424,15 @@ fs3270_open(struct inode *inode, struct file *filp)
 	minor = iminor(filp->f_dentry->d_inode);
 	/* Check for minor 0 multiplexer. */
 	if (minor == 0) {
-		if (!current->signal->tty)
+		struct tty_struct *tty;
+		mutex_lock(&tty_mutex);
+		tty = get_current_tty();
+		if (!tty || tty->driver->major != IBM_TTY3270_MAJOR) {
+			mutex_unlock(&tty_mutex);
 			return -ENODEV;
-		if (current->signal->tty->driver->major != IBM_TTY3270_MAJOR)
-			return -ENODEV;
-		minor = current->signal->tty->index + RAW3270_FIRSTMINOR;
+		}
+		minor = tty->index + RAW3270_FIRSTMINOR;
+		mutex_unlock(&tty_mutex);
 	}
 	/* Check if some other program is already using fullscreen mode. */
 	fp = (struct fs3270 *) raw3270_find_view(&fs3270_fn, minor);
diff --git a/fs/dquot.c b/fs/dquot.c
index f9cd5e23ebdf..89066b19124d 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -828,6 +828,7 @@ static inline int need_print_warning(struct dquot *dquot)
 static void print_warning(struct dquot *dquot, const char warntype)
 {
 	char *msg = NULL;
+	struct tty_struct *tty;
 	int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B :
 	  ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0);
 
@@ -835,14 +836,15 @@ static void print_warning(struct dquot *dquot, const char warntype)
 		return;
 
 	mutex_lock(&tty_mutex);
-	if (!current->signal->tty)
+	tty = get_current_tty();
+	if (!tty)
 		goto out_lock;
-	tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
+	tty_write_message(tty, dquot->dq_sb->s_id);
 	if (warntype == ISOFTWARN || warntype == BSOFTWARN)
-		tty_write_message(current->signal->tty, ": warning, ");
+		tty_write_message(tty, ": warning, ");
 	else
-		tty_write_message(current->signal->tty, ": write failed, ");
-	tty_write_message(current->signal->tty, quotatypes[dquot->dq_type]);
+		tty_write_message(tty, ": write failed, ");
+	tty_write_message(tty, quotatypes[dquot->dq_type]);
 	switch (warntype) {
 		case IHARDWARN:
 			msg = " file limit reached.\r\n";
@@ -863,7 +865,7 @@ static void print_warning(struct dquot *dquot, const char warntype)
 			msg = " block quota exceeded.\r\n";
 			break;
 	}
-	tty_write_message(current->signal->tty, msg);
+	tty_write_message(tty, msg);
 out_lock:
 	mutex_unlock(&tty_mutex);
 }
diff --git a/fs/open.c b/fs/open.c
index 89e0c237a636..3b56192816ca 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1087,6 +1087,7 @@ EXPORT_SYMBOL(sys_close);
 asmlinkage long sys_vhangup(void)
 {
 	if (capable(CAP_SYS_TTY_CONFIG)) {
+		/* XXX: this needs locking */
 		tty_vhangup(current->signal->tty);
 		return 0;
 	}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f717f0898238..1d29999a3439 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -309,6 +309,12 @@ extern void tty_ldisc_flush(struct tty_struct *tty);
 extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 		     unsigned long arg);
 
+extern dev_t tty_devnum(struct tty_struct *tty);
+extern void proc_clear_tty(struct task_struct *p);
+extern void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+extern void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+extern struct tty_struct *get_current_tty(void);
+
 extern struct mutex tty_mutex;
 
 /* n_tty.c */
@@ -335,10 +341,5 @@ extern void console_print(const char *);
 extern int vt_ioctl(struct tty_struct *tty, struct file * file,
 		    unsigned int cmd, unsigned long arg);
 
-static inline dev_t tty_devnum(struct tty_struct *tty)
-{
-	return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
-}
-
 #endif /* __KERNEL__ */
 #endif
diff --git a/kernel/acct.c b/kernel/acct.c
index dc12db8600e7..ca5619039367 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -428,6 +428,7 @@ static void do_acct_process(struct file *file)
 	u64 elapsed;
 	u64 run_time;
 	struct timespec uptime;
+	struct tty_struct *tty;
 
 	/*
 	 * First check to see if there is enough free_space to continue
@@ -485,12 +486,8 @@ static void do_acct_process(struct file *file)
 #endif
 
 	mutex_lock(&tty_mutex);
-	/* FIXME: Whoever is responsible for current->signal locking needs
-	   to use the same locking all over the kernel and document it */
-	read_lock(&tasklist_lock);
-	ac.ac_tty = current->signal->tty ?
-		old_encode_dev(tty_devnum(current->signal->tty)) : 0;
-	read_unlock(&tasklist_lock);
+	tty = get_current_tty();
+	ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
 	mutex_unlock(&tty_mutex);
 
 	spin_lock_irq(&current->sighand->siglock);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 40722e26de98..b6cb802fbcd1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -826,10 +826,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				 context->return_code);
 
 	mutex_lock(&tty_mutex);
+	read_lock(&tasklist_lock);
 	if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
 		tty = tsk->signal->tty->name;
 	else
 		tty = "(none)";
+	read_unlock(&tasklist_lock);
 	audit_log_format(ab,
 		  " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
 		  " ppid=%d pid=%d auid=%u uid=%u gid=%u"
diff --git a/kernel/exit.c b/kernel/exit.c
index 4e3f919edc48..fa235779b6a3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -384,9 +384,7 @@ void daemonize(const char *name, ...)
 	exit_mm(current);
 
 	set_special_pids(1, 1);
-	mutex_lock(&tty_mutex);
-	current->signal->tty = NULL;
-	mutex_unlock(&tty_mutex);
+	proc_clear_tty(current);
 
 	/* Block and flush all signals */
 	sigfillset(&blocked);
diff --git a/kernel/sys.c b/kernel/sys.c
index a0c1a29a507f..1ac2d1c5d84e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1484,7 +1484,6 @@ asmlinkage long sys_setsid(void)
 	pid_t session;
 	int err = -EPERM;
 
-	mutex_lock(&tty_mutex);
 	write_lock_irq(&tasklist_lock);
 
 	/* Fail if I am already a session leader */
@@ -1504,12 +1503,15 @@ asmlinkage long sys_setsid(void)
 
 	group_leader->signal->leader = 1;
 	__set_special_pids(session, session);
+
+	spin_lock(&group_leader->sighand->siglock);
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
+	spin_unlock(&group_leader->sighand->siglock);
+
 	err = process_group(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
-	mutex_unlock(&tty_mutex);
 	return err;
 }
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 44e9cd470543..f5df8c70a9b5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1695,9 +1695,10 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 	struct tty_struct *tty;
 	struct fdtable *fdt;
 	long j = -1;
+	int drop_tty = 0;
 
 	mutex_lock(&tty_mutex);
-	tty = current->signal->tty;
+	tty = get_current_tty();
 	if (tty) {
 		file_list_lock();
 		file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -1710,12 +1711,14 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 			struct inode *inode = file->f_dentry->d_inode;
 			if (inode_has_perm(current, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
-				/* Reset controlling tty. */
-				current->signal->tty = NULL;
-				current->signal->tty_old_pgrp = 0;
+				drop_tty = 1;
 			}
 		}
 		file_list_unlock();
+
+		/* Reset controlling tty. */
+		if (drop_tty)
+			proc_set_tty(current, NULL);
 	}
 	mutex_unlock(&tty_mutex);
 
-- 
cgit v1.2.3


From 7bcfa95e561f11a17720162935e4f704c5d6fda3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Dec 2006 02:36:07 -0800
Subject: [PATCH] do_acct_process(): don't take tty_mutex

No need to take the global tty_mutex, signal->tty->driver can't go away while
we are holding ->siglock.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/acct.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/acct.c b/kernel/acct.c
index ca5619039367..69a40c9777a2 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -485,12 +485,9 @@ static void do_acct_process(struct file *file)
 	ac.ac_ppid = current->parent->tgid;
 #endif
 
-	mutex_lock(&tty_mutex);
-	tty = get_current_tty();
-	ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
-	mutex_unlock(&tty_mutex);
-
 	spin_lock_irq(&current->sighand->siglock);
+	tty = current->signal->tty;
+	ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
 	ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
 	ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
 	ac.ac_flag = pacct->ac_flag;
-- 
cgit v1.2.3


From ae424ae4b5bcd820ad6ee6f0b986c4e14ed4d6cf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Dec 2006 02:36:08 -0800
Subject: [PATCH] make set_special_pids() static

Make set_special_pids() static, the only caller is daemonize().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 1 -
 kernel/exit.c         | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dede82c63445..5e8a0ba61749 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1240,7 +1240,6 @@ extern struct   mm_struct init_mm;
 
 #define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
 extern struct task_struct *find_task_by_pid_type(int type, int pid);
-extern void set_special_pids(pid_t session, pid_t pgrp);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
 /* per-UID process charging. */
diff --git a/kernel/exit.c b/kernel/exit.c
index fa235779b6a3..fa0495e167e9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -314,7 +314,7 @@ void __set_special_pids(pid_t session, pid_t pgrp)
 	}
 }
 
-void set_special_pids(pid_t session, pid_t pgrp)
+static void set_special_pids(pid_t session, pid_t pgrp)
 {
 	write_lock_irq(&tasklist_lock);
 	__set_special_pids(session, pgrp);
-- 
cgit v1.2.3


From dae3c5a0b7052ad7dd9fa78c51ecfab828c5007b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Dec 2006 02:36:09 -0800
Subject: [PATCH] sys_unshare: remove a broken CLONE_SIGHAND code

sys_unshare(CLONE_SIGHAND) is broken, the code under 'if (new_sigh)' is
never executed but very wrong. Just remove it to avoid a confusion,
task_lock() has nothing to do with ->sighand changing.

Also, change the comment in unshare_sighand(). Yes, CLONE_THREAD implies
CLONE_SIGHAND, but still it looks confusing. Also, we don't need to check
current->sighand != NULL.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 7f2e31ba33af..f387a1393ca5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1544,15 +1544,13 @@ static int unshare_namespace(unsigned long unshare_flags, struct namespace **new
 }
 
 /*
- * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
- * supported yet
+ * Unsharing of sighand is not supported yet
  */
 static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
 {
 	struct sighand_struct *sigh = current->sighand;
 
-	if ((unshare_flags & CLONE_SIGHAND) &&
-	    (sigh && atomic_read(&sigh->count) > 1))
+	if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
 		return -EINVAL;
 	else
 		return 0;
@@ -1626,7 +1624,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 	int err = 0;
 	struct fs_struct *fs, *new_fs = NULL;
 	struct namespace *ns, *new_ns = NULL;
-	struct sighand_struct *sigh, *new_sigh = NULL;
+	struct sighand_struct *new_sigh = NULL;
 	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct sem_undo_list *new_ulist = NULL;
@@ -1671,7 +1669,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 		}
 	}
 
-	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist ||
+	if (new_fs || new_ns || new_mm || new_fd || new_ulist ||
 				new_uts || new_ipc) {
 
 		task_lock(current);
@@ -1693,12 +1691,6 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 			new_ns = ns;
 		}
 
-		if (new_sigh) {
-			sigh = current->sighand;
-			rcu_assign_pointer(current->sighand, new_sigh);
-			new_sigh = sigh;
-		}
-
 		if (new_mm) {
 			mm = current->mm;
 			active_mm = current->active_mm;
-- 
cgit v1.2.3


From d63a5a74dee87883fda6b7d170244acaac5b05e8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 8 Dec 2006 02:36:17 -0800
Subject: [PATCH] lockdep: avoid lockdep warning in md

md_open takes ->reconfig_mutex which causes lockdep to complain.  This
(normally) doesn't have deadlock potential as the possible conflict is with a
reconfig_mutex in a different device.

I say "normally" because if a loop were created in the array->member hierarchy
a deadlock could happen.  However that causes bigger problems than a deadlock
and should be fixed independently.

So we flag the lock in md_open as a nested lock.  This requires defining
mutex_lock_interruptible_nested.

Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/md.c       | 2 +-
 include/linux/mutex.h | 2 ++
 kernel/mutex.c        | 9 +++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index d2bcb410e3ab..3ce7a5d81d81 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4423,7 +4423,7 @@ static int md_open(struct inode *inode, struct file *file)
 	mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
 	int err;
 
-	if ((err = mddev_lock(mddev)))
+	if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
 		goto out;
 
 	err = 0;
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index b2b91c477563..a7544afd7582 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -125,8 +125,10 @@ extern int fastcall mutex_lock_interruptible(struct mutex *lock);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+extern int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass);
 #else
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
+# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 #endif
 
 /*
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 8c71cf72a497..e7cbbb82765b 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -206,6 +206,15 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_nested);
+
+int __sched
+mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
+{
+	might_sleep();
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass);
+}
+
+EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
 #endif
 
 /*
-- 
cgit v1.2.3


From f3a43f3f64bff8e205c3702f6b4804d66e306848 Mon Sep 17 00:00:00 2001
From: "Josef \"Jeff\" Sipek" <jsipek@cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:36:43 -0800
Subject: [PATCH] kernel: change uses of f_{dentry, vfsmnt} to use f_path

Change all the uses of f_{dentry,vfsmnt} to f_path.{dentry,mnt} in
linux/kernel/.

Signed-off-by: Josef "Jeff" Sipek <jsipek@cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/acct.c   | 14 +++++++-------
 kernel/fork.c   |  2 +-
 kernel/futex.c  | 10 +++++-----
 kernel/relay.c  |  4 ++--
 kernel/sysctl.c |  2 +-
 5 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/acct.c b/kernel/acct.c
index 69a40c9777a2..70d0d88e5554 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file)
 	spin_unlock(&acct_globals.lock);
 
 	/* May block */
-	if (vfs_statfs(file->f_dentry, &sbuf))
+	if (vfs_statfs(file->f_path.dentry, &sbuf))
 		return res;
 	suspend = sbuf.f_blocks * SUSPEND;
 	resume = sbuf.f_blocks * RESUME;
@@ -194,7 +194,7 @@ static void acct_file_reopen(struct file *file)
 		add_timer(&acct_globals.timer);
 	}
 	if (old_acct) {
-		mnt_unpin(old_acct->f_vfsmnt);
+		mnt_unpin(old_acct->f_path.mnt);
 		spin_unlock(&acct_globals.lock);
 		do_acct_process(old_acct);
 		filp_close(old_acct, NULL);
@@ -212,7 +212,7 @@ static int acct_on(char *name)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) {
 		filp_close(file, NULL);
 		return -EACCES;
 	}
@@ -229,11 +229,11 @@ static int acct_on(char *name)
 	}
 
 	spin_lock(&acct_globals.lock);
-	mnt_pin(file->f_vfsmnt);
+	mnt_pin(file->f_path.mnt);
 	acct_file_reopen(file);
 	spin_unlock(&acct_globals.lock);
 
-	mntput(file->f_vfsmnt);	/* it's pinned, now give up active reference */
+	mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
 
 	return 0;
 }
@@ -283,7 +283,7 @@ asmlinkage long sys_acct(const char __user *name)
 void acct_auto_close_mnt(struct vfsmount *m)
 {
 	spin_lock(&acct_globals.lock);
-	if (acct_globals.file && acct_globals.file->f_vfsmnt == m)
+	if (acct_globals.file && acct_globals.file->f_path.mnt == m)
 		acct_file_reopen(NULL);
 	spin_unlock(&acct_globals.lock);
 }
@@ -299,7 +299,7 @@ void acct_auto_close(struct super_block *sb)
 {
 	spin_lock(&acct_globals.lock);
 	if (acct_globals.file &&
-	    acct_globals.file->f_vfsmnt->mnt_sb == sb) {
+	    acct_globals.file->f_path.mnt->mnt_sb == sb) {
 		acct_file_reopen(NULL);
 	}
 	spin_unlock(&acct_globals.lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index f387a1393ca5..597707819327 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -252,7 +252,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 		anon_vma_link(tmp);
 		file = tmp->vm_file;
 		if (file) {
-			struct inode *inode = file->f_dentry->d_inode;
+			struct inode *inode = file->f_path.dentry->d_inode;
 			get_file(file);
 			if (tmp->vm_flags & VM_DENYWRITE)
 				atomic_dec(&inode->i_writecount);
diff --git a/kernel/futex.c b/kernel/futex.c
index 95989a3b4168..5a737de857d3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -166,7 +166,7 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
 /*
  * Get parameters which are the keys for a futex.
  *
- * For shared mappings, it's (page->index, vma->vm_file->f_dentry->d_inode,
+ * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
  * offset_within_page).  For private mappings, it's (uaddr, current->mm).
  * We can usually work out the index without swapping in the page.
  *
@@ -223,7 +223,7 @@ static int get_futex_key(u32 __user *uaddr, union futex_key *key)
 	/*
 	 * Linear file mappings are also simple.
 	 */
-	key->shared.inode = vma->vm_file->f_dentry->d_inode;
+	key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
 	key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
 	if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
 		key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
@@ -1528,9 +1528,9 @@ static int futex_fd(u32 __user *uaddr, int signal)
 		goto out;
 	}
 	filp->f_op = &futex_fops;
-	filp->f_vfsmnt = mntget(futex_mnt);
-	filp->f_dentry = dget(futex_mnt->mnt_root);
-	filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+	filp->f_path.mnt = mntget(futex_mnt);
+	filp->f_path.dentry = dget(futex_mnt->mnt_root);
+	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 
 	if (signal) {
 		err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
diff --git a/kernel/relay.c b/kernel/relay.c
index 75a3a9a7efc2..818e514729cf 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -959,7 +959,7 @@ static inline ssize_t relay_file_read_subbufs(struct file *filp,
 	if (!desc->count)
 		return 0;
 
-	mutex_lock(&filp->f_dentry->d_inode->i_mutex);
+	mutex_lock(&filp->f_path.dentry->d_inode->i_mutex);
 	do {
 		if (!relay_file_read_avail(buf, *ppos))
 			break;
@@ -979,7 +979,7 @@ static inline ssize_t relay_file_read_subbufs(struct file *filp,
 			*ppos = relay_file_read_end_pos(buf, read_start, ret);
 		}
 	} while (desc->count && ret);
-	mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
+	mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
 
 	return desc->written;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8e9f00fd6d18..9846db93a595 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1614,7 +1614,7 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
 			  size_t count, loff_t *ppos)
 {
 	int op;
-	struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
+	struct proc_dir_entry *de = PDE(file->f_path.dentry->d_inode);
 	struct ctl_table *table;
 	size_t res;
 	ssize_t error = -ENOTDIR;
-- 
cgit v1.2.3


From a7a005fd12b84392becca311f2a20d5bf2a1b7af Mon Sep 17 00:00:00 2001
From: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Date: Fri, 8 Dec 2006 02:37:17 -0800
Subject: [PATCH] struct path: convert kernel

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/auditsc.c |  4 ++--
 kernel/cpuset.c  | 22 +++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b6cb802fbcd1..298897559ca4 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -781,8 +781,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
 			if ((vma->vm_flags & VM_EXECUTABLE) &&
 			    vma->vm_file) {
 				audit_log_d_path(ab, "exe=",
-						 vma->vm_file->f_dentry,
-						 vma->vm_file->f_vfsmnt);
+						 vma->vm_file->f_path.dentry,
+						 vma->vm_file->f_path.mnt);
 				break;
 			}
 			vma = vma->vm_next;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 0a6b4d89f9a0..2c3b4431472b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -413,8 +413,8 @@ static struct file_system_type cpuset_fs_type = {
  *
  *
  * When reading/writing to a file:
- *	- the cpuset to use in file->f_dentry->d_parent->d_fsdata
- *	- the 'cftype' of the file is file->f_dentry->d_fsdata
+ *	- the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
+ *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata
  */
 
 struct cftype {
@@ -1284,8 +1284,8 @@ static ssize_t cpuset_common_file_write(struct file *file,
 					const char __user *userbuf,
 					size_t nbytes, loff_t *unused_ppos)
 {
-	struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
-	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+	struct cftype *cft = __d_cft(file->f_path.dentry);
 	cpuset_filetype_t type = cft->private;
 	char *buffer;
 	char *pathbuf = NULL;
@@ -1367,7 +1367,7 @@ static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
 						size_t nbytes, loff_t *ppos)
 {
 	ssize_t retval = 0;
-	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cftype *cft = __d_cft(file->f_path.dentry);
 	if (!cft)
 		return -ENODEV;
 
@@ -1417,8 +1417,8 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
 static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
 				size_t nbytes, loff_t *ppos)
 {
-	struct cftype *cft = __d_cft(file->f_dentry);
-	struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+	struct cftype *cft = __d_cft(file->f_path.dentry);
+	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
 	cpuset_filetype_t type = cft->private;
 	char *page;
 	ssize_t retval = 0;
@@ -1476,7 +1476,7 @@ static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbyt
 								loff_t *ppos)
 {
 	ssize_t retval = 0;
-	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cftype *cft = __d_cft(file->f_path.dentry);
 	if (!cft)
 		return -ENODEV;
 
@@ -1498,7 +1498,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file)
 	if (err)
 		return err;
 
-	cft = __d_cft(file->f_dentry);
+	cft = __d_cft(file->f_path.dentry);
 	if (!cft)
 		return -ENODEV;
 	if (cft->open)
@@ -1511,7 +1511,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file)
 
 static int cpuset_file_release(struct inode *inode, struct file *file)
 {
-	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cftype *cft = __d_cft(file->f_path.dentry);
 	if (cft->release)
 		return cft->release(inode, file);
 	return 0;
@@ -1700,7 +1700,7 @@ static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
  */
 static int cpuset_tasks_open(struct inode *unused, struct file *file)
 {
-	struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
 	struct ctr_struct *ctr;
 	pid_t *pidarray;
 	int npids;
-- 
cgit v1.2.3


From 937949d9edbf4049bd41af6c9f92c26280584564 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:37:54 -0800
Subject: [PATCH] add process_session() helper routine

Replace occurences of task->signal->session by a new process_session() helper
routine.

It will be useful for pid namespaces to abstract the session pid number.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/mips/kernel/irixelf.c |  2 +-
 drivers/char/mxser.c       |  2 +-
 drivers/char/rocket.c      |  2 +-
 drivers/char/tty_io.c      | 12 ++++++------
 fs/binfmt_elf.c            |  4 ++--
 fs/binfmt_elf_fdpic.c      |  4 ++--
 include/linux/sched.h      |  5 +++++
 kernel/exit.c              | 19 ++++++++++---------
 kernel/fork.c              |  4 ++--
 kernel/signal.c            |  2 +-
 kernel/sys.c               |  8 ++++----
 11 files changed, 35 insertions(+), 29 deletions(-)

(limited to 'kernel')

diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index 1bbefbf43373..37cad5de515c 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -1145,7 +1145,7 @@ static int irix_core_dump(long signr, struct pt_regs * regs, struct file *file)
 	psinfo.pr_pid = prstatus.pr_pid = current->pid;
 	psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
 	psinfo.pr_pgrp = prstatus.pr_pgrp = process_group(current);
-	psinfo.pr_sid = prstatus.pr_sid = current->signal->session;
+	psinfo.pr_sid = prstatus.pr_sid = process_session(current);
 	if (current->pid == current->tgid) {
 		/*
 		 * This is the record for the group leader.  Add in the
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 5ed2486b7581..b1cc89c89820 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -994,7 +994,7 @@ static int mxser_open(struct tty_struct *tty, struct file *filp)
 		mxser_change_speed(info, NULL);
 	}
 
-	info->session = current->signal->session;
+	info->session = process_session(current);
 	info->pgrp = process_group(current);
 
 	/*
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index bac80056f7e0..4fdf52e9f3b1 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -1017,7 +1017,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	info->session = current->signal->session;
+	info->session = process_session(current);
 	info->pgrp = process_group(current);
 
 	if ((info->flags & ROCKET_INITIALIZED) == 0) {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 8a2d26e98ac4..4ebba7ca1dc9 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1511,7 +1511,7 @@ void disassociate_ctty(int on_exit)
 
 	spin_lock_irq(&current->sighand->siglock);
 	current->signal->tty_old_pgrp = 0;
-	session = current->signal->session;
+	session = process_session(current);
 	spin_unlock_irq(&current->sighand->siglock);
 
 	mutex_lock(&tty_mutex);
@@ -2897,7 +2897,7 @@ static int tiocsctty(struct tty_struct *tty, int arg)
 {
 	int ret = 0;
 	if (current->signal->leader &&
-	    (current->signal->session == tty->session))
+			(process_session(current) == tty->session))
 		return ret;
 
 	mutex_lock(&tty_mutex);
@@ -2979,13 +2979,13 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 		return retval;
 	if (!current->signal->tty ||
 	    (current->signal->tty != real_tty) ||
-	    (real_tty->session != current->signal->session))
+	    (real_tty->session != process_session(current)))
 		return -ENOTTY;
 	if (get_user(pgrp, p))
 		return -EFAULT;
 	if (pgrp < 0)
 		return -EINVAL;
-	if (session_of_pgrp(pgrp) != current->signal->session)
+	if (session_of_pgrp(pgrp) != process_session(current))
 		return -EPERM;
 	real_tty->pgrp = pgrp;
 	return 0;
@@ -3338,7 +3338,7 @@ static void __do_SAK(struct work_struct *work)
 	/* Kill the entire session */
 	do_each_task_pid(session, PIDTYPE_SID, p) {
 		printk(KERN_NOTICE "SAK: killed process %d"
-			" (%s): p->signal->session==tty->session\n",
+			" (%s): process_session(p)==tty->session\n",
 			p->pid, p->comm);
 		send_sig(SIGKILL, p, 1);
 	} while_each_task_pid(session, PIDTYPE_SID, p);
@@ -3348,7 +3348,7 @@ static void __do_SAK(struct work_struct *work)
 	do_each_thread(g, p) {
 		if (p->signal->tty == tty) {
 			printk(KERN_NOTICE "SAK: killed process %d"
-			    " (%s): p->signal->session==tty->session\n",
+			    " (%s): process_session(p)==tty->session\n",
 			    p->pid, p->comm);
 			send_sig(SIGKILL, p, 1);
 			continue;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c6dbb4a7ec78..d3adfd353ff9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1317,7 +1317,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pid = p->pid;
 	prstatus->pr_ppid = p->parent->pid;
 	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_sid = process_session(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1363,7 +1363,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pid = p->pid;
 	psinfo->pr_ppid = p->parent->pid;
 	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_sid = process_session(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9f0b7efc3df5..76f06f6bc2f6 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1322,7 +1322,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pid = p->pid;
 	prstatus->pr_ppid = p->parent->pid;
 	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_sid = process_session(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1371,7 +1371,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pid = p->pid;
 	psinfo->pr_ppid = p->parent->pid;
 	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_sid = process_session(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e8a0ba61749..270d864a8ff1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1047,6 +1047,11 @@ static inline pid_t process_group(struct task_struct *tsk)
 	return tsk->signal->pgrp;
 }
 
+static inline pid_t process_session(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
diff --git a/kernel/exit.c b/kernel/exit.c
index fa0495e167e9..8d289bfc13d1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -193,14 +193,14 @@ int session_of_pgrp(int pgrp)
 
 	read_lock(&tasklist_lock);
 	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
-		if (p->signal->session > 0) {
-			sid = p->signal->session;
+		if (process_session(p) > 0) {
+			sid = process_session(p);
 			goto out;
 		}
 	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
 	p = find_task_by_pid(pgrp);
 	if (p)
-		sid = p->signal->session;
+		sid = process_session(p);
 out:
 	read_unlock(&tasklist_lock);
 	
@@ -225,8 +225,8 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
 				|| p->exit_state
 				|| is_init(p->real_parent))
 			continue;
-		if (process_group(p->real_parent) != pgrp
-			    && p->real_parent->signal->session == p->signal->session) {
+		if (process_group(p->real_parent) != pgrp &&
+		    process_session(p->real_parent) == process_session(p)) {
 			ret = 0;
 			break;
 		}
@@ -302,7 +302,7 @@ void __set_special_pids(pid_t session, pid_t pgrp)
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (curr->signal->session != session) {
+	if (process_session(curr) != session) {
 		detach_pid(curr, PIDTYPE_SID);
 		curr->signal->session = session;
 		attach_pid(curr, PIDTYPE_SID, session);
@@ -647,10 +647,11 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 	 * outside, so the child pgrp is now orphaned.
 	 */
 	if ((process_group(p) != process_group(father)) &&
-	    (p->signal->session == father->signal->session)) {
+	    (process_session(p) == process_session(father))) {
 		int pgrp = process_group(p);
 
-		if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
+		if (will_become_orphaned_pgrp(pgrp, NULL) &&
+		    has_stopped_jobs(pgrp)) {
 			__kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp);
 			__kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 		}
@@ -784,7 +785,7 @@ static void exit_notify(struct task_struct *tsk)
 	t = tsk->real_parent;
 	
 	if ((process_group(t) != process_group(tsk)) &&
-	    (t->signal->session == tsk->signal->session) &&
+	    (process_session(t) == process_session(tsk)) &&
 	    will_become_orphaned_pgrp(process_group(tsk), tsk) &&
 	    has_stopped_jobs(process_group(tsk))) {
 		__kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk));
diff --git a/kernel/fork.c b/kernel/fork.c
index 597707819327..298c4d6ab512 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1259,9 +1259,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		if (thread_group_leader(p)) {
 			p->signal->tty = current->signal->tty;
 			p->signal->pgrp = process_group(current);
-			p->signal->session = current->signal->session;
+			p->signal->session = process_session(current);
 			attach_pid(p, PIDTYPE_PGID, process_group(p));
-			attach_pid(p, PIDTYPE_SID, p->signal->session);
+			attach_pid(p, PIDTYPE_SID, process_session(p));
 
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
diff --git a/kernel/signal.c b/kernel/signal.c
index ec81defde339..9eac4db60eda 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -583,7 +583,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 	error = -EPERM;
 	if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
 	    && ((sig != SIGCONT) ||
-		(current->signal->session != t->signal->session))
+		(process_session(current) != process_session(t)))
 	    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
 	    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
 	    && !capable(CAP_KILL))
diff --git a/kernel/sys.c b/kernel/sys.c
index 1ac2d1c5d84e..4f9d23a3095f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1381,7 +1381,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 
 	if (p->real_parent == group_leader) {
 		err = -EPERM;
-		if (p->signal->session != group_leader->signal->session)
+		if (process_session(p) != process_session(group_leader))
 			goto out;
 		err = -EACCES;
 		if (p->did_exec)
@@ -1400,7 +1400,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 		struct task_struct *p;
 
 		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (p->signal->session == group_leader->signal->session)
+			if (process_session(p) == process_session(group_leader))
 				goto ok_pgid;
 		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
 		goto out;
@@ -1459,7 +1459,7 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid)
-		return current->signal->session;
+		return process_session(current);
 	else {
 		int retval;
 		struct task_struct *p;
@@ -1471,7 +1471,7 @@ asmlinkage long sys_getsid(pid_t pid)
 		if (p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = p->signal->session;
+				retval = process_session(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
-- 
cgit v1.2.3


From 1ec320afdc9552c92191d5f89fcd1ebe588334ca Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:37:55 -0800
Subject: [PATCH] add process_session() helper routine: deprecate old field

Add an anonymous union and ((deprecated)) to catch direct usage of the
session field.

[akpm@osdl.org: fix various missed conversions]
[jdike@addtoit.com: fix UML bug]
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/tty_io.c     |  2 +-
 fs/proc/array.c           |  2 +-
 include/linux/init_task.h | 11 ++++++-----
 include/linux/sched.h     | 19 +++++++++++++++++--
 kernel/exit.c             |  2 +-
 kernel/fork.c             |  2 +-
 6 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 4ebba7ca1dc9..48cee2004e97 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3855,7 +3855,7 @@ EXPORT_SYMBOL(proc_clear_tty);
 void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
 {
 	if (tty) {
-		tty->session = tsk->signal->session;
+		tty->session = process_session(tsk);
 		tty->pgrp = process_group(tsk);
 	}
 	tsk->signal->tty = tty;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index b0cd014a39bd..70e4fab117b1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -381,7 +381,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 			stime = cputime_add(stime, sig->stime);
 		}
 
-		sid = sig->session;
+		sid = signal_session(sig);
 		pgid = process_group(task);
 		ppid = rcu_dereference(task->real_parent)->tgid;
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 733790d4f7db..848a68af3d42 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -57,17 +57,18 @@
 	.cpu_vm_mask	= CPU_MASK_ALL,				\
 }
 
-#define INIT_SIGNALS(sig) {	\
-	.count		= ATOMIC_INIT(1), 		\
+#define INIT_SIGNALS(sig) {						\
+	.count		= ATOMIC_INIT(1), 				\
 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
-	.shared_pending	= { 				\
+	.shared_pending	= { 						\
 		.list = LIST_HEAD_INIT(sig.shared_pending.list),	\
-		.signal =  {{0}}}, \
+		.signal =  {{0}}},					\
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
 	.pgrp		= 1,						\
-	.session	= 1,						\
+	.tty_old_pgrp   = 0,						\
+	{ .__session      = 1},						\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 270d864a8ff1..6fec1d419714 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -436,7 +436,12 @@ struct signal_struct {
 	/* job control IDs */
 	pid_t pgrp;
 	pid_t tty_old_pgrp;
-	pid_t session;
+
+	union {
+		pid_t session __deprecated;
+		pid_t __session;
+	};
+
 	/* boolean value for session group leader */
 	int leader;
 
@@ -1047,9 +1052,19 @@ static inline pid_t process_group(struct task_struct *tsk)
 	return tsk->signal->pgrp;
 }
 
+static inline pid_t signal_session(struct signal_struct *sig)
+{
+	return sig->__session;
+}
+
 static inline pid_t process_session(struct task_struct *tsk)
 {
-	return tsk->signal->session;
+	return signal_session(tsk->signal);
+}
+
+static inline void set_signal_session(struct signal_struct *sig, pid_t session)
+{
+	sig->__session = session;
 }
 
 static inline struct pid *task_pid(struct task_struct *task)
diff --git a/kernel/exit.c b/kernel/exit.c
index 8d289bfc13d1..6267a6cc6113 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -304,7 +304,7 @@ void __set_special_pids(pid_t session, pid_t pgrp)
 
 	if (process_session(curr) != session) {
 		detach_pid(curr, PIDTYPE_SID);
-		curr->signal->session = session;
+		set_signal_session(curr->signal, session);
 		attach_pid(curr, PIDTYPE_SID, session);
 	}
 	if (process_group(curr) != pgrp) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 298c4d6ab512..60d2644bfe85 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1259,7 +1259,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		if (thread_group_leader(p)) {
 			p->signal->tty = current->signal->tty;
 			p->signal->pgrp = process_group(current);
-			p->signal->session = process_session(current);
+			set_signal_session(p->signal, process_session(current));
 			attach_pid(p, PIDTYPE_PGID, process_group(p));
 			attach_pid(p, PIDTYPE_SID, process_session(p));
 
-- 
cgit v1.2.3


From 6b3286ed1169d74fea401367d6d4d6c6ec758a81 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Fri, 8 Dec 2006 02:37:56 -0800
Subject: [PATCH] rename struct namespace to struct mnt_namespace

Rename 'struct namespace' to 'struct mnt_namespace' to avoid confusion with
other namespaces being developped for the containers : pid, uts, ipc, etc.
'namespace' variables and attributes are also renamed to 'mnt_ns'

Signed-off-by: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/mntpt.c                |   2 +-
 fs/namespace.c                | 113 +++++++++++++++++++++---------------------
 fs/nfs/getroot.c              |   2 +-
 fs/pnode.c                    |   2 +-
 fs/pnode.h                    |   2 +-
 fs/proc/base.c                |  36 +++++++-------
 fs/reiserfs/super.c           |   2 +-
 include/linux/init_task.h     |   2 +-
 include/linux/mnt_namespace.h |  42 ++++++++++++++++
 include/linux/mount.h         |   4 +-
 include/linux/namespace.h     |  42 ----------------
 include/linux/nsproxy.h       |   4 +-
 kernel/exit.c                 |   2 +-
 kernel/fork.c                 |  21 ++++----
 kernel/kmod.c                 |   2 +-
 kernel/nsproxy.c              |  16 +++---
 16 files changed, 148 insertions(+), 146 deletions(-)
 create mode 100644 include/linux/mnt_namespace.h
 delete mode 100644 include/linux/namespace.h

(limited to 'kernel')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index f33b1a81a761..8f74e8450826 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -18,7 +18,7 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include "super.h"
 #include "cell.h"
 #include "volume.h"
diff --git a/fs/namespace.c b/fs/namespace.c
index b00ac84ebbdd..fde8553faa76 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/sysfs.h>
 #include <linux/seq_file.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
@@ -133,10 +133,10 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 
 static inline int check_mnt(struct vfsmount *mnt)
 {
-	return mnt->mnt_namespace == current->nsproxy->namespace;
+	return mnt->mnt_ns == current->nsproxy->mnt_ns;
 }
 
-static void touch_namespace(struct namespace *ns)
+static void touch_mnt_namespace(struct mnt_namespace *ns)
 {
 	if (ns) {
 		ns->event = ++event;
@@ -144,7 +144,7 @@ static void touch_namespace(struct namespace *ns)
 	}
 }
 
-static void __touch_namespace(struct namespace *ns)
+static void __touch_mnt_namespace(struct mnt_namespace *ns)
 {
 	if (ns && ns->event != event) {
 		ns->event = event;
@@ -187,19 +187,19 @@ static void commit_tree(struct vfsmount *mnt)
 	struct vfsmount *parent = mnt->mnt_parent;
 	struct vfsmount *m;
 	LIST_HEAD(head);
-	struct namespace *n = parent->mnt_namespace;
+	struct mnt_namespace *n = parent->mnt_ns;
 
 	BUG_ON(parent == mnt);
 
 	list_add_tail(&head, &mnt->mnt_list);
 	list_for_each_entry(m, &head, mnt_list)
-		m->mnt_namespace = n;
+		m->mnt_ns = n;
 	list_splice(&head, n->list.prev);
 
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 				hash(parent, mnt->mnt_mountpoint));
 	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
-	touch_namespace(n);
+	touch_mnt_namespace(n);
 }
 
 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -320,7 +320,7 @@ EXPORT_SYMBOL(mnt_unpin);
 /* iterator */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct mnt_namespace *n = m->private;
 	struct list_head *p;
 	loff_t l = *pos;
 
@@ -333,7 +333,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct mnt_namespace *n = m->private;
 	struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
 	(*pos)++;
 	return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
@@ -526,8 +526,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 	list_for_each_entry(p, kill, mnt_hash) {
 		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt_list);
-		__touch_namespace(p->mnt_namespace);
-		p->mnt_namespace = NULL;
+		__touch_mnt_namespace(p->mnt_ns);
+		p->mnt_ns = NULL;
 		list_del_init(&p->mnt_child);
 		if (p->mnt_parent != p)
 			p->mnt_mountpoint->d_mounted--;
@@ -830,7 +830,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
 	if (parent_nd) {
 		detach_mnt(source_mnt, parent_nd);
 		attach_mnt(source_mnt, nd);
-		touch_namespace(current->nsproxy->namespace);
+		touch_mnt_namespace(current->nsproxy->mnt_ns);
 	} else {
 		mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
 		commit_tree(source_mnt);
@@ -1145,9 +1145,9 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
 	 */
 	if (!propagate_mount_busy(mnt, 2)) {
 		/* delete from the namespace */
-		touch_namespace(mnt->mnt_namespace);
+		touch_mnt_namespace(mnt->mnt_ns);
 		list_del_init(&mnt->mnt_list);
-		mnt->mnt_namespace = NULL;
+		mnt->mnt_ns = NULL;
 		umount_tree(mnt, 1, umounts);
 		spin_unlock(&vfsmount_lock);
 	} else {
@@ -1168,7 +1168,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
  */
 static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts)
 {
-	struct namespace *namespace;
+	struct mnt_namespace *ns;
 	struct vfsmount *mnt;
 
 	while (!list_empty(graveyard)) {
@@ -1178,10 +1178,10 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou
 
 		/* don't do anything if the namespace is dead - all the
 		 * vfsmounts from it are going away anyway */
-		namespace = mnt->mnt_namespace;
-		if (!namespace || !namespace->root)
+		ns = mnt->mnt_ns;
+		if (!ns || !ns->root)
 			continue;
-		get_namespace(namespace);
+		get_mnt_ns(ns);
 
 		spin_unlock(&vfsmount_lock);
 		down_write(&namespace_sem);
@@ -1189,7 +1189,7 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou
 		up_write(&namespace_sem);
 		release_mounts(&umounts);
 		mntput(mnt);
-		put_namespace(namespace);
+		put_mnt_ns(ns);
 		spin_lock(&vfsmount_lock);
 	}
 }
@@ -1439,14 +1439,15 @@ dput_out:
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
  */
-struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
+struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk,
+		struct fs_struct *fs)
 {
-	struct namespace *namespace = tsk->nsproxy->namespace;
-	struct namespace *new_ns;
+	struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns;
+	struct mnt_namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
 	struct vfsmount *p, *q;
 
-	new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL);
+	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return NULL;
 
@@ -1457,7 +1458,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 
 	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
-	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root,
+	new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
 					CL_COPY_ALL | CL_EXPIRE);
 	if (!new_ns->root) {
 		up_write(&namespace_sem);
@@ -1473,10 +1474,10 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 	 * as belonging to new namespace.  We have already acquired a private
 	 * fs_struct, so tsk->fs->lock is not needed.
 	 */
-	p = namespace->root;
+	p = mnt_ns->root;
 	q = new_ns->root;
 	while (p) {
-		q->mnt_namespace = new_ns;
+		q->mnt_ns = new_ns;
 		if (fs) {
 			if (p == fs->rootmnt) {
 				rootmnt = p;
@@ -1491,7 +1492,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 				fs->altrootmnt = mntget(q);
 			}
 		}
-		p = next_mnt(p, namespace->root);
+		p = next_mnt(p, mnt_ns->root);
 		q = next_mnt(q, new_ns->root);
 	}
 	up_write(&namespace_sem);
@@ -1506,16 +1507,16 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 	return new_ns;
 }
 
-int copy_namespace(int flags, struct task_struct *tsk)
+int copy_mnt_ns(int flags, struct task_struct *tsk)
 {
-	struct namespace *namespace = tsk->nsproxy->namespace;
-	struct namespace *new_ns;
+	struct mnt_namespace *ns = tsk->nsproxy->mnt_ns;
+	struct mnt_namespace *new_ns;
 	int err = 0;
 
-	if (!namespace)
+	if (!ns)
 		return 0;
 
-	get_namespace(namespace);
+	get_mnt_ns(ns);
 
 	if (!(flags & CLONE_NEWNS))
 		return 0;
@@ -1525,16 +1526,16 @@ int copy_namespace(int flags, struct task_struct *tsk)
 		goto out;
 	}
 
-	new_ns = dup_namespace(tsk, tsk->fs);
+	new_ns = dup_mnt_ns(tsk, tsk->fs);
 	if (!new_ns) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	tsk->nsproxy->namespace = new_ns;
+	tsk->nsproxy->mnt_ns = new_ns;
 
 out:
-	put_namespace(namespace);
+	put_mnt_ns(ns);
 	return err;
 }
 
@@ -1754,7 +1755,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	detach_mnt(user_nd.mnt, &root_parent);
 	attach_mnt(user_nd.mnt, &old_nd);     /* mount old root on put_old */
 	attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
-	touch_namespace(current->nsproxy->namespace);
+	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	spin_unlock(&vfsmount_lock);
 	chroot_fs_refs(&user_nd, &new_nd);
 	security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1779,27 +1780,27 @@ out3:
 static void __init init_mount_tree(void)
 {
 	struct vfsmount *mnt;
-	struct namespace *namespace;
+	struct mnt_namespace *ns;
 
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
-	namespace = kmalloc(sizeof(*namespace), GFP_KERNEL);
-	if (!namespace)
+	ns = kmalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
 		panic("Can't allocate initial namespace");
-	atomic_set(&namespace->count, 1);
-	INIT_LIST_HEAD(&namespace->list);
-	init_waitqueue_head(&namespace->poll);
-	namespace->event = 0;
-	list_add(&mnt->mnt_list, &namespace->list);
-	namespace->root = mnt;
-	mnt->mnt_namespace = namespace;
-
-	init_task.nsproxy->namespace = namespace;
-	get_namespace(namespace);
-
-	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
-	set_fs_root(current->fs, namespace->root, namespace->root->mnt_root);
+	atomic_set(&ns->count, 1);
+	INIT_LIST_HEAD(&ns->list);
+	init_waitqueue_head(&ns->poll);
+	ns->event = 0;
+	list_add(&mnt->mnt_list, &ns->list);
+	ns->root = mnt;
+	mnt->mnt_ns = ns;
+
+	init_task.nsproxy->mnt_ns = ns;
+	get_mnt_ns(ns);
+
+	set_fs_pwd(current->fs, ns->root, ns->root->mnt_root);
+	set_fs_root(current->fs, ns->root, ns->root->mnt_root);
 }
 
 void __init mnt_init(unsigned long mempages)
@@ -1860,11 +1861,11 @@ void __init mnt_init(unsigned long mempages)
 	init_mount_tree();
 }
 
-void __put_namespace(struct namespace *namespace)
+void __put_mnt_ns(struct mnt_namespace *ns)
 {
-	struct vfsmount *root = namespace->root;
+	struct vfsmount *root = ns->root;
 	LIST_HEAD(umount_list);
-	namespace->root = NULL;
+	ns->root = NULL;
 	spin_unlock(&vfsmount_lock);
 	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
@@ -1872,5 +1873,5 @@ void __put_namespace(struct namespace *namespace)
 	spin_unlock(&vfsmount_lock);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
-	kfree(namespace);
+	kfree(ns);
 }
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 20c6f39ea38a..8391bd7a83ce 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -31,7 +31,7 @@
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/namei.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/security.h>
 
 #include <asm/system.h>
diff --git a/fs/pnode.c b/fs/pnode.c
index da42ee61c1df..56aacead8362 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -6,7 +6,7 @@
  *	Author : Ram Pai (linuxram@us.ibm.com)
  *
  */
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
 #include "pnode.h"
diff --git a/fs/pnode.h b/fs/pnode.h
index 020e1bb60fdb..d45bd8ec36bf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -13,7 +13,7 @@
 
 #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
 #define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
-#define IS_MNT_NEW(mnt)  (!mnt->mnt_namespace)
+#define IS_MNT_NEW(mnt)  (!mnt->mnt_ns)
 #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
 #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a71f1755bb57..a3b5074118a7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -59,7 +59,7 @@
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/rcupdate.h>
@@ -365,33 +365,33 @@ struct proc_mounts {
 static int mounts_open(struct inode *inode, struct file *file)
 {
 	struct task_struct *task = get_proc_task(inode);
-	struct namespace *namespace = NULL;
+	struct mnt_namespace *ns = NULL;
 	struct proc_mounts *p;
 	int ret = -EINVAL;
 
 	if (task) {
 		task_lock(task);
-		namespace = task->nsproxy->namespace;
-		if (namespace)
-			get_namespace(namespace);
+		ns = task->nsproxy->mnt_ns;
+		if (ns)
+			get_mnt_ns(ns);
 		task_unlock(task);
 		put_task_struct(task);
 	}
 
-	if (namespace) {
+	if (ns) {
 		ret = -ENOMEM;
 		p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
 		if (p) {
 			file->private_data = &p->m;
 			ret = seq_open(file, &mounts_op);
 			if (!ret) {
-				p->m.private = namespace;
-				p->event = namespace->event;
+				p->m.private = ns;
+				p->event = ns->event;
 				return 0;
 			}
 			kfree(p);
 		}
-		put_namespace(namespace);
+		put_mnt_ns(ns);
 	}
 	return ret;
 }
@@ -399,15 +399,15 @@ static int mounts_open(struct inode *inode, struct file *file)
 static int mounts_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
-	struct namespace *namespace = m->private;
-	put_namespace(namespace);
+	struct mnt_namespace *ns = m->private;
+	put_mnt_ns(ns);
 	return seq_release(inode, file);
 }
 
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
 	struct proc_mounts *p = file->private_data;
-	struct namespace *ns = p->m.private;
+	struct mnt_namespace *ns = p->m.private;
 	unsigned res = 0;
 
 	poll_wait(file, &ns->poll, wait);
@@ -437,21 +437,21 @@ static int mountstats_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		struct namespace *namespace = NULL;
+		struct mnt_namespace *mnt_ns = NULL;
 		struct task_struct *task = get_proc_task(inode);
 
 		if (task) {
 			task_lock(task);
 			if (task->nsproxy)
-				namespace = task->nsproxy->namespace;
-			if (namespace)
-				get_namespace(namespace);
+				mnt_ns = task->nsproxy->mnt_ns;
+			if (mnt_ns)
+				get_mnt_ns(mnt_ns);
 			task_unlock(task);
 			put_task_struct(task);
 		}
 
-		if (namespace)
-			m->private = namespace;
+		if (mnt_ns)
+			m->private = mnt_ns;
 		else {
 			seq_release(inode, file);
 			ret = -EINVAL;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7fb5fb036f90..58ad4551a7c1 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -23,7 +23,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/quotaops.h>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 848a68af3d42..5c4989172f7e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -76,7 +76,7 @@ extern struct nsproxy init_nsproxy;
 	.count		= ATOMIC_INIT(1),				\
 	.nslock		= __SPIN_LOCK_UNLOCKED(nsproxy.nslock),		\
 	.uts_ns		= &init_uts_ns,					\
-	.namespace	= NULL,						\
+	.mnt_ns		= NULL,						\
 	INIT_IPC_NS(ipc_ns)						\
 }
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
new file mode 100644
index 000000000000..4af0b1fc282a
--- /dev/null
+++ b/include/linux/mnt_namespace.h
@@ -0,0 +1,42 @@
+#ifndef _NAMESPACE_H_
+#define _NAMESPACE_H_
+#ifdef __KERNEL__
+
+#include <linux/mount.h>
+#include <linux/sched.h>
+#include <linux/nsproxy.h>
+
+struct mnt_namespace {
+	atomic_t		count;
+	struct vfsmount *	root;
+	struct list_head	list;
+	wait_queue_head_t poll;
+	int event;
+};
+
+extern int copy_mnt_ns(int, struct task_struct *);
+extern void __put_mnt_ns(struct mnt_namespace *ns);
+extern struct mnt_namespace *dup_mnt_ns(struct task_struct *,
+		struct fs_struct *);
+
+static inline void put_mnt_ns(struct mnt_namespace *ns)
+{
+	if (atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+		/* releases vfsmount_lock */
+		__put_mnt_ns(ns);
+}
+
+static inline void exit_mnt_ns(struct task_struct *p)
+{
+	struct mnt_namespace *ns = p->nsproxy->mnt_ns;
+	if (ns)
+		put_mnt_ns(ns);
+}
+
+static inline void get_mnt_ns(struct mnt_namespace *ns)
+{
+	atomic_inc(&ns->count);
+}
+
+#endif
+#endif
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 403d1a97c512..e357dc86a4de 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -20,7 +20,7 @@
 struct super_block;
 struct vfsmount;
 struct dentry;
-struct namespace;
+struct mnt_namespace;
 
 #define MNT_NOSUID	0x01
 #define MNT_NODEV	0x02
@@ -52,7 +52,7 @@ struct vfsmount {
 	struct list_head mnt_slave_list;/* list of slave mounts */
 	struct list_head mnt_slave;	/* slave list entry */
 	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
-	struct namespace *mnt_namespace; /* containing namespace */
+	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_pinned;
 };
 
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
deleted file mode 100644
index d137009f0b2b..000000000000
--- a/include/linux/namespace.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _NAMESPACE_H_
-#define _NAMESPACE_H_
-#ifdef __KERNEL__
-
-#include <linux/mount.h>
-#include <linux/sched.h>
-#include <linux/nsproxy.h>
-
-struct namespace {
-	atomic_t		count;
-	struct vfsmount *	root;
-	struct list_head	list;
-	wait_queue_head_t poll;
-	int event;
-};
-
-extern int copy_namespace(int, struct task_struct *);
-extern void __put_namespace(struct namespace *namespace);
-extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
-
-static inline void put_namespace(struct namespace *namespace)
-{
-	if (atomic_dec_and_lock(&namespace->count, &vfsmount_lock))
-		/* releases vfsmount_lock */
-		__put_namespace(namespace);
-}
-
-static inline void exit_namespace(struct task_struct *p)
-{
-	struct namespace *namespace = p->nsproxy->namespace;
-	if (namespace) {
-		put_namespace(namespace);
-	}
-}
-
-static inline void get_namespace(struct namespace *namespace)
-{
-	atomic_inc(&namespace->count);
-}
-
-#endif
-#endif
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 971d1c6dfc4b..0aba1b1a39c7 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -4,7 +4,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 
-struct namespace;
+struct mnt_namespace;
 struct uts_namespace;
 struct ipc_namespace;
 
@@ -25,7 +25,7 @@ struct nsproxy {
 	spinlock_t nslock;
 	struct uts_namespace *uts_ns;
 	struct ipc_namespace *ipc_ns;
-	struct namespace *namespace;
+	struct mnt_namespace *mnt_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 6267a6cc6113..28d9feedfd27 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -13,7 +13,7 @@
 #include <linux/completion.h>
 #include <linux/personality.h>
 #include <linux/tty.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/key.h>
 #include <linux/security.h>
 #include <linux/cpu.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 60d2644bfe85..8c859eef8e6a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -18,7 +18,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/completion.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/personality.h>
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
@@ -1525,17 +1525,18 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 }
 
 /*
- * Unshare the namespace structure if it is being shared
+ * Unshare the mnt_namespace structure if it is being shared
  */
-static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
+static int unshare_mnt_namespace(unsigned long unshare_flags,
+		struct mnt_namespace **new_nsp, struct fs_struct *new_fs)
 {
-	struct namespace *ns = current->nsproxy->namespace;
+	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
 
 	if ((unshare_flags & CLONE_NEWNS) && ns) {
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
-		*new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
+		*new_nsp = dup_mnt_ns(current, new_fs ? new_fs : current->fs);
 		if (!*new_nsp)
 			return -ENOMEM;
 	}
@@ -1623,7 +1624,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 {
 	int err = 0;
 	struct fs_struct *fs, *new_fs = NULL;
-	struct namespace *ns, *new_ns = NULL;
+	struct mnt_namespace *ns, *new_ns = NULL;
 	struct sighand_struct *new_sigh = NULL;
 	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
@@ -1645,7 +1646,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 		goto bad_unshare_out;
 	if ((err = unshare_fs(unshare_flags, &new_fs)))
 		goto bad_unshare_cleanup_thread;
-	if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
+	if ((err = unshare_mnt_namespace(unshare_flags, &new_ns, new_fs)))
 		goto bad_unshare_cleanup_fs;
 	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
 		goto bad_unshare_cleanup_ns;
@@ -1686,8 +1687,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 		}
 
 		if (new_ns) {
-			ns = current->nsproxy->namespace;
-			current->nsproxy->namespace = new_ns;
+			ns = current->nsproxy->mnt_ns;
+			current->nsproxy->mnt_ns = new_ns;
 			new_ns = ns;
 		}
 
@@ -1748,7 +1749,7 @@ bad_unshare_cleanup_sigh:
 
 bad_unshare_cleanup_ns:
 	if (new_ns)
-		put_namespace(new_ns);
+		put_mnt_ns(new_ns);
 
 bad_unshare_cleanup_fs:
 	if (new_fs)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 8d2bea09a4ec..3a7379aa31ca 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -25,7 +25,7 @@
 #include <linux/kmod.h>
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/completion.h>
 #include <linux/file.h>
 #include <linux/workqueue.h>
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 674aceb7335a..bd9cb435dfe0 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -17,7 +17,7 @@
 #include <linux/version.h>
 #include <linux/nsproxy.h>
 #include <linux/init_task.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
 #include <linux/utsname.h>
 
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
@@ -60,8 +60,8 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig)
 	struct nsproxy *ns = clone_namespaces(orig);
 
 	if (ns) {
-		if (ns->namespace)
-			get_namespace(ns->namespace);
+		if (ns->mnt_ns)
+			get_mnt_ns(ns->mnt_ns);
 		if (ns->uts_ns)
 			get_uts_ns(ns->uts_ns);
 		if (ns->ipc_ns)
@@ -97,7 +97,7 @@ int copy_namespaces(int flags, struct task_struct *tsk)
 
 	tsk->nsproxy = new_ns;
 
-	err = copy_namespace(flags, tsk);
+	err = copy_mnt_ns(flags, tsk);
 	if (err)
 		goto out_ns;
 
@@ -117,8 +117,8 @@ out_ipc:
 	if (new_ns->uts_ns)
 		put_uts_ns(new_ns->uts_ns);
 out_uts:
-	if (new_ns->namespace)
-		put_namespace(new_ns->namespace);
+	if (new_ns->mnt_ns)
+		put_mnt_ns(new_ns->mnt_ns);
 out_ns:
 	tsk->nsproxy = old_ns;
 	kfree(new_ns);
@@ -127,8 +127,8 @@ out_ns:
 
 void free_nsproxy(struct nsproxy *ns)
 {
-		if (ns->namespace)
-			put_namespace(ns->namespace);
+		if (ns->mnt_ns)
+			put_mnt_ns(ns->mnt_ns);
 		if (ns->uts_ns)
 			put_uts_ns(ns->uts_ns);
 		if (ns->ipc_ns)
-- 
cgit v1.2.3


From 373beb35cd6b625e0ba4ad98baace12310a26aa8 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:37:57 -0800
Subject: [PATCH] identifier to nsproxy

Add an identifier to nsproxy.  The default init_ns_proxy has identifier 0 and
allocated nsproxies are given -1.

This identifier will be used by a new syscall sys_bind_ns.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/init_task.h | 1 +
 include/linux/nsproxy.h   | 1 +
 kernel/nsproxy.c          | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5c4989172f7e..90c5f9a07730 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -75,6 +75,7 @@ extern struct nsproxy init_nsproxy;
 #define INIT_NSPROXY(nsproxy) {						\
 	.count		= ATOMIC_INIT(1),				\
 	.nslock		= __SPIN_LOCK_UNLOCKED(nsproxy.nslock),		\
+	.id		= 0,						\
 	.uts_ns		= &init_uts_ns,					\
 	.mnt_ns		= NULL,						\
 	INIT_IPC_NS(ipc_ns)						\
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 0aba1b1a39c7..27f37c1ec1d9 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -23,6 +23,7 @@ struct ipc_namespace;
 struct nsproxy {
 	atomic_t count;
 	spinlock_t nslock;
+	unsigned long id;
 	struct uts_namespace *uts_ns;
 	struct ipc_namespace *ipc_ns;
 	struct mnt_namespace *mnt_ns;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index bd9cb435dfe0..f223c15c18e9 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -45,8 +45,10 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
 	struct nsproxy *ns;
 
 	ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
-	if (ns)
+	if (ns) {
 		atomic_set(&ns->count, 1);
+		ns->id = -1;
+	}
 	return ns;
 }
 
-- 
cgit v1.2.3


From 61a58c6c238cc81f7742b8cc84212cc55fb57747 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Date: Fri, 8 Dec 2006 02:37:58 -0800
Subject: [PATCH] rename struct pspace to struct pid_namespace

Rename struct pspace to struct pid_namespace for consistency with other
namespaces (uts_namespace and ipc_namespace).  Also rename
include/linux/pspace.h to include/linux/pid_namespace.h and variables from
pspace to pid_ns.

Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c           |  4 ++--
 include/linux/pid_namespace.h | 23 ++++++++++++++++++++
 include/linux/pspace.h        | 23 --------------------
 kernel/pid.c                  | 49 ++++++++++++++++++++++---------------------
 4 files changed, 50 insertions(+), 49 deletions(-)
 create mode 100644 include/linux/pid_namespace.h
 delete mode 100644 include/linux/pspace.h

(limited to 'kernel')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 9397ff62553e..89ccfa16b000 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -45,7 +45,7 @@
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
-#include <linux/pspace.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -92,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, init_pspace.last_pid);
+		nr_running(), nr_threads, init_pid_ns.last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
new file mode 100644
index 000000000000..54d79095e295
--- /dev/null
+++ b/include/linux/pid_namespace.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PID_NS_H
+#define _LINUX_PID_NS_H
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <linux/pid.h>
+
+struct pidmap {
+       atomic_t nr_free;
+       void *page;
+};
+
+#define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+
+struct pid_namespace {
+       struct pidmap pidmap[PIDMAP_ENTRIES];
+       int last_pid;
+};
+
+extern struct pid_namespace init_pid_ns;
+
+#endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/pspace.h b/include/linux/pspace.h
deleted file mode 100644
index 91d48b8b2d99..000000000000
--- a/include/linux/pspace.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _LINUX_PSPACE_H
-#define _LINUX_PSPACE_H
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/threads.h>
-#include <linux/pid.h>
-
-struct pidmap {
-       atomic_t nr_free;
-       void *page;
-};
-
-#define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
-
-struct pspace {
-       struct pidmap pidmap[PIDMAP_ENTRIES];
-       int last_pid;
-};
-
-extern struct pspace init_pspace;
-
-#endif /* _LINUX_PSPACE_H */
diff --git a/kernel/pid.c b/kernel/pid.c
index a48879b0b921..25807e1b98dd 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -26,7 +26,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/hash.h>
-#include <linux/pspace.h>
+#include <linux/pid_namespace.h>
 
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash;
@@ -43,9 +43,10 @@ int pid_max_max = PID_MAX_LIMIT;
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
 
-static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off)
+static inline int mk_pid(struct pid_namespace *pid_ns,
+		struct pidmap *map, int off)
 {
-	return (map - pspace->pidmap)*BITS_PER_PAGE + off;
+	return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
 }
 
 #define find_next_offset(map, off)					\
@@ -57,7 +58,7 @@ static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off)
  * value does not cause lots of bitmaps to be allocated, but
  * the scheme scales to up to 4 million PIDs, runtime.
  */
-struct pspace init_pspace = {
+struct pid_namespace init_pid_ns = {
 	.pidmap = {
 		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
 	},
@@ -80,25 +81,25 @@ struct pspace init_pspace = {
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static fastcall void free_pidmap(struct pspace *pspace, int pid)
+static fastcall void free_pidmap(struct pid_namespace *pid_ns, int pid)
 {
-	struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE;
+	struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE;
 	int offset = pid & BITS_PER_PAGE_MASK;
 
 	clear_bit(offset, map->page);
 	atomic_inc(&map->nr_free);
 }
 
-static int alloc_pidmap(struct pspace *pspace)
+static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
-	int i, offset, max_scan, pid, last = pspace->last_pid;
+	int i, offset, max_scan, pid, last = pid_ns->last_pid;
 	struct pidmap *map;
 
 	pid = last + 1;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
 	offset = pid & BITS_PER_PAGE_MASK;
-	map = &pspace->pidmap[pid/BITS_PER_PAGE];
+	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
 	for (i = 0; i <= max_scan; ++i) {
 		if (unlikely(!map->page)) {
@@ -120,11 +121,11 @@ static int alloc_pidmap(struct pspace *pspace)
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
 					atomic_dec(&map->nr_free);
-					pspace->last_pid = pid;
+					pid_ns->last_pid = pid;
 					return pid;
 				}
 				offset = find_next_offset(map, offset);
-				pid = mk_pid(pspace, map, offset);
+				pid = mk_pid(pid_ns, map, offset);
 			/*
 			 * find_next_offset() found a bit, the pid from it
 			 * is in-bounds, and if we fell back to the last
@@ -135,34 +136,34 @@ static int alloc_pidmap(struct pspace *pspace)
 					(i != max_scan || pid < last ||
 					    !((last+1) & BITS_PER_PAGE_MASK)));
 		}
-		if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
+		if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
 			++map;
 			offset = 0;
 		} else {
-			map = &pspace->pidmap[0];
+			map = &pid_ns->pidmap[0];
 			offset = RESERVED_PIDS;
 			if (unlikely(last == offset))
 				break;
 		}
-		pid = mk_pid(pspace, map, offset);
+		pid = mk_pid(pid_ns, map, offset);
 	}
 	return -1;
 }
 
-static int next_pidmap(struct pspace *pspace, int last)
+static int next_pidmap(struct pid_namespace *pid_ns, int last)
 {
 	int offset;
 	struct pidmap *map, *end;
 
 	offset = (last + 1) & BITS_PER_PAGE_MASK;
-	map = &pspace->pidmap[(last + 1)/BITS_PER_PAGE];
-	end = &pspace->pidmap[PIDMAP_ENTRIES];
+	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
+	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
 	for (; map < end; map++, offset = 0) {
 		if (unlikely(!map->page))
 			continue;
 		offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
 		if (offset < BITS_PER_PAGE)
-			return mk_pid(pspace, map, offset);
+			return mk_pid(pid_ns, map, offset);
 	}
 	return -1;
 }
@@ -192,7 +193,7 @@ fastcall void free_pid(struct pid *pid)
 	hlist_del_rcu(&pid->pid_chain);
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
-	free_pidmap(&init_pspace, pid->nr);
+	free_pidmap(&init_pid_ns, pid->nr);
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
 
@@ -206,7 +207,7 @@ struct pid *alloc_pid(void)
 	if (!pid)
 		goto out;
 
-	nr = alloc_pidmap(&init_pspace);
+	nr = alloc_pidmap(&init_pid_ns);
 	if (nr < 0)
 		goto out_free;
 
@@ -348,7 +349,7 @@ struct pid *find_ge_pid(int nr)
 		pid = find_pid(nr);
 		if (pid)
 			break;
-		nr = next_pidmap(&init_pspace, nr);
+		nr = next_pidmap(&init_pid_ns, nr);
 	} while (nr > 0);
 
 	return pid;
@@ -382,10 +383,10 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-	init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	/* Reserve PID 0. We never call free_pidmap(0) */
-	set_bit(0, init_pspace.pidmap[0].page);
-	atomic_dec(&init_pspace.pidmap[0].nr_free);
+	set_bit(0, init_pid_ns.pidmap[0].page);
+	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
 
 	pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
 					__alignof__(struct pid),
-- 
cgit v1.2.3


From 9a575a92db3312a40cdf0b0406d88de88ad9741e Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:37:59 -0800
Subject: [PATCH] to nsproxy

Add the pid namespace framework to the nsproxy object.  The copy of the pid
namespace only increases the refcount on the global pid namespace,
init_pid_ns, and unshare is not implemented.

There is no configuration option to activate or deactivate this feature
because this not relevant for the moment.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/init_task.h     |  2 ++
 include/linux/nsproxy.h       |  2 ++
 include/linux/pid_namespace.h | 20 ++++++++++++++++++--
 kernel/nsproxy.c              | 26 +++++++++++++++++++-------
 kernel/pid.c                  | 23 +++++++++++++++++++++++
 5 files changed, 64 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 90c5f9a07730..7272ff9ee77c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -7,6 +7,7 @@
 #include <linux/utsname.h>
 #include <linux/lockdep.h>
 #include <linux/ipc.h>
+#include <linux/pid_namespace.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -73,6 +74,7 @@
 
 extern struct nsproxy init_nsproxy;
 #define INIT_NSPROXY(nsproxy) {						\
+	.pid_ns		= &init_pid_ns,					\
 	.count		= ATOMIC_INIT(1),				\
 	.nslock		= __SPIN_LOCK_UNLOCKED(nsproxy.nslock),		\
 	.id		= 0,						\
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 27f37c1ec1d9..fdfb0e44912f 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -7,6 +7,7 @@
 struct mnt_namespace;
 struct uts_namespace;
 struct ipc_namespace;
+struct pid_namespace;
 
 /*
  * A structure to contain pointers to all per-process
@@ -27,6 +28,7 @@ struct nsproxy {
 	struct uts_namespace *uts_ns;
 	struct ipc_namespace *ipc_ns;
 	struct mnt_namespace *mnt_ns;
+	struct pid_namespace *pid_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 54d79095e295..76e7c6b2cf33 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -5,6 +5,8 @@
 #include <linux/mm.h>
 #include <linux/threads.h>
 #include <linux/pid.h>
+#include <linux/nsproxy.h>
+#include <linux/kref.h>
 
 struct pidmap {
        atomic_t nr_free;
@@ -14,10 +16,24 @@ struct pidmap {
 #define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
 
 struct pid_namespace {
-       struct pidmap pidmap[PIDMAP_ENTRIES];
-       int last_pid;
+	struct kref kref;
+	struct pidmap pidmap[PIDMAP_ENTRIES];
+	int last_pid;
 };
 
 extern struct pid_namespace init_pid_ns;
 
+static inline void get_pid_ns(struct pid_namespace *ns)
+{
+	kref_get(&ns->kref);
+}
+
+extern int copy_pid_ns(int flags, struct task_struct *tsk);
+extern void free_pid_ns(struct kref *kref);
+
+static inline void put_pid_ns(struct pid_namespace *ns)
+{
+	kref_put(&ns->kref, free_pid_ns);
+}
+
 #endif /* _LINUX_PID_NS_H */
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f223c15c18e9..e2ce748e96af 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -19,6 +19,7 @@
 #include <linux/init_task.h>
 #include <linux/mnt_namespace.h>
 #include <linux/utsname.h>
+#include <linux/pid_namespace.h>
 
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
@@ -68,6 +69,8 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig)
 			get_uts_ns(ns->uts_ns);
 		if (ns->ipc_ns)
 			get_ipc_ns(ns->ipc_ns);
+		if (ns->pid_ns)
+			get_pid_ns(ns->pid_ns);
 	}
 
 	return ns;
@@ -111,10 +114,17 @@ int copy_namespaces(int flags, struct task_struct *tsk)
 	if (err)
 		goto out_ipc;
 
+	err = copy_pid_ns(flags, tsk);
+	if (err)
+		goto out_pid;
+
 out:
 	put_nsproxy(old_ns);
 	return err;
 
+out_pid:
+	if (new_ns->ipc_ns)
+		put_ipc_ns(new_ns->ipc_ns);
 out_ipc:
 	if (new_ns->uts_ns)
 		put_uts_ns(new_ns->uts_ns);
@@ -129,11 +139,13 @@ out_ns:
 
 void free_nsproxy(struct nsproxy *ns)
 {
-		if (ns->mnt_ns)
-			put_mnt_ns(ns->mnt_ns);
-		if (ns->uts_ns)
-			put_uts_ns(ns->uts_ns);
-		if (ns->ipc_ns)
-			put_ipc_ns(ns->ipc_ns);
-		kfree(ns);
+	if (ns->mnt_ns)
+		put_mnt_ns(ns->mnt_ns);
+	if (ns->uts_ns)
+		put_uts_ns(ns->uts_ns);
+	if (ns->ipc_ns)
+		put_ipc_ns(ns->ipc_ns);
+	if (ns->pid_ns)
+		put_pid_ns(ns->pid_ns);
+	kfree(ns);
 }
diff --git a/kernel/pid.c b/kernel/pid.c
index 25807e1b98dd..5319b9f2fc5e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -59,6 +59,9 @@ static inline int mk_pid(struct pid_namespace *pid_ns,
  * the scheme scales to up to 4 million PIDs, runtime.
  */
 struct pid_namespace init_pid_ns = {
+	.kref = {
+		.refcount       = ATOMIC_INIT(2),
+	},
 	.pidmap = {
 		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
 	},
@@ -356,6 +359,26 @@ struct pid *find_ge_pid(int nr)
 }
 EXPORT_SYMBOL_GPL(find_get_pid);
 
+int copy_pid_ns(int flags, struct task_struct *tsk)
+{
+	struct pid_namespace *old_ns = tsk->nsproxy->pid_ns;
+	int err = 0;
+
+	if (!old_ns)
+		return 0;
+
+	get_pid_ns(old_ns);
+	return err;
+}
+
+void free_pid_ns(struct kref *kref)
+{
+	struct pid_namespace *ns;
+
+	ns = container_of(kref, struct pid_namespace, kref);
+	kfree(ns);
+}
+
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
-- 
cgit v1.2.3


From 6cc1b22a4acef3816eaa5f8c227d93d749b23195 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:38:00 -0800
Subject: [PATCH] use current->nsproxy->pid_ns

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/proc_misc.c | 2 +-
 kernel/pid.c        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 89ccfa16b000..f6d6f81dd446 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -92,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, init_pid_ns.last_pid);
+		nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 5319b9f2fc5e..1d9cc268b499 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -196,7 +196,7 @@ fastcall void free_pid(struct pid *pid)
 	hlist_del_rcu(&pid->pid_chain);
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
-	free_pidmap(&init_pid_ns, pid->nr);
+	free_pidmap(current->nsproxy->pid_ns, pid->nr);
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
 
@@ -210,7 +210,7 @@ struct pid *alloc_pid(void)
 	if (!pid)
 		goto out;
 
-	nr = alloc_pidmap(&init_pid_ns);
+	nr = alloc_pidmap(current->nsproxy->pid_ns);
 	if (nr < 0)
 		goto out_free;
 
@@ -352,7 +352,7 @@ struct pid *find_ge_pid(int nr)
 		pid = find_pid(nr);
 		if (pid)
 			break;
-		nr = next_pidmap(&init_pid_ns, nr);
+		nr = next_pidmap(current->nsproxy->pid_ns, nr);
 	} while (nr > 0);
 
 	return pid;
-- 
cgit v1.2.3


From 84d737866e2babdeab0c6b18ea155c6a649663b8 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Date: Fri, 8 Dec 2006 02:38:01 -0800
Subject: [PATCH] add child reaper to pid_namespace

Add a per pid_namespace child-reaper.  This is needed so processes are reaped
within the same pid space and do not spill over to the parent pid space.  Its
also needed so containers preserve existing semantic that pid == 1 would reap
orphaned children.

This is based on Eric Biederman's patch: http://lkml.org/lkml/2006/2/6/285

Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c                     |  5 +++--
 include/linux/pid.h           |  5 +++--
 include/linux/pid_namespace.h |  6 ++++++
 include/linux/sched.h         |  1 -
 init/main.c                   |  5 ++---
 kernel/exit.c                 | 23 +++++++++++++++--------
 kernel/pid.c                  |  3 ++-
 kernel/signal.c               | 11 +++++++++--
 8 files changed, 40 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/fs/exec.c b/fs/exec.c
index 60433e2254a4..12d8cd461b41 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -38,6 +38,7 @@
 #include <linux/binfmts.h>
 #include <linux/swap.h>
 #include <linux/utsname.h>
+#include <linux/pid_namespace.h>
 #include <linux/module.h>
 #include <linux/namei.h>
 #include <linux/proc_fs.h>
@@ -620,8 +621,8 @@ static int de_thread(struct task_struct *tsk)
 	 * Reparenting needs write_lock on tasklist_lock,
 	 * so it is safe to do it under read_lock.
 	 */
-	if (unlikely(tsk->group_leader == child_reaper))
-		child_reaper = tsk;
+	if (unlikely(tsk->group_leader == child_reaper(tsk)))
+		tsk->nsproxy->pid_ns->child_reaper = tsk;
 
 	zap_other_threads(tsk);
 	read_unlock(&tasklist_lock);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 2c0007d17218..4dec047b1837 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -35,8 +35,9 @@ enum pid_type
  *
  * Holding a reference to struct pid solves both of these problems.
  * It is small so holding a reference does not consume a lot of
- * resources, and since a new struct pid is allocated when the numeric
- * pid value is reused we don't mistakenly refer to new processes.
+ * resources, and since a new struct pid is allocated when the numeric pid
+ * value is reused (when pids wrap around) we don't mistakenly refer to new
+ * processes.
  */
 
 struct pid
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 76e7c6b2cf33..d2a9d419f01f 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -19,6 +19,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
+	struct task_struct *child_reaper;
 };
 
 extern struct pid_namespace init_pid_ns;
@@ -36,4 +37,9 @@ static inline void put_pid_ns(struct pid_namespace *ns)
 	kref_put(&ns->kref, free_pid_ns);
 }
 
+static inline struct task_struct *child_reaper(struct task_struct *tsk)
+{
+	return tsk->nsproxy->pid_ns->child_reaper;
+}
+
 #endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fec1d419714..f0317edea141 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,7 +1400,6 @@ extern NORET_TYPE void do_group_exit(int);
 extern void daemonize(const char *, ...);
 extern int allow_signal(int);
 extern int disallow_signal(int);
-extern struct task_struct *child_reaper;
 
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
diff --git a/init/main.c b/init/main.c
index 4cdcd06e6d78..036f97c0c34c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -51,6 +51,7 @@
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 #include <linux/utsrelease.h>
+#include <linux/pid_namespace.h>
 #include <linux/compile.h>
 
 #include <asm/io.h>
@@ -626,8 +627,6 @@ static int __init initcall_debug_setup(char *str)
 }
 __setup("initcall_debug", initcall_debug_setup);
 
-struct task_struct *child_reaper = &init_task;
-
 extern initcall_t __initcall_start[], __initcall_end[];
 
 static void __init do_initcalls(void)
@@ -727,7 +726,7 @@ static int init(void * unused)
 	 * assumptions about where in the task array this
 	 * can be found.
 	 */
-	child_reaper = current;
+	init_pid_ns.child_reaper = current;
 
 	cad_pid = task_pid(current);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 28d9feedfd27..fd0e067952ab 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -22,6 +22,7 @@
 #include <linux/file.h>
 #include <linux/binfmts.h>
 #include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
 #include <linux/mount.h>
@@ -48,7 +49,6 @@
 #include <asm/mmu_context.h>
 
 extern void sem_exit (void);
-extern struct task_struct *child_reaper;
 
 static void exit_mm(struct task_struct * tsk);
 
@@ -260,7 +260,8 @@ static int has_stopped_jobs(int pgrp)
 }
 
 /**
- * reparent_to_init - Reparent the calling kernel thread to the init task.
+ * reparent_to_init - Reparent the calling kernel thread to the init task
+ * of the pid space that the thread belongs to.
  *
  * If a kernel thread is launched as a result of a system call, or if
  * it ever exits, it should generally reparent itself to init so that
@@ -278,8 +279,8 @@ static void reparent_to_init(void)
 	ptrace_unlink(current);
 	/* Reparent to init */
 	remove_parent(current);
-	current->parent = child_reaper;
-	current->real_parent = child_reaper;
+	current->parent = child_reaper(current);
+	current->real_parent = child_reaper(current);
 	add_parent(current);
 
 	/* Set the exit signal to SIGCHLD so we signal init on exit */
@@ -662,7 +663,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
  * When we die, we re-parent all our children.
  * Try to give them to another thread in our thread
  * group, and if no such member exists, give it to
- * the global child reaper process (ie "init")
+ * the child reaper process (ie "init") in our pid
+ * space.
  */
 static void
 forget_original_parent(struct task_struct *father, struct list_head *to_release)
@@ -673,7 +675,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
 	do {
 		reaper = next_thread(reaper);
 		if (reaper == father) {
-			reaper = child_reaper;
+			reaper = child_reaper(father);
 			break;
 		}
 	} while (reaper->exit_state);
@@ -859,8 +861,13 @@ fastcall NORET_TYPE void do_exit(long code)
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
-	if (unlikely(tsk == child_reaper))
-		panic("Attempted to kill init!");
+	if (unlikely(tsk == child_reaper(tsk))) {
+		if (tsk->nsproxy->pid_ns != &init_pid_ns)
+			tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper;
+		else
+			panic("Attempted to kill init!");
+	}
+
 
 	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
 		current->ptrace_message = code;
diff --git a/kernel/pid.c b/kernel/pid.c
index 1d9cc268b499..2efe9d8d367b 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -65,7 +65,8 @@ struct pid_namespace init_pid_ns = {
 	.pidmap = {
 		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
 	},
-	.last_pid = 0
+	.last_pid = 0,
+	.child_reaper = &init_task
 };
 
 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index 9eac4db60eda..1921ffdc5e77 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -24,6 +24,9 @@
 #include <linux/signal.h>
 #include <linux/capability.h>
 #include <linux/freezer.h>
+#include <linux/pid_namespace.h>
+#include <linux/nsproxy.h>
+
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1877,8 +1880,12 @@ relock:
 		if (sig_kernel_ignore(signr)) /* Default is nothing. */
 			continue;
 
-		/* Init gets no signals it doesn't want.  */
-		if (current == child_reaper)
+		/*
+		 * Init of a pid space gets no signals it doesn't want from
+		 * within that pid space. It can of course get signals from
+		 * its parent pid space.
+		 */
+		if (current == child_reaper(current))
 			continue;
 
 		if (sig_kernel_stop(signr)) {
-- 
cgit v1.2.3


From f020bc468fe4a91d32046d448511978c7b611315 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Dec 2006 02:38:02 -0800
Subject: [PATCH] sys_setpgid: eliminate unnecessary
 do_each_task_pid(PIDTYPE_PGID)

All tasks in the process group have the same sid, we don't need to iterate
them all to check that the caller of sys_setpgid() doesn't change its
session.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index 4f9d23a3095f..c7675c1bfdf2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1397,16 +1397,13 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 		goto out;
 
 	if (pgid != pid) {
-		struct task_struct *p;
+		struct task_struct *g =
+			find_task_by_pid_type(PIDTYPE_PGID, pgid);
 
-		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (process_session(p) == process_session(group_leader))
-				goto ok_pgid;
-		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
-		goto out;
+		if (!g || process_session(g) != process_session(group_leader))
+			goto out;
 	}
 
-ok_pgid:
 	err = security_task_setpgid(p, pgid);
 	if (err)
 		goto out;
-- 
cgit v1.2.3


From 62dfb5541a025b47df9405ff0219c7829a97d83b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Dec 2006 02:38:03 -0800
Subject: [PATCH] session_of_pgrp: kill unnecessary
 do_each_task_pid(PIDTYPE_PGID)

All members of the process group have the same sid and it can't be == 0.

NOTE: this code (and a similar one in sys_setpgid) was needed because it
was possibe to have ->session == 0. It's not possible any longer since

	[PATCH] pidhash: don't use zero pids
	Commit: c7c6464117a02b0d54feb4ebeca4db70fa493678

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index fd0e067952ab..03e64fe4a14a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -189,21 +189,18 @@ repeat:
 int session_of_pgrp(int pgrp)
 {
 	struct task_struct *p;
-	int sid = -1;
+	int sid = 0;
 
 	read_lock(&tasklist_lock);
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
-		if (process_session(p) > 0) {
-			sid = process_session(p);
-			goto out;
-		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-	p = find_task_by_pid(pgrp);
-	if (p)
+
+	p = find_task_by_pid_type(PIDTYPE_PGID, pgrp);
+	if (p == NULL)
+		p = find_task_by_pid(pgrp);
+	if (p != NULL)
 		sid = process_session(p);
-out:
+
 	read_unlock(&tasklist_lock);
-	
+
 	return sid;
 }
 
-- 
cgit v1.2.3


From cf9f151c7257683f489df85f94baf408d1d5694a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 Dec 2006 02:39:55 -0800
Subject: [PATCH] sysctl: simplify sysctl_uts_string

The binary interface to the namespace sysctls was never implemented resulting
in some really weird things if you attempted to use sys_sysctl to read your
hostname for example.

This patch series simples the code a little and implements the binary sysctl
interface.

In testing this patch series I discovered that our 32bit compatibility for the
binary sysctl interface is imperfect.  In particular KERN_SHMMAX and
KERN_SMMALL are size_t sized quantities and are returned as 8 bytes on to
32bit binaries using a x86_64 kernel.  However this has existing for a long
time so it is not a new regression with the namespace work.

Gads the whole sysctl thing needs work before it stops being easy to shoot
yourself in the foot.

Looking forward a little bit we need a better way to handle sysctls and
namespaces as our current technique will not work for the network namespace.
I think something based on the current overlapping sysctl trees will work but
the proc side needs to be redone before we can use it.

This patch:

Introduce get_uts() and put_uts() (used later) and remove most of the special
cases for when UTS namespace is compiled in.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 128 ++++++++++++--------------------------------------------
 1 file changed, 26 insertions(+), 102 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9846db93a595..c8bcbfb2e069 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -163,6 +163,28 @@ extern ctl_table inotify_table[];
 int sysctl_legacy_va_layout;
 #endif
 
+static void *get_uts(ctl_table *table, int write)
+{
+	char *which = table->data;
+#ifdef CONFIG_UTS_NS
+	struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
+	which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
+#endif
+	if (!write)
+		down_read(&uts_sem);
+	else
+		down_write(&uts_sem);
+	return which;
+}
+
+static void put_uts(ctl_table *table, int write, void *which)
+{
+	if (!write)
+		up_read(&uts_sem);
+	else
+		up_write(&uts_sem);
+}
+
 /* /proc declarations: */
 
 #ifdef CONFIG_PROC_SYSCTL
@@ -229,7 +251,6 @@ static ctl_table root_table[] = {
 };
 
 static ctl_table kern_table[] = {
-#ifndef CONFIG_UTS_NS
 	{
 		.ctl_name	= KERN_OSTYPE,
 		.procname	= "ostype",
@@ -275,54 +296,6 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_do_uts_string,
 		.strategy	= &sysctl_string,
 	},
-#else  /* !CONFIG_UTS_NS */
-	{
-		.ctl_name	= KERN_OSTYPE,
-		.procname	= "ostype",
-		.data		= NULL,
-		/* could maybe use __NEW_UTS_LEN here? */
-		.maxlen		= FIELD_SIZEOF(struct new_utsname, sysname),
-		.mode		= 0444,
-		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
-	},
-	{
-		.ctl_name	= KERN_OSRELEASE,
-		.procname	= "osrelease",
-		.data		= NULL,
-		.maxlen		= FIELD_SIZEOF(struct new_utsname, release),
-		.mode		= 0444,
-		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
-	},
-	{
-		.ctl_name	= KERN_VERSION,
-		.procname	= "version",
-		.data		= NULL,
-		.maxlen		= FIELD_SIZEOF(struct new_utsname, version),
-		.mode		= 0444,
-		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
-	},
-	{
-		.ctl_name	= KERN_NODENAME,
-		.procname	= "hostname",
-		.data		= NULL,
-		.maxlen		= FIELD_SIZEOF(struct new_utsname, nodename),
-		.mode		= 0644,
-		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
-	},
-	{
-		.ctl_name	= KERN_DOMAINNAME,
-		.procname	= "domainname",
-		.data		= NULL,
-		.maxlen		= FIELD_SIZEOF(struct new_utsname, domainname),
-		.mode		= 0644,
-		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
-	},
-#endif /* !CONFIG_UTS_NS */
 	{
 		.ctl_name	= KERN_PANIC,
 		.procname	= "panic",
@@ -1753,66 +1726,17 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
  *	Special case of dostring for the UTS structure. This has locks
  *	to observe. Should this be in kernel/sys.c ????
  */
- 
-#ifndef CONFIG_UTS_NS
-static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int r;
 
-	if (!write) {
-		down_read(&uts_sem);
-		r=proc_dostring(table,0,filp,buffer,lenp, ppos);
-		up_read(&uts_sem);
-	} else {
-		down_write(&uts_sem);
-		r=proc_dostring(table,1,filp,buffer,lenp, ppos);
-		up_write(&uts_sem);
-	}
-	return r;
-}
-#else /* !CONFIG_UTS_NS */
 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
-	struct uts_namespace* uts_ns = current->nsproxy->uts_ns;
-	char* which;
-
-	switch (table->ctl_name) {
-	case KERN_OSTYPE:
-		which = uts_ns->name.sysname;
-		break;
-	case KERN_NODENAME:
-		which = uts_ns->name.nodename;
-		break;
-	case KERN_OSRELEASE:
-		which = uts_ns->name.release;
-		break;
-	case KERN_VERSION:
-		which = uts_ns->name.version;
-		break;
-	case KERN_DOMAINNAME:
-		which = uts_ns->name.domainname;
-		break;
-	default:
-		r = -EINVAL;
-		goto out;
-	}
-
-	if (!write) {
-		down_read(&uts_sem);
-		r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos);
-		up_read(&uts_sem);
-	} else {
-		down_write(&uts_sem);
-		r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos);
-		up_write(&uts_sem);
-	}
- out:
+	void *which;
+	which = get_uts(table, write);
+	r = _proc_do_string(which, table->maxlen,write,filp,buffer,lenp, ppos);
+	put_uts(table, write, which);
 	return r;
 }
-#endif /* !CONFIG_UTS_NS */
 
 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
 				 int *valp,
-- 
cgit v1.2.3


From c4b8b769fa9051838d2772886ecd0ee2a926ddc3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 Dec 2006 02:39:55 -0800
Subject: [PATCH] sysctl: implement sysctl_uts_string()

The problem: When using sys_sysctl we don't read the proper values for the
variables exported from the uts namespace, nor do we do the proper locking.

This patch introduces sysctl_uts_string which properly fetches the values and
does the proper locking.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c8bcbfb2e069..77ea4fc386ef 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -137,6 +137,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
 
+static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+		  void __user *oldval, size_t __user *oldlenp,
+		  void __user *newval, size_t newlen, void **context);
+
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -258,7 +262,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.sysname),
 		.mode		= 0444,
 		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_OSRELEASE,
@@ -267,7 +271,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.release),
 		.mode		= 0444,
 		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_VERSION,
@@ -276,7 +280,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.version),
 		.mode		= 0444,
 		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_NODENAME,
@@ -285,7 +289,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.nodename),
 		.mode		= 0644,
 		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_DOMAINNAME,
@@ -294,7 +298,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.domainname),
 		.mode		= 0644,
 		.proc_handler	= &proc_do_uts_string,
-		.strategy	= &sysctl_string,
+		.strategy	= &sysctl_uts_string,
 	},
 	{
 		.ctl_name	= KERN_PANIC,
@@ -2598,6 +2602,23 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 	return 1;
 }
 
+
+/* The generic string strategy routine: */
+static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+		  void __user *oldval, size_t __user *oldlenp,
+		  void __user *newval, size_t newlen, void **context)
+{
+	struct ctl_table uts_table;
+	int r, write;
+	write = newval && newlen;
+	memcpy(&uts_table, table, sizeof(uts_table));
+	uts_table.data = get_uts(table, write);
+	r = sysctl_string(&uts_table, name, nlen,
+		oldval, oldlenp, newval, newlen, context);
+	put_uts(table, write, uts_table.data);
+	return r;
+}
+
 #else /* CONFIG_SYSCTL_SYSCALL */
 
 
@@ -2662,6 +2683,12 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 	return -ENOSYS;
 }
 
+static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+		  void __user *oldval, size_t __user *oldlenp,
+		  void __user *newval, size_t newlen, void **context)
+{
+	return -ENOSYS;
+}
 #endif /* CONFIG_SYSCTL_SYSCALL */
 
 /*
-- 
cgit v1.2.3


From 9bc9a6bd3cf559bffe962c51efb062e8b5270ca9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 Dec 2006 02:39:56 -0800
Subject: [PATCH] sysctl: simplify ipc ns specific sysctls

Refactor the ipc sysctl support so that it is simpler, more readable, and
prepares for fixing the bug with the wrong values being returned in the
sys_sysctl interface.

The function proc_do_ipc_string() was misnamed as it never handled strings.
It's magic of when to work with strings and when to work with longs belonged
in the sysctl table.  I couldn't tell if the code would work if you disabled
the ipc namespace but it certainly looked like it would have problems.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 106 ++++++++++++++++++++++++++------------------------------
 1 file changed, 49 insertions(+), 57 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 77ea4fc386ef..10474a63a111 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -92,7 +92,9 @@ extern char modprobe_path[];
 extern int sg_big_buff;
 #endif
 #ifdef CONFIG_SYSVIPC
-static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
 		void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
@@ -189,6 +191,18 @@ static void put_uts(ctl_table *table, int write, void *which)
 		up_write(&uts_sem);
 }
 
+#ifdef CONFIG_SYSVIPC
+static void *get_ipc(ctl_table *table, int write)
+{
+	char *which = table->data;
+	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
+	which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
+	return which;
+}
+#else
+#define get_ipc(T,W) ((T)->data)
+#endif
+
 /* /proc declarations: */
 
 #ifdef CONFIG_PROC_SYSCTL
@@ -458,58 +472,58 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_SHMMAX,
 		.procname	= "shmmax",
-		.data		= NULL,
-		.maxlen		= sizeof (size_t),
+		.data		= &init_ipc_ns.shm_ctlmax,
+		.maxlen		= sizeof (init_ipc_ns.shm_ctlmax),
 		.mode		= 0644,
-		.proc_handler	= &proc_do_ipc_string,
+		.proc_handler	= &proc_ipc_doulongvec_minmax,
 	},
 	{
 		.ctl_name	= KERN_SHMALL,
 		.procname	= "shmall",
-		.data		= NULL,
-		.maxlen		= sizeof (size_t),
+		.data		= &init_ipc_ns.shm_ctlall,
+		.maxlen		= sizeof (init_ipc_ns.shm_ctlall),
 		.mode		= 0644,
-		.proc_handler	= &proc_do_ipc_string,
+		.proc_handler	= &proc_ipc_doulongvec_minmax,
 	},
 	{
 		.ctl_name	= KERN_SHMMNI,
 		.procname	= "shmmni",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
+		.data		= &init_ipc_ns.shm_ctlmni,
+		.maxlen		= sizeof (init_ipc_ns.shm_ctlmni),
 		.mode		= 0644,
-		.proc_handler	= &proc_do_ipc_string,
+		.proc_handler	= &proc_ipc_dointvec,
 	},
 	{
 		.ctl_name	= KERN_MSGMAX,
 		.procname	= "msgmax",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
+		.data		= &init_ipc_ns.msg_ctlmax,
+		.maxlen		= sizeof (init_ipc_ns.msg_ctlmax),
 		.mode		= 0644,
-		.proc_handler	= &proc_do_ipc_string,
+		.proc_handler	= &proc_ipc_dointvec,
 	},
 	{
 		.ctl_name	= KERN_MSGMNI,
 		.procname	= "msgmni",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
+		.data		= &init_ipc_ns.msg_ctlmni,
+		.maxlen		= sizeof (init_ipc_ns.msg_ctlmni),
 		.mode		= 0644,
-		.proc_handler	= &proc_do_ipc_string,
+		.proc_handler	= &proc_ipc_dointvec,
 	},
 	{
 		.ctl_name	= KERN_MSGMNB,
 		.procname	=  "msgmnb",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
+		.data		= &init_ipc_ns.msg_ctlmnb,
+		.maxlen		= sizeof (init_ipc_ns.msg_ctlmnb),
 		.mode		= 0644,
-		.proc_handler	= &proc_do_ipc_string,
+		.proc_handler	= &proc_ipc_dointvec,
 	},
 	{
 		.ctl_name	= KERN_SEM,
 		.procname	= "sem",
-		.data		= NULL,
+		.data		= &init_ipc_ns.sem_ctls,
 		.maxlen		= 4*sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_do_ipc_string,
+		.proc_handler	= &proc_ipc_dointvec,
 	},
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
@@ -2319,46 +2333,24 @@ int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
 }
 
 #ifdef CONFIG_SYSVIPC
-static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
-		void __user *buffer, size_t *lenp, loff_t *ppos)
+static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	void *data;
-	struct ipc_namespace *ns;
-
-	ns = current->nsproxy->ipc_ns;
-
-	switch (table->ctl_name) {
-	case KERN_SHMMAX:
-		data = &ns->shm_ctlmax;
-		goto proc_minmax;
-	case KERN_SHMALL:
-		data = &ns->shm_ctlall;
-		goto proc_minmax;
-	case KERN_SHMMNI:
-		data = &ns->shm_ctlmni;
-		break;
-	case KERN_MSGMAX:
-		data = &ns->msg_ctlmax;
-		break;
-	case KERN_MSGMNI:
-		data = &ns->msg_ctlmni;
-		break;
-	case KERN_MSGMNB:
-		data = &ns->msg_ctlmnb;
-		break;
-	case KERN_SEM:
-		data = &ns->sem_ctls;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return __do_proc_dointvec(data, table, write, filp, buffer,
+	void *which;
+	which = get_ipc(table, write);
+	return __do_proc_dointvec(which, table, write, filp, buffer,
 			lenp, ppos, NULL, NULL);
-proc_minmax:
-	return __do_proc_doulongvec_minmax(data, table, write, filp, buffer,
+}
+
+static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
+	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	void *which;
+	which = get_ipc(table, write);
+	return __do_proc_doulongvec_minmax(which, table, write, filp, buffer,
 			lenp, ppos, 1l, 1l);
 }
+
 #endif
 
 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
-- 
cgit v1.2.3


From 6b49a257850fb8ad91f4c76bb712e9213141a34a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 8 Dec 2006 02:39:57 -0800
Subject: [PATCH] sysctl: fix sys_sysctl interface of ipc sysctls

Currently there is a regression and the ipc sysctls don't show up in the
binary sysctl namespace.

This patch adds sysctl_ipc_data to read data/write from the appropriate
namespace and deliver it in the expected manner.

[akpm@osdl.org: warning fix]
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 10474a63a111..025fcb3c66f8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -143,6 +143,12 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
 		  void __user *newval, size_t newlen, void **context);
 
+#ifdef CONFIG_SYSVIPC
+static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
+		  void __user *oldval, size_t __user *oldlenp,
+		  void __user *newval, size_t newlen, void **context);
+#endif
+
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -476,6 +482,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof (init_ipc_ns.shm_ctlmax),
 		.mode		= 0644,
 		.proc_handler	= &proc_ipc_doulongvec_minmax,
+		.strategy	= sysctl_ipc_data,
 	},
 	{
 		.ctl_name	= KERN_SHMALL,
@@ -484,6 +491,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof (init_ipc_ns.shm_ctlall),
 		.mode		= 0644,
 		.proc_handler	= &proc_ipc_doulongvec_minmax,
+		.strategy	= sysctl_ipc_data,
 	},
 	{
 		.ctl_name	= KERN_SHMMNI,
@@ -492,6 +500,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof (init_ipc_ns.shm_ctlmni),
 		.mode		= 0644,
 		.proc_handler	= &proc_ipc_dointvec,
+		.strategy	= sysctl_ipc_data,
 	},
 	{
 		.ctl_name	= KERN_MSGMAX,
@@ -500,6 +509,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmax),
 		.mode		= 0644,
 		.proc_handler	= &proc_ipc_dointvec,
+		.strategy	= sysctl_ipc_data,
 	},
 	{
 		.ctl_name	= KERN_MSGMNI,
@@ -508,6 +518,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmni),
 		.mode		= 0644,
 		.proc_handler	= &proc_ipc_dointvec,
+		.strategy	= sysctl_ipc_data,
 	},
 	{
 		.ctl_name	= KERN_MSGMNB,
@@ -516,6 +527,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmnb),
 		.mode		= 0644,
 		.proc_handler	= &proc_ipc_dointvec,
+		.strategy	= sysctl_ipc_data,
 	},
 	{
 		.ctl_name	= KERN_SEM,
@@ -524,6 +536,7 @@ static ctl_table kern_table[] = {
 		.maxlen		= 4*sizeof (int),
 		.mode		= 0644,
 		.proc_handler	= &proc_ipc_dointvec,
+		.strategy	= sysctl_ipc_data,
 	},
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
@@ -2611,6 +2624,47 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 	return r;
 }
 
+#ifdef CONFIG_SYSVIPC
+/* The generic sysctl ipc data routine. */
+static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	size_t len;
+	void *data;
+
+	/* Get out of I don't have a variable */
+	if (!table->data || !table->maxlen)
+		return -ENOTDIR;
+
+	data = get_ipc(table, 1);
+	if (!data)
+		return -ENOTDIR;
+
+	if (oldval && oldlenp) {
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, data, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	if (newval && newlen) {
+		if (newlen > table->maxlen)
+			newlen = table->maxlen;
+
+		if (copy_from_user(data, newval, newlen))
+			return -EFAULT;
+	}
+	return 1;
+}
+#endif
+
 #else /* CONFIG_SYSCTL_SYSCALL */
 
 
@@ -2681,6 +2735,12 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 {
 	return -ENOSYS;
 }
+static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	return -ENOSYS;
+}
 #endif /* CONFIG_SYSCTL_SYSCALL */
 
 /*
-- 
cgit v1.2.3


From 4594bf159f1962cec3b727954b7c598b07e2e737 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Dec 2006 11:33:26 +0000
Subject: [PATCH] WorkStruct: Use direct assignment rather than cmpxchg()

Use direct assignment rather than cmpxchg() as the latter is unavailable
and unimplementable on some platforms and is actually unnecessary.

The use of cmpxchg() was to guard against two possibilities, neither of
which can actually occur:

 (1) The pending flag may have been unset or may be cleared.  However, given
     where it's called, the pending flag is _always_ set.  I don't think it
     can be unset whilst we're in set_wq_data().

     Once the work is enqueued to be actually run, the only way off the queue
     is for it to be actually run.

     If it's a delayed work item, then the bit can't be cleared by the timer
     because we haven't started the timer yet.  Also, the pending bit can't be
     cleared by cancelling the delayed work _until_ the work item has had its
     timer started.

 (2) The workqueue pointer might change.  This can only happen in two cases:

     (a) The work item has just been queued to actually run, and so we're
         protected by the appropriate workqueue spinlock.

     (b) A delayed work item is being queued, and so the timer hasn't been
     	 started yet, and so no one else knows about the work item or can
     	 access it (the pending bit protects us).

     Besides, set_wq_data() _sets_ the workqueue pointer unconditionally, so
     it can be assigned instead.

So, replacing the set_wq_data() with a straight assignment would be okay
in most cases.

The problem is where we end up tangling with test_and_set_bit() emulated
using spinlocks, and even then it's not a problem _provided_
test_and_set_bit() doesn't attempt to modify the word if the bit was
set.

If that's a problem, then a bitops-proofed assignment will be required -
equivalent to atomic_set() vs other atomic_xxx() ops.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/workqueue.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6b186750e9be..db49886bfae1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -85,22 +85,19 @@ static inline int is_single_threaded(struct workqueue_struct *wq)
 	return list_empty(&wq->list);
 }
 
+/*
+ * Set the workqueue on which a work item is to be run
+ * - Must *only* be called if the pending flag is set
+ */
 static inline void set_wq_data(struct work_struct *work, void *wq)
 {
-	unsigned long new, old, res;
+	unsigned long new;
+
+	BUG_ON(!work_pending(work));
 
-	/* assume the pending flag is already set and that the task has already
-	 * been queued on this workqueue */
 	new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
-	res = work->management;
-	if (res != new) {
-		do {
-			old = res;
-			new = (unsigned long) wq;
-			new |= (old & WORK_STRUCT_FLAG_MASK);
-			res = cmpxchg(&work->management, old, new);
-		} while (res != old);
-	}
+	new |= work->management & WORK_STRUCT_FLAG_MASK;
+	work->management = new;
 }
 
 static inline void *get_wq_data(struct work_struct *work)
-- 
cgit v1.2.3


From d53ef07ab45085c0b06b652d588aa49b8ba41458 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sun, 10 Dec 2006 02:18:36 -0800
Subject: [PATCH] ipc-procfs-sysctl mixups

When CONFIG_PROC_FS=n and CONFIG_PROC_SYSCTL=n but CONFIG_SYSVIPC=y, we get
this build error:

kernel/built-in.o:(.data+0xc38): undefined reference to `proc_ipc_doulongvec_minmax'
kernel/built-in.o:(.data+0xc88): undefined reference to `proc_ipc_doulongvec_minmax'
kernel/built-in.o:(.data+0xcd8): undefined reference to `proc_ipc_dointvec'
kernel/built-in.o:(.data+0xd28): undefined reference to `proc_ipc_dointvec'
kernel/built-in.o:(.data+0xd78): undefined reference to `proc_ipc_dointvec'
kernel/built-in.o:(.data+0xdc8): undefined reference to `proc_ipc_dointvec'
kernel/built-in.o:(.data+0xe18): undefined reference to `proc_ipc_dointvec'
make: *** [vmlinux] Error 1

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 025fcb3c66f8..1c5697e3521e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2408,6 +2408,17 @@ static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
 {
 	return -ENOSYS;
 }
+static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
+		struct file *filp, void __user *buffer,
+		size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
 #endif
 
 int proc_dointvec(ctl_table *table, int write, struct file *filp,
-- 
cgit v1.2.3


From 98d7340c360993fdd703609ff7462051e03cc2fb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 10 Dec 2006 02:19:09 -0800
Subject: [PATCH] sysctl: remove some OPs

kernel.cap-bound uses only OP_SET and OP_AND

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1c5697e3521e..c86445a62af2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1931,9 +1931,6 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
 
 #define OP_SET	0
 #define OP_AND	1
-#define OP_OR	2
-#define OP_MAX	3
-#define OP_MIN	4
 
 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
 				      int *valp,
@@ -1945,13 +1942,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
 		switch(op) {
 		case OP_SET:	*valp = val; break;
 		case OP_AND:	*valp &= val; break;
-		case OP_OR:	*valp |= val; break;
-		case OP_MAX:	if(*valp < val)
-					*valp = val;
-				break;
-		case OP_MIN:	if(*valp > val)
-				*valp = val;
-				break;
 		}
 	} else {
 		int val = *valp;
-- 
cgit v1.2.3


From 1f29bcd739972f71f2fd5d5d265daf3e1208fa5e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 10 Dec 2006 02:19:10 -0800
Subject: [PATCH] sysctl: remove unused "context" param

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andi Kleen <ak@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/frv/kernel/pm.c           |  6 +++---
 arch/mips/lasat/sysctl.c       | 17 +++++++--------
 arch/x86_64/kernel/vsyscall.c  |  3 +--
 drivers/char/random.c          |  2 +-
 include/linux/sysctl.h         |  5 ++---
 include/net/ip.h               |  3 +--
 include/net/sctp/sctp.h        |  2 +-
 kernel/sysctl.c                | 47 ++++++++++++++++++++----------------------
 net/decnet/dn_dev.c            |  6 ++----
 net/decnet/sysctl_net_decnet.c |  6 ++----
 net/ipv4/devinet.c             |  3 +--
 net/ipv4/route.c               |  3 +--
 net/ipv4/sysctl_net_ipv4.c     | 16 ++++++--------
 net/ipv6/addrconf.c            |  3 +--
 net/ipv6/ndisc.c               |  9 +++-----
 15 files changed, 55 insertions(+), 76 deletions(-)

(limited to 'kernel')

diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index c1d9fc8f1a85..ee677ced7b68 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -223,7 +223,7 @@ static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
 
 static int cmode_sysctl(ctl_table *table, int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen, void **context)
+			void __user *newval, size_t newlen)
 {
 	if (oldval && oldlenp) {
 		size_t oldlen;
@@ -326,7 +326,7 @@ static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
 
 static int p0_sysctl(ctl_table *table, int __user *name, int nlen,
 		     void __user *oldval, size_t __user *oldlenp,
-		     void __user *newval, size_t newlen, void **context)
+		     void __user *newval, size_t newlen)
 {
 	if (oldval && oldlenp) {
 		size_t oldlen;
@@ -370,7 +370,7 @@ static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
 
 static int cm_sysctl(ctl_table *table, int __user *name, int nlen,
 		     void __user *oldval, size_t __user *oldlenp,
-		     void __user *newval, size_t newlen, void **context)
+		     void __user *newval, size_t newlen)
 {
 	if (oldval && oldlenp) {
 		size_t oldlen;
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index da35d4555491..12878359f2c8 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -40,12 +40,12 @@ static DEFINE_MUTEX(lasat_info_mutex);
 /* Strategy function to write EEPROM after changing string entry */
 int sysctl_lasatstring(ctl_table *table, int *name, int nlen,
 		void *oldval, size_t *oldlenp,
-		void *newval, size_t newlen, void **context)
+		void *newval, size_t newlen)
 {
 	int r;
 	mutex_lock(&lasat_info_mutex);
 	r = sysctl_string(table, name,
-			  nlen, oldval, oldlenp, newval, newlen, context);
+			  nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
@@ -119,11 +119,11 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
 /* Sysctl for setting the IP addresses */
 int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen,
 		    void *oldval, size_t *oldlenp,
-		    void *newval, size_t newlen, void **context)
+		    void *newval, size_t newlen)
 {
 	int r;
 	mutex_lock(&lasat_info_mutex);
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen, context);
+	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
@@ -139,14 +139,14 @@ int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen,
 /* Same for RTC */
 int sysctl_lasat_rtc(ctl_table *table, int *name, int nlen,
 		    void *oldval, size_t *oldlenp,
-		    void *newval, size_t newlen, void **context)
+		    void *newval, size_t newlen)
 {
 	int r;
 	mutex_lock(&lasat_info_mutex);
 	rtctmp = ds1603_read();
 	if (rtctmp < 0)
 		rtctmp = 0;
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen, context);
+	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
@@ -251,13 +251,12 @@ int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
 
 static int sysctl_lasat_eeprom_value(ctl_table *table, int *name, int nlen,
 				     void *oldval, size_t *oldlenp,
-				     void *newval, size_t newlen,
-				     void **context)
+				     void *newval, size_t newlen)
 {
 	int r;
 
 	mutex_lock(&lasat_info_mutex);
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen, context);
+	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
 	if (r < 0) {
 		mutex_unlock(&lasat_info_mutex);
 		return r;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 4a673f5397a0..2433d6fc68b1 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -225,8 +225,7 @@ out:
 
 static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
 				void __user *oldval, size_t __user *oldlenp,
-				void __user *newval, size_t newlen,
-				void **context)
+				void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 092a01cc02da..13d0b1350a62 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1203,7 +1203,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
 
 static int uuid_strategy(ctl_table *table, int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen, void **context)
+			 void __user *newval, size_t newlen)
 {
 	unsigned char tmp_uuid[16], *uuid;
 	unsigned int len;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 94316a98e0d0..6d8846e7be6d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -918,8 +918,7 @@ typedef struct ctl_table ctl_table;
 
 typedef int ctl_handler (ctl_table *table, int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen, 
-			 void **context);
+			 void __user *newval, size_t newlen);
 
 typedef int proc_handler (ctl_table *ctl, int write, struct file * filp,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -950,7 +949,7 @@ extern int do_sysctl (int __user *name, int nlen,
 extern int do_sysctl_strategy (ctl_table *table, 
 			       int __user *name, int nlen,
 			       void __user *oldval, size_t __user *oldlenp,
-			       void __user *newval, size_t newlen, void ** context);
+			       void __user *newval, size_t newlen);
 
 extern ctl_handler sysctl_string;
 extern ctl_handler sysctl_intvec;
diff --git a/include/net/ip.h b/include/net/ip.h
index 83cb9ac5554e..053f02b5cb89 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -376,8 +376,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 			 size_t *lenp, loff_t *ppos);
 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 				  void __user *oldval, size_t __user *oldlenp,
-				  void __user *newval, size_t newlen, 
-				  void **context);
+				  void __user *newval, size_t newlen);
 #ifdef CONFIG_PROC_FS
 extern int ip_misc_proc_init(void);
 #endif
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 215461f18db1..c818f87122af 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -368,7 +368,7 @@ static inline void sctp_sysctl_register(void) { return; }
 static inline void sctp_sysctl_unregister(void) { return; }
 static inline int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context) {
+		void __user *newval, size_t newlen) {
 	return -ENOSYS;
 }
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c86445a62af2..130c5ec9ee0b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -133,7 +133,7 @@ extern int max_lock_depth;
 
 #ifdef CONFIG_SYSCTL_SYSCALL
 static int parse_table(int __user *, int, void __user *, size_t __user *,
-		void __user *, size_t, ctl_table *, void **);
+		void __user *, size_t, ctl_table *);
 #endif
 
 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
@@ -141,12 +141,12 @@ static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 
 static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context);
+		  void __user *newval, size_t newlen);
 
 #ifdef CONFIG_SYSVIPC
 static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context);
+		  void __user *newval, size_t newlen);
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
@@ -1243,7 +1243,6 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
 	do {
 		struct ctl_table_header *head =
 			list_entry(tmp, struct ctl_table_header, ctl_entry);
-		void *context = NULL;
 
 		if (!use_table(head))
 			continue;
@@ -1251,9 +1250,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
 		spin_unlock(&sysctl_lock);
 
 		error = parse_table(name, nlen, oldval, oldlenp, 
-					newval, newlen, head->ctl_table,
-					&context);
-		kfree(context);
+					newval, newlen, head->ctl_table);
 
 		spin_lock(&sysctl_lock);
 		unuse_table(head);
@@ -1309,7 +1306,7 @@ static inline int ctl_perm(ctl_table *table, int op)
 static int parse_table(int __user *name, int nlen,
 		       void __user *oldval, size_t __user *oldlenp,
 		       void __user *newval, size_t newlen,
-		       ctl_table *table, void **context)
+		       ctl_table *table)
 {
 	int n;
 repeat:
@@ -1329,7 +1326,7 @@ repeat:
 					error = table->strategy(
 						table, name, nlen,
 						oldval, oldlenp,
-						newval, newlen, context);
+						newval, newlen);
 					if (error)
 						return error;
 				}
@@ -1340,7 +1337,7 @@ repeat:
 			}
 			error = do_sysctl_strategy(table, name, nlen,
 						   oldval, oldlenp,
-						   newval, newlen, context);
+						   newval, newlen);
 			return error;
 		}
 	}
@@ -1351,7 +1348,7 @@ repeat:
 int do_sysctl_strategy (ctl_table *table, 
 			int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen, void **context)
+			void __user *newval, size_t newlen)
 {
 	int op = 0, rc;
 	size_t len;
@@ -1365,7 +1362,7 @@ int do_sysctl_strategy (ctl_table *table,
 
 	if (table->strategy) {
 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
-				     newval, newlen, context);
+				     newval, newlen);
 		if (rc < 0)
 			return rc;
 		if (rc > 0)
@@ -2473,7 +2470,7 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
 /* The generic string strategy routine: */
 int sysctl_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	if (!table->data || !table->maxlen) 
 		return -ENOTDIR;
@@ -2519,7 +2516,7 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen,
  */
 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 
 	if (newval && newlen) {
@@ -2555,7 +2552,7 @@ int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
 /* Strategy function to convert jiffies to seconds */ 
 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	if (oldval) {
 		size_t olen;
@@ -2583,7 +2580,7 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
 /* Strategy function to convert jiffies to seconds */ 
 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	if (oldval) {
 		size_t olen;
@@ -2612,7 +2609,7 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 /* The generic string strategy routine: */
 static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	struct ctl_table uts_table;
 	int r, write;
@@ -2620,7 +2617,7 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 	memcpy(&uts_table, table, sizeof(uts_table));
 	uts_table.data = get_uts(table, write);
 	r = sysctl_string(&uts_table, name, nlen,
-		oldval, oldlenp, newval, newlen, context);
+		oldval, oldlenp, newval, newlen);
 	put_uts(table, write, uts_table.data);
 	return r;
 }
@@ -2629,7 +2626,7 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 /* The generic sysctl ipc data routine. */
 static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	size_t len;
 	void *data;
@@ -2704,41 +2701,41 @@ out:
 
 int sysctl_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
 static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 		  void __user *oldval, size_t __user *oldlenp,
-		  void __user *newval, size_t newlen, void **context)
+		  void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context)
+		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0b9d4c955154..fc6f3c023a54 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -167,8 +167,7 @@ static int dn_forwarding_proc(ctl_table *, int, struct file *,
 			void __user *, size_t *, loff_t *);
 static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen,
-			void **context);
+			void __user *newval, size_t newlen);
 
 static struct dn_dev_sysctl_table {
 	struct ctl_table_header *sysctl_header;
@@ -347,8 +346,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 
 static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
-			void __user *newval, size_t newlen,
-			void **context)
+			void __user *newval, size_t newlen)
 {
 #ifdef CONFIG_DECNET_ROUTER
 	struct net_device *dev = table->extra1;
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index e246f054f368..a4065eb1341e 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -134,8 +134,7 @@ static int parse_addr(__le16 *addr, char *str)
 
 static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen,
 				void __user *oldval, size_t __user *oldlenp,
-				void __user *newval, size_t newlen,
-				void **context)
+				void __user *newval, size_t newlen)
 {
 	size_t len;
 	__le16 addr;
@@ -220,8 +219,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
 
 static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen,
 				void __user *oldval, size_t __user *oldlenp,
-				void __user *newval, size_t newlen,
-				void **context)
+				void __user *newval, size_t newlen)
 {
 	size_t len;
 	struct net_device *dev;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2fd899160f85..84bed40273ad 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1303,8 +1303,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 
 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 				  void __user *oldval, size_t __user *oldlenp,
-				  void __user *newval, size_t newlen, 
-				  void **context)
+				  void __user *newval, size_t newlen)
 {
 	int *valp = table->data;
 	int new;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11c167118e87..1aaff0a2e098 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2872,8 +2872,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 						void __user *oldval,
 						size_t __user *oldlenp,
 						void __user *newval,
-						size_t newlen,
-						void **context)
+						size_t newlen)
 {
 	int delay;
 	if (newlen != sizeof(int))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index dfcf47f10f88..fabf69a9108c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -51,8 +51,7 @@ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 static int ipv4_sysctl_forward_strategy(ctl_table *table,
 			 int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
-			 void __user *newval, size_t newlen, 
-			 void **context)
+			 void __user *newval, size_t newlen)
 {
 	int *valp = table->data;
 	int new;
@@ -111,8 +110,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
 static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
 					 int nlen, void __user *oldval,
 					 size_t __user *oldlenp,
-					 void __user *newval, size_t newlen,
-					 void **context)
+					 void __user *newval, size_t newlen)
 {
 	char val[TCP_CA_NAME_MAX];
 	ctl_table tbl = {
@@ -122,8 +120,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
 	int ret;
 
 	tcp_get_default_congestion_control(val);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen,
-			    context);
+	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen)
 		ret = tcp_set_default_congestion_control(val);
 	return ret;
@@ -169,8 +166,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
 static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
 					       int nlen, void __user *oldval,
 					       size_t __user *oldlenp,
-					       void __user *newval, size_t newlen,
-					       void **context)
+					       void __user *newval,
+					       size_t newlen)
 {
 	ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
 	int ret;
@@ -180,8 +177,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
 		return -ENOMEM;
 
 	tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen,
-			    context);
+	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen)
 		ret = tcp_set_allowed_congestion_control(tbl.data);
 	kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a5e8d207a51b..9b0a90643151 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3656,8 +3656,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
 					    int __user *name, int nlen,
 					    void __user *oldval,
 					    size_t __user *oldlenp,
-					    void __user *newval, size_t newlen,
-					    void **context)
+					    void __user *newval, size_t newlen)
 {
 	int *valp = table->data;
 	int new;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 56ea92837307..6a9f616de37d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1667,8 +1667,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
 					int nlen, void __user *oldval,
 					size_t __user *oldlenp,
-					void __user *newval, size_t newlen,
-					void **context)
+					void __user *newval, size_t newlen)
 {
 	struct net_device *dev = ctl->extra1;
 	struct inet6_dev *idev;
@@ -1681,14 +1680,12 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
 	switch (ctl->ctl_name) {
 	case NET_NEIGH_REACHABLE_TIME:
 		ret = sysctl_jiffies(ctl, name, nlen,
-				     oldval, oldlenp, newval, newlen,
-				     context);
+				     oldval, oldlenp, newval, newlen);
 		break;
 	case NET_NEIGH_RETRANS_TIME_MS:
 	case NET_NEIGH_REACHABLE_TIME_MS:
 		 ret = sysctl_ms_jiffies(ctl, name, nlen,
-					 oldval, oldlenp, newval, newlen,
-					 context);
+					 oldval, oldlenp, newval, newlen);
 		 break;
 	default:
 		ret = 0;
-- 
cgit v1.2.3


From 7c3ab7381e79dfc7db14a67c6f4f3285664e1ec2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:19 -0800
Subject: [PATCH] io-accounting: core statistics

The present per-task IO accounting isn't very useful.  It simply counts the
number of bytes passed into read() and write().  So if a process reads 1MB
from an already-cached file, it is accused of having performed 1MB of I/O,
which is wrong.

(David Wright had some comments on the applicability of the present logical IO accounting:

  For billing purposes it is useless but for workload analysis it is very
  useful

  read_bytes/read_calls  average read request size
  write_bytes/write_calls average write request size

  read_bytes/read_blocks ie logical/physical can indicate hit rate or thrashing
  write_bytes/write_blocks  ie logical/physical  guess since pdflush writes can
                                                be missed

  I often look for logical larger than physical to see filesystem cache
  problems.  And the bytes/cpusec can help find applications that are
  dominating the cache and causing slow interactive response from page cache
  contention.

  I want to find the IO intensive applications and make sure they are doing
  efficient IO.  Thus the acctcms(sysV) or csacms command would give the high
  IO commands).

This patchset adds new accounting which tries to be more accurate.  We account
for three things:

reads:

  attempt to count the number of bytes which this process really did cause
  to be fetched from the storage layer.  Done at the submit_bio() level, so it
  is accurate for block-backed filesystems.  I also attempt to wire up NFS and
  CIFS.

writes:

  attempt to count the number of bytes which this process caused to be sent
  to the storage layer.  This is done at page-dirtying time.

  The big inaccuracy here is truncate.  If a process writes 1MB to a file
  and then deletes the file, it will in fact perform no writeout.  But it will
  have been accounted as having caused 1MB of write.

  So...

cancelled_writes:

  account the number of bytes which this process caused to not happen, by
  truncating pagecache.

  We _could_ just subtract this from the process's `write' accounting.  But
  that means that some processes would be reported to have done negative
  amounts of write IO, which is silly.

  So we just report the raw number and punt this decision up to userspace.

Now, we _could_ account for writes at the physical I/O level.  But

- This would require that we track memory-dirtying tasks at the per-page
  level (would require a new pointer in struct page).

- It would mean that IO statistics for a process are usually only available
  long after that process has exitted.  Which means that we probably cannot
  communicate this info via taskstats.

This patch:

Wire up the kernel-private data structures and the accessor functions to
manipulate them.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h                  |  2 ++
 include/linux/task_io_accounting.h     | 37 ++++++++++++++++++++++++++
 include/linux/task_io_accounting_ops.h | 47 ++++++++++++++++++++++++++++++++++
 init/Kconfig                           |  9 +++++++
 kernel/fork.c                          |  2 ++
 5 files changed, 97 insertions(+)
 create mode 100644 include/linux/task_io_accounting.h
 create mode 100644 include/linux/task_io_accounting_ops.h

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ad9c46071ff8..1208feab46e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -82,6 +82,7 @@ struct sched_param {
 #include <linux/resource.h>
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
+#include <linux/task_io_accounting.h>
 
 #include <asm/processor.h>
 
@@ -1013,6 +1014,7 @@ struct task_struct {
 	wait_queue_t *io_wait;
 /* i/o counters(bytes read/written, #syscalls */
 	u64 rchar, wchar, syscr, syscw;
+	struct task_io_accounting ioac;
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h
new file mode 100644
index 000000000000..44d00e9cceea
--- /dev/null
+++ b/include/linux/task_io_accounting.h
@@ -0,0 +1,37 @@
+/*
+ * task_io_accounting: a structure which is used for recording a single task's
+ * IO statistics.
+ *
+ * Don't include this header file directly - it is designed to be dragged in via
+ * sched.h.
+ *
+ * Blame akpm@osdl.org for all this.
+ */
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+struct task_io_accounting {
+	/*
+	 * The number of bytes which this task has caused to be read from
+	 * storage.
+	 */
+	u64 read_bytes;
+
+	/*
+	 * The number of bytes which this task has caused, or shall cause to be
+	 * written to disk.
+	 */
+	u64 write_bytes;
+
+	/*
+	 * A task can cause "negative" IO too.  If this task truncates some
+	 * dirty pagecache, some IO which another task has been accounted for
+	 * (in its write_bytes) will not be happening.  We _could_ just
+	 * subtract that from the truncating task's write_bytes, but there is
+	 * information loss in doing that.
+	 */
+	u64 cancelled_write_bytes;
+};
+#else
+struct task_io_accounting {
+};
+#endif
diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h
new file mode 100644
index 000000000000..df2a319106b2
--- /dev/null
+++ b/include/linux/task_io_accounting_ops.h
@@ -0,0 +1,47 @@
+/*
+ * Task I/O accounting operations
+ */
+#ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED
+#define __TASK_IO_ACCOUNTING_OPS_INCLUDED
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+static inline void task_io_account_read(size_t bytes)
+{
+	current->ioac.read_bytes += bytes;
+}
+
+static inline void task_io_account_write(size_t bytes)
+{
+	current->ioac.write_bytes += bytes;
+}
+
+static inline void task_io_account_cancelled_write(size_t bytes)
+{
+	current->ioac.cancelled_write_bytes += bytes;
+}
+
+static inline void task_io_accounting_init(struct task_struct *tsk)
+{
+	memset(&tsk->ioac, 0, sizeof(tsk->ioac));
+}
+
+#else
+
+static inline void task_io_account_read(size_t bytes)
+{
+}
+
+static inline void task_io_account_write(size_t bytes)
+{
+}
+
+static inline void task_io_account_cancelled_write(size_t bytes)
+{
+}
+
+static inline void task_io_accounting_init(struct task_struct *tsk)
+{
+}
+
+#endif		/* CONFIG_TASK_IO_ACCOUNTING */
+#endif		/* __TASK_IO_ACCOUNTING_OPS_INCLUDED */
diff --git a/init/Kconfig b/init/Kconfig
index 14d484606fab..9edf103b3ec3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -304,6 +304,15 @@ config TASK_XACCT
 
 	  Say N if unsure.
 
+config TASK_IO_ACCOUNTING
+	bool "Enable per-task storage I/O accounting (EXPERIMENTAL)"
+	depends on TASK_XACCT
+	help
+	  Collect information on the number of bytes of storage I/O which this
+	  task has caused.
+
+	  Say N if unsure.
+
 config SYSCTL
 	bool
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c859eef8e6a..086e172d0d3d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -36,6 +36,7 @@
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
 #include <linux/futex.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
 #include <linux/mount.h>
@@ -1055,6 +1056,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->wchar = 0;		/* I/O counter: bytes written */
 	p->syscr = 0;		/* I/O counter: read syscalls */
 	p->syscw = 0;		/* I/O counter: write syscalls */
+	task_io_accounting_init(p);
 	acct_clear_integrals(p);
 
  	p->it_virt_expires = cputime_zero;
-- 
cgit v1.2.3


From 4a7864ca638e0a38307962ee8ef122822a351b65 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 10 Dec 2006 02:19:53 -0800
Subject: [PATCH] io-accounting: via taskstats

Deliver IO accounting via taskstats.

Cc: Jay Lan <jlan@sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Cc: David Wright <daw@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/taskstats.h | 8 +++++++-
 kernel/tsacct.c           | 9 +++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 15c64037be1b..3fced4798255 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
  */
 
 
-#define TASKSTATS_VERSION	2
+#define TASKSTATS_VERSION	3
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -140,6 +140,12 @@ struct taskstats {
 	__u64	read_syscalls;		/* read syscalls */
 	__u64	write_syscalls;		/* write syscalls */
 	/* Extended accounting fields end */
+
+#define TASKSTATS_HAS_IO_ACCOUNTING
+	/* Per-task storage I/O accounting starts */
+	__u64	read_bytes;		/* bytes of read I/O */
+	__u64	write_bytes;		/* bytes of write I/O */
+	__u64	cancelled_write_bytes;	/* bytes of cancelled write I/O */
 };
 
 
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 96f77013d3f0..baacc3691415 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -96,6 +96,15 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 	stats->write_char	= p->wchar;
 	stats->read_syscalls	= p->syscr;
 	stats->write_syscalls	= p->syscw;
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	stats->read_bytes	= p->ioac.read_bytes;
+	stats->write_bytes	= p->ioac.write_bytes;
+	stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+#else
+	stats->read_bytes	= 0;
+	stats->write_bytes	= 0;
+	stats->cancelled_write_bytes = 0;
+#endif
 }
 #undef KB
 #undef MB
-- 
cgit v1.2.3


From cc2a73b5caf065f8612fcb5df5bd2f5e25881d99 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sun, 10 Dec 2006 02:20:00 -0800
Subject: [PATCH] sched.c: correct comment for this_rq_lock()

Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index f385eff4682d..479199ab14ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -547,7 +547,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
 #endif
 
 /*
- * rq_lock - lock a given runqueue and disable interrupts.
+ * this_rq_lock - lock this runqueue and disable interrupts.
  */
 static inline struct rq *this_rq_lock(void)
 	__acquires(rq->lock)
-- 
cgit v1.2.3


From 6711cab43ed5e60bf51e3dbbce6395e87d4e9805 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Sun, 10 Dec 2006 02:20:07 -0800
Subject: [PATCH] ched domain: move sched group allocations to percpu area

Move the sched group allocations to percpu area.  This will minimize cross
node memory references and also cleans up the sched groups allocation for
allnodes sched domain.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 132 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 64 insertions(+), 68 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 479199ab14ab..a08387b5f7fa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5528,28 +5528,27 @@ static int __init isolated_cpu_setup(char *str)
 __setup ("isolcpus=", isolated_cpu_setup);
 
 /*
- * init_sched_build_groups takes an array of groups, the cpumask we wish
- * to span, and a pointer to a function which identifies what group a CPU
- * belongs to. The return value of group_fn must be a valid index into the
- * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we
- * keep track of groups covered with a cpumask_t).
+ * init_sched_build_groups takes the cpumask we wish to span, and a pointer
+ * to a function which identifies what group(along with sched group) a CPU
+ * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
+ * (due to the fact that we keep track of groups covered with a cpumask_t).
  *
  * init_sched_build_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
  */
 static void
-init_sched_build_groups(struct sched_group groups[], cpumask_t span,
-			const cpumask_t *cpu_map,
-			int (*group_fn)(int cpu, const cpumask_t *cpu_map))
+init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map,
+			int (*group_fn)(int cpu, const cpumask_t *cpu_map,
+					struct sched_group **sg))
 {
 	struct sched_group *first = NULL, *last = NULL;
 	cpumask_t covered = CPU_MASK_NONE;
 	int i;
 
 	for_each_cpu_mask(i, span) {
-		int group = group_fn(i, cpu_map);
-		struct sched_group *sg = &groups[group];
+		struct sched_group *sg;
+		int group = group_fn(i, cpu_map, &sg);
 		int j;
 
 		if (cpu_isset(i, covered))
@@ -5559,7 +5558,7 @@ init_sched_build_groups(struct sched_group groups[], cpumask_t span,
 		sg->cpu_power = 0;
 
 		for_each_cpu_mask(j, span) {
-			if (group_fn(j, cpu_map) != group)
+			if (group_fn(j, cpu_map, NULL) != group)
 				continue;
 
 			cpu_set(j, covered);
@@ -6135,10 +6134,13 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
  */
 #ifdef CONFIG_SCHED_SMT
 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static struct sched_group sched_group_cpus[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
 
-static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map,
+			    struct sched_group **sg)
 {
+	if (sg)
+		*sg = &per_cpu(sched_group_cpus, cpu);
 	return cpu;
 }
 #endif
@@ -6148,39 +6150,52 @@ static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map)
  */
 #ifdef CONFIG_SCHED_MC
 static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static struct sched_group sched_group_core[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_core);
 #endif
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
-static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
+			     struct sched_group **sg)
 {
+	int group;
 	cpumask_t mask = cpu_sibling_map[cpu];
 	cpus_and(mask, mask, *cpu_map);
-	return first_cpu(mask);
+	group = first_cpu(mask);
+	if (sg)
+		*sg = &per_cpu(sched_group_core, group);
+	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
-static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
+			     struct sched_group **sg)
 {
+	if (sg)
+		*sg = &per_cpu(sched_group_core, cpu);
 	return cpu;
 }
 #endif
 
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
 
-static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map,
+			     struct sched_group **sg)
 {
+	int group;
 #ifdef CONFIG_SCHED_MC
 	cpumask_t mask = cpu_coregroup_map(cpu);
 	cpus_and(mask, mask, *cpu_map);
-	return first_cpu(mask);
+	group = first_cpu(mask);
 #elif defined(CONFIG_SCHED_SMT)
 	cpumask_t mask = cpu_sibling_map[cpu];
 	cpus_and(mask, mask, *cpu_map);
-	return first_cpu(mask);
+	group = first_cpu(mask);
 #else
-	return cpu;
+	group = cpu;
 #endif
+	if (sg)
+		*sg = &per_cpu(sched_group_phys, group);
+	return group;
 }
 
 #ifdef CONFIG_NUMA
@@ -6193,12 +6208,22 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
 static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
 
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
 
-static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map)
+static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
+				 struct sched_group **sg)
 {
-	return cpu_to_node(cpu);
+	cpumask_t nodemask = node_to_cpumask(cpu_to_node(cpu));
+	int group;
+
+	cpus_and(nodemask, nodemask, *cpu_map);
+	group = first_cpu(nodemask);
+
+	if (sg)
+		*sg = &per_cpu(sched_group_allnodes, group);
+	return group;
 }
+
 static void init_numa_sched_groups_power(struct sched_group *group_head)
 {
 	struct sched_group *sg = group_head;
@@ -6234,16 +6259,9 @@ static void free_sched_groups(const cpumask_t *cpu_map)
 	int cpu, i;
 
 	for_each_cpu_mask(cpu, *cpu_map) {
-		struct sched_group *sched_group_allnodes
-			= sched_group_allnodes_bycpu[cpu];
 		struct sched_group **sched_group_nodes
 			= sched_group_nodes_bycpu[cpu];
 
-		if (sched_group_allnodes) {
-			kfree(sched_group_allnodes);
-			sched_group_allnodes_bycpu[cpu] = NULL;
-		}
-
 		if (!sched_group_nodes)
 			continue;
 
@@ -6337,7 +6355,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	struct sched_domain *sd;
 #ifdef CONFIG_NUMA
 	struct sched_group **sched_group_nodes = NULL;
-	struct sched_group *sched_group_allnodes = NULL;
+	int sd_allnodes = 0;
 
 	/*
 	 * Allocate the per-node list of sched groups
@@ -6355,7 +6373,6 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	 * Set up domains for cpus specified by the cpu_map.
 	 */
 	for_each_cpu_mask(i, *cpu_map) {
-		int group;
 		struct sched_domain *sd = NULL, *p;
 		cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
 
@@ -6364,26 +6381,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 #ifdef CONFIG_NUMA
 		if (cpus_weight(*cpu_map)
 				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
-			if (!sched_group_allnodes) {
-				sched_group_allnodes
-					= kmalloc_node(sizeof(struct sched_group)
-						  	* MAX_NUMNODES,
-						  GFP_KERNEL,
-						  cpu_to_node(i));
-				if (!sched_group_allnodes) {
-					printk(KERN_WARNING
-					"Can not alloc allnodes sched group\n");
-					goto error;
-				}
-				sched_group_allnodes_bycpu[i]
-						= sched_group_allnodes;
-			}
 			sd = &per_cpu(allnodes_domains, i);
 			*sd = SD_ALLNODES_INIT;
 			sd->span = *cpu_map;
-			group = cpu_to_allnodes_group(i, cpu_map);
-			sd->groups = &sched_group_allnodes[group];
+			cpu_to_allnodes_group(i, cpu_map, &sd->groups);
 			p = sd;
+			sd_allnodes = 1;
 		} else
 			p = NULL;
 
@@ -6398,36 +6401,33 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 
 		p = sd;
 		sd = &per_cpu(phys_domains, i);
-		group = cpu_to_phys_group(i, cpu_map);
 		*sd = SD_CPU_INIT;
 		sd->span = nodemask;
 		sd->parent = p;
 		if (p)
 			p->child = sd;
-		sd->groups = &sched_group_phys[group];
+		cpu_to_phys_group(i, cpu_map, &sd->groups);
 
 #ifdef CONFIG_SCHED_MC
 		p = sd;
 		sd = &per_cpu(core_domains, i);
-		group = cpu_to_core_group(i, cpu_map);
 		*sd = SD_MC_INIT;
 		sd->span = cpu_coregroup_map(i);
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
-		sd->groups = &sched_group_core[group];
+		cpu_to_core_group(i, cpu_map, &sd->groups);
 #endif
 
 #ifdef CONFIG_SCHED_SMT
 		p = sd;
 		sd = &per_cpu(cpu_domains, i);
-		group = cpu_to_cpu_group(i, cpu_map);
 		*sd = SD_SIBLING_INIT;
 		sd->span = cpu_sibling_map[i];
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
-		sd->groups = &sched_group_cpus[group];
+		cpu_to_cpu_group(i, cpu_map, &sd->groups);
 #endif
 	}
 
@@ -6439,8 +6439,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 		if (i != first_cpu(this_sibling_map))
 			continue;
 
-		init_sched_build_groups(sched_group_cpus, this_sibling_map,
-					cpu_map, &cpu_to_cpu_group);
+		init_sched_build_groups(this_sibling_map, cpu_map, &cpu_to_cpu_group);
 	}
 #endif
 
@@ -6451,8 +6450,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 		cpus_and(this_core_map, this_core_map, *cpu_map);
 		if (i != first_cpu(this_core_map))
 			continue;
-		init_sched_build_groups(sched_group_core, this_core_map,
-					cpu_map, &cpu_to_core_group);
+		init_sched_build_groups(this_core_map, cpu_map, &cpu_to_core_group);
 	}
 #endif
 
@@ -6465,15 +6463,13 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 		if (cpus_empty(nodemask))
 			continue;
 
-		init_sched_build_groups(sched_group_phys, nodemask,
-					cpu_map, &cpu_to_phys_group);
+		init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group);
 	}
 
 #ifdef CONFIG_NUMA
 	/* Set up node groups */
-	if (sched_group_allnodes)
-		init_sched_build_groups(sched_group_allnodes, *cpu_map,
-					cpu_map, &cpu_to_allnodes_group);
+	if (sd_allnodes)
+		init_sched_build_groups(*cpu_map, cpu_map, &cpu_to_allnodes_group);
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		/* Set up node groups */
@@ -6565,10 +6561,10 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	for (i = 0; i < MAX_NUMNODES; i++)
 		init_numa_sched_groups_power(sched_group_nodes[i]);
 
-	if (sched_group_allnodes) {
-		int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map);
-		struct sched_group *sg = &sched_group_allnodes[group];
+	if (sd_allnodes) {
+		struct sched_group *sg;
 
+		cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg);
 		init_numa_sched_groups_power(sg);
 	}
 #endif
-- 
cgit v1.2.3


From 054b9108e01ef27e2e6b32b4226abb6024626f06 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Sun, 10 Dec 2006 02:20:11 -0800
Subject: [PATCH] move_task_off_dead_cpu() should be called with disabled ints

move_task_off_dead_cpu() requires interrupts to be disabled, while
migrate_dead() calls it with enabled interrupts.  Added appropriate
comments to functions and added BUG_ON(!irqs_disabled()) into
double_rq_lock() and double_lock_balance() which are the origin sources of
such bugs.

Signed-off-by: Kirill Korotaev <dev@openvz.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index a08387b5f7fa..f04add905bdf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1952,6 +1952,7 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
 	__acquires(rq1->lock)
 	__acquires(rq2->lock)
 {
+	BUG_ON(!irqs_disabled());
 	if (rq1 == rq2) {
 		spin_lock(&rq1->lock);
 		__acquire(rq2->lock);	/* Fake it out ;) */
@@ -1991,6 +1992,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	__acquires(busiest->lock)
 	__acquires(this_rq->lock)
 {
+	if (unlikely(!irqs_disabled())) {
+		/* printk() doesn't work good under rq->lock */
+		spin_unlock(&this_rq->lock);
+		BUG_ON(1);
+	}
 	if (unlikely(!spin_trylock(&busiest->lock))) {
 		if (busiest < this_rq) {
 			spin_unlock(&this_rq->lock);
@@ -5067,7 +5073,10 @@ wait_to_die:
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-/* Figure out where task on dead CPU should go, use force if neccessary. */
+/*
+ * Figure out where task on dead CPU should go, use force if neccessary.
+ * NOTE: interrupts should be disabled by the caller
+ */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	unsigned long flags;
@@ -5187,6 +5196,7 @@ void idle_task_exit(void)
 	mmdrop(mm);
 }
 
+/* called under rq->lock with disabled interrupts */
 static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
@@ -5203,10 +5213,11 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 	 * Drop lock around migration; if someone else moves it,
 	 * that's OK.  No task can be added to this CPU, so iteration is
 	 * fine.
+	 * NOTE: interrupts should be left disabled  --dev@
 	 */
-	spin_unlock_irq(&rq->lock);
+	spin_unlock(&rq->lock);
 	move_task_off_dead_cpu(dead_cpu, p);
-	spin_lock_irq(&rq->lock);
+	spin_lock(&rq->lock);
 
 	put_task_struct(p);
 }
-- 
cgit v1.2.3


From 571f6d2fb0b1c04798df783db2ba85e96bcce43d Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:13 -0800
Subject: [PATCH] sched: avoid taking rq lock in wake_priority_sleeper

Avoid taking the request queue lock in wake_priority_sleeper if there are no
running processes.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index f04add905bdf..fdd26fffaa20 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2915,6 +2915,9 @@ static inline int wake_priority_sleeper(struct rq *rq)
 	int ret = 0;
 
 #ifdef CONFIG_SCHED_SMT
+	if (!rq->nr_running)
+		return 0;
+
 	spin_lock(&rq->lock);
 	/*
 	 * If an SMT sibling task has been put to sleep for priority
-- 
cgit v1.2.3


From 4211a9a2e94a34df8c02bc39b7ec10678ad5c2ab Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:19 -0800
Subject: [PATCH] sched: remove staggering of load balancing

Timer interrupts already are staggered.  We do not need an additional layer of
time staggering for short load balancing actions that take a reasonably small
portion of the time slice.

For load balancing on large sched_domains we will add a serialization later
that avoids concurrent load balance operations and thus has the same effect as
load staggering.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index fdd26fffaa20..b5b350135002 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2841,16 +2841,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
  * Balancing parameters are set up in arch_init_sched_domains.
  */
 
-/* Don't have all balancing operations going off at once: */
-static inline unsigned long cpu_offset(int cpu)
-{
-	return jiffies + cpu * HZ / NR_CPUS;
-}
-
 static void
 rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 {
-	unsigned long this_load, interval, j = cpu_offset(this_cpu);
+	unsigned long this_load, interval;
 	struct sched_domain *sd;
 	int i, scale;
 
@@ -2885,7 +2879,7 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 		if (unlikely(!interval))
 			interval = 1;
 
-		if (j - sd->last_balance >= interval) {
+		if (jiffies - sd->last_balance >= interval) {
 			if (load_balance(this_cpu, this_rq, sd, idle)) {
 				/*
 				 * We've pulled tasks over so either we're no
-- 
cgit v1.2.3


From fe2eea3fafb3df2f5b8a55a48bcbb0d23b3b5618 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:21 -0800
Subject: [PATCH] sched: disable interrupts for locking in load_balance()

Interrupts must be disabled for request queue locks if we want to run
load_balance() with interrupts enabled.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index b5b350135002..d327511d268e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2546,8 +2546,6 @@ static inline unsigned long minus_1_or_zero(unsigned long n)
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
- *
- * Called with this_rq unlocked.
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum idle_type idle)
@@ -2557,6 +2555,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	unsigned long imbalance;
 	struct rq *busiest;
 	cpumask_t cpus = CPU_MASK_ALL;
+	unsigned long flags;
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -2596,11 +2595,13 @@ redo:
 		 * still unbalanced. nr_moved simply stays zero, so it is
 		 * correctly treated as an imbalance.
 		 */
+		local_irq_save(flags);
 		double_rq_lock(this_rq, busiest);
 		nr_moved = move_tasks(this_rq, this_cpu, busiest,
 				      minus_1_or_zero(busiest->nr_running),
 				      imbalance, sd, idle, &all_pinned);
 		double_rq_unlock(this_rq, busiest);
+		local_irq_restore(flags);
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
 		if (unlikely(all_pinned)) {
@@ -2617,13 +2618,13 @@ redo:
 
 		if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
 
-			spin_lock(&busiest->lock);
+			spin_lock_irqsave(&busiest->lock, flags);
 
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
 			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
-				spin_unlock(&busiest->lock);
+				spin_unlock_irqrestore(&busiest->lock, flags);
 				all_pinned = 1;
 				goto out_one_pinned;
 			}
@@ -2633,7 +2634,7 @@ redo:
 				busiest->push_cpu = this_cpu;
 				active_balance = 1;
 			}
-			spin_unlock(&busiest->lock);
+			spin_unlock_irqrestore(&busiest->lock, flags);
 			if (active_balance)
 				wake_up_process(busiest->migration_thread);
 
-- 
cgit v1.2.3


From 7835b98bc6de2ca10afa45572d272304b000b048 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:22 -0800
Subject: [PATCH] sched: extract load calculation from rebalance_tick

A load calculation is always done in rebalance_tick() in addition to the real
load balancing activities that only take place when certain jiffie counts have
been reached.  Move that processing into a separate function and call it
directly from scheduler_tick().

Also extract the time slice handling from scheduler_tick and put it into a
separate function.  Then we can clean up scheduler_tick significantly.  It
will no longer have any gotos.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 96 +++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 54 insertions(+), 42 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index d327511d268e..3c1b74aa1b07 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2833,20 +2833,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 	spin_unlock(&target_rq->lock);
 }
 
-/*
- * rebalance_tick will get called every timer tick, on every CPU.
- *
- * It checks each scheduling domain to see if it is due to be balanced,
- * and initiates a balancing operation if so.
- *
- * Balancing parameters are set up in arch_init_sched_domains.
- */
-
-static void
-rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
+static void update_load(struct rq *this_rq)
 {
-	unsigned long this_load, interval;
-	struct sched_domain *sd;
+	unsigned long this_load;
 	int i, scale;
 
 	this_load = this_rq->raw_weighted_load;
@@ -2866,6 +2855,22 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 			new_load += scale-1;
 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
 	}
+}
+
+/*
+ * rebalance_tick will get called every timer tick, on every CPU.
+ *
+ * It checks each scheduling domain to see if it is due to be balanced,
+ * and initiates a balancing operation if so.
+ *
+ * Balancing parameters are set up in arch_init_sched_domains.
+ */
+
+static void
+rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
+{
+	unsigned long interval;
+	struct sched_domain *sd;
 
 	for_each_domain(this_cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -2897,12 +2902,15 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
+static inline void rebalance_tick(int cpu, struct rq *rq)
 {
 }
 static inline void idle_balance(int cpu, struct rq *rq)
 {
 }
+static inline void update_load(struct rq *this_rq)
+{
+}
 #endif
 
 static inline int wake_priority_sleeper(struct rq *rq)
@@ -3052,35 +3060,12 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
 }
 
-/*
- * This function gets called by the timer code, with HZ frequency.
- * We call it with interrupts disabled.
- *
- * It also gets called by the fork code, when changing the parent's
- * timeslices.
- */
-void scheduler_tick(void)
+static void task_running_tick(struct rq *rq, struct task_struct *p)
 {
-	unsigned long long now = sched_clock();
-	struct task_struct *p = current;
-	int cpu = smp_processor_id();
-	struct rq *rq = cpu_rq(cpu);
-
-	update_cpu_clock(p, rq, now);
-
-	rq->timestamp_last_tick = now;
-
-	if (p == rq->idle) {
-		if (wake_priority_sleeper(rq))
-			goto out;
-		rebalance_tick(cpu, rq, SCHED_IDLE);
-		return;
-	}
-
-	/* Task might have expired already, but not scheduled off yet */
 	if (p->array != rq->active) {
+		/* Task has expired but was not scheduled yet */
 		set_tsk_need_resched(p);
-		goto out;
+		return;
 	}
 	spin_lock(&rq->lock);
 	/*
@@ -3148,8 +3133,35 @@ void scheduler_tick(void)
 	}
 out_unlock:
 	spin_unlock(&rq->lock);
-out:
-	rebalance_tick(cpu, rq, NOT_IDLE);
+}
+
+/*
+ * This function gets called by the timer code, with HZ frequency.
+ * We call it with interrupts disabled.
+ *
+ * It also gets called by the fork code, when changing the parent's
+ * timeslices.
+ */
+void scheduler_tick(void)
+{
+	unsigned long long now = sched_clock();
+	struct task_struct *p = current;
+	int cpu = smp_processor_id();
+	struct rq *rq = cpu_rq(cpu);
+	enum idle_type idle = NOT_IDLE;
+
+	update_cpu_clock(p, rq, now);
+
+	rq->timestamp_last_tick = now;
+
+	if (p == rq->idle) {
+		/* Task on the idle queue */
+		if (!wake_priority_sleeper(rq))
+			idle = SCHED_IDLE;
+	} else
+		task_running_tick(rq, p);
+	update_load(rq);
+	rebalance_tick(cpu, rq, idle);
 }
 
 #ifdef CONFIG_SCHED_SMT
-- 
cgit v1.2.3


From e418e1c2bf1a253916b569370653414eb28597b6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:23 -0800
Subject: [PATCH] sched: move idle status calculation into rebalance_tick()

Perform the idle state determination in rebalance_tick.

If we separate balancing from sched_tick then we also need to determine the
idle state in rebalance_tick.

V2->V3
	Remove useless idlle != 0 check. Checking nr_running seems
	to be sufficient. Thanks Suresh.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 3c1b74aa1b07..14a8d9050cd4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2867,10 +2867,16 @@ static void update_load(struct rq *this_rq)
  */
 
 static void
-rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
+rebalance_tick(int this_cpu, struct rq *this_rq)
 {
 	unsigned long interval;
 	struct sched_domain *sd;
+	/*
+	 * We are idle if there are no processes running. This
+	 * is valid even if we are the idle process (SMT).
+	 */
+	enum idle_type idle = !this_rq->nr_running ?
+				SCHED_IDLE : NOT_IDLE;
 
 	for_each_domain(this_cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -2902,37 +2908,26 @@ rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, struct rq *rq)
-{
-}
 static inline void idle_balance(int cpu, struct rq *rq)
 {
 }
-static inline void update_load(struct rq *this_rq)
-{
-}
 #endif
 
-static inline int wake_priority_sleeper(struct rq *rq)
+static inline void wake_priority_sleeper(struct rq *rq)
 {
-	int ret = 0;
-
 #ifdef CONFIG_SCHED_SMT
 	if (!rq->nr_running)
-		return 0;
+		return;
 
 	spin_lock(&rq->lock);
 	/*
 	 * If an SMT sibling task has been put to sleep for priority
 	 * reasons reschedule the idle task to see if it can now run.
 	 */
-	if (rq->nr_running) {
+	if (rq->nr_running)
 		resched_task(rq->idle);
-		ret = 1;
-	}
 	spin_unlock(&rq->lock);
 #endif
-	return ret;
 }
 
 DEFINE_PER_CPU(struct kernel_stat, kstat);
@@ -3148,20 +3143,20 @@ void scheduler_tick(void)
 	struct task_struct *p = current;
 	int cpu = smp_processor_id();
 	struct rq *rq = cpu_rq(cpu);
-	enum idle_type idle = NOT_IDLE;
 
 	update_cpu_clock(p, rq, now);
 
 	rq->timestamp_last_tick = now;
 
-	if (p == rq->idle) {
+	if (p == rq->idle)
 		/* Task on the idle queue */
-		if (!wake_priority_sleeper(rq))
-			idle = SCHED_IDLE;
-	} else
+		wake_priority_sleeper(rq);
+	else
 		task_running_tick(rq, p);
+#ifdef CONFIG_SMP
 	update_load(rq);
-	rebalance_tick(cpu, rq, idle);
+	rebalance_tick(cpu, rq);
+#endif
 }
 
 #ifdef CONFIG_SCHED_SMT
-- 
cgit v1.2.3


From c9819f4593e8d052b41a89f47140f5c5e7e30582 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:25 -0800
Subject: [PATCH] sched: use softirq for load balancing

Call rebalance_tick (renamed to run_rebalance_domains) from a newly introduced
softirq.

We calculate the earliest time for each layer of sched domains to be rescanned
(this is the rescan time for idle) and use the earliest of those to schedule
the softirq via a new field "next_balance" added to struct rq.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/interrupt.h |  3 ++-
 kernel/sched.c            | 22 +++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index de7593f4e895..e36e86c869fb 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -231,7 +231,8 @@ enum
 	NET_TX_SOFTIRQ,
 	NET_RX_SOFTIRQ,
 	BLOCK_SOFTIRQ,
-	TASKLET_SOFTIRQ
+	TASKLET_SOFTIRQ,
+	SCHED_SOFTIRQ,
 };
 
 /* softirq mask and active fields moved to irq_cpustat_t in
diff --git a/kernel/sched.c b/kernel/sched.c
index 14a8d9050cd4..0a3e748d737d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -227,6 +227,7 @@ struct rq {
 	unsigned long expired_timestamp;
 	unsigned long long timestamp_last_tick;
 	struct task_struct *curr, *idle;
+	unsigned long next_balance;
 	struct mm_struct *prev_mm;
 	struct prio_array *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -2858,7 +2859,7 @@ static void update_load(struct rq *this_rq)
 }
 
 /*
- * rebalance_tick will get called every timer tick, on every CPU.
+ * run_rebalance_domains is triggered when needed from the scheduler tick.
  *
  * It checks each scheduling domain to see if it is due to be balanced,
  * and initiates a balancing operation if so.
@@ -2866,9 +2867,10 @@ static void update_load(struct rq *this_rq)
  * Balancing parameters are set up in arch_init_sched_domains.
  */
 
-static void
-rebalance_tick(int this_cpu, struct rq *this_rq)
+static void run_rebalance_domains(struct softirq_action *h)
 {
+	int this_cpu = smp_processor_id();
+	struct rq *this_rq = cpu_rq(this_cpu);
 	unsigned long interval;
 	struct sched_domain *sd;
 	/*
@@ -2877,6 +2879,8 @@ rebalance_tick(int this_cpu, struct rq *this_rq)
 	 */
 	enum idle_type idle = !this_rq->nr_running ?
 				SCHED_IDLE : NOT_IDLE;
+	/* Earliest time when we have to call run_rebalance_domains again */
+	unsigned long next_balance = jiffies + 60*HZ;
 
 	for_each_domain(this_cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -2891,7 +2895,7 @@ rebalance_tick(int this_cpu, struct rq *this_rq)
 		if (unlikely(!interval))
 			interval = 1;
 
-		if (jiffies - sd->last_balance >= interval) {
+		if (time_after_eq(jiffies, sd->last_balance + interval)) {
 			if (load_balance(this_cpu, this_rq, sd, idle)) {
 				/*
 				 * We've pulled tasks over so either we're no
@@ -2902,7 +2906,10 @@ rebalance_tick(int this_cpu, struct rq *this_rq)
 			}
 			sd->last_balance += interval;
 		}
+		if (time_after(next_balance, sd->last_balance + interval))
+			next_balance = sd->last_balance + interval;
 	}
+	this_rq->next_balance = next_balance;
 }
 #else
 /*
@@ -3155,7 +3162,8 @@ void scheduler_tick(void)
 		task_running_tick(rq, p);
 #ifdef CONFIG_SMP
 	update_load(rq);
-	rebalance_tick(cpu, rq);
+	if (time_after_eq(jiffies, rq->next_balance))
+		raise_softirq(SCHED_SOFTIRQ);
 #endif
 }
 
@@ -6859,6 +6867,10 @@ void __init sched_init(void)
 
 	set_load_weight(&init_task);
 
+#ifdef CONFIG_SMP
+	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
+#endif
+
 #ifdef CONFIG_RT_MUTEXES
 	plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
 #endif
-- 
cgit v1.2.3


From 1bd77f2da58e9cdd1f159217887343dadd9af417 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:27 -0800
Subject: [PATCH] sched: call tasklet less frequently

Trigger softirq less frequently

We trigger the softirq before this patch using offset of sd->interval.
However, if the queue is busy then it is sufficient to schedule the softirq
with sd->interval * busy_factor.

So we modify the calculation of the next time to balance by taking
the interval added to last_balance again. This is only the
right value if the idle/busy situation continues as is.

There are two potential trouble spots:
- If the queue was idle and now gets busy then we call rebalance
  early. However, that is not a problem because we will then use
  the longer interval for the next period.

- If the queue was busy and becomes idle then we potentially
  wait too long before rebalancing. However, when the task
  goes idle then idle_balance is called. We add another calculation
  of the next balance time based on sd->interval in idle_balance
  so that we will rebalance soon.

V2->V3:
- Calculate rebalance time based on current jiffies and not
  based on the jiffies at the last time we load balanced.
  We no longer rely on staggering and therefore we can
  affort to do this now.

V3->V4:
- Use functions to do jiffy comparisons.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 0a3e748d737d..0a4a26b21f69 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2774,14 +2774,28 @@ out_balanced:
 static void idle_balance(int this_cpu, struct rq *this_rq)
 {
 	struct sched_domain *sd;
+	int pulled_task = 0;
+	unsigned long next_balance = jiffies + 60 *  HZ;
 
 	for_each_domain(this_cpu, sd) {
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
 			/* If we've pulled tasks over stop searching: */
-			if (load_balance_newidle(this_cpu, this_rq, sd))
+			pulled_task = load_balance_newidle(this_cpu,
+							this_rq, sd);
+			if (time_after(next_balance,
+				  sd->last_balance + sd->balance_interval))
+				next_balance = sd->last_balance
+						+ sd->balance_interval;
+			if (pulled_task)
 				break;
 		}
 	}
+	if (!pulled_task)
+		/*
+		 * We are going idle. next_balance may be set based on
+		 * a busy processor. So reset next_balance.
+		 */
+		this_rq->next_balance = next_balance;
 }
 
 /*
@@ -2904,7 +2918,7 @@ static void run_rebalance_domains(struct softirq_action *h)
 				 */
 				idle = NOT_IDLE;
 			}
-			sd->last_balance += interval;
+			sd->last_balance = jiffies;
 		}
 		if (time_after(next_balance, sd->last_balance + interval))
 			next_balance = sd->last_balance + interval;
-- 
cgit v1.2.3


From 08c183f31bdbb709f177f6d3110d5f288ea33933 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 10 Dec 2006 02:20:29 -0800
Subject: [PATCH] sched: add option to serialize load balancing

Large sched domains can be very expensive to scan.  Add an option SD_SERIALIZE
to the sched domain flags.  If that flag is set then we make sure that no
other such domain is being balanced.

[akpm@osdl.org: build fix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/topology.h    | 1 +
 include/asm-ia64/topology.h    | 1 +
 include/asm-powerpc/topology.h | 1 +
 include/asm-x86_64/topology.h  | 1 +
 include/linux/sched.h          | 1 +
 include/linux/topology.h       | 3 ++-
 kernel/sched.c                 | 9 +++++++++
 7 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index 978d09596130..ac58580ad664 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -89,6 +89,7 @@ static inline int node_to_first_cpu(int node)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index a6e38565ab4c..22ed6749557e 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -101,6 +101,7 @@ void build_cpu_to_node_map(void);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h
index 50c014007de7..6610495f5f16 100644
--- a/include/asm-powerpc/topology.h
+++ b/include/asm-powerpc/topology.h
@@ -66,6 +66,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_WAKE_IDLE		\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 5c8f49280dbc..2facec5914d2 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -47,6 +47,7 @@ extern int __node_distance(int, int);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
+				| SD_SERIALIZE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1208feab46e0..ea92e5c89089 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -648,6 +648,7 @@ enum idle_type
 #define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
 #define SD_POWERSAVINGS_BALANCE	256	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	512	/* Domain members share cpu pkg resources */
+#define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 
 #define BALANCE_FOR_MC_POWER	\
 	(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
diff --git a/include/linux/topology.h b/include/linux/topology.h
index b93bb6cc6cc2..6c5a6e6e813b 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -194,7 +194,8 @@
 	.wake_idx		= 0, /* unused */	\
 	.forkexec_idx		= 0, /* unused */	\
 	.per_cpu_gain		= 100,			\
-	.flags			= SD_LOAD_BALANCE,	\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_SERIALIZE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
 	.nr_balance_failed	= 0,			\
diff --git a/kernel/sched.c b/kernel/sched.c
index 0a4a26b21f69..2b2b780939c9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2880,6 +2880,7 @@ static void update_load(struct rq *this_rq)
  *
  * Balancing parameters are set up in arch_init_sched_domains.
  */
+static DEFINE_SPINLOCK(balancing);
 
 static void run_rebalance_domains(struct softirq_action *h)
 {
@@ -2909,6 +2910,11 @@ static void run_rebalance_domains(struct softirq_action *h)
 		if (unlikely(!interval))
 			interval = 1;
 
+		if (sd->flags & SD_SERIALIZE) {
+			if (!spin_trylock(&balancing))
+				goto out;
+		}
+
 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
 			if (load_balance(this_cpu, this_rq, sd, idle)) {
 				/*
@@ -2920,6 +2926,9 @@ static void run_rebalance_domains(struct softirq_action *h)
 			}
 			sd->last_balance = jiffies;
 		}
+		if (sd->flags & SD_SERIALIZE)
+			spin_unlock(&balancing);
+out:
 		if (time_after(next_balance, sd->last_balance + interval))
 			next_balance = sd->last_balance + interval;
 	}
-- 
cgit v1.2.3


From b18ec80396834497933d77b81ec0918519f4e2a7 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Sun, 10 Dec 2006 02:20:31 -0800
Subject: [PATCH] sched: improve migration accuracy

Co-opt rq->timestamp_last_tick to maintain a cache_hot_time evaluation
reference timestamp at both tick and sched times to prevent said reference,
formerly rq->timestamp_last_tick, from being behind task->last_ran at
evaluation time, and to move said reference closer to current time on the
remote processor, intent being to improve cache hot evaluation and
timestamp adjustment accuracy for task migration.

Fix minor sched_time double accounting error which occurs when a task
passing through schedule() does not schedule off, and takes the next timer
tick.

[kenneth.w.chen@intel.com: cleanup]
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Don Mullis <dwm@meer.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 2b2b780939c9..15ce772a471a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -225,7 +225,8 @@ struct rq {
 	unsigned long nr_uninterruptible;
 
 	unsigned long expired_timestamp;
-	unsigned long long timestamp_last_tick;
+	/* Cached timestamp set by update_cpu_clock() */
+	unsigned long long most_recent_timestamp;
 	struct task_struct *curr, *idle;
 	unsigned long next_balance;
 	struct mm_struct *prev_mm;
@@ -944,8 +945,8 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 	if (!local) {
 		/* Compensate for drifting sched_clock */
 		struct rq *this_rq = this_rq();
-		now = (now - this_rq->timestamp_last_tick)
-			+ rq->timestamp_last_tick;
+		now = (now - this_rq->most_recent_timestamp)
+			+ rq->most_recent_timestamp;
 	}
 #endif
 
@@ -1689,8 +1690,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		 * Not the local CPU - must adjust timestamp. This should
 		 * get optimised away in the !CONFIG_SMP case.
 		 */
-		p->timestamp = (p->timestamp - this_rq->timestamp_last_tick)
-					+ rq->timestamp_last_tick;
+		p->timestamp = (p->timestamp - this_rq->most_recent_timestamp)
+					+ rq->most_recent_timestamp;
 		__activate_task(p, rq);
 		if (TASK_PREEMPTS_CURR(p, rq))
 			resched_task(rq->curr);
@@ -2068,8 +2069,8 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array,
 	set_task_cpu(p, this_cpu);
 	inc_nr_running(p, this_rq);
 	enqueue_task(p, this_array);
-	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
-				+ this_rq->timestamp_last_tick;
+	p->timestamp = (p->timestamp - src_rq->most_recent_timestamp)
+				+ this_rq->most_recent_timestamp;
 	/*
 	 * Note that idle threads have a prio of MAX_PRIO, for this test
 	 * to be always true for them.
@@ -2105,10 +2106,15 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 	 * 2) too many balance attempts have failed.
 	 */
 
-	if (sd->nr_balance_failed > sd->cache_nice_tries)
+	if (sd->nr_balance_failed > sd->cache_nice_tries) {
+#ifdef CONFIG_SCHEDSTATS
+		if (task_hot(p, rq->most_recent_timestamp, sd))
+			schedstat_inc(sd, lb_hot_gained[idle]);
+#endif
 		return 1;
+	}
 
-	if (task_hot(p, rq->timestamp_last_tick, sd))
+	if (task_hot(p, rq->most_recent_timestamp, sd))
 		return 0;
 	return 1;
 }
@@ -2206,11 +2212,6 @@ skip_queue:
 		goto skip_bitmap;
 	}
 
-#ifdef CONFIG_SCHEDSTATS
-	if (task_hot(tmp, busiest->timestamp_last_tick, sd))
-		schedstat_inc(sd, lb_hot_gained[idle]);
-#endif
-
 	pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
 	pulled++;
 	rem_load_move -= tmp->load_weight;
@@ -2971,7 +2972,8 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 static inline void
 update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
 {
-	p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
+	p->sched_time += now - p->last_ran;
+	p->last_ran = rq->most_recent_timestamp = now;
 }
 
 /*
@@ -2984,8 +2986,7 @@ unsigned long long current_sched_time(const struct task_struct *p)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
-	ns = p->sched_time + sched_clock() - ns;
+	ns = p->sched_time + sched_clock() - p->last_ran;
 	local_irq_restore(flags);
 
 	return ns;
@@ -3176,8 +3177,6 @@ void scheduler_tick(void)
 
 	update_cpu_clock(p, rq, now);
 
-	rq->timestamp_last_tick = now;
-
 	if (p == rq->idle)
 		/* Task on the idle queue */
 		wake_priority_sleeper(rq);
@@ -5032,8 +5031,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 		 * afterwards, and pretending it was a local activate.
 		 * This way is cleaner and logically correct.
 		 */
-		p->timestamp = p->timestamp - rq_src->timestamp_last_tick
-				+ rq_dest->timestamp_last_tick;
+		p->timestamp = p->timestamp - rq_src->most_recent_timestamp
+				+ rq_dest->most_recent_timestamp;
 		deactivate_task(p, rq_src);
 		__activate_task(p, rq_dest);
 		if (TASK_PREEMPTS_CURR(p, rq_dest))
-- 
cgit v1.2.3


From 783609c6cb4eaa23f2ac5c968a44483584ec133f Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Sun, 10 Dec 2006 02:20:33 -0800
Subject: [PATCH] sched: decrease number of load balances

Currently at a particular domain, each cpu in the sched group will do a
load balance at the frequency of balance_interval.  More the cores and
threads, more the cpus will be in each sched group at SMP and NUMA domain.
And we endup spending quite a bit of time doing load balancing in those
domains.

Fix this by making only one cpu(first idle cpu or first cpu in the group if
all the cpus are busy) in the sched group do the load balance at that
particular sched domain and this load will slowly percolate down to the
other cpus with in that group(when they do load balancing at lower
domains).

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 59 ++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 48 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea92e5c89089..72d6927d29ed 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -707,6 +707,7 @@ struct sched_domain {
 	unsigned long lb_hot_gained[MAX_IDLE_TYPES];
 	unsigned long lb_nobusyg[MAX_IDLE_TYPES];
 	unsigned long lb_nobusyq[MAX_IDLE_TYPES];
+	unsigned long lb_stopbalance[MAX_IDLE_TYPES];
 
 	/* Active load balancing */
 	unsigned long alb_cnt;
diff --git a/kernel/sched.c b/kernel/sched.c
index 15ce772a471a..4e453431c61a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -428,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
-#define SCHEDSTAT_VERSION 12
+#define SCHEDSTAT_VERSION 13
 
 static int show_schedstat(struct seq_file *seq, void *v)
 {
@@ -466,7 +466,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 			seq_printf(seq, "domain%d %s", dcnt++, mask_str);
 			for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
 					itype++) {
-				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu",
+				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu",
 				    sd->lb_cnt[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
@@ -474,7 +474,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
 				    sd->lb_gained[itype],
 				    sd->lb_hot_gained[itype],
 				    sd->lb_nobusyq[itype],
-				    sd->lb_nobusyg[itype]);
+				    sd->lb_nobusyg[itype],
+				    sd->lb_stopbalance[itype]);
 			}
 			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
 			    sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
@@ -2249,7 +2250,7 @@ out:
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
 		   unsigned long *imbalance, enum idle_type idle, int *sd_idle,
-		   cpumask_t *cpus)
+		   cpumask_t *cpus, int *balance)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2278,10 +2279,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		unsigned long load, group_capacity;
 		int local_group;
 		int i;
+		unsigned int balance_cpu = -1, first_idle_cpu = 0;
 		unsigned long sum_nr_running, sum_weighted_load;
 
 		local_group = cpu_isset(this_cpu, group->cpumask);
 
+		if (local_group)
+			balance_cpu = first_cpu(group->cpumask);
+
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
 
@@ -2297,9 +2302,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 				*sd_idle = 0;
 
 			/* Bias balancing toward cpus of our domain */
-			if (local_group)
+			if (local_group) {
+				if (idle_cpu(i) && !first_idle_cpu) {
+					first_idle_cpu = 1;
+					balance_cpu = i;
+				}
+
 				load = target_load(i, load_idx);
-			else
+			} else
 				load = source_load(i, load_idx);
 
 			avg_load += load;
@@ -2307,6 +2317,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 			sum_weighted_load += rq->raw_weighted_load;
 		}
 
+		/*
+		 * First idle cpu or the first cpu(busiest) in this sched group
+		 * is eligible for doing load balancing at this and above
+		 * domains.
+		 */
+		if (local_group && balance_cpu != this_cpu && balance) {
+			*balance = 0;
+			goto ret;
+		}
+
 		total_load += avg_load;
 		total_pwr += group->cpu_power;
 
@@ -2498,8 +2518,8 @@ out_balanced:
 		*imbalance = min_load_per_task;
 		return group_min;
 	}
-ret:
 #endif
+ret:
 	*imbalance = 0;
 	return NULL;
 }
@@ -2550,7 +2570,8 @@ static inline unsigned long minus_1_or_zero(unsigned long n)
  * tasks if there is an imbalance.
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
-			struct sched_domain *sd, enum idle_type idle)
+			struct sched_domain *sd, enum idle_type idle,
+			int *balance)
 {
 	int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
@@ -2573,7 +2594,13 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 redo:
 	group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
-							&cpus);
+				   &cpus, balance);
+
+	if (*balance == 0) {
+		schedstat_inc(sd, lb_stopbalance[idle]);
+		goto out_balanced;
+	}
+
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[idle]);
 		goto out_balanced;
@@ -2715,7 +2742,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 	schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
 redo:
 	group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
-				&sd_idle, &cpus);
+				   &sd_idle, &cpus, NULL);
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
 		goto out_balanced;
@@ -2885,7 +2912,7 @@ static DEFINE_SPINLOCK(balancing);
 
 static void run_rebalance_domains(struct softirq_action *h)
 {
-	int this_cpu = smp_processor_id();
+	int this_cpu = smp_processor_id(), balance = 1;
 	struct rq *this_rq = cpu_rq(this_cpu);
 	unsigned long interval;
 	struct sched_domain *sd;
@@ -2917,7 +2944,7 @@ static void run_rebalance_domains(struct softirq_action *h)
 		}
 
 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
-			if (load_balance(this_cpu, this_rq, sd, idle)) {
+			if (load_balance(this_cpu, this_rq, sd, idle, &balance)) {
 				/*
 				 * We've pulled tasks over so either we're no
 				 * longer idle, or one of our SMT siblings is
@@ -2932,6 +2959,14 @@ static void run_rebalance_domains(struct softirq_action *h)
 out:
 		if (time_after(next_balance, sd->last_balance + interval))
 			next_balance = sd->last_balance + interval;
+
+		/*
+		 * Stop the load balance at this level. There is another
+		 * CPU in our sched group which is doing load balancing more
+		 * actively.
+		 */
+		if (!balance)
+			break;
 	}
 	this_rq->next_balance = next_balance;
 }
-- 
cgit v1.2.3


From 06066714f6016cffcb249f6ab21b7919de1bc859 Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Sun, 10 Dec 2006 02:20:35 -0800
Subject: [PATCH] sched: remove lb_stopbalance counter

Remove scheduler stats lb_stopbalance counter.  This counter can be
calculated by: lb_balanced - lb_nobusyg - lb_nobusyq.  There is no need to
create gazillion counters while we can derive the value.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  1 -
 kernel/sched.c        | 11 ++++-------
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 72d6927d29ed..ea92e5c89089 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -707,7 +707,6 @@ struct sched_domain {
 	unsigned long lb_hot_gained[MAX_IDLE_TYPES];
 	unsigned long lb_nobusyg[MAX_IDLE_TYPES];
 	unsigned long lb_nobusyq[MAX_IDLE_TYPES];
-	unsigned long lb_stopbalance[MAX_IDLE_TYPES];
 
 	/* Active load balancing */
 	unsigned long alb_cnt;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4e453431c61a..66e44b5b53d2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -428,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
-#define SCHEDSTAT_VERSION 13
+#define SCHEDSTAT_VERSION 14
 
 static int show_schedstat(struct seq_file *seq, void *v)
 {
@@ -466,7 +466,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 			seq_printf(seq, "domain%d %s", dcnt++, mask_str);
 			for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
 					itype++) {
-				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu",
 				    sd->lb_cnt[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
@@ -474,8 +474,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 				    sd->lb_gained[itype],
 				    sd->lb_hot_gained[itype],
 				    sd->lb_nobusyq[itype],
-				    sd->lb_nobusyg[itype],
-				    sd->lb_stopbalance[itype]);
+				    sd->lb_nobusyg[itype]);
 			}
 			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
 			    sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
@@ -2596,10 +2595,8 @@ redo:
 	group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
 				   &cpus, balance);
 
-	if (*balance == 0) {
-		schedstat_inc(sd, lb_stopbalance[idle]);
+	if (*balance == 0)
 		goto out_balanced;
-	}
 
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[idle]);
-- 
cgit v1.2.3


From 62ab616d54371a65f595c199aad1e1755b837d25 Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Sun, 10 Dec 2006 02:20:36 -0800
Subject: [PATCH] sched: optimize activate_task for RT task

RT task does not participate in interactiveness priority and thus shouldn't
be bothered with timestamp and p->sleep_type manipulation when task is
being put on run queue.  Bypass all of the them with a single if (rt_task)
test.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 66e44b5b53d2..48e35c916326 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -940,6 +940,9 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 {
 	unsigned long long now;
 
+	if (rt_task(p))
+		goto out;
+
 	now = sched_clock();
 #ifdef CONFIG_SMP
 	if (!local) {
@@ -961,8 +964,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 				     (now - p->timestamp) >> 20);
 	}
 
-	if (!rt_task(p))
-		p->prio = recalc_task_prio(p, now);
+	p->prio = recalc_task_prio(p, now);
 
 	/*
 	 * This checks to make sure it's not an uninterruptible task
@@ -987,7 +989,7 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
 		}
 	}
 	p->timestamp = now;
-
+out:
 	__activate_task(p, rq);
 }
 
-- 
cgit v1.2.3


From 33859f7f9788da2ac9aa23be4dc8e948112809ca Mon Sep 17 00:00:00 2001
From: Miguel Ojeda Sandonis <maxextreme@gmail.com>
Date: Sun, 10 Dec 2006 02:20:38 -0800
Subject: [PATCH] kernel/sched.c: whitespace cleanups

[akpm@osdl.org: additional cleanups]
Signed-off-by: Miguel Ojeda Sandonis <maxextreme@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 95 +++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 55 insertions(+), 40 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 48e35c916326..8a0afb97af71 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -466,7 +466,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
 			seq_printf(seq, "domain%d %s", dcnt++, mask_str);
 			for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
 					itype++) {
-				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu",
+				seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
+						"%lu",
 				    sd->lb_cnt[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
@@ -476,11 +477,13 @@ static int show_schedstat(struct seq_file *seq, void *v)
 				    sd->lb_nobusyq[itype],
 				    sd->lb_nobusyg[itype]);
 			}
-			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+			seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
+			    " %lu %lu %lu\n",
 			    sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
 			    sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
 			    sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
-			    sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance);
+			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
+			    sd->ttwu_move_balance);
 		}
 		preempt_enable();
 #endif
@@ -1454,7 +1457,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 
 		if (this_sd->flags & SD_WAKE_AFFINE) {
 			unsigned long tl = this_load;
-			unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu);
+			unsigned long tl_per_task;
+
+			tl_per_task = cpu_avg_load_per_task(this_cpu);
 
 			/*
 			 * If sync wakeup then subtract the (maximum possible)
@@ -2487,18 +2492,21 @@ small_imbalance:
 		pwr_now /= SCHED_LOAD_SCALE;
 
 		/* Amount of load we'd subtract */
-		tmp = busiest_load_per_task*SCHED_LOAD_SCALE/busiest->cpu_power;
+		tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
+			busiest->cpu_power;
 		if (max_load > tmp)
 			pwr_move += busiest->cpu_power *
 				min(busiest_load_per_task, max_load - tmp);
 
 		/* Amount of load we'd add */
-		if (max_load*busiest->cpu_power <
-				busiest_load_per_task*SCHED_LOAD_SCALE)
-			tmp = max_load*busiest->cpu_power/this->cpu_power;
+		if (max_load * busiest->cpu_power <
+				busiest_load_per_task * SCHED_LOAD_SCALE)
+			tmp = max_load * busiest->cpu_power / this->cpu_power;
 		else
-			tmp = busiest_load_per_task*SCHED_LOAD_SCALE/this->cpu_power;
-		pwr_move += this->cpu_power*min(this_load_per_task, this_load + tmp);
+			tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
+				this->cpu_power;
+		pwr_move += this->cpu_power *
+			min(this_load_per_task, this_load + tmp);
 		pwr_move /= SCHED_LOAD_SCALE;
 
 		/* Move if we gain throughput */
@@ -3366,7 +3374,8 @@ void fastcall add_preempt_count(int val)
 	/*
 	 * Spinlock count overflowing soon?
 	 */
-	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
+				PREEMPT_MASK - 10);
 }
 EXPORT_SYMBOL(add_preempt_count);
 
@@ -5439,16 +5448,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		if (!(sd->flags & SD_LOAD_BALANCE)) {
 			printk("does not load-balance\n");
 			if (sd->parent)
-				printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
+				printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
+						" has parent");
 			break;
 		}
 
 		printk("span %s\n", str);
 
 		if (!cpu_isset(cpu, sd->span))
-			printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
+			printk(KERN_ERR "ERROR: domain->span does not contain "
+					"CPU%d\n", cpu);
 		if (!cpu_isset(cpu, group->cpumask))
-			printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
+			printk(KERN_ERR "ERROR: domain->groups does not contain"
+					" CPU%d\n", cpu);
 
 		printk(KERN_DEBUG);
 		for (i = 0; i < level + 2; i++)
@@ -5463,7 +5475,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 			if (!group->cpu_power) {
 				printk("\n");
-				printk(KERN_ERR "ERROR: domain->cpu_power not set\n");
+				printk(KERN_ERR "ERROR: domain->cpu_power not "
+						"set\n");
 			}
 
 			if (!cpus_weight(group->cpumask)) {
@@ -5486,15 +5499,17 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		printk("\n");
 
 		if (!cpus_equal(sd->span, groupmask))
-			printk(KERN_ERR "ERROR: groups don't span domain->span\n");
+			printk(KERN_ERR "ERROR: groups don't span "
+					"domain->span\n");
 
 		level++;
 		sd = sd->parent;
+		if (!sd)
+			continue;
 
-		if (sd) {
-			if (!cpus_subset(groupmask, sd->span))
-				printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
-		}
+		if (!cpus_subset(groupmask, sd->span))
+			printk(KERN_ERR "ERROR: parent span is not a superset "
+				"of domain->span\n");
 
 	} while (sd);
 }
@@ -5812,8 +5827,9 @@ __setup("max_cache_size=", setup_max_cache_size);
  */
 static void touch_cache(void *__cache, unsigned long __size)
 {
-	unsigned long size = __size/sizeof(long), chunk1 = size/3,
-			chunk2 = 2*size/3;
+	unsigned long size = __size / sizeof(long);
+	unsigned long chunk1 = size / 3;
+	unsigned long chunk2 = 2 * size / 3;
 	unsigned long *cache = __cache;
 	int i;
 
@@ -5922,11 +5938,11 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size)
 	 */
 	measure_one(cache, size, cpu1, cpu2);
 	for (i = 0; i < ITERATIONS; i++)
-		cost1 += measure_one(cache, size - i*1024, cpu1, cpu2);
+		cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2);
 
 	measure_one(cache, size, cpu2, cpu1);
 	for (i = 0; i < ITERATIONS; i++)
-		cost1 += measure_one(cache, size - i*1024, cpu2, cpu1);
+		cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1);
 
 	/*
 	 * (We measure the non-migrating [cached] cost on both
@@ -5936,17 +5952,17 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size)
 
 	measure_one(cache, size, cpu1, cpu1);
 	for (i = 0; i < ITERATIONS; i++)
-		cost2 += measure_one(cache, size - i*1024, cpu1, cpu1);
+		cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1);
 
 	measure_one(cache, size, cpu2, cpu2);
 	for (i = 0; i < ITERATIONS; i++)
-		cost2 += measure_one(cache, size - i*1024, cpu2, cpu2);
+		cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2);
 
 	/*
 	 * Get the per-iteration migration cost:
 	 */
-	do_div(cost1, 2*ITERATIONS);
-	do_div(cost2, 2*ITERATIONS);
+	do_div(cost1, 2 * ITERATIONS);
+	do_div(cost2, 2 * ITERATIONS);
 
 	return cost1 - cost2;
 }
@@ -5984,7 +6000,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
 	 */
 	cache = vmalloc(max_size);
 	if (!cache) {
-		printk("could not vmalloc %d bytes for cache!\n", 2*max_size);
+		printk("could not vmalloc %d bytes for cache!\n", 2 * max_size);
 		return 1000000; /* return 1 msec on very small boxen */
 	}
 
@@ -6009,7 +6025,8 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
 		avg_fluct = (avg_fluct + fluct)/2;
 
 		if (migration_debug)
-			printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): (%8Ld %8Ld)\n",
+			printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): "
+				"(%8Ld %8Ld)\n",
 				cpu1, cpu2, size,
 				(long)cost / 1000000,
 				((long)cost / 100000) % 10,
@@ -6104,20 +6121,18 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
 			-1
 #endif
 		);
-	if (system_state == SYSTEM_BOOTING) {
-		if (num_online_cpus() > 1) {
-			printk("migration_cost=");
-			for (distance = 0; distance <= max_distance; distance++) {
-				if (distance)
-					printk(",");
-				printk("%ld", (long)migration_cost[distance] / 1000);
-			}
-			printk("\n");
+	if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) {
+		printk("migration_cost=");
+		for (distance = 0; distance <= max_distance; distance++) {
+			if (distance)
+				printk(",");
+			printk("%ld", (long)migration_cost[distance] / 1000);
 		}
+		printk("\n");
 	}
 	j1 = jiffies;
 	if (migration_debug)
-		printk("migration: %ld seconds\n", (j1-j0)/HZ);
+		printk("migration: %ld seconds\n", (j1-j0) / HZ);
 
 	/*
 	 * Move back to the original CPU. NUMA-Q gets confused
-- 
cgit v1.2.3


From f3d19c90fb117a5f080310a4592929aa8e1ad8e9 Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Sun, 10 Dec 2006 02:21:09 -0800
Subject: [PATCH] fdtable: Delete pointless code in dup_fd()

The dup_fd() function creates a new files_struct and fdtable embedded inside
that files_struct, and then possibly expands the fdtable using expand_files().

The out_release error path is invoked when expand_files() returns an error
code.  However, when this attempt to expand fails, the fdtable is left in its
original embedded form, so it is pointless to try to free the associated
fdarray and fdsets.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 086e172d0d3d..30eab4f063cd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -711,8 +711,10 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	old_fds = old_fdt->fd;
 	new_fds = new_fdt->fd;
 
-	memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8);
-	memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8);
+	memcpy(new_fdt->open_fds->fds_bits,
+		old_fdt->open_fds->fds_bits, open_files/8);
+	memcpy(new_fdt->close_on_exec->fds_bits,
+		old_fdt->close_on_exec->fds_bits, open_files/8);
 
 	for (i = open_files; i != 0; i--) {
 		struct file *f = *old_fds++;
@@ -745,14 +747,11 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
 	}
 
-out:
 	return newf;
 
 out_release:
-	free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
-	free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
-	free_fd_array(new_fdt->fd, new_fdt->max_fds);
 	kmem_cache_free(files_cachep, newf);
+out:
 	return NULL;
 }
 
-- 
cgit v1.2.3


From bbea9f69668a3d0cf9feba15a724cd02896f8675 Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Sun, 10 Dec 2006 02:21:12 -0800
Subject: [PATCH] fdtable: Make fdarray and fdsets equal in size

Currently, each fdtable supports three dynamically-sized arrays of data: the
fdarray and two fdsets.  The code allows the number of fds supported by the
fdarray (fdtable->max_fds) to differ from the number of fds supported by each
of the fdsets (fdtable->max_fdset).

In practice, it is wasteful for these two sizes to differ: whenever we hit a
limit on the smaller-capacity structure, we will reallocate the entire fdtable
and all the dynamic arrays within it, so any delta in the memory used by the
larger-capacity structure will never be touched at all.

Rather than hogging this excess, we shouldn't even allocate it in the first
place, and keep the capacities of the fdarray and the fdsets equal.  This
patch removes fdtable->max_fdset.  As an added bonus, most of the supporting
code becomes simpler.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/alpha/kernel/osf_sys.c |  6 ++---
 arch/mips/kernel/kspd.c     |  2 +-
 fs/compat.c                 | 10 ++++----
 fs/exec.c                   |  2 +-
 fs/fcntl.c                  |  5 ++--
 fs/file.c                   | 56 ++++++++++++++++++---------------------------
 fs/open.c                   |  3 +--
 fs/select.c                 | 10 ++++----
 include/linux/file.h        |  6 -----
 include/linux/init_task.h   |  1 -
 kernel/exit.c               |  2 +-
 kernel/fork.c               | 24 +++++--------------
 security/selinux/hooks.c    |  2 +-
 13 files changed, 48 insertions(+), 81 deletions(-)

(limited to 'kernel')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fb804043b320..be133f1f75a4 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -979,7 +979,7 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
 	long timeout;
 	int ret = -EINVAL;
 	struct fdtable *fdt;
-	int max_fdset;
+	int max_fds;
 
 	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (tvp) {
@@ -1003,9 +1003,9 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
 
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n < 0 || n > max_fdset)
+	if (n < 0 || n > max_fds)
 		goto out_nofds;
 
 	/*
diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c
index 2c82412b9efe..5929f883e46b 100644
--- a/arch/mips/kernel/kspd.c
+++ b/arch/mips/kernel/kspd.c
@@ -301,7 +301,7 @@ static void sp_cleanup(void)
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
-		if (i >= fdt->max_fdset || i >= fdt->max_fds)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
diff --git a/fs/compat.c b/fs/compat.c
index b766964a625c..0ec70e3cee0a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1679,19 +1679,19 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 {
 	fd_set_bits fds;
 	char *bits;
-	int size, max_fdset, ret = -EINVAL;
+	int size, max_fds, ret = -EINVAL;
 	struct fdtable *fdt;
 
 	if (n < 0)
 		goto out_nofds;
 
-	/* max_fdset can increase, so grab it once to avoid race */
+	/* max_fds can increase, so grab it once to avoid race */
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n > max_fdset)
-		n = max_fdset;
+	if (n > max_fds)
+		n = max_fds;
 
 	/*
 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
diff --git a/fs/exec.c b/fs/exec.c
index 12d8cd461b41..11fe93f7363c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -783,7 +783,7 @@ static void flush_old_files(struct files_struct * files)
 		j++;
 		i = j * __NFDBITS;
 		fdt = files_fdtable(files);
-		if (i >= fdt->max_fds || i >= fdt->max_fdset)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->close_on_exec->fds_bits[j];
 		if (!set)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2bdaef35da54..8e382a5d51bd 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -77,10 +77,9 @@ repeat:
 		start = files->next_fd;
 
 	newfd = start;
-	if (start < fdt->max_fdset) {
+	if (start < fdt->max_fds)
 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-			fdt->max_fdset, start);
-	}
+					   fdt->max_fds, start);
 	
 	error = -EMFILE;
 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
diff --git a/fs/file.c b/fs/file.c
index 51aef675470f..fb3d2038dc21 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -68,8 +68,8 @@ void free_fd_array(struct file **array, int num)
 
 static void __free_fdtable(struct fdtable *fdt)
 {
-	free_fdset(fdt->open_fds, fdt->max_fdset);
-	free_fdset(fdt->close_on_exec, fdt->max_fdset);
+	free_fdset(fdt->open_fds, fdt->max_fds);
+	free_fdset(fdt->close_on_exec, fdt->max_fds);
 	free_fd_array(fdt->fd, fdt->max_fds);
 	kfree(fdt);
 }
@@ -98,7 +98,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	struct fdtable_defer *fddef;
 
 	BUG_ON(!fdt);
-	fdset_size = fdt->max_fdset / 8;
+	fdset_size = fdt->max_fds / 8;
 	fdarray_size = fdt->max_fds * sizeof(struct file *);
 
 	if (fdt->free_files) {
@@ -110,13 +110,11 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 		kmem_cache_free(files_cachep, fdt->free_files);
 		return;
 	}
-	if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
-		fdt->max_fds <= NR_OPEN_DEFAULT) {
+	if (fdt->max_fds <= NR_OPEN_DEFAULT)
 		/*
 		 * The fdtable was embedded
 		 */
 		return;
-	}
 	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
 		kfree(fdt->open_fds);
 		kfree(fdt->close_on_exec);
@@ -136,9 +134,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 
 void free_fdtable(struct fdtable *fdt)
 {
-	if (fdt->free_files ||
-		fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
-		fdt->max_fds > NR_OPEN_DEFAULT)
+	if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT)
 		call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
 
@@ -151,12 +147,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
 	int i;
 	int count;
 
-	BUG_ON(nfdt->max_fdset < fdt->max_fdset);
 	BUG_ON(nfdt->max_fds < fdt->max_fds);
 	/* Copy the existing tables and install the new pointers */
 
-	i = fdt->max_fdset / (sizeof(unsigned long) * 8);
-	count = (nfdt->max_fdset - fdt->max_fdset) / 8;
+	i = fdt->max_fds / (sizeof(unsigned long) * 8);
+	count = (nfdt->max_fds - fdt->max_fds) / 8;
 
 	/*
 	 * Don't copy the entire array if the current fdset is
@@ -164,9 +159,9 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
 	 */
 	if (i) {
 		memcpy (nfdt->open_fds, fdt->open_fds,
-						fdt->max_fdset/8);
+						fdt->max_fds/8);
 		memcpy (nfdt->close_on_exec, fdt->close_on_exec,
-						fdt->max_fdset/8);
+						fdt->max_fds/8);
 		memset (&nfdt->open_fds->fds_bits[i], 0, count);
 		memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
 	}
@@ -201,7 +196,7 @@ fd_set * alloc_fdset(int num)
 
 void free_fdset(fd_set *array, int num)
 {
-	if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
+	if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */
 		return;
 	else if (num <= 8 * PAGE_SIZE)
 		kfree(array);
@@ -220,18 +215,6 @@ static struct fdtable *alloc_fdtable(int nr)
 	if (!fdt)
   		goto out;
 
-	nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1));
-	if (nfds > NR_OPEN)
-		nfds = NR_OPEN;
-
-  	new_openset = alloc_fdset(nfds);
-  	new_execset = alloc_fdset(nfds);
-  	if (!new_openset || !new_execset)
-  		goto out;
-	fdt->open_fds = new_openset;
-	fdt->close_on_exec = new_execset;
-	fdt->max_fdset = nfds;
-
 	nfds = NR_OPEN_DEFAULT;
 	/*
 	 * Expand to the max in easy steps, and keep expanding it until
@@ -251,15 +234,21 @@ static struct fdtable *alloc_fdtable(int nr)
 				nfds = NR_OPEN;
   		}
 	} while (nfds <= nr);
+
+  	new_openset = alloc_fdset(nfds);
+  	new_execset = alloc_fdset(nfds);
+  	if (!new_openset || !new_execset)
+  		goto out;
+	fdt->open_fds = new_openset;
+	fdt->close_on_exec = new_execset;
+
 	new_fds = alloc_fd_array(nfds);
 	if (!new_fds)
-		goto out2;
+		goto out;
 	fdt->fd = new_fds;
 	fdt->max_fds = nfds;
 	fdt->free_files = NULL;
 	return fdt;
-out2:
-	nfds = fdt->max_fdset;
 out:
 	free_fdset(new_openset, nfds);
 	free_fdset(new_execset, nfds);
@@ -290,7 +279,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
 	 * we dropped the lock
 	 */
 	cur_fdt = files_fdtable(files);
-	if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) {
+	if (nr >= cur_fdt->max_fds) {
 		/* Continue as planned */
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
@@ -316,11 +305,10 @@ int expand_files(struct files_struct *files, int nr)
 
 	fdt = files_fdtable(files);
 	/* Do we need to expand? */
-	if (nr < fdt->max_fdset && nr < fdt->max_fds)
+	if (nr < fdt->max_fds)
 		return 0;
 	/* Can we expand? */
-	if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN ||
-	    nr >= NR_OPEN)
+	if (nr >= NR_OPEN)
 		return -EMFILE;
 
 	/* All good, so we try */
diff --git a/fs/open.c b/fs/open.c
index 0d94319e8681..c989fb4cf7b9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -864,8 +864,7 @@ int get_unused_fd(void)
 
 repeat:
 	fdt = files_fdtable(files);
- 	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
-				fdt->max_fdset,
+	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
 				files->next_fd);
 
 	/*
diff --git a/fs/select.c b/fs/select.c
index dcbc1112b7ec..fe0893afd931 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -311,7 +311,7 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 {
 	fd_set_bits fds;
 	void *bits;
-	int ret, max_fdset;
+	int ret, max_fds;
 	unsigned int size;
 	struct fdtable *fdt;
 	/* Allocate small arguments on the stack to save memory and be faster */
@@ -321,13 +321,13 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	if (n < 0)
 		goto out_nofds;
 
-	/* max_fdset can increase, so grab it once to avoid race */
+	/* max_fds can increase, so grab it once to avoid race */
 	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	max_fdset = fdt->max_fdset;
+	max_fds = fdt->max_fds;
 	rcu_read_unlock();
-	if (n > max_fdset)
-		n = max_fdset;
+	if (n > max_fds)
+		n = max_fds;
 
 	/*
 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
diff --git a/include/linux/file.h b/include/linux/file.h
index 6e77b9177f9e..02be4012225b 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -26,14 +26,8 @@ struct embedded_fd_set {
 	unsigned long fds_bits[1];
 };
 
-/*
- * More than this number of fds: we use a separately allocated fd_set
- */
-#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set))
-
 struct fdtable {
 	unsigned int max_fds;
-	int max_fdset;
 	struct file ** fd;      /* current fd array */
 	fd_set *close_on_exec;
 	fd_set *open_fds;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 7272ff9ee77c..58c18daab65d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,7 +12,6 @@
 #define INIT_FDTABLE \
 {							\
 	.max_fds	= NR_OPEN_DEFAULT, 		\
-	.max_fdset	= EMBEDDED_FD_SET_SIZE,		\
 	.fd		= &init_files.fd_array[0], 	\
 	.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, \
 	.open_fds	= (fd_set *)&init_files.open_fds_init, 	\
diff --git a/kernel/exit.c b/kernel/exit.c
index 03e64fe4a14a..5f77e76b4f97 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -425,7 +425,7 @@ static void close_files(struct files_struct * files)
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
-		if (i >= fdt->max_fdset || i >= fdt->max_fds)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
diff --git a/kernel/fork.c b/kernel/fork.c
index 30eab4f063cd..aba595424f78 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -614,7 +614,7 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 
 static int count_open_files(struct fdtable *fdt)
 {
-	int size = fdt->max_fdset;
+	int size = fdt->max_fds;
 	int i;
 
 	/* Find the last open fd */
@@ -641,7 +641,6 @@ static struct files_struct *alloc_files(void)
 	newf->next_fd = 0;
 	fdt = &newf->fdtab;
 	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
 	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
 	fdt->open_fds = (fd_set *)&newf->open_fds_init;
 	fdt->fd = &newf->fd_array[0];
@@ -662,7 +661,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 {
 	struct files_struct *newf;
 	struct file **old_fds, **new_fds;
-	int open_files, size, i, expand;
+	int open_files, size, i;
 	struct fdtable *old_fdt, *new_fdt;
 
 	*errorp = -ENOMEM;
@@ -673,25 +672,14 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	spin_lock(&oldf->file_lock);
 	old_fdt = files_fdtable(oldf);
 	new_fdt = files_fdtable(newf);
-	size = old_fdt->max_fdset;
 	open_files = count_open_files(old_fdt);
-	expand = 0;
 
 	/*
-	 * Check whether we need to allocate a larger fd array or fd set.
-	 * Note: we're not a clone task, so the open count won't  change.
+	 * Check whether we need to allocate a larger fd array and fd set.
+	 * Note: we're not a clone task, so the open count won't change.
 	 */
-	if (open_files > new_fdt->max_fdset) {
-		new_fdt->max_fdset = 0;
-		expand = 1;
-	}
 	if (open_files > new_fdt->max_fds) {
 		new_fdt->max_fds = 0;
-		expand = 1;
-	}
-
-	/* if the old fdset gets grown now, we'll only copy up to "size" fds */
-	if (expand) {
 		spin_unlock(&oldf->file_lock);
 		spin_lock(&newf->file_lock);
 		*errorp = expand_files(newf, open_files-1);
@@ -739,8 +727,8 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	/* This is long word aligned thus could use a optimized version */ 
 	memset(new_fds, 0, size); 
 
-	if (new_fdt->max_fdset > open_files) {
-		int left = (new_fdt->max_fdset-open_files)/8;
+	if (new_fdt->max_fds > open_files) {
+		int left = (new_fdt->max_fds-open_files)/8;
 		int start = open_files / (8 * sizeof(unsigned long));
 
 		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3753416eb9b9..65fb5e8ea941 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1734,7 +1734,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 		j++;
 		i = j * __NFDBITS;
 		fdt = files_fdtable(files);
-		if (i >= fdt->max_fds || i >= fdt->max_fdset)
+		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j];
 		if (!set)
-- 
cgit v1.2.3


From 4fd45812cbe875a620c86a096a5d46c742694b7e Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Sun, 10 Dec 2006 02:21:17 -0800
Subject: [PATCH] fdtable: Remove the free_files field

An fdtable can either be embedded inside a files_struct or standalone (after
being expanded).  When an fdtable is being discarded after all RCU references
to it have expired, we must either free it directly, in the standalone case,
or free the files_struct it is contained within, in the embedded case.

Currently the free_files field controls this behavior, but we can get rid of
it entirely, as all the necessary information is already recorded.  We can
distinguish embedded and standalone fdtables using max_fds, and if it is
embedded we can divine the relevant files_struct using container_of().

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c                 | 27 ++++++++-------------------
 include/linux/file.h      |  3 +--
 include/linux/init_task.h |  1 -
 kernel/exit.c             |  6 ++----
 kernel/fork.c             |  1 -
 5 files changed, 11 insertions(+), 27 deletions(-)

(limited to 'kernel')

diff --git a/fs/file.c b/fs/file.c
index fb3d2038dc21..17e6a55521e2 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -91,7 +91,7 @@ static void free_fdtable_work(struct work_struct *work)
 	}
 }
 
-static void free_fdtable_rcu(struct rcu_head *rcu)
+void free_fdtable_rcu(struct rcu_head *rcu)
 {
 	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
 	int fdset_size, fdarray_size;
@@ -101,20 +101,15 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	fdset_size = fdt->max_fds / 8;
 	fdarray_size = fdt->max_fds * sizeof(struct file *);
 
-	if (fdt->free_files) {
+	if (fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
-		 * The this fdtable was embedded in the files structure
-		 * and the files structure itself was getting destroyed.
-		 * It is now safe to free the files structure.
+		 * This fdtable is embedded in the files structure and that
+		 * structure itself is getting destroyed.
 		 */
-		kmem_cache_free(files_cachep, fdt->free_files);
+		kmem_cache_free(files_cachep,
+				container_of(fdt, struct files_struct, fdtab));
 		return;
 	}
-	if (fdt->max_fds <= NR_OPEN_DEFAULT)
-		/*
-		 * The fdtable was embedded
-		 */
-		return;
 	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
 		kfree(fdt->open_fds);
 		kfree(fdt->close_on_exec);
@@ -132,12 +127,6 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	}
 }
 
-void free_fdtable(struct fdtable *fdt)
-{
-	if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT)
-		call_rcu(&fdt->rcu, free_fdtable_rcu);
-}
-
 /*
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
@@ -247,7 +236,6 @@ static struct fdtable *alloc_fdtable(int nr)
 		goto out;
 	fdt->fd = new_fds;
 	fdt->max_fds = nfds;
-	fdt->free_files = NULL;
 	return fdt;
 out:
 	free_fdset(new_openset, nfds);
@@ -283,7 +271,8 @@ static int expand_fdtable(struct files_struct *files, int nr)
 		/* Continue as planned */
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
-		free_fdtable(cur_fdt);
+		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
+			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
 		__free_fdtable(new_fdt);
diff --git a/include/linux/file.h b/include/linux/file.h
index 02be4012225b..319118f275b0 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -32,7 +32,6 @@ struct fdtable {
 	fd_set *close_on_exec;
 	fd_set *open_fds;
 	struct rcu_head rcu;
-	struct files_struct *free_files;
 	struct fdtable *next;
 };
 
@@ -84,7 +83,7 @@ extern fd_set *alloc_fdset(int);
 extern void free_fdset(fd_set *, int);
 
 extern int expand_files(struct files_struct *, int nr);
-extern void free_fdtable(struct fdtable *fdt);
+extern void free_fdtable_rcu(struct rcu_head *rcu);
 extern void __init files_defer_init(void);
 
 static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 58c18daab65d..b5315150199e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -16,7 +16,6 @@
 	.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, \
 	.open_fds	= (fd_set *)&init_files.open_fds_init, 	\
 	.rcu		= RCU_HEAD_INIT, 		\
-	.free_files	= NULL,		 		\
 	.next		= NULL,		 		\
 }
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 5f77e76b4f97..122fadb972fc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -466,11 +466,9 @@ void fastcall put_files_struct(struct files_struct *files)
 		 * you can free files immediately.
 		 */
 		fdt = files_fdtable(files);
-		if (fdt == &files->fdtab)
-			fdt->free_files = files;
-		else
+		if (fdt != &files->fdtab)
 			kmem_cache_free(files_cachep, files);
-		free_fdtable(fdt);
+		call_rcu(&fdt->rcu, free_fdtable_rcu);
 	}
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index aba595424f78..d16c566eb645 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,7 +645,6 @@ static struct files_struct *alloc_files(void)
 	fdt->open_fds = (fd_set *)&newf->open_fds_init;
 	fdt->fd = &newf->fd_array[0];
 	INIT_RCU_HEAD(&fdt->rcu);
-	fdt->free_files = NULL;
 	fdt->next = NULL;
 	rcu_assign_pointer(newf->fdt, fdt);
 out:
-- 
cgit v1.2.3


From 4c36a5dec25fb344ad76b11860da3a8b50bd1248 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 10 Dec 2006 02:21:24 -0800
Subject: [PATCH] round_jiffies infrastructure

Introduce a round_jiffies() function as well as a round_jiffies_relative()
function.  These functions round a jiffies value to the next whole second.
The primary purpose of this rounding is to cause all "we don't care exactly
when" timers to happen at the same jiffy.

This avoids multiple timers firing within the second for no real reason;
with dynamic ticks these extra timers cause wakeups from deep sleep CPU
sleep states and thus waste power.

The exact wakeup moment is skewed by the cpu number, to avoid all cpus from
waking up at the exact same time (and hitting the same lock/cachelines
there)

[akpm@osdl.org: fix variable type]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/timer.h |   6 +++
 kernel/timer.c        | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 138 insertions(+)

(limited to 'kernel')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index c982304dbafd..eeef6643d4c6 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -98,4 +98,10 @@ extern void run_local_timers(void);
 struct hrtimer;
 extern int it_real_fn(struct hrtimer *);
 
+unsigned long __round_jiffies(unsigned long j, int cpu);
+unsigned long __round_jiffies_relative(unsigned long j, int cpu);
+unsigned long round_jiffies(unsigned long j);
+unsigned long round_jiffies_relative(unsigned long j);
+
+
 #endif
diff --git a/kernel/timer.c b/kernel/timer.c
index c1c7fbcffec1..b1f40f256eb0 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -80,6 +80,138 @@ tvec_base_t boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
 static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
 
+/**
+ * __round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long __round_jiffies(unsigned long j, int cpu)
+{
+	int rem;
+	unsigned long original = j;
+
+	/*
+	 * We don't want all cpus firing their timers at once hitting the
+	 * same lock or cachelines, so we skew each extra cpu with an extra
+	 * 3 jiffies. This 3 jiffies came originally from the mm/ code which
+	 * already did this.
+	 * The skew is done by adding 3*cpunr, then round, then subtract this
+	 * extra offset again.
+	 */
+	j += cpu * 3;
+
+	rem = j % HZ;
+
+	/*
+	 * If the target jiffie is just after a whole second (which can happen
+	 * due to delays of the timer irq, long irq off times etc etc) then
+	 * we should round down to the whole second, not up. Use 1/4th second
+	 * as cutoff for this rounding as an extreme upper bound for this.
+	 */
+	if (rem < HZ/4) /* round down */
+		j = j - rem;
+	else /* round up */
+		j = j - rem + HZ;
+
+	/* now that we have rounded, subtract the extra skew again */
+	j -= cpu * 3;
+
+	if (j <= jiffies) /* rounding ate our timeout entirely; */
+		return original;
+	return j;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies);
+
+/**
+ * __round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies_relative rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long __round_jiffies_relative(unsigned long j, int cpu)
+{
+	/*
+	 * In theory the following code can skip a jiffy in case jiffies
+	 * increments right between the addition and the later subtraction.
+	 * However since the entire point of this function is to use approximate
+	 * timeouts, it's entirely ok to not handle that.
+	 */
+	return  __round_jiffies(j + jiffies, cpu) - jiffies;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_relative);
+
+/**
+ * round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * round_jiffies rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long round_jiffies(unsigned long j)
+{
+	return __round_jiffies(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies);
+
+/**
+ * round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * round_jiffies_relative rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the "j" parameter.
+ */
+unsigned long round_jiffies_relative(unsigned long j)
+{
+	return __round_jiffies_relative(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies_relative);
+
+
 static inline void set_running_timer(tvec_base_t *base,
 					struct timer_list *timer)
 {
-- 
cgit v1.2.3


From 2b0137001de68153203dd3bc20e6d27eb7c9719c Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Sun, 10 Dec 2006 02:21:30 -0800
Subject: [PATCH] clocksource: add usage of CONFIG_SYSFS

Simply adds some ifdefs to remove clocksoure sysfs code when CONFIG_SYSFS
isn't turn on.

Signed-off-by: Daniel Walker <dwalker@mvista.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/time/clocksource.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 74eca5939bd9..3651b34cc355 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -186,6 +186,7 @@ void clocksource_reselect(void)
 }
 EXPORT_SYMBOL(clocksource_reselect);
 
+#ifdef CONFIG_SYSFS
 /**
  * sysfs_show_current_clocksources - sysfs interface for current clocksource
  * @dev:	unused
@@ -307,6 +308,7 @@ static int __init init_clocksource_sysfs(void)
 }
 
 device_initcall(init_clocksource_sysfs);
+#endif /* CONFIG_SYSFS */
 
 /**
  * boot_override_clocksource - boot clock override
-- 
cgit v1.2.3


From f5f1a24a2caa299bb7d294aee92d7dd3410d9ed7 Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Sun, 10 Dec 2006 02:21:33 -0800
Subject: [PATCH] clocksource: small cleanup

Mostly changing alignment.  Just some general cleanup.

[akpm@osdl.org: build fix]
Signed-off-by: Daniel Walker <dwalker@mvista.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/clocksource/acpi_pm.c |  6 +++---
 include/linux/clocksource.h   |  2 +-
 kernel/time/clocksource.c     |  6 +++---
 kernel/timer.c                | 16 +++++++++++-----
 4 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 8ab61ef97b4c..b6bcdbbf57b3 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -77,11 +77,11 @@ static struct clocksource clocksource_acpi_pm = {
 
 
 #ifdef CONFIG_PCI
-static int acpi_pm_good;
+static int __devinitdata acpi_pm_good;
 static int __init acpi_pm_good_setup(char *__str)
 {
-       acpi_pm_good = 1;
-       return 1;
+	acpi_pm_good = 1;
+	return 1;
 }
 __setup("acpi_pm_good", acpi_pm_good_setup);
 
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d852024ed095..1622d23a8dc3 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -159,7 +159,7 @@ static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles)
  * Unless you're the timekeeping code, you should not be using this!
  */
 static inline void clocksource_calculate_interval(struct clocksource *c,
-						unsigned long length_nsec)
+					  	  unsigned long length_nsec)
 {
 	u64 tmp;
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3651b34cc355..22504afc0d34 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -156,7 +156,7 @@ int clocksource_register(struct clocksource *c)
 	/* check if clocksource is already registered */
 	if (is_registered_source(c)) {
 		printk("register_clocksource: Cannot register %s. "
-			"Already registered!", c->name);
+		       "Already registered!", c->name);
 		ret = -EBUSY;
 	} else {
 		/* register it */
@@ -276,10 +276,10 @@ sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
  * Sysfs setup bits:
  */
 static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
-			sysfs_override_clocksource);
+		   sysfs_override_clocksource);
 
 static SYSDEV_ATTR(available_clocksource, 0600,
-			sysfs_show_available_clocksources, NULL);
+		   sysfs_show_available_clocksources, NULL);
 
 static struct sysdev_class clocksource_sysclass = {
 	set_kset_name("clocksource"),
diff --git a/kernel/timer.c b/kernel/timer.c
index b1f40f256eb0..0256ab443d8a 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -846,7 +846,7 @@ static int change_clocksource(void)
 		clock = new;
 		clock->cycle_last = now;
 		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-					clock->name);
+		       clock->name);
 		return 1;
 	} else if (clock->update_callback) {
 		return clock->update_callback();
@@ -854,7 +854,10 @@ static int change_clocksource(void)
 	return 0;
 }
 #else
-#define change_clocksource() (0)
+static inline int change_clocksource(void)
+{
+	return 0;
+}
 #endif
 
 /**
@@ -952,7 +955,8 @@ device_initcall(timekeeping_init_device);
  * If the error is already larger, we look ahead even further
  * to compensate for late or lost adjustments.
  */
-static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset)
+static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
+						 s64 *offset)
 {
 	s64 tick_error, i;
 	u32 look_ahead, adj;
@@ -976,7 +980,8 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *
 	 * Now calculate the error in (1 << look_ahead) ticks, but first
 	 * remove the single look ahead already included in the error.
 	 */
-	tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
+	tick_error = current_tick_length() >>
+		(TICK_LENGTH_SHIFT - clock->shift + 1);
 	tick_error -= clock->xtime_interval >> 1;
 	error = ((error - tick_error) >> look_ahead) + tick_error;
 
@@ -1028,7 +1033,8 @@ static void clocksource_adjust(struct clocksource *clock, s64 offset)
 	clock->mult += adj;
 	clock->xtime_interval += interval;
 	clock->xtime_nsec -= offset;
-	clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
+	clock->error -= (interval - offset) <<
+			(TICK_LENGTH_SHIFT - clock->shift);
 }
 
 /**
-- 
cgit v1.2.3


From 5f8442edfb214908e9c6ca1142bf882c9bc364e5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 13 Dec 2006 00:34:04 -0800
Subject: [PATCH] Revert "[PATCH] identifier to nsproxy"

This reverts commit 373beb35cd6b625e0ba4ad98baace12310a26aa8.

No one is using this identifier yet.  The purpose of this identifier is to
export nsproxy to user space which is wrong.  nsproxy is an internal
implementation optimization, which should keep our fork times from getting
slower as we increase the number of global namespaces you don't have to
share.

Adding a global identifier like this is inappropriate because it makes
namespaces inherently non-recursive, greatly limiting what we can do with
them in the future.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/init_task.h | 1 -
 include/linux/nsproxy.h   | 1 -
 kernel/nsproxy.c          | 4 +---
 3 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index b5315150199e..6383d2d83bb0 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -75,7 +75,6 @@ extern struct nsproxy init_nsproxy;
 	.pid_ns		= &init_pid_ns,					\
 	.count		= ATOMIC_INIT(1),				\
 	.nslock		= __SPIN_LOCK_UNLOCKED(nsproxy.nslock),		\
-	.id		= 0,						\
 	.uts_ns		= &init_uts_ns,					\
 	.mnt_ns		= NULL,						\
 	INIT_IPC_NS(ipc_ns)						\
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index fdfb0e44912f..0b9f0dc30d61 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -24,7 +24,6 @@ struct pid_namespace;
 struct nsproxy {
 	atomic_t count;
 	spinlock_t nslock;
-	unsigned long id;
 	struct uts_namespace *uts_ns;
 	struct ipc_namespace *ipc_ns;
 	struct mnt_namespace *mnt_ns;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index e2ce748e96af..f5b9ee6f6bbb 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -46,10 +46,8 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
 	struct nsproxy *ns;
 
 	ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
-	if (ns) {
+	if (ns)
 		atomic_set(&ns->count, 1);
-		ns->id = -1;
-	}
 	return ns;
 }
 
-- 
cgit v1.2.3


From 02a0e53d8227aff5e62e0433f82c12c1c2805fd6 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 13 Dec 2006 00:34:25 -0800
Subject: [PATCH] cpuset: rework cpuset_zone_allowed api

Elaborate the API for calling cpuset_zone_allowed(), so that users have to
explicitly choose between the two variants:

  cpuset_zone_allowed_hardwall()
  cpuset_zone_allowed_softwall()

Until now, whether or not you got the hardwall flavor depended solely on
whether or not you or'd in the __GFP_HARDWALL gfp flag to the gfp_mask
argument.

If you didn't specify __GFP_HARDWALL, you implicitly got the softwall
version.

Unfortunately, this meant that users would end up with the softwall version
without thinking about it.  Since only the softwall version might sleep,
this led to bugs with possible sleeping in interrupt context on more than
one occassion.

The hardwall version requires that the current tasks mems_allowed allows
the node of the specified zone (or that you're in interrupt or that
__GFP_THISNODE is set or that you're on a one cpuset system.)

The softwall version, depending on the gfp_mask, might allow a node if it
was allowed in the nearest enclusing cpuset marked mem_exclusive (which
requires taking the cpuset lock 'callback_mutex' to evaluate.)

This patch removes the cpuset_zone_allowed() call, and forces the caller to
explicitly choose between the hardwall and the softwall case.

If the caller wants the gfp_mask to determine this choice, they should (1)
be sure they can sleep or that __GFP_HARDWALL is set, and (2) invoke the
cpuset_zone_allowed_softwall() routine.

This adds another 100 or 200 bytes to the kernel text space, due to the few
lines of nearly duplicate code at the top of both cpuset_zone_allowed_*
routines.  It should save a few instructions executed for the calls that
turned into calls of cpuset_zone_allowed_hardwall, thanks to not having to
set (before the call) then check (within the call) the __GFP_HARDWALL flag.

For the most critical call, from get_page_from_freelist(), the same
instructions are executed as before -- the old cpuset_zone_allowed()
routine it used to call is the same code as the
cpuset_zone_allowed_softwall() routine that it calls now.

Not a perfect win, but seems worth it, to reduce this chance of hitting a
sleeping with irq off complaint again.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/cpuset.h | 22 +++++++++++---
 kernel/cpuset.c        | 82 ++++++++++++++++++++++++++++++++++++++++----------
 mm/hugetlb.c           |  2 +-
 mm/oom_kill.c          |  2 +-
 mm/page_alloc.c        |  2 +-
 mm/slab.c              |  2 +-
 mm/vmscan.c            |  8 ++---
 7 files changed, 92 insertions(+), 28 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 8821e1f75b44..826b15e914e2 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -30,10 +30,19 @@ void cpuset_update_task_memory_state(void);
 		nodes_subset((nodes), current->mems_allowed)
 int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
 
-extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask);
-static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
+extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask);
+
+static int inline cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
+{
+	return number_of_cpusets <= 1 ||
+		__cpuset_zone_allowed_softwall(z, gfp_mask);
+}
+
+static int inline cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
 {
-	return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask);
+	return number_of_cpusets <= 1 ||
+		__cpuset_zone_allowed_hardwall(z, gfp_mask);
 }
 
 extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
@@ -94,7 +103,12 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 	return 1;
 }
 
-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
+{
+	return 1;
+}
+
+static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
 {
 	return 1;
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2c3b4431472b..232aed2b10f9 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
 }
 
 /**
- * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
+ * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node?
  * @z: is this zone on an allowed node?
- * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
+ * @gfp_mask: memory allocation flags
  *
- * If we're in interrupt, yes, we can always allocate.  If zone
+ * If we're in interrupt, yes, we can always allocate.  If
+ * __GFP_THISNODE is set, yes, we can always allocate.  If zone
  * z's node is in our tasks mems_allowed, yes.  If it's not a
  * __GFP_HARDWALL request and this zone's nodes is in the nearest
  * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
  * Otherwise, no.
  *
+ * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
+ * reduces to cpuset_zone_allowed_hardwall().  Otherwise,
+ * cpuset_zone_allowed_softwall() might sleep, and might allow a zone
+ * from an enclosing cpuset.
+ *
+ * cpuset_zone_allowed_hardwall() only handles the simpler case of
+ * hardwall cpusets, and never sleeps.
+ *
+ * The __GFP_THISNODE placement logic is really handled elsewhere,
+ * by forcibly using a zonelist starting at a specified node, and by
+ * (in get_page_from_freelist()) refusing to consider the zones for
+ * any node on the zonelist except the first.  By the time any such
+ * calls get to this routine, we should just shut up and say 'yes'.
+ *
  * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
  * and do not allow allocations outside the current tasks cpuset.
  * GFP_KERNEL allocations are not so marked, so can escape to the
- * nearest mem_exclusive ancestor cpuset.
+ * nearest enclosing mem_exclusive ancestor cpuset.
  *
- * Scanning up parent cpusets requires callback_mutex.  The __alloc_pages()
- * routine only calls here with __GFP_HARDWALL bit _not_ set if
- * it's a GFP_KERNEL allocation, and all nodes in the current tasks
- * mems_allowed came up empty on the first pass over the zonelist.
- * So only GFP_KERNEL allocations, if all nodes in the cpuset are
- * short of memory, might require taking the callback_mutex mutex.
+ * Scanning up parent cpusets requires callback_mutex.  The
+ * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
+ * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
+ * current tasks mems_allowed came up empty on the first pass over
+ * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
+ * cpuset are short of memory, might require taking the callback_mutex
+ * mutex.
  *
  * The first call here from mm/page_alloc:get_page_from_freelist()
- * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
- * no allocation on a node outside the cpuset is allowed (unless in
- * interrupt, of course).
+ * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
+ * so no allocation on a node outside the cpuset is allowed (unless
+ * in interrupt, of course).
  *
  * The second pass through get_page_from_freelist() doesn't even call
  * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
  *	GFP_USER     - only nodes in current tasks mems allowed ok.
  *
  * Rule:
- *    Don't call cpuset_zone_allowed() if you can't sleep, unless you
+ *    Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you
  *    pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
  *    the code that might scan up ancestor cpusets and sleep.
- **/
+ */
 
-int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
 {
 	int node;			/* node that zone z is on */
 	const struct cpuset *cs;	/* current cpuset ancestors */
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 	return allowed;
 }
 
+/*
+ * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node?
+ * @z: is this zone on an allowed node?
+ * @gfp_mask: memory allocation flags
+ *
+ * If we're in interrupt, yes, we can always allocate.
+ * If __GFP_THISNODE is set, yes, we can always allocate.  If zone
+ * z's node is in our tasks mems_allowed, yes.   Otherwise, no.
+ *
+ * The __GFP_THISNODE placement logic is really handled elsewhere,
+ * by forcibly using a zonelist starting at a specified node, and by
+ * (in get_page_from_freelist()) refusing to consider the zones for
+ * any node on the zonelist except the first.  By the time any such
+ * calls get to this routine, we should just shut up and say 'yes'.
+ *
+ * Unlike the cpuset_zone_allowed_softwall() variant, above,
+ * this variant requires that the zone be in the current tasks
+ * mems_allowed or that we're in interrupt.  It does not scan up the
+ * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
+ * It never sleeps.
+ */
+
+int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
+{
+	int node;			/* node that zone z is on */
+
+	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
+		return 1;
+	node = zone_to_nid(z);
+	if (node_isset(node, current->mems_allowed))
+		return 1;
+	return 0;
+}
+
 /**
  * cpuset_lock - lock out any changes to cpuset structures
  *
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0ccc7f230252..089092d152ab 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
 
 	for (z = zonelist->zones; *z; z++) {
 		nid = zone_to_nid(*z);
-		if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
+		if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
 		    !list_empty(&hugepage_freelists[nid]))
 			break;
 	}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 223d9ccb7d64..64cf3c214634 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -177,7 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
 	nodemask_t nodes = node_online_map;
 
 	for (z = zonelist->zones; *z; z++)
-		if (cpuset_zone_allowed(*z, gfp_mask))
+		if (cpuset_zone_allowed_softwall(*z, gfp_mask))
 			node_clear(zone_to_nid(*z), nodes);
 		else
 			return CONSTRAINT_CPUSET;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e6b17b2989e0..8c1a116875bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1162,7 +1162,7 @@ zonelist_scan:
 			zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
 				break;
 		if ((alloc_flags & ALLOC_CPUSET) &&
-			!cpuset_zone_allowed(zone, gfp_mask))
+			!cpuset_zone_allowed_softwall(zone, gfp_mask))
 				goto try_next_zone;
 
 		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
diff --git a/mm/slab.c b/mm/slab.c
index 9d3550086c93..b856786a3a30 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3262,7 +3262,7 @@ retry:
 	for (z = zonelist->zones; *z && !obj; z++) {
 		nid = zone_to_nid(*z);
 
-		if (cpuset_zone_allowed(*z, flags | __GFP_HARDWALL) &&
+		if (cpuset_zone_allowed_hardwall(*z, flags) &&
 			cache->nodelists[nid] &&
 			cache->nodelists[nid]->free_objects)
 				obj = ____cache_alloc_node(cache,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 093f5fe6dd77..e9813b06c7a3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -984,7 +984,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 		if (!populated_zone(zone))
 			continue;
 
-		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 			continue;
 
 		note_zone_scanning_priority(zone, priority);
@@ -1034,7 +1034,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
 
-		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 			continue;
 
 		lru_pages += zone->nr_active + zone->nr_inactive;
@@ -1089,7 +1089,7 @@ out:
 	for (i = 0; zones[i] != 0; i++) {
 		struct zone *zone = zones[i];
 
-		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 			continue;
 
 		zone->prev_priority = priority;
@@ -1354,7 +1354,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
-	if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 		return;
 	if (!waitqueue_active(&pgdat->kswapd_wait))
 		return;
-- 
cgit v1.2.3


From 3df494a32b936aef76d893f5065f962ebd9b9437 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 13 Dec 2006 00:34:28 -0800
Subject: [PATCH] PM: Fix freezing of stopped tasks

Currently, if a task is stopped (ie.  it's in the TASK_STOPPED state), it
is considered by the freezer as unfreezeable.  However, there may be a race
between the freezer and the delivery of the continuation signal to the task
resulting in the task running after we have finished freezing the other
tasks.  This, in turn, may lead to undesirable effects up to and including
data corruption.

To prevent this from happening we first need to make the freezer consider
stopped tasks as freezeable.  For this purpose we need to make freezeable()
stop returning 0 for these tasks and we need to force them to enter the
refrigerator.  However, if there's no continuation signal in the meantime,
the stopped tasks should remain stopped after all processes have been
thawed, so we need to send an additional SIGSTOP to each of them before
waking it up.

Also, a stopped task that has just been woken up should first check if
there's a freezing request for it and go to the refrigerator if that's the
case.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/process.c | 12 ++++++------
 kernel/signal.c        |  4 +++-
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 99eeb119b06d..b9a32860bef3 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -28,8 +28,7 @@ static inline int freezeable(struct task_struct * p)
 	if ((p == current) || 
 	    (p->flags & PF_NOFREEZE) ||
 	    (p->exit_state == EXIT_ZOMBIE) ||
-	    (p->exit_state == EXIT_DEAD) ||
-	    (p->state == TASK_STOPPED))
+	    (p->exit_state == EXIT_DEAD))
 		return 0;
 	return 1;
 }
@@ -61,9 +60,12 @@ static inline void freeze_process(struct task_struct *p)
 	unsigned long flags;
 
 	if (!freezing(p)) {
+		if (p->state == TASK_STOPPED)
+			force_sig_specific(SIGSTOP, p);
+
 		freeze(p);
 		spin_lock_irqsave(&p->sighand->siglock, flags);
-		signal_wake_up(p, 0);
+		signal_wake_up(p, p->state == TASK_STOPPED);
 		spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	}
 }
@@ -103,9 +105,7 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
 			if (frozen(p))
 				continue;
 
-			if (p->state == TASK_TRACED &&
-			    (frozen(p->parent) ||
-			     p->parent->state == TASK_STOPPED)) {
+			if (p->state == TASK_TRACED && frozen(p->parent)) {
 				cancel_freezing(p);
 				continue;
 			}
diff --git a/kernel/signal.c b/kernel/signal.c
index 1921ffdc5e77..5630255d2e2a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1705,7 +1705,9 @@ finish_stop(int stop_count)
 		read_unlock(&tasklist_lock);
 	}
 
-	schedule();
+	do {
+		schedule();
+	} while (try_to_freeze());
 	/*
 	 * Now we don't run again until continued.
 	 */
-- 
cgit v1.2.3


From 8a102eed9c4e1d21bad07a8fd97bd4fbf125d966 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 13 Dec 2006 00:34:30 -0800
Subject: [PATCH] PM: Fix SMP races in the freezer

Currently, to tell a task that it should go to the refrigerator, we set the
PF_FREEZE flag for it and send a fake signal to it.  Unfortunately there
are two SMP-related problems with this approach.  First, a task running on
another CPU may be updating its flags while the freezer attempts to set
PF_FREEZE for it and this may leave the task's flags in an inconsistent
state.  Second, there is a potential race between freeze_process() and
refrigerator() in which freeze_process() running on one CPU is reading a
task's PF_FREEZE flag while refrigerator() running on another CPU has just
set PF_FROZEN for the same task and attempts to reset PF_FREEZE for it.  If
the refrigerator wins the race, freeze_process() will state that PF_FREEZE
hasn't been set for the task and will set it unnecessarily, so the task
will go to the refrigerator once again after it's been thawed.

To solve first of these problems we need to stop using PF_FREEZE to tell
tasks that they should go to the refrigerator.  Instead, we can introduce a
special TIF_*** flag and use it for this purpose, since it is allowed to
change the other tasks' TIF_*** flags and there are special calls for it.

To avoid the freeze_process()-refrigerator() race we can make
freeze_process() to always check the task's PF_FROZEN flag after it's read
its "freeze" flag.  We should also make sure that refrigerator() will
always reset the task's "freeze" flag after it's set PF_FROZEN for it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Andi Kleen <ak@muc.de>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/thread_info.h     |  2 ++
 include/asm-frv/thread_info.h     |  2 ++
 include/asm-i386/thread_info.h    |  2 ++
 include/asm-ia64/thread_info.h    |  2 ++
 include/asm-powerpc/thread_info.h |  2 ++
 include/asm-sh/thread_info.h      |  2 ++
 include/asm-x86_64/thread_info.h  |  2 ++
 include/linux/freezer.h           | 11 ++++++-----
 include/linux/sched.h             |  1 -
 kernel/power/process.c            | 17 ++++++++++-------
 10 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h
index d9b8bddc8732..5014794f9eb3 100644
--- a/include/asm-arm/thread_info.h
+++ b/include/asm-arm/thread_info.h
@@ -147,6 +147,7 @@ extern void iwmmxt_task_switch(struct thread_info *);
 #define TIF_POLLING_NRFLAG	16
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18
+#define TIF_FREEZE		19
 
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -154,6 +155,7 @@ extern void iwmmxt_task_switch(struct thread_info *);
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /*
  * Change these and you break ASM code in entry-common.S
diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h
index d66c48e6ef14..d881f518e6a9 100644
--- a/include/asm-frv/thread_info.h
+++ b/include/asm-frv/thread_info.h
@@ -116,6 +116,7 @@ register struct thread_info *__current_thread_info asm("gr15");
 #define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17	/* OOM killer killed process */
+#define TIF_FREEZE		18	/* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -125,6 +126,7 @@ register struct thread_info *__current_thread_info asm("gr15");
 #define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 46d32ad92082..4b187bb377b4 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -134,6 +134,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		16
 #define TIF_DEBUG		17	/* uses debug registers */
 #define TIF_IO_BITMAP		18	/* uses I/O bitmap */
+#define TIF_FREEZE		19	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -147,6 +148,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_DEBUG		(1<<TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1<<TIF_IO_BITMAP)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 8adcde0934ca..9b505b25544f 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -88,6 +88,7 @@ struct thread_info {
 #define TIF_MEMDIE		17
 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
 #define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
+#define TIF_FREEZE		20	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
@@ -98,6 +99,7 @@ struct thread_info {
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_MCA_INIT		(1 << TIF_MCA_INIT)
 #define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /* "work to do on user-return" bits */
 #define TIF_ALLWORK_MASK	(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index d339e2e88b11..3f32ca8bfec9 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -122,6 +122,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTOREALL		12	/* Restore all regs (implies NOERROR) */
 #define TIF_NOERROR		14	/* Force successful syscall return */
 #define TIF_RESTORE_SIGMASK	15	/* Restore signal mask in do_signal */
+#define TIF_FREEZE		16	/* Freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -138,6 +139,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
 #define _TIF_NOERROR		(1<<TIF_NOERROR)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
 
 #define _TIF_USER_WORK_MASK	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
diff --git a/include/asm-sh/thread_info.h b/include/asm-sh/thread_info.h
index 0c01dc550819..879f741105db 100644
--- a/include/asm-sh/thread_info.h
+++ b/include/asm-sh/thread_info.h
@@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		18
+#define TIF_FREEZE		19
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -114,6 +115,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x000000FE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x000000FF	/* work to do on any return to u-space */
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 787a08114b48..74a6c74397f7 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -122,6 +122,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define TIF_MEMDIE		20
 #define TIF_DEBUG		21	/* uses debug registers */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
+#define TIF_FREEZE		23	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -137,6 +138,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
 #define _TIF_DEBUG		(1<<TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1<<TIF_IO_BITMAP)
+#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 393063096134..5e75e26d4787 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -16,16 +16,15 @@ static inline int frozen(struct task_struct *p)
  */
 static inline int freezing(struct task_struct *p)
 {
-	return p->flags & PF_FREEZE;
+	return test_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
  * Request that a process be frozen
- * FIXME: SMP problem. We may not modify other process' flags!
  */
 static inline void freeze(struct task_struct *p)
 {
-	p->flags |= PF_FREEZE;
+	set_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
@@ -33,7 +32,7 @@ static inline void freeze(struct task_struct *p)
  */
 static inline void do_not_freeze(struct task_struct *p)
 {
-	p->flags &= ~PF_FREEZE;
+	clear_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
@@ -54,7 +53,9 @@ static inline int thaw_process(struct task_struct *p)
  */
 static inline void frozen_process(struct task_struct *p)
 {
-	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+	p->flags |= PF_FROZEN;
+	wmb();
+	clear_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 extern void refrigerator(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea92e5c89089..446373535190 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1144,7 +1144,6 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_FREEZE	0x00004000	/* this task is being frozen for suspend now */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index b9a32860bef3..6d566bf7085c 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -60,13 +60,16 @@ static inline void freeze_process(struct task_struct *p)
 	unsigned long flags;
 
 	if (!freezing(p)) {
-		if (p->state == TASK_STOPPED)
-			force_sig_specific(SIGSTOP, p);
-
-		freeze(p);
-		spin_lock_irqsave(&p->sighand->siglock, flags);
-		signal_wake_up(p, p->state == TASK_STOPPED);
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+		rmb();
+		if (!frozen(p)) {
+			if (p->state == TASK_STOPPED)
+				force_sig_specific(SIGSTOP, p);
+
+			freeze(p);
+			spin_lock_irqsave(&p->sighand->siglock, flags);
+			signal_wake_up(p, p->state == TASK_STOPPED);
+			spin_unlock_irqrestore(&p->sighand->siglock, flags);
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 5d6f647fc6bb57377c9f417c4752e43189f56bb1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:36 -0800
Subject: [PATCH] debug: add sysrq_always_enabled boot option

Most distributions enable sysrq support but set it to 0 by default.  Add a
sysrq_always_enabled boot option to always-enable sysrq keys.  Useful for
debugging - without having to modify the disribution's config files (which
might not be possible if the kernel is on a live CD, etc.).

Also, while at it, clean up the sysrq interfaces.

[bunk@stusta.de: make sysrq_always_enabled_setup() static]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  6 ++++++
 drivers/char/sysrq.c                | 37 +++++++++++++++++++++++++++++++------
 drivers/char/viocons.c              | 10 ++++------
 include/linux/sysrq.h               | 22 ++++++++++++++++++----
 kernel/sysctl.c                     |  3 +--
 5 files changed, 60 insertions(+), 18 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d8323b8893c3..ef69c75780bf 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1656,6 +1656,12 @@ and is between 256 and 4096 characters. It is defined in the file
 	sym53c416=	[HW,SCSI]
 			See header of drivers/scsi/sym53c416.c.
 
+	sysrq_always_enabled
+			[KNL]
+			Ignore sysrq setting - this boot parameter will
+			neutralize any effect of /proc/sys/kernel/sysrq.
+			Useful for debugging.
+
 	t128=		[HW,SCSI]
 			See header of drivers/scsi/t128.c.
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 05810c8d20bc..13935235e066 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -41,7 +41,34 @@
 #include <asm/irq_regs.h>
 
 /* Whether we react on sysrq keys or just ignore them */
-int sysrq_enabled = 1;
+int __read_mostly __sysrq_enabled = 1;
+
+static int __read_mostly sysrq_always_enabled;
+
+int sysrq_on(void)
+{
+	return __sysrq_enabled || sysrq_always_enabled;
+}
+
+/*
+ * A value of 1 means 'all', other nonzero values are an op mask:
+ */
+static inline int sysrq_on_mask(int mask)
+{
+	return sysrq_always_enabled || __sysrq_enabled == 1 ||
+						(__sysrq_enabled & mask);
+}
+
+static int __init sysrq_always_enabled_setup(char *str)
+{
+	sysrq_always_enabled = 1;
+	printk(KERN_INFO "debug: sysrq always enabled.\n");
+
+	return 1;
+}
+
+__setup("sysrq_always_enabled", sysrq_always_enabled_setup);
+
 
 static void sysrq_handle_loglevel(int key, struct tty_struct *tty)
 {
@@ -379,8 +406,7 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 		 * Should we check for enabled operations (/proc/sysrq-trigger
 		 * should not) and is the invoked operation enabled?
 		 */
-		if (!check_mask || sysrq_enabled == 1 ||
-		    (sysrq_enabled & op_p->enable_mask)) {
+		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
 			printk("%s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
 			op_p->handler(key, tty);
@@ -414,9 +440,8 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
  */
 void handle_sysrq(int key, struct tty_struct *tty)
 {
-	if (!sysrq_enabled)
-		return;
-	__handle_sysrq(key, tty, 1);
+	if (sysrq_on())
+		__handle_sysrq(key, tty, 1);
 }
 EXPORT_SYMBOL(handle_sysrq);
 
diff --git a/drivers/char/viocons.c b/drivers/char/viocons.c
index 6d2e314860df..0e0da443cbd5 100644
--- a/drivers/char/viocons.c
+++ b/drivers/char/viocons.c
@@ -61,10 +61,7 @@
 static DEFINE_SPINLOCK(consolelock);
 static DEFINE_SPINLOCK(consoleloglock);
 
-#ifdef CONFIG_MAGIC_SYSRQ
 static int vio_sysrq_pressed;
-extern int sysrq_enabled;
-#endif
 
 #define VIOCHAR_NUM_BUF		16
 
@@ -936,8 +933,10 @@ static void vioHandleData(struct HvLpEvent *event)
 	 */
 	num_pushed = 0;
 	for (index = 0; index < cevent->len; index++) {
-#ifdef CONFIG_MAGIC_SYSRQ
-		if (sysrq_enabled) {
+		/*
+		 * Will be optimized away if !CONFIG_MAGIC_SYSRQ:
+		 */
+		if (sysrq_on()) {
 			/* 0x0f is the ascii character for ^O */
 			if (cevent->data[index] == '\x0f') {
 				vio_sysrq_pressed = 1;
@@ -956,7 +955,6 @@ static void vioHandleData(struct HvLpEvent *event)
 				continue;
 			}
 		}
-#endif
 		/*
 		 * The sysrq sequence isn't included in this check if
 		 * sysrq is enabled and compiled into the kernel because
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 9df8833670cb..98a1d8cfb73d 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -37,23 +37,37 @@ struct sysrq_key_op {
 
 #ifdef CONFIG_MAGIC_SYSRQ
 
+extern int sysrq_on(void);
+
+/*
+ * Do not use this one directly:
+ */
+extern int __sysrq_enabled;
+
 /* Generic SysRq interface -- you may call it from any device driver, supplying
  * ASCII code of the key, pointer to registers and kbd/tty structs (if they
  * are available -- else NULL's).
  */
 
-void handle_sysrq(int, struct tty_struct *);
-void __handle_sysrq(int, struct tty_struct *, int check_mask);
-int register_sysrq_key(int, struct sysrq_key_op *);
-int unregister_sysrq_key(int, struct sysrq_key_op *);
+void handle_sysrq(int key, struct tty_struct *tty);
+void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
+int register_sysrq_key(int key, struct sysrq_key_op *op);
+int unregister_sysrq_key(int key, struct sysrq_key_op *op);
 struct sysrq_key_op *__sysrq_get_key_op(int key);
 
 #else
 
+static inline int sysrq_on(void)
+{
+	return 0;
+}
 static inline int __reterr(void)
 {
 	return -EINVAL;
 }
+static inline void handle_sysrq(int key, struct tty_struct *tty)
+{
+}
 
 #define register_sysrq_key(ig,nore) __reterr()
 #define unregister_sysrq_key(ig,nore) __reterr()
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 130c5ec9ee0b..600b33358ded 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -65,7 +65,6 @@ extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern int sysctl_panic_on_oom;
 extern int max_threads;
-extern int sysrq_enabled;
 extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
@@ -543,7 +542,7 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_SYSRQ,
 		.procname	= "sysrq",
-		.data		= &sysrq_enabled,
+		.data		= &__sysrq_enabled,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
-- 
cgit v1.2.3


From a664089741aa9010333ecbdadbf5d9de62bafa2d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:39 -0800
Subject: [PATCH] lockdep: filter off by default

Fix typo in the class_filter() function.  (filtering is not used by default so
this only affects lockdep-internal debugging cases)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b02032476dc2..5d69c7d7407e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -172,8 +172,8 @@ static int class_filter(struct lock_class *class)
 			!strcmp(class->name, "&struct->lockfield"))
 		return 1;
 #endif
-	/* Allow everything else. 0 would be filter everything else */
-	return 1;
+	/* Filter everything else. 1 would be to allow everything else */
+	return 0;
 }
 #endif
 
-- 
cgit v1.2.3


From 81fc685a898f84d0787eeebb1c118de0bd3484a0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:40 -0800
Subject: [PATCH] lockdep: improve verbose messages

Make verbose lockdep messages (off by default) more informative by printing
out the hash chain key.  (this patch was what helped me catch the earlier
lockdep hash-collision bug)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 5d69c7d7407e..5ba2825bd46a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1264,7 +1264,7 @@ out_unlock_set:
  * add it and return 0 - in this case the new dependency chain is
  * validated. If the key is already hashed, return 1.
  */
-static inline int lookup_chain_cache(u64 chain_key)
+static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class)
 {
 	struct list_head *hash_head = chainhashentry(chain_key);
 	struct lock_chain *chain;
@@ -1286,9 +1286,13 @@ cache_hit:
 			__raw_spin_lock(&hash_lock);
 			return 1;
 #endif
+			if (very_verbose(class))
+				printk("\nhash chain already cached, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name);
 			return 0;
 		}
 	}
+	if (very_verbose(class))
+		printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name);
 	/*
 	 * Allocate a new chain entry from the static array, and add
 	 * it to the hash:
@@ -2139,7 +2143,7 @@ out_calc_hash:
 	 * (If lookup_chain_cache() returns with 1 it acquires
 	 * hash_lock for us)
 	 */
-	if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) {
+	if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) {
 		/*
 		 * Check whether last held lock:
 		 *
-- 
cgit v1.2.3


From 23d95a03d63eff25118b50737006ce6e7c1b8def Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:40 -0800
Subject: [PATCH] lockdep: improve lockdep_reset()

Clear all the chains during lockdep_reset().  This fixes some locking-selftest
false positives i saw on -rt.  (never saw those on mainline though, but it
could happen.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 5ba2825bd46a..77fa791f6f31 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2437,6 +2437,7 @@ EXPORT_SYMBOL_GPL(lock_release);
 void lockdep_reset(void)
 {
 	unsigned long flags;
+	int i;
 
 	raw_local_irq_save(flags);
 	current->curr_chain_key = 0;
@@ -2447,6 +2448,8 @@ void lockdep_reset(void)
 	nr_softirq_chains = 0;
 	nr_process_chains = 0;
 	debug_locks = 1;
+	for (i = 0; i < CHAINHASH_SIZE; i++)
+		INIT_LIST_HEAD(chainhash_table + i);
 	raw_local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From 33e94e960b57497fe7fd9493080210b6d87e88e6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:41 -0800
Subject: [PATCH] lockdep: clean up VERY_VERBOSE define

Cleanup: the VERY_VERBOSE define was unnecessarily dependent on #ifdef VERBOSE
- while the VERBOSE switch is 0 or 1 (always defined).

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 77fa791f6f31..c4ffd3c1dec4 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -145,9 +145,7 @@ EXPORT_SYMBOL(lockdep_on);
  */
 
 #define VERBOSE			0
-#ifdef VERBOSE
-# define VERY_VERBOSE		0
-#endif
+#define VERY_VERBOSE		0
 
 #if VERBOSE
 # define HARDIRQ_VERBOSE	1
-- 
cgit v1.2.3


From 27c3b23226fc649de47e4886ccbf994482f388ba Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:42 -0800
Subject: [PATCH] lockdep: use chain hash on CONFIG_DEBUG_LOCKDEP too

CONFIG_DEBUG_LOCKDEP is unacceptably slow because it does not utilize
the chain-hash. Turn the chain-hash back on in this case too.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c4ffd3c1dec4..69e92c6b0472 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1276,14 +1276,6 @@ static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class)
 		if (chain->chain_key == chain_key) {
 cache_hit:
 			debug_atomic_inc(&chain_lookup_hits);
-			/*
-			 * In the debugging case, force redundant checking
-			 * by returning 1:
-			 */
-#ifdef CONFIG_DEBUG_LOCKDEP
-			__raw_spin_lock(&hash_lock);
-			return 1;
-#endif
 			if (very_verbose(class))
 				printk("\nhash chain already cached, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name);
 			return 0;
-- 
cgit v1.2.3


From 3117df0453828bd045c16244e6f50e5714667a8a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:43 -0800
Subject: [PATCH] lockdep: print irq-trace info on asserts

When we print an assert due to scheduling-in-atomic bugs, and if lockdep
is enabled, then the IRQ tracing information of lockdep can be printed
to pinpoint the code location that disabled interrupts. This saved me
quite a bit of debugging time in cases where the backtrace did not
identify the irq-disabling site well enough.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/lockdep.h | 16 +++++++++++++---
 kernel/lockdep.c        |  6 +-----
 kernel/sched.c          |  4 ++++
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 498bfbd3b4e1..ea097dddc44f 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -281,15 +281,25 @@ struct lock_class_key { };
 #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
 extern void early_init_irq_lock_class(void);
 #else
-# define early_init_irq_lock_class()		do { } while (0)
+static inline void early_init_irq_lock_class(void)
+{
+}
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 extern void early_boot_irqs_off(void);
 extern void early_boot_irqs_on(void);
+extern void print_irqtrace_events(struct task_struct *curr);
 #else
-# define early_boot_irqs_off()			do { } while (0)
-# define early_boot_irqs_on()			do { } while (0)
+static inline void early_boot_irqs_off(void)
+{
+}
+static inline void early_boot_irqs_on(void)
+{
+}
+static inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
 #endif
 
 /*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 69e92c6b0472..07a3d74a84be 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1447,7 +1447,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
 	return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass);
 }
 
-static inline void print_irqtrace_events(struct task_struct *curr)
+void print_irqtrace_events(struct task_struct *curr)
 {
 	printk("irq event stamp: %u\n", curr->irq_events);
 	printk("hardirqs last  enabled at (%u): ", curr->hardirq_enable_event);
@@ -1460,10 +1460,6 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 	print_ip_sym(curr->softirq_disable_ip);
 }
 
-#else
-static inline void print_irqtrace_events(struct task_struct *curr)
-{
-}
 #endif
 
 static int
diff --git a/kernel/sched.c b/kernel/sched.c
index 8a0afb97af71..5cd833bc2173 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3429,6 +3429,8 @@ asmlinkage void __sched schedule(void)
 			"%s/0x%08x/%d\n",
 			current->comm, preempt_count(), current->pid);
 		debug_show_held_locks(current);
+		if (irqs_disabled())
+			print_irqtrace_events(current);
 		dump_stack();
 	}
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
@@ -6977,6 +6979,8 @@ void __might_sleep(char *file, int line)
 		printk("in_atomic():%d, irqs_disabled():%d\n",
 			in_atomic(), irqs_disabled());
 		debug_show_held_locks(current);
+		if (irqs_disabled())
+			print_irqtrace_events(current);
 		dump_stack();
 	}
 #endif
-- 
cgit v1.2.3


From 74c383f1400f559562aa517d6d62f77245bddf52 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 Dec 2006 00:34:43 -0800
Subject: [PATCH] lockdep: fix possible races while disabling lock-debugging

Jarek Poplawski noticed that lockdep global state could be accessed in a
racy way if one CPU did a lockdep assert (shutting lockdep down), while the
other CPU would try to do something that changes its global state.

This patch fixes those races and cleans up lockdep's internal locking by
adding a graph_lock()/graph_unlock()/debug_locks_off_graph_unlock helpers.

(Also note that as we all know the Linux kernel is, by definition, bug-free
and perfect, so this code never triggers, so these fixes are highly
theoretical.  I wrote this patch for aesthetic reasons alone.)

[akpm@osdl.org: build fix]
[jarkao2@o2.pl: build fix's refix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 170 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 105 insertions(+), 65 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 07a3d74a84be..01e750559034 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -43,13 +43,49 @@
 #include "lockdep_internals.h"
 
 /*
- * hash_lock: protects the lockdep hashes and class/list/hash allocators.
+ * lockdep_lock: protects the lockdep graph, the hashes and the
+ *               class/list/hash allocators.
  *
  * This is one of the rare exceptions where it's justified
  * to use a raw spinlock - we really dont want the spinlock
- * code to recurse back into the lockdep code.
+ * code to recurse back into the lockdep code...
  */
-static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int graph_lock(void)
+{
+	__raw_spin_lock(&lockdep_lock);
+	/*
+	 * Make sure that if another CPU detected a bug while
+	 * walking the graph we dont change it (while the other
+	 * CPU is busy printing out stuff with the graph lock
+	 * dropped already)
+	 */
+	if (!debug_locks) {
+		__raw_spin_unlock(&lockdep_lock);
+		return 0;
+	}
+	return 1;
+}
+
+static inline int graph_unlock(void)
+{
+	__raw_spin_unlock(&lockdep_lock);
+	return 0;
+}
+
+/*
+ * Turn lock debugging off and return with 0 if it was off already,
+ * and also release the graph lock:
+ */
+static inline int debug_locks_off_graph_unlock(void)
+{
+	int ret = debug_locks_off();
+
+	__raw_spin_unlock(&lockdep_lock);
+
+	return ret;
+}
 
 static int lockdep_initialized;
 
@@ -57,14 +93,15 @@ unsigned long nr_list_entries;
 static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 
 /*
- * Allocate a lockdep entry. (assumes hash_lock held, returns
+ * Allocate a lockdep entry. (assumes the graph_lock held, returns
  * with NULL on failure)
  */
 static struct lock_list *alloc_list_entry(void)
 {
 	if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
-		__raw_spin_unlock(&hash_lock);
-		debug_locks_off();
+		if (!debug_locks_off_graph_unlock())
+			return NULL;
+
 		printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
 		printk("turning off the locking correctness validator.\n");
 		return NULL;
@@ -205,7 +242,7 @@ static int softirq_verbose(struct lock_class *class)
 
 /*
  * Stack-trace: tightly packed array of stack backtrace
- * addresses. Protected by the hash_lock.
+ * addresses. Protected by the graph_lock.
  */
 unsigned long nr_stack_trace_entries;
 static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
@@ -224,18 +261,15 @@ static int save_trace(struct stack_trace *trace)
 	trace->max_entries = trace->nr_entries;
 
 	nr_stack_trace_entries += trace->nr_entries;
-	if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) {
-		__raw_spin_unlock(&hash_lock);
-		return 0;
-	}
 
 	if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
-		__raw_spin_unlock(&hash_lock);
-		if (debug_locks_off()) {
-			printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
-			printk("turning off the locking correctness validator.\n");
-			dump_stack();
-		}
+		if (!debug_locks_off_graph_unlock())
+			return 0;
+
+		printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
+		printk("turning off the locking correctness validator.\n");
+		dump_stack();
+
 		return 0;
 	}
 
@@ -524,9 +558,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth)
 {
 	struct task_struct *curr = current;
 
-	__raw_spin_unlock(&hash_lock);
-	debug_locks_off();
-	if (debug_locks_silent)
+	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
 	printk("\n=======================================================\n");
@@ -554,12 +586,10 @@ static noinline int print_circular_bug_tail(void)
 	if (debug_locks_silent)
 		return 0;
 
-	/* hash_lock unlocked by the header */
-	__raw_spin_lock(&hash_lock);
 	this.class = check_source->class;
 	if (!save_trace(&this.trace))
 		return 0;
-	__raw_spin_unlock(&hash_lock);
+
 	print_circular_bug_entry(&this, 0);
 
 	printk("\nother info that might help us debug this:\n\n");
@@ -575,8 +605,10 @@ static noinline int print_circular_bug_tail(void)
 
 static int noinline print_infinite_recursion_bug(void)
 {
-	__raw_spin_unlock(&hash_lock);
-	DEBUG_LOCKS_WARN_ON(1);
+	if (!debug_locks_off_graph_unlock())
+		return 0;
+
+	WARN_ON(1);
 
 	return 0;
 }
@@ -711,9 +743,7 @@ print_bad_irq_dependency(struct task_struct *curr,
 			 enum lock_usage_bit bit2,
 			 const char *irqclass)
 {
-	__raw_spin_unlock(&hash_lock);
-	debug_locks_off();
-	if (debug_locks_silent)
+	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
 	printk("\n======================================================\n");
@@ -794,9 +824,7 @@ static int
 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 		   struct held_lock *next)
 {
-	debug_locks_off();
-	__raw_spin_unlock(&hash_lock);
-	if (debug_locks_silent)
+	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
 	printk("\n=============================================\n");
@@ -972,14 +1000,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 * Debugging printouts:
 	 */
 	if (verbose(prev->class) || verbose(next->class)) {
-		__raw_spin_unlock(&hash_lock);
+		graph_unlock();
 		printk("\n new dependency: ");
 		print_lock_name(prev->class);
 		printk(" => ");
 		print_lock_name(next->class);
 		printk("\n");
 		dump_stack();
-		__raw_spin_lock(&hash_lock);
+		return graph_lock();
 	}
 	return 1;
 }
@@ -1044,8 +1072,10 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 	}
 	return 1;
 out_bug:
-	__raw_spin_unlock(&hash_lock);
-	DEBUG_LOCKS_WARN_ON(1);
+	if (!debug_locks_off_graph_unlock())
+		return 0;
+
+	WARN_ON(1);
 
 	return 0;
 }
@@ -1199,7 +1229,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	hash_head = classhashentry(key);
 
 	raw_local_irq_save(flags);
-	__raw_spin_lock(&hash_lock);
+	if (!graph_lock()) {
+		raw_local_irq_restore(flags);
+		return NULL;
+	}
 	/*
 	 * We have to do the hash-walk again, to avoid races
 	 * with another CPU:
@@ -1212,9 +1245,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	 * the hash:
 	 */
 	if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
-		__raw_spin_unlock(&hash_lock);
+		if (!debug_locks_off_graph_unlock()) {
+			raw_local_irq_restore(flags);
+			return NULL;
+		}
 		raw_local_irq_restore(flags);
-		debug_locks_off();
+
 		printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
 		printk("turning off the locking correctness validator.\n");
 		return NULL;
@@ -1235,18 +1271,23 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	list_add_tail_rcu(&class->hash_entry, hash_head);
 
 	if (verbose(class)) {
-		__raw_spin_unlock(&hash_lock);
+		graph_unlock();
 		raw_local_irq_restore(flags);
+
 		printk("\nnew class %p: %s", class->key, class->name);
 		if (class->name_version > 1)
 			printk("#%d", class->name_version);
 		printk("\n");
 		dump_stack();
+
 		raw_local_irq_save(flags);
-		__raw_spin_lock(&hash_lock);
+		if (!graph_lock()) {
+			raw_local_irq_restore(flags);
+			return NULL;
+		}
 	}
 out_unlock_set:
-	__raw_spin_unlock(&hash_lock);
+	graph_unlock();
 	raw_local_irq_restore(flags);
 
 	if (!subclass || force)
@@ -1287,19 +1328,21 @@ cache_hit:
 	 * Allocate a new chain entry from the static array, and add
 	 * it to the hash:
 	 */
-	__raw_spin_lock(&hash_lock);
+	if (!graph_lock())
+		return 0;
 	/*
 	 * We have to walk the chain again locked - to avoid duplicates:
 	 */
 	list_for_each_entry(chain, hash_head, entry) {
 		if (chain->chain_key == chain_key) {
-			__raw_spin_unlock(&hash_lock);
+			graph_unlock();
 			goto cache_hit;
 		}
 	}
 	if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
-		__raw_spin_unlock(&hash_lock);
-		debug_locks_off();
+		if (!debug_locks_off_graph_unlock())
+			return 0;
+
 		printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
 		printk("turning off the locking correctness validator.\n");
 		return 0;
@@ -1375,9 +1418,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
 			struct held_lock *this, int forwards,
 			const char *irqclass)
 {
-	__raw_spin_unlock(&hash_lock);
-	debug_locks_off();
-	if (debug_locks_silent)
+	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
 	printk("\n=========================================================\n");
@@ -1466,9 +1507,7 @@ static int
 print_usage_bug(struct task_struct *curr, struct held_lock *this,
 		enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
 {
-	__raw_spin_unlock(&hash_lock);
-	debug_locks_off();
-	if (debug_locks_silent)
+	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
 	printk("\n=================================\n");
@@ -1529,12 +1568,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 	if (likely(this->class->usage_mask & new_mask))
 		return 1;
 
-	__raw_spin_lock(&hash_lock);
+	if (!graph_lock())
+		return 0;
 	/*
 	 * Make sure we didnt race:
 	 */
 	if (unlikely(this->class->usage_mask & new_mask)) {
-		__raw_spin_unlock(&hash_lock);
+		graph_unlock();
 		return 1;
 	}
 
@@ -1720,16 +1760,16 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 		debug_atomic_dec(&nr_unused_locks);
 		break;
 	default:
-		__raw_spin_unlock(&hash_lock);
-		debug_locks_off();
+		if (!debug_locks_off_graph_unlock())
+			return 0;
 		WARN_ON(1);
 		return 0;
 	}
 
-	__raw_spin_unlock(&hash_lock);
+	graph_unlock();
 
 	/*
-	 * We must printk outside of the hash_lock:
+	 * We must printk outside of the graph_lock:
 	 */
 	if (ret == 2) {
 		printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
@@ -2127,7 +2167,7 @@ out_calc_hash:
 	 * We look up the chain_key and do the O(N^2) check and update of
 	 * the dependencies only if this is a new dependency chain.
 	 * (If lookup_chain_cache() returns with 1 it acquires
-	 * hash_lock for us)
+	 * graph_lock for us)
 	 */
 	if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) {
 		/*
@@ -2160,7 +2200,7 @@ out_calc_hash:
 		if (!chain_head && ret != 2)
 			if (!check_prevs_add(curr, hlock))
 				return 0;
-		__raw_spin_unlock(&hash_lock);
+		graph_unlock();
 	}
 	curr->lockdep_depth++;
 	check_chain_key(curr);
@@ -2472,7 +2512,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
 	int i;
 
 	raw_local_irq_save(flags);
-	__raw_spin_lock(&hash_lock);
+	graph_lock();
 
 	/*
 	 * Unhash all classes that were created by this module:
@@ -2486,7 +2526,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
 				zap_class(class);
 	}
 
-	__raw_spin_unlock(&hash_lock);
+	graph_unlock();
 	raw_local_irq_restore(flags);
 }
 
@@ -2514,20 +2554,20 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 	 * Debug check: in the end all mapped classes should
 	 * be gone.
 	 */
-	__raw_spin_lock(&hash_lock);
+	graph_lock();
 	for (i = 0; i < CLASSHASH_SIZE; i++) {
 		head = classhash_table + i;
 		if (list_empty(head))
 			continue;
 		list_for_each_entry_safe(class, next, head, hash_entry) {
 			if (unlikely(class == lock->class_cache)) {
-				__raw_spin_unlock(&hash_lock);
-				DEBUG_LOCKS_WARN_ON(1);
+				if (debug_locks_off_graph_unlock())
+					WARN_ON(1);
 				goto out_restore;
 			}
 		}
 	}
-	__raw_spin_unlock(&hash_lock);
+	graph_unlock();
 
 out_restore:
 	raw_local_irq_restore(flags);
-- 
cgit v1.2.3


From cd86128088554d64fea1679191509f00e6353c5b Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Wed, 13 Dec 2006 00:34:52 -0800
Subject: [PATCH] Fix numerous kcalloc() calls, convert to kzalloc()

All kcalloc() calls of the form "kcalloc(1,...)" are converted to the
equivalent kzalloc() calls, and a few kcalloc() calls with the incorrect
ordering of the first two arguments are fixed.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Cc: Greg KH <greg@kroah.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ata/pdc_adma.c              |  4 ++--
 drivers/macintosh/smu.c             |  2 +-
 drivers/mtd/rfd_ftl.c               |  2 +-
 drivers/net/phy/phy_device.c        |  2 +-
 drivers/net/skge.c                  |  2 +-
 drivers/pcmcia/at91_cf.c            |  2 +-
 drivers/pcmcia/omap_cf.c            |  2 +-
 drivers/pnp/isapnp/core.c           | 22 +++++++++++-----------
 drivers/pnp/pnpacpi/core.c          |  6 +++---
 drivers/pnp/pnpacpi/rsparser.c      | 22 +++++++++++-----------
 drivers/pnp/pnpbios/core.c          | 14 +++++++-------
 drivers/pnp/pnpbios/proc.c          |  8 ++++----
 drivers/pnp/pnpbios/rsparser.c      | 16 ++++++++--------
 drivers/scsi/sym53c8xx_2/sym_hipd.c |  2 +-
 drivers/usb/gadget/at91_udc.c       |  2 +-
 drivers/usb/misc/uss720.c           |  2 +-
 drivers/usb/net/rndis_host.c        |  2 +-
 fs/ocfs2/alloc.c                    |  2 +-
 fs/ocfs2/cluster/heartbeat.c        |  4 ++--
 fs/ocfs2/cluster/nodemanager.c      |  6 +++---
 fs/ocfs2/cluster/tcp.c              | 10 +++++-----
 fs/ocfs2/dlm/dlmdomain.c            |  4 ++--
 fs/ocfs2/dlm/dlmlock.c              |  4 ++--
 fs/ocfs2/dlm/dlmmaster.c            |  2 +-
 fs/ocfs2/dlm/dlmrecovery.c          |  6 +++---
 fs/ocfs2/localalloc.c               |  2 +-
 fs/ocfs2/slot_map.c                 |  2 +-
 fs/ocfs2/suballoc.c                 |  6 +++---
 fs/ocfs2/super.c                    |  6 +++---
 fs/ocfs2/vote.c                     |  4 ++--
 include/linux/gameport.h            |  2 +-
 kernel/relay.c                      |  4 ++--
 net/sunrpc/svc.c                    |  2 +-
 33 files changed, 89 insertions(+), 89 deletions(-)

(limited to 'kernel')

diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index 9021e34d2096..90786d7a20bb 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -551,7 +551,7 @@ static int adma_port_start(struct ata_port *ap)
 		return rc;
 	adma_enter_reg_mode(ap);
 	rc = -ENOMEM;
-	pp = kcalloc(1, sizeof(*pp), GFP_KERNEL);
+	pp = kzalloc(sizeof(*pp), GFP_KERNEL);
 	if (!pp)
 		goto err_out;
 	pp->pkt = dma_alloc_coherent(dev, ADMA_PKT_BYTES, &pp->pkt_dma,
@@ -672,7 +672,7 @@ static int adma_ata_init_one(struct pci_dev *pdev,
 	if (rc)
 		goto err_out_iounmap;
 
-	probe_ent = kcalloc(1, sizeof(*probe_ent), GFP_KERNEL);
+	probe_ent = kzalloc(sizeof(*probe_ent), GFP_KERNEL);
 	if (probe_ent == NULL) {
 		rc = -ENOMEM;
 		goto err_out_iounmap;
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 6dde27ab79a8..6f30459b9385 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -945,7 +945,7 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id)
 	 */
 	tlen = sizeof(struct property) + len + 18;
 
-	prop = kcalloc(tlen, 1, GFP_KERNEL);
+	prop = kzalloc(tlen, GFP_KERNEL);
 	if (prop == NULL)
 		return NULL;
 	hdr = (struct smu_sdbp_header *)(prop + 1);
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index fa4362fb4dd8..0f3baa5d9c2a 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -768,7 +768,7 @@ static void rfd_ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	if (mtd->type != MTD_NORFLASH)
 		return;
 
-	part = kcalloc(1, sizeof(struct partition), GFP_KERNEL);
+	part = kzalloc(sizeof(struct partition), GFP_KERNEL);
 	if (!part)
 		return;
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b01fc70a57db..a4d7529ef415 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -50,7 +50,7 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	struct phy_device *dev;
 	/* We allocate the device, and initialize the
 	 * default values */
-	dev = kcalloc(1, sizeof(*dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 
 	if (NULL == dev)
 		return (struct phy_device*) PTR_ERR((void*)-ENOMEM);
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index b60f0451f6cd..8a39376f87dc 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -749,7 +749,7 @@ static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u32 base)
 	struct skge_element *e;
 	int i;
 
-	ring->start = kcalloc(sizeof(*e), ring->count, GFP_KERNEL);
+	ring->start = kcalloc(ring->count, sizeof(*e), GFP_KERNEL);
 	if (!ring->start)
 		return -ENOMEM;
 
diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index 52d4a38b3667..3334f22a86c0 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -230,7 +230,7 @@ static int __init at91_cf_probe(struct platform_device *pdev)
 	if (!io)
 		return -ENODEV;
 
-	cf = kcalloc(1, sizeof *cf, GFP_KERNEL);
+	cf = kzalloc(sizeof *cf, GFP_KERNEL);
 	if (!cf)
 		return -ENOMEM;
 
diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c
index 06bf7f48836e..e65a6b8188f6 100644
--- a/drivers/pcmcia/omap_cf.c
+++ b/drivers/pcmcia/omap_cf.c
@@ -220,7 +220,7 @@ static int __devinit omap_cf_probe(struct device *dev)
 	if (irq < 0)
 		return -EINVAL;
 
-	cf = kcalloc(1, sizeof *cf, GFP_KERNEL);
+	cf = kzalloc(sizeof *cf, GFP_KERNEL);
 	if (!cf)
 		return -ENOMEM;
 	init_timer(&cf->timer);
diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c
index 3ac5b123215a..a0b158704ca1 100644
--- a/drivers/pnp/isapnp/core.c
+++ b/drivers/pnp/isapnp/core.c
@@ -395,7 +395,7 @@ static void isapnp_parse_id(struct pnp_dev * dev, unsigned short vendor, unsigne
 	struct pnp_id * id;
 	if (!dev)
 		return;
-	id = kcalloc(1, sizeof(struct pnp_id), GFP_KERNEL);
+	id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
 	if (!id)
 		return;
 	sprintf(id->id, "%c%c%c%x%x%x%x",
@@ -419,7 +419,7 @@ static struct pnp_dev * __init isapnp_parse_device(struct pnp_card *card, int si
 	struct pnp_dev *dev;
 
 	isapnp_peek(tmp, size);
-	dev = kcalloc(1, sizeof(struct pnp_dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(struct pnp_dev), GFP_KERNEL);
 	if (!dev)
 		return NULL;
 	dev->number = number;
@@ -450,7 +450,7 @@ static void __init isapnp_parse_irq_resource(struct pnp_option *option,
 	unsigned long bits;
 
 	isapnp_peek(tmp, size);
-	irq = kcalloc(1, sizeof(struct pnp_irq), GFP_KERNEL);
+	irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
 	if (!irq)
 		return;
 	bits = (tmp[1] << 8) | tmp[0];
@@ -474,7 +474,7 @@ static void __init isapnp_parse_dma_resource(struct pnp_option *option,
 	struct pnp_dma *dma;
 
 	isapnp_peek(tmp, size);
-	dma = kcalloc(1, sizeof(struct pnp_dma), GFP_KERNEL);
+	dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
 	if (!dma)
 		return;
 	dma->map = tmp[0];
@@ -494,7 +494,7 @@ static void __init isapnp_parse_port_resource(struct pnp_option *option,
 	struct pnp_port *port;
 
 	isapnp_peek(tmp, size);
-	port = kcalloc(1, sizeof(struct pnp_port), GFP_KERNEL);
+	port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
 	if (!port)
 		return;
 	port->min = (tmp[2] << 8) | tmp[1];
@@ -517,7 +517,7 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option,
 	struct pnp_port *port;
 
 	isapnp_peek(tmp, size);
-	port = kcalloc(1, sizeof(struct pnp_port), GFP_KERNEL);
+	port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
 	if (!port)
 		return;
 	port->min = port->max = (tmp[1] << 8) | tmp[0];
@@ -539,7 +539,7 @@ static void __init isapnp_parse_mem_resource(struct pnp_option *option,
 	struct pnp_mem *mem;
 
 	isapnp_peek(tmp, size);
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = ((tmp[2] << 8) | tmp[1]) << 8;
@@ -562,7 +562,7 @@ static void __init isapnp_parse_mem32_resource(struct pnp_option *option,
 	struct pnp_mem *mem;
 
 	isapnp_peek(tmp, size);
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
@@ -584,7 +584,7 @@ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option,
 	struct pnp_mem *mem;
 
 	isapnp_peek(tmp, size);
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = mem->max = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
@@ -829,7 +829,7 @@ static unsigned char __init isapnp_checksum(unsigned char *data)
 
 static void isapnp_parse_card_id(struct pnp_card * card, unsigned short vendor, unsigned short device)
 {
-	struct pnp_id * id = kcalloc(1, sizeof(struct pnp_id), GFP_KERNEL);
+	struct pnp_id * id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
 	if (!id)
 		return;
 	sprintf(id->id, "%c%c%c%x%x%x%x",
@@ -865,7 +865,7 @@ static int __init isapnp_build_device_list(void)
 			header[4], header[5], header[6], header[7], header[8]);
 		printk(KERN_DEBUG "checksum = 0x%x\n", checksum);
 #endif
-		if ((card = kcalloc(1, sizeof(struct pnp_card), GFP_KERNEL)) == NULL)
+		if ((card = kzalloc(sizeof(struct pnp_card), GFP_KERNEL)) == NULL)
 			continue;
 
 		card->number = csn;
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 6cf34a63c790..62eda5d59024 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -139,7 +139,7 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 		return 0;
 
 	pnp_dbg("ACPI device : hid %s", acpi_device_hid(device));
-	dev =  kcalloc(1, sizeof(struct pnp_dev), GFP_KERNEL);
+	dev =  kzalloc(sizeof(struct pnp_dev), GFP_KERNEL);
 	if (!dev) {
 		pnp_err("Out of memory");
 		return -ENOMEM;
@@ -169,7 +169,7 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 	dev->number = num;
 	
 	/* set the initial values for the PnP device */
-	dev_id = kcalloc(1, sizeof(struct pnp_id), GFP_KERNEL);
+	dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
 	if (!dev_id)
 		goto err;
 	pnpidacpi_to_pnpid(acpi_device_hid(device), dev_id->id);
@@ -201,7 +201,7 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 		for (i = 0; i < cid_list->count; i++) {
 			if (!ispnpidacpi(cid_list->id[i].value))
 				continue;
-			dev_id = kcalloc(1, sizeof(struct pnp_id), GFP_KERNEL);
+			dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
 			if (!dev_id)
 				continue;
 
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 379048fdf05d..7a535542fe92 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -298,7 +298,7 @@ static void pnpacpi_parse_dma_option(struct pnp_option *option, struct acpi_reso
 
 	if (p->channel_count == 0)
 		return;
-	dma = kcalloc(1, sizeof(struct pnp_dma), GFP_KERNEL);
+	dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
 	if (!dma)
 		return;
 
@@ -354,7 +354,7 @@ static void pnpacpi_parse_irq_option(struct pnp_option *option,
 
 	if (p->interrupt_count == 0)
 		return;
-	irq = kcalloc(1, sizeof(struct pnp_irq), GFP_KERNEL);
+	irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
 	if (!irq)
 		return;
 
@@ -375,7 +375,7 @@ static void pnpacpi_parse_ext_irq_option(struct pnp_option *option,
 
 	if (p->interrupt_count == 0)
 		return;
-	irq = kcalloc(1, sizeof(struct pnp_irq), GFP_KERNEL);
+	irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
 	if (!irq)
 		return;
 
@@ -396,7 +396,7 @@ pnpacpi_parse_port_option(struct pnp_option *option,
 
 	if (io->address_length == 0)
 		return;
-	port = kcalloc(1, sizeof(struct pnp_port), GFP_KERNEL);
+	port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
 	if (!port)
 		return;
 	port->min = io->minimum;
@@ -417,7 +417,7 @@ pnpacpi_parse_fixed_port_option(struct pnp_option *option,
 
 	if (io->address_length == 0)
 		return;
-	port = kcalloc(1, sizeof(struct pnp_port), GFP_KERNEL);
+	port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
 	if (!port)
 		return;
 	port->min = port->max = io->address;
@@ -436,7 +436,7 @@ pnpacpi_parse_mem24_option(struct pnp_option *option,
 
 	if (p->address_length == 0)
 		return;
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = p->minimum;
@@ -459,7 +459,7 @@ pnpacpi_parse_mem32_option(struct pnp_option *option,
 
 	if (p->address_length == 0)
 		return;
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = p->minimum;
@@ -482,7 +482,7 @@ pnpacpi_parse_fixed_mem32_option(struct pnp_option *option,
 
 	if (p->address_length == 0)
 		return;
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = mem->max = p->address;
@@ -514,7 +514,7 @@ pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r)
 		return;
 
 	if (p->resource_type == ACPI_MEMORY_RANGE) {
-		mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+		mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 		if (!mem)
 			return;
 		mem->min = mem->max = p->minimum;
@@ -524,7 +524,7 @@ pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r)
 		    ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE : 0;
 		pnp_register_mem_resource(option, mem);
 	} else if (p->resource_type == ACPI_IO_RANGE) {
-		port = kcalloc(1, sizeof(struct pnp_port), GFP_KERNEL);
+		port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
 		if (!port)
 			return;
 		port->min = port->max = p->minimum;
@@ -721,7 +721,7 @@ int pnpacpi_build_resource_template(acpi_handle handle,
 	if (!res_cnt)
 		return -EINVAL;
 	buffer->length = sizeof(struct acpi_resource) * (res_cnt + 1) + 1;
-	buffer->pointer = kcalloc(1, buffer->length - 1, GFP_KERNEL);
+	buffer->pointer = kzalloc(buffer->length - 1, GFP_KERNEL);
 	if (!buffer->pointer)
 		return -ENOMEM;
 	pnp_dbg("Res cnt %d", res_cnt);
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 33adeba1a31f..b7fdb4d21a96 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -112,7 +112,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
 	if (!(envp = (char **) kcalloc (20, sizeof (char *), GFP_KERNEL))) {
 		return -ENOMEM;
 	}
-	if (!(buf = kcalloc (1, 256, GFP_KERNEL))) {
+	if (!(buf = kzalloc(256, GFP_KERNEL))) {
 		kfree (envp);
 		return -ENOMEM;
 	}
@@ -220,7 +220,7 @@ static int pnpbios_get_resources(struct pnp_dev * dev, struct pnp_resource_table
 	if(!pnpbios_is_dynamic(dev))
 		return -EPERM;
 
-	node = kcalloc(1, node_info.max_node_size, GFP_KERNEL);
+	node = kzalloc(node_info.max_node_size, GFP_KERNEL);
 	if (!node)
 		return -1;
 	if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) {
@@ -243,7 +243,7 @@ static int pnpbios_set_resources(struct pnp_dev * dev, struct pnp_resource_table
 	if (!pnpbios_is_dynamic(dev))
 		return -EPERM;
 
-	node = kcalloc(1, node_info.max_node_size, GFP_KERNEL);
+	node = kzalloc(node_info.max_node_size, GFP_KERNEL);
 	if (!node)
 		return -1;
 	if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) {
@@ -294,7 +294,7 @@ static int pnpbios_disable_resources(struct pnp_dev *dev)
 	if(dev->flags & PNPBIOS_NO_DISABLE || !pnpbios_is_dynamic(dev))
 		return -EPERM;
 
-	node = kcalloc(1, node_info.max_node_size, GFP_KERNEL);
+	node = kzalloc(node_info.max_node_size, GFP_KERNEL);
 	if (!node)
 		return -ENOMEM;
 
@@ -336,7 +336,7 @@ static int insert_device(struct pnp_dev *dev, struct pnp_bios_node * node)
 	}
 
 	/* set the initial values for the PnP device */
-	dev_id = kcalloc(1, sizeof(struct pnp_id), GFP_KERNEL);
+	dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
 	if (!dev_id)
 		return -1;
 	pnpid32_to_pnpid(node->eisa_id,id);
@@ -374,7 +374,7 @@ static void __init build_devlist(void)
 	struct pnp_bios_node *node;
 	struct pnp_dev *dev;
 
-	node = kcalloc(1, node_info.max_node_size, GFP_KERNEL);
+	node = kzalloc(node_info.max_node_size, GFP_KERNEL);
 	if (!node)
 		return;
 
@@ -391,7 +391,7 @@ static void __init build_devlist(void)
 				break;
 		}
 		nodes_got++;
-		dev =  kcalloc(1, sizeof (struct pnp_dev), GFP_KERNEL);
+		dev =  kzalloc(sizeof (struct pnp_dev), GFP_KERNEL);
 		if (!dev)
 			break;
 		if(insert_device(dev,node)<0)
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index 5a3dfc97f5e9..8027073f7919 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -87,7 +87,7 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
 		return -EFBIG;
 	}
 
-	tmpbuf = kcalloc(1, escd.escd_size, GFP_KERNEL);
+	tmpbuf = kzalloc(escd.escd_size, GFP_KERNEL);
 	if (!tmpbuf) return -ENOMEM;
 
 	if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base)) {
@@ -133,7 +133,7 @@ static int proc_read_devices(char *buf, char **start, off_t pos,
 	if (pos >= 0xff)
 		return 0;
 
-	node = kcalloc(1, node_info.max_node_size, GFP_KERNEL);
+	node = kzalloc(node_info.max_node_size, GFP_KERNEL);
 	if (!node) return -ENOMEM;
 
 	for (nodenum=pos; nodenum<0xff; ) {
@@ -168,7 +168,7 @@ static int proc_read_node(char *buf, char **start, off_t pos,
 	u8 nodenum = (long)data;
 	int len;
 
-	node = kcalloc(1, node_info.max_node_size, GFP_KERNEL);
+	node = kzalloc(node_info.max_node_size, GFP_KERNEL);
 	if (!node) return -ENOMEM;
 	if (pnp_bios_get_dev_node(&nodenum, boot, node)) {
 		kfree(node);
@@ -188,7 +188,7 @@ static int proc_write_node(struct file *file, const char __user *buf,
 	u8 nodenum = (long)data;
 	int ret = count;
 
-	node = kcalloc(1, node_info.max_node_size, GFP_KERNEL);
+	node = kzalloc(node_info.max_node_size, GFP_KERNEL);
 	if (!node)
 		return -ENOMEM;
 	if (pnp_bios_get_dev_node(&nodenum, boot, node)) {
diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c
index ef508a4de557..95b79685a9d1 100644
--- a/drivers/pnp/pnpbios/rsparser.c
+++ b/drivers/pnp/pnpbios/rsparser.c
@@ -248,7 +248,7 @@ static void
 pnpbios_parse_mem_option(unsigned char *p, int size, struct pnp_option *option)
 {
 	struct pnp_mem * mem;
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = ((p[5] << 8) | p[4]) << 8;
@@ -264,7 +264,7 @@ static void
 pnpbios_parse_mem32_option(unsigned char *p, int size, struct pnp_option *option)
 {
 	struct pnp_mem * mem;
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4];
@@ -280,7 +280,7 @@ static void
 pnpbios_parse_fixed_mem32_option(unsigned char *p, int size, struct pnp_option *option)
 {
 	struct pnp_mem * mem;
-	mem = kcalloc(1, sizeof(struct pnp_mem), GFP_KERNEL);
+	mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
 	if (!mem)
 		return;
 	mem->min = mem->max = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4];
@@ -297,7 +297,7 @@ pnpbios_parse_irq_option(unsigned char *p, int size, struct pnp_option *option)
 	struct pnp_irq * irq;
 	unsigned long bits;
 
-	irq = kcalloc(1, sizeof(struct pnp_irq), GFP_KERNEL);
+	irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
 	if (!irq)
 		return;
 	bits = (p[2] << 8) | p[1];
@@ -314,7 +314,7 @@ static void
 pnpbios_parse_dma_option(unsigned char *p, int size, struct pnp_option *option)
 {
 	struct pnp_dma * dma;
-	dma = kcalloc(1, sizeof(struct pnp_dma), GFP_KERNEL);
+	dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
 	if (!dma)
 		return;
 	dma->map = p[1];
@@ -327,7 +327,7 @@ static void
 pnpbios_parse_port_option(unsigned char *p, int size, struct pnp_option *option)
 {
 	struct pnp_port * port;
-	port = kcalloc(1, sizeof(struct pnp_port), GFP_KERNEL);
+	port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
 	if (!port)
 		return;
 	port->min = (p[3] << 8) | p[2];
@@ -343,7 +343,7 @@ static void
 pnpbios_parse_fixed_port_option(unsigned char *p, int size, struct pnp_option *option)
 {
 	struct pnp_port * port;
-	port = kcalloc(1, sizeof(struct pnp_port), GFP_KERNEL);
+	port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
 	if (!port)
 		return;
 	port->min = port->max = (p[2] << 8) | p[1];
@@ -527,7 +527,7 @@ pnpbios_parse_compatible_ids(unsigned char *p, unsigned char *end, struct pnp_de
 		case SMALL_TAG_COMPATDEVID: /* compatible ID */
 			if (len != 4)
 				goto len_err;
-			dev_id =  kcalloc(1, sizeof (struct pnp_id), GFP_KERNEL);
+			dev_id =  kzalloc(sizeof (struct pnp_id), GFP_KERNEL);
 			if (!dev_id)
 				return NULL;
 			memset(dev_id, 0, sizeof(struct pnp_id));
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 940fa1e6f994..21cd4c7f5289 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -5545,7 +5545,7 @@ int sym_hcb_attach(struct Scsi_Host *shost, struct sym_fw *fw, struct sym_nvram
 	/*
 	 *  Allocate the array of lists of CCBs hashed by DSA.
 	 */
-	np->ccbh = kcalloc(sizeof(struct sym_ccb **), CCB_HASH_SIZE, GFP_KERNEL);
+	np->ccbh = kcalloc(CCB_HASH_SIZE, sizeof(struct sym_ccb **), GFP_KERNEL);
 	if (!np->ccbh)
 		goto attach_failed;
 
diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 72f3db99ff94..3e0abbb49fe1 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -598,7 +598,7 @@ at91_ep_alloc_request(struct usb_ep *_ep, unsigned int gfp_flags)
 {
 	struct at91_request *req;
 
-	req = kcalloc(1, sizeof (struct at91_request), gfp_flags);
+	req = kzalloc(sizeof (struct at91_request), gfp_flags);
 	if (!req)
 		return NULL;
 
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 7e8a0acd52ee..70250252ae2a 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -705,7 +705,7 @@ static int uss720_probe(struct usb_interface *intf,
 	/*
 	 * Allocate parport interface 
 	 */
-	if (!(priv = kcalloc(sizeof(struct parport_uss720_private), 1, GFP_KERNEL))) {
+	if (!(priv = kzalloc(sizeof(struct parport_uss720_private), GFP_KERNEL))) {
 		usb_put_dev(usbdev);
 		return -ENOMEM;
 	}
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index 99f26b3e502f..ea5f44de3de2 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -469,7 +469,7 @@ static void rndis_unbind(struct usbnet *dev, struct usb_interface *intf)
 	struct rndis_halt	*halt;
 
 	/* try to clear any rndis state/activity (no i/o from stack!) */
-	halt = kcalloc(1, sizeof *halt, GFP_KERNEL);
+	halt = kzalloc(sizeof *halt, GFP_KERNEL);
 	if (halt) {
 		halt->msg_type = RNDIS_MSG_HALT;
 		halt->msg_len = ccpu2(sizeof *halt);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edc91ca3792a..f27e5378caf2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1959,7 +1959,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	*tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
+	*tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
 	if (!(*tc)) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 4cd9a9580456..a25ef5a50386 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1553,7 +1553,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
 	struct o2hb_region *reg = NULL;
 	struct config_item *ret = NULL;
 
-	reg = kcalloc(1, sizeof(struct o2hb_region), GFP_KERNEL);
+	reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
 	if (reg == NULL)
 		goto out; /* ENOMEM */
 
@@ -1679,7 +1679,7 @@ struct config_group *o2hb_alloc_hb_set(void)
 	struct o2hb_heartbeat_group *hs = NULL;
 	struct config_group *ret = NULL;
 
-	hs = kcalloc(1, sizeof(struct o2hb_heartbeat_group), GFP_KERNEL);
+	hs = kzalloc(sizeof(struct o2hb_heartbeat_group), GFP_KERNEL);
 	if (hs == NULL)
 		goto out;
 
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 357f1d551771..b17333a0606b 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -714,7 +714,7 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group,
 	if (strlen(name) > O2NM_MAX_NAME_LEN)
 		goto out; /* ENAMETOOLONG */
 
-	node = kcalloc(1, sizeof(struct o2nm_node), GFP_KERNEL);
+	node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL);
 	if (node == NULL)
 		goto out; /* ENOMEM */
 
@@ -825,8 +825,8 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
 	if (o2nm_single_cluster)
 		goto out; /* ENOSPC */
 
-	cluster = kcalloc(1, sizeof(struct o2nm_cluster), GFP_KERNEL);
-	ns = kcalloc(1, sizeof(struct o2nm_node_group), GFP_KERNEL);
+	cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
+	ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
 	defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
 	o2hb_group = o2hb_alloc_hb_set();
 	if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 457753df1ae7..ae4ff4a6636b 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -324,7 +324,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
 	struct page *page = NULL;
 
 	page = alloc_page(GFP_NOFS);
-	sc = kcalloc(1, sizeof(*sc), GFP_NOFS);
+	sc = kzalloc(sizeof(*sc), GFP_NOFS);
 	if (sc == NULL || page == NULL)
 		goto out;
 
@@ -714,7 +714,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
 		goto out;
 	}
 
-       	nmh = kcalloc(1, sizeof(struct o2net_msg_handler), GFP_NOFS);
+       	nmh = kzalloc(sizeof(struct o2net_msg_handler), GFP_NOFS);
 	if (nmh == NULL) {
 		ret = -ENOMEM;
 		goto out;
@@ -1918,9 +1918,9 @@ int o2net_init(void)
 
 	o2quo_init();
 
-	o2net_hand = kcalloc(1, sizeof(struct o2net_handshake), GFP_KERNEL);
-	o2net_keep_req = kcalloc(1, sizeof(struct o2net_msg), GFP_KERNEL);
-	o2net_keep_resp = kcalloc(1, sizeof(struct o2net_msg), GFP_KERNEL);
+	o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
+	o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
+	o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
 	if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) {
 		kfree(o2net_hand);
 		kfree(o2net_keep_req);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 420a375a3949..f0b25f2dd205 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -920,7 +920,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
 
 	mlog_entry("%p", dlm);
 
-	ctxt = kcalloc(1, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -1223,7 +1223,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 	int i;
 	struct dlm_ctxt *dlm = NULL;
 
-	dlm = kcalloc(1, sizeof(*dlm), GFP_KERNEL);
+	dlm = kzalloc(sizeof(*dlm), GFP_KERNEL);
 	if (!dlm) {
 		mlog_errno(-ENOMEM);
 		goto leave;
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 42a1b91979b5..e5ca3db197f6 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -408,13 +408,13 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
 	struct dlm_lock *lock;
 	int kernel_allocated = 0;
 
-	lock = kcalloc(1, sizeof(*lock), GFP_NOFS);
+	lock = kzalloc(sizeof(*lock), GFP_NOFS);
 	if (!lock)
 		return NULL;
 
 	if (!lksb) {
 		/* zero memory only if kernel-allocated */
-		lksb = kcalloc(1, sizeof(*lksb), GFP_NOFS);
+		lksb = kzalloc(sizeof(*lksb), GFP_NOFS);
 		if (!lksb) {
 			kfree(lock);
 			return NULL;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 856012b4fa49..0ad872055cb3 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1939,7 +1939,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 			       int ignore_higher, u8 request_from, u32 flags)
 {
 	struct dlm_work_item *item;
-	item = kcalloc(1, sizeof(*item), GFP_NOFS);
+	item = kzalloc(sizeof(*item), GFP_NOFS);
 	if (!item)
 		return -ENOMEM;
 
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index fb3e2b0817f1..367a11e9e2ed 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -757,7 +757,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
 		}
 		BUG_ON(num == dead_node);
 
-		ndata = kcalloc(1, sizeof(*ndata), GFP_NOFS);
+		ndata = kzalloc(sizeof(*ndata), GFP_NOFS);
 		if (!ndata) {
 			dlm_destroy_recovery_area(dlm, dead_node);
 			return -ENOMEM;
@@ -842,7 +842,7 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
 	}
 	BUG_ON(lr->dead_node != dlm->reco.dead_node);
 
-	item = kcalloc(1, sizeof(*item), GFP_NOFS);
+	item = kzalloc(sizeof(*item), GFP_NOFS);
 	if (!item) {
 		dlm_put(dlm);
 		return -ENOMEM;
@@ -1323,7 +1323,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	ret = -ENOMEM;
 	buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS);
-	item = kcalloc(1, sizeof(*item), GFP_NOFS);
+	item = kzalloc(sizeof(*item), GFP_NOFS);
 	if (!buf || !item)
 		goto leave;
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 698d79a74ef8..4dedd9789108 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -776,7 +776,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
 {
 	int status;
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index aa6f5aadedc4..2d3ac32cb74e 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -175,7 +175,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 	struct buffer_head *bh = NULL;
 	struct ocfs2_slot_info *si;
 
-	si = kcalloc(1, sizeof(struct ocfs2_slot_info), GFP_KERNEL);
+	si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL);
 	if (!si) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 000d71cca6c5..6dbb11762759 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -488,7 +488,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
 	int status;
 	u32 slot;
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -530,7 +530,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 {
 	int status;
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -595,7 +595,7 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	*ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
 		status = -ENOMEM;
 		mlog_errno(status);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a6d2f8cc165b..6e300a88a47e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1231,7 +1231,7 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
 
 	BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN);
 
-	osb->uuid_str = kcalloc(1, OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL);
+	osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL);
 	if (osb->uuid_str == NULL)
 		return -ENOMEM;
 
@@ -1262,7 +1262,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
 	mlog_entry_void();
 
-	osb = kcalloc(1, sizeof(struct ocfs2_super), GFP_KERNEL);
+	osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL);
 	if (!osb) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -1387,7 +1387,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	 */
 	/* initialize our journal structure */
 
-	journal = kcalloc(1, sizeof(struct ocfs2_journal), GFP_KERNEL);
+	journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
 	if (!journal) {
 		mlog(ML_ERROR, "unable to alloc journal\n");
 		status = -ENOMEM;
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 0315a8b61ed6..0afd8b9af70f 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -479,7 +479,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response
 {
 	struct ocfs2_net_wait_ctxt *w;
 
-	w = kcalloc(1, sizeof(*w), GFP_NOFS);
+	w = kzalloc(sizeof(*w), GFP_NOFS);
 	if (!w) {
 		mlog_errno(-ENOMEM);
 		goto bail;
@@ -642,7 +642,7 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb,
 
 	BUG_ON(!ocfs2_is_valid_vote_request(type));
 
-	request = kcalloc(1, sizeof(*request), GFP_NOFS);
+	request = kzalloc(sizeof(*request), GFP_NOFS);
 	if (!request) {
 		mlog_errno(-ENOMEM);
 	} else {
diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 2cdba0c23957..afad95272841 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -105,7 +105,7 @@ static inline void gameport_set_phys(struct gameport *gameport,
 
 static inline struct gameport *gameport_allocate_port(void)
 {
-	struct gameport *gameport = kcalloc(1, sizeof(struct gameport), GFP_KERNEL);
+	struct gameport *gameport = kzalloc(sizeof(struct gameport), GFP_KERNEL);
 
 	return gameport;
 }
diff --git a/kernel/relay.c b/kernel/relay.c
index 818e514729cf..a4701e7ba7d0 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -138,7 +138,7 @@ depopulate:
  */
 struct rchan_buf *relay_create_buf(struct rchan *chan)
 {
-	struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
+	struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
 	if (!buf)
 		return NULL;
 
@@ -479,7 +479,7 @@ struct rchan *relay_open(const char *base_filename,
 	if (!(subbuf_size && n_subbufs))
 		return NULL;
 
-	chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
+	chan = kzalloc(sizeof(struct rchan), GFP_KERNEL);
 	if (!chan)
 		return NULL;
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index eb44ec929ca1..f3001f3626f6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -308,7 +308,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 	serv->sv_nrpools = npools;
 	serv->sv_pools =
-		kcalloc(sizeof(struct svc_pool), serv->sv_nrpools,
+		kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
 			GFP_KERNEL);
 	if (!serv->sv_pools) {
 		kfree(serv);
-- 
cgit v1.2.3


From cd7175edf963a92b2c3cd491d3e34afd357e7284 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 13 Dec 2006 00:35:45 -0800
Subject: [PATCH] Optimize calc_load()

calc_load() is called by timer interrupt to update avenrun[].  It currently
calls nr_active() at each timer tick (HZ per second), while the update of
avenrun[] is done only once every 5 seconds.  (LOAD_FREQ=5 Hz)

nr_active() is quite expensive on SMP machines, since it has to sum up
nr_running and nr_uninterruptible of all online CPUS, bringing foreign
dirty cache lines.

This patch is an optimization of calc_load() so that nr_active() is called
only if we need it.

The use of unlikely() is welcome since the condition is true only once every
5*HZ time.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/timer.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index 0256ab443d8a..feddf817baa5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1146,11 +1146,15 @@ static inline void calc_load(unsigned long ticks)
 	unsigned long active_tasks; /* fixed-point */
 	static int count = LOAD_FREQ;
 
-	active_tasks = count_active_tasks();
-	for (count -= ticks; count < 0; count += LOAD_FREQ) {
-		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
-		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
-		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+	count -= ticks;
+	if (unlikely(count < 0)) {
+		active_tasks = count_active_tasks();
+		do {
+			CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+			CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+			CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+			count += LOAD_FREQ;
+		} while (count < 0);
 	}
 }
 
-- 
cgit v1.2.3


From ec8c0446b6e2b67b5c8813eb517f4bf00efa99a9 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 12 Dec 2006 17:14:57 +0000
Subject: [PATCH] Optimize D-cache alias handling on fork

Virtually index, physically tagged cache architectures can get away
without cache flushing when forking.  This patch adds a new cache
flushing function flush_cache_dup_mm(struct mm_struct *) which for the
moment I've implemented to do the same thing on all architectures
except on MIPS where it's a no-op.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/cachetlb.txt          | 23 +++++++++++++++++------
 include/asm-alpha/cacheflush.h      |  1 +
 include/asm-arm/cacheflush.h        |  2 ++
 include/asm-arm26/cacheflush.h      |  1 +
 include/asm-avr32/cacheflush.h      |  1 +
 include/asm-cris/cacheflush.h       |  1 +
 include/asm-frv/cacheflush.h        |  1 +
 include/asm-h8300/cacheflush.h      |  1 +
 include/asm-i386/cacheflush.h       |  1 +
 include/asm-ia64/cacheflush.h       |  1 +
 include/asm-m32r/cacheflush.h       |  3 +++
 include/asm-m68k/cacheflush.h       |  2 ++
 include/asm-m68knommu/cacheflush.h  |  1 +
 include/asm-mips/cacheflush.h       |  2 ++
 include/asm-parisc/cacheflush.h     |  2 ++
 include/asm-powerpc/cacheflush.h    |  1 +
 include/asm-s390/cacheflush.h       |  1 +
 include/asm-sh/cpu-sh2/cacheflush.h |  2 ++
 include/asm-sh/cpu-sh3/cacheflush.h |  3 +++
 include/asm-sh/cpu-sh4/cacheflush.h |  1 +
 include/asm-sh64/cacheflush.h       |  2 ++
 include/asm-sparc/cacheflush.h      |  1 +
 include/asm-sparc64/cacheflush.h    |  1 +
 include/asm-v850/cacheflush.h       |  1 +
 include/asm-x86_64/cacheflush.h     |  1 +
 include/asm-xtensa/cacheflush.h     |  2 ++
 kernel/fork.c                       |  2 +-
 27 files changed, 54 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 53245c429f7d..73e794f0ff09 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -179,10 +179,21 @@ Here are the routines, one by one:
 	lines associated with 'mm'.
 
 	This interface is used to handle whole address space
-	page table operations such as what happens during
-	fork, exit, and exec.
+	page table operations such as what happens during exit and exec.
+
+2) void flush_cache_dup_mm(struct mm_struct *mm)
+
+	This interface flushes an entire user address space from
+	the caches.  That is, after running, there will be no cache
+	lines associated with 'mm'.
+
+	This interface is used to handle whole address space
+	page table operations such as what happens during fork.
+
+	This option is separate from flush_cache_mm to allow some
+	optimizations for VIPT caches.
 
-2) void flush_cache_range(struct vm_area_struct *vma,
+3) void flush_cache_range(struct vm_area_struct *vma,
 			  unsigned long start, unsigned long end)
 
 	Here we are flushing a specific range of (user) virtual
@@ -199,7 +210,7 @@ Here are the routines, one by one:
 	call flush_cache_page (see below) for each entry which may be
 	modified.
 
-3) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
+4) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
 
 	This time we need to remove a PAGE_SIZE sized range
 	from the cache.  The 'vma' is the backing structure used by
@@ -220,7 +231,7 @@ Here are the routines, one by one:
 
 	This is used primarily during fault processing.
 
-4) void flush_cache_kmaps(void)
+5) void flush_cache_kmaps(void)
 
 	This routine need only be implemented if the platform utilizes
 	highmem.  It will be called right before all of the kmaps
@@ -232,7 +243,7 @@ Here are the routines, one by one:
 
 	This routing should be implemented in asm/highmem.h
 
-5) void flush_cache_vmap(unsigned long start, unsigned long end)
+6) void flush_cache_vmap(unsigned long start, unsigned long end)
    void flush_cache_vunmap(unsigned long start, unsigned long end)
 
 	Here in these two interfaces we are flushing a specific range
diff --git a/include/asm-alpha/cacheflush.h b/include/asm-alpha/cacheflush.h
index 805640b41078..b686cc7fc44e 100644
--- a/include/asm-alpha/cacheflush.h
+++ b/include/asm-alpha/cacheflush.h
@@ -6,6 +6,7 @@
 /* Caches aren't brain-dead on the Alpha. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h
index f0845646aacb..378a3a2ce8d9 100644
--- a/include/asm-arm/cacheflush.h
+++ b/include/asm-arm/cacheflush.h
@@ -319,6 +319,8 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 				unsigned long len, int write);
 #endif
 
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
 /*
  * flush_cache_user_range is used when we want to ensure that the
  * Harvard caches are synchronised for the user space address range.
diff --git a/include/asm-arm26/cacheflush.h b/include/asm-arm26/cacheflush.h
index 9c1b9c7f2ebd..14ae15b6faab 100644
--- a/include/asm-arm26/cacheflush.h
+++ b/include/asm-arm26/cacheflush.h
@@ -22,6 +22,7 @@
 
 #define flush_cache_all()                       do { } while (0)
 #define flush_cache_mm(mm)                      do { } while (0)
+#define flush_cache_dup_mm(mm)                  do { } while (0)
 #define flush_cache_range(vma,start,end)        do { } while (0)
 #define flush_cache_page(vma,vmaddr,pfn)        do { } while (0)
 #define flush_cache_vmap(start, end)		do { } while (0)
diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h
index f1bf1708980e..dfaaa88cd412 100644
--- a/include/asm-avr32/cacheflush.h
+++ b/include/asm-avr32/cacheflush.h
@@ -87,6 +87,7 @@ void invalidate_icache_region(void *start, size_t len);
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_cache_vmap(start, end)		do { } while (0)
diff --git a/include/asm-cris/cacheflush.h b/include/asm-cris/cacheflush.h
index 72cc71dffe70..01af2de27c5b 100644
--- a/include/asm-cris/cacheflush.h
+++ b/include/asm-cris/cacheflush.h
@@ -9,6 +9,7 @@
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-frv/cacheflush.h b/include/asm-frv/cacheflush.h
index eaa5826bc1c8..02500405a6fb 100644
--- a/include/asm-frv/cacheflush.h
+++ b/include/asm-frv/cacheflush.h
@@ -20,6 +20,7 @@
  */
 #define flush_cache_all()			do {} while(0)
 #define flush_cache_mm(mm)			do {} while(0)
+#define flush_cache_dup_mm(mm)			do {} while(0)
 #define flush_cache_range(mm, start, end)	do {} while(0)
 #define flush_cache_page(vma, vmaddr, pfn)	do {} while(0)
 #define flush_cache_vmap(start, end)		do {} while(0)
diff --git a/include/asm-h8300/cacheflush.h b/include/asm-h8300/cacheflush.h
index 1e4d95bb5ec9..71210d141b64 100644
--- a/include/asm-h8300/cacheflush.h
+++ b/include/asm-h8300/cacheflush.h
@@ -12,6 +12,7 @@
 
 #define flush_cache_all()
 #define	flush_cache_mm(mm)
+#define	flush_cache_dup_mm(mm)		do { } while (0)
 #define	flush_cache_range(vma,a,b)
 #define	flush_cache_page(vma,p,pfn)
 #define	flush_dcache_page(page)
diff --git a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h
index 7199f7b326f1..74e03c8f2e51 100644
--- a/include/asm-i386/cacheflush.h
+++ b/include/asm-i386/cacheflush.h
@@ -7,6 +7,7 @@
 /* Caches aren't brain-dead on the intel. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-ia64/cacheflush.h b/include/asm-ia64/cacheflush.h
index f2dacb4245ec..4906916d715b 100644
--- a/include/asm-ia64/cacheflush.h
+++ b/include/asm-ia64/cacheflush.h
@@ -18,6 +18,7 @@
 
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_icache_page(vma,page)		do { } while (0)
diff --git a/include/asm-m32r/cacheflush.h b/include/asm-m32r/cacheflush.h
index 8b261b49149e..56961a9511b2 100644
--- a/include/asm-m32r/cacheflush.h
+++ b/include/asm-m32r/cacheflush.h
@@ -9,6 +9,7 @@ extern void _flush_cache_copyback_all(void);
 #if defined(CONFIG_CHIP_M32700) || defined(CONFIG_CHIP_OPSP) || defined(CONFIG_CHIP_M32104)
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
@@ -29,6 +30,7 @@ extern void smp_flush_cache_all(void);
 #elif defined(CONFIG_CHIP_M32102)
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
@@ -41,6 +43,7 @@ extern void smp_flush_cache_all(void);
 #else
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-m68k/cacheflush.h b/include/asm-m68k/cacheflush.h
index 24d3ff449135..16bf375fdbe1 100644
--- a/include/asm-m68k/cacheflush.h
+++ b/include/asm-m68k/cacheflush.h
@@ -89,6 +89,8 @@ static inline void flush_cache_mm(struct mm_struct *mm)
 		__flush_cache_030();
 }
 
+#define flush_cache_dup_mm(mm)			flush_cache_mm(mm)
+
 /* flush_cache_range/flush_cache_page must be macros to avoid
    a dependency on linux/mm.h, which includes this file... */
 static inline void flush_cache_range(struct vm_area_struct *vma,
diff --git a/include/asm-m68knommu/cacheflush.h b/include/asm-m68knommu/cacheflush.h
index c3aadf3b0d88..163dcb1a9689 100644
--- a/include/asm-m68knommu/cacheflush.h
+++ b/include/asm-m68knommu/cacheflush.h
@@ -8,6 +8,7 @@
 
 #define flush_cache_all()			__flush_cache_all()
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	__flush_cache_all()
 #define flush_cache_page(vma, vmaddr)		do { } while (0)
 #define flush_dcache_range(start,len)		__flush_cache_all()
diff --git a/include/asm-mips/cacheflush.h b/include/asm-mips/cacheflush.h
index e3c9925876a3..0ddada3bb0b6 100644
--- a/include/asm-mips/cacheflush.h
+++ b/include/asm-mips/cacheflush.h
@@ -17,6 +17,7 @@
  *
  *  - flush_cache_all() flushes entire cache
  *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
  *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
  *  - flush_cache_range(vma, start, end) flushes a range of pages
  *  - flush_icache_range(start, end) flush a range of instructions
@@ -31,6 +32,7 @@
 extern void (*flush_cache_all)(void);
 extern void (*__flush_cache_all)(void);
 extern void (*flush_cache_mm)(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm)	do { (void) (mm); } while (0)
 extern void (*flush_cache_range)(struct vm_area_struct *vma,
 	unsigned long start, unsigned long end);
 extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
index 2bc41f2e0271..aedb0512cb04 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/include/asm-parisc/cacheflush.h
@@ -15,6 +15,8 @@
 #define flush_cache_mm(mm) flush_cache_all_local()
 #endif
 
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
 #define flush_kernel_dcache_range(start,size) \
 	flush_kernel_dcache_range_asm((start), (start)+(size));
 
diff --git a/include/asm-powerpc/cacheflush.h b/include/asm-powerpc/cacheflush.h
index 8a740c88d93d..08e93e789219 100644
--- a/include/asm-powerpc/cacheflush.h
+++ b/include/asm-powerpc/cacheflush.h
@@ -18,6 +18,7 @@
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_icache_page(vma, page)		do { } while (0)
diff --git a/include/asm-s390/cacheflush.h b/include/asm-s390/cacheflush.h
index e399a8ba2ed7..f7cade8083f3 100644
--- a/include/asm-s390/cacheflush.h
+++ b/include/asm-s390/cacheflush.h
@@ -7,6 +7,7 @@
 /* Caches aren't brain-dead on the s390. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-sh/cpu-sh2/cacheflush.h b/include/asm-sh/cpu-sh2/cacheflush.h
index f556fa80ea97..2979efb26de3 100644
--- a/include/asm-sh/cpu-sh2/cacheflush.h
+++ b/include/asm-sh/cpu-sh2/cacheflush.h
@@ -15,6 +15,7 @@
  *
  *  - flush_cache_all() flushes entire cache
  *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
  *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
  *  - flush_cache_range(vma, start, end) flushes a range of pages
  *
@@ -27,6 +28,7 @@
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-sh/cpu-sh3/cacheflush.h b/include/asm-sh/cpu-sh3/cacheflush.h
index 03fde97a7fd0..f70d8ef76a15 100644
--- a/include/asm-sh/cpu-sh3/cacheflush.h
+++ b/include/asm-sh/cpu-sh3/cacheflush.h
@@ -15,6 +15,7 @@
  *
  *  - flush_cache_all() flushes entire cache
  *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
  *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
  *  - flush_cache_range(vma, start, end) flushes a range of pages
  *
@@ -39,6 +40,7 @@
 
 void flush_cache_all(void);
 void flush_cache_mm(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
                               unsigned long end);
 void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn);
@@ -48,6 +50,7 @@ void flush_icache_page(struct vm_area_struct *vma, struct page *page);
 #else
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-sh/cpu-sh4/cacheflush.h b/include/asm-sh/cpu-sh4/cacheflush.h
index 515fd574267c..b01a10f31225 100644
--- a/include/asm-sh/cpu-sh4/cacheflush.h
+++ b/include/asm-sh/cpu-sh4/cacheflush.h
@@ -18,6 +18,7 @@
  */
 void flush_cache_all(void);
 void flush_cache_mm(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 		       unsigned long end);
 void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/asm-sh64/cacheflush.h b/include/asm-sh64/cacheflush.h
index 55f71aa0aa6b..1e53a47bdc97 100644
--- a/include/asm-sh64/cacheflush.h
+++ b/include/asm-sh64/cacheflush.h
@@ -21,6 +21,8 @@ extern void flush_icache_user_range(struct vm_area_struct *vma,
 				    struct page *page, unsigned long addr,
 				    int len);
 
+#define flush_cache_dup_mm(mm)	flush_cache_mm(mm)
+
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
diff --git a/include/asm-sparc/cacheflush.h b/include/asm-sparc/cacheflush.h
index fc632f811cd8..68ac10910271 100644
--- a/include/asm-sparc/cacheflush.h
+++ b/include/asm-sparc/cacheflush.h
@@ -48,6 +48,7 @@ BTFIXUPDEF_CALL(void, flush_cache_page, struct vm_area_struct *, unsigned long)
 
 #define flush_cache_all() BTFIXUP_CALL(flush_cache_all)()
 #define flush_cache_mm(mm) BTFIXUP_CALL(flush_cache_mm)(mm)
+#define flush_cache_dup_mm(mm) BTFIXUP_CALL(flush_cache_mm)(mm)
 #define flush_cache_range(vma,start,end) BTFIXUP_CALL(flush_cache_range)(vma,start,end)
 #define flush_cache_page(vma,addr,pfn) BTFIXUP_CALL(flush_cache_page)(vma,addr)
 #define flush_icache_range(start, end)		do { } while (0)
diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h
index 745d1ab60371..122e4058dd9e 100644
--- a/include/asm-sparc64/cacheflush.h
+++ b/include/asm-sparc64/cacheflush.h
@@ -12,6 +12,7 @@
 /* These are the same regardless of whether this is an SMP kernel or not. */
 #define flush_cache_mm(__mm) \
 	do { if ((__mm) == current->mm) flushw_user(); } while(0)
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 #define flush_cache_range(vma, start, end) \
 	flush_cache_mm((vma)->vm_mm)
 #define flush_cache_page(vma, page, pfn) \
diff --git a/include/asm-v850/cacheflush.h b/include/asm-v850/cacheflush.h
index e1a87f82f1a4..9ece05a202ef 100644
--- a/include/asm-v850/cacheflush.h
+++ b/include/asm-v850/cacheflush.h
@@ -24,6 +24,7 @@
    systems with MMUs, so we don't need them.  */
 #define flush_cache_all()			((void)0)
 #define flush_cache_mm(mm)			((void)0)
+#define flush_cache_dup_mm(mm)			((void)0)
 #define flush_cache_range(vma, start, end)	((void)0)
 #define flush_cache_page(vma, vmaddr, pfn)	((void)0)
 #define flush_dcache_page(page)			((void)0)
diff --git a/include/asm-x86_64/cacheflush.h b/include/asm-x86_64/cacheflush.h
index d32f7f58752a..ab1cb5c7dc92 100644
--- a/include/asm-x86_64/cacheflush.h
+++ b/include/asm-x86_64/cacheflush.h
@@ -7,6 +7,7 @@
 /* Caches aren't brain-dead on the intel. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-xtensa/cacheflush.h b/include/asm-xtensa/cacheflush.h
index 337765b629de..22ef901b7845 100644
--- a/include/asm-xtensa/cacheflush.h
+++ b/include/asm-xtensa/cacheflush.h
@@ -75,6 +75,7 @@ extern void __flush_invalidate_dcache_range(unsigned long, unsigned long);
 
 #define flush_cache_all()		__flush_invalidate_cache_all();
 #define flush_cache_mm(mm)		__flush_invalidate_cache_all();
+#define flush_cache_dup_mm(mm)		__flush_invalidate_cache_all();
 
 #define flush_cache_vmap(start,end)	__flush_invalidate_cache_all();
 #define flush_cache_vunmap(start,end)	__flush_invalidate_cache_all();
@@ -88,6 +89,7 @@ extern void flush_cache_page(struct vm_area_struct*, unsigned long, unsigned lon
 
 #define flush_cache_all()				do { } while (0)
 #define flush_cache_mm(mm)				do { } while (0)
+#define flush_cache_dup_mm(mm)				do { } while (0)
 
 #define flush_cache_vmap(start,end)			do { } while (0)
 #define flush_cache_vunmap(start,end)			do { } while (0)
diff --git a/kernel/fork.c b/kernel/fork.c
index d16c566eb645..fc723e595cd5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -203,7 +203,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	struct mempolicy *pol;
 
 	down_write(&oldmm->mmap_sem);
-	flush_cache_mm(oldmm);
+	flush_cache_dup_mm(oldmm);
 	/*
 	 * Not linked in yet - no deadlock potential:
 	 */
-- 
cgit v1.2.3


From 1f71740ab9714bf5ae9ee04c724ff0d5c67ca3dc Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 24 Nov 2006 12:15:25 +0100
Subject: Driver core: show "initstate" of module

Show the initialization state(live, coming, going) of the module:
  $ cat /sys/module/usbcore/initstate
  live

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 kernel/module.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index d9eae45d0145..b565eaeff7e6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -824,9 +824,34 @@ static inline void module_unload_init(struct module *mod)
 }
 #endif /* CONFIG_MODULE_UNLOAD */
 
+static ssize_t show_initstate(struct module_attribute *mattr,
+			   struct module *mod, char *buffer)
+{
+	const char *state = "unknown";
+
+	switch (mod->state) {
+	case MODULE_STATE_LIVE:
+		state = "live";
+		break;
+	case MODULE_STATE_COMING:
+		state = "coming";
+		break;
+	case MODULE_STATE_GOING:
+		state = "going";
+		break;
+	}
+	return sprintf(buffer, "%s\n", state);
+}
+
+static struct module_attribute initstate = {
+	.attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE },
+	.show = show_initstate,
+};
+
 static struct module_attribute *modinfo_attrs[] = {
 	&modinfo_version,
 	&modinfo_srcversion,
+	&initstate,
 #ifdef CONFIG_MODULE_UNLOAD
 	&refcnt,
 #endif
-- 
cgit v1.2.3


From f89bce3d9afc6b1fb898ae176df4962c1303ee86 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Tue, 5 Dec 2006 03:07:38 -0800
Subject: Driver core: deprecate PM_LEGACY, default it to N

Deprecate the old "legacy" PM API, and more importantly default it to "n".
Virtually nothing in-tree uses it any more.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 kernel/power/Kconfig | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 710ed084e7c5..ed296225dcd4 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -20,13 +20,14 @@ config PM
 	  sending the processor to sleep and saving power.
 
 config PM_LEGACY
-	bool "Legacy Power Management API"
+	bool "Legacy Power Management API (DEPRECATED)"
 	depends on PM
-	default y
+	default n
 	---help---
-	   Support for pm_register() and friends.
+	   Support for pm_register() and friends.  This old API is obsoleted
+	   by the driver model.
 
-	   If unsure, say Y.
+	   If unsure, say N.
 
 config PM_DEBUG
 	bool "Power Management Debug Support"
-- 
cgit v1.2.3


From d1526e2cda64d5a1de56aef50bad9e5df14245c2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Fri, 15 Dec 2006 08:43:13 -0800
Subject: Remove stack unwinder for now

It has caused more problems than it ever really solved, and is
apparently not getting cleaned up and fixed.  We can put it back when
it's stable and isn't likely to make warning or bug events worse.

In the meantime, enable frame pointers for more readable stack traces.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Makefile                          |    5 -
 arch/i386/defconfig               |    2 -
 arch/i386/kernel/entry.S          |   32 -
 arch/i386/kernel/traps.c          |   83 ---
 arch/x86_64/Makefile              |    2 -
 arch/x86_64/defconfig             |    2 -
 arch/x86_64/kernel/entry.S        |   33 -
 arch/x86_64/kernel/traps.c        |   84 ---
 arch/x86_64/kernel/vmlinux.lds.S  |    2 -
 include/asm-generic/vmlinux.lds.h |   22 -
 include/asm-i386/unwind.h         |   91 ---
 include/asm-x86_64/unwind.h       |   96 ---
 include/linux/unwind.h            |   63 +-
 kernel/Makefile                   |    1 -
 kernel/unwind.c                   | 1305 -------------------------------------
 lib/Kconfig.debug                 |   18 -
 lib/fault-inject.c                |   32 +-
 17 files changed, 3 insertions(+), 1870 deletions(-)
 delete mode 100644 kernel/unwind.c

(limited to 'kernel')

diff --git a/Makefile b/Makefile
index dc82462b68ba..4a4720387936 100644
--- a/Makefile
+++ b/Makefile
@@ -496,11 +496,6 @@ else
 CFLAGS		+= -fomit-frame-pointer
 endif
 
-ifdef CONFIG_UNWIND_INFO
-CFLAGS		+= -fasynchronous-unwind-tables
-LDFLAGS_vmlinux	+= --eh-frame-hdr
-endif
-
 ifdef CONFIG_DEBUG_INFO
 CFLAGS		+= -g
 endif
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 3265208e5899..e075ff05c46d 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1493,8 +1493,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
-CONFIG_UNWIND_INFO=y
-CONFIG_STACK_UNWIND=y
 # CONFIG_FORCED_INLINING is not set
 # CONFIG_HEADERS_CHECK is not set
 # CONFIG_RCU_TORTURE_TEST is not set
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index de34b7fed3c1..06461b8b715d 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -979,38 +979,6 @@ ENTRY(spurious_interrupt_bug)
 	jmp error_code
 	CFI_ENDPROC
 
-#ifdef CONFIG_STACK_UNWIND
-ENTRY(arch_unwind_init_running)
-	CFI_STARTPROC
-	movl	4(%esp), %edx
-	movl	(%esp), %ecx
-	leal	4(%esp), %eax
-	movl	%ebx, PT_EBX(%edx)
-	xorl	%ebx, %ebx
-	movl	%ebx, PT_ECX(%edx)
-	movl	%ebx, PT_EDX(%edx)
-	movl	%esi, PT_ESI(%edx)
-	movl	%edi, PT_EDI(%edx)
-	movl	%ebp, PT_EBP(%edx)
-	movl	%ebx, PT_EAX(%edx)
-	movl	$__USER_DS, PT_DS(%edx)
-	movl	$__USER_DS, PT_ES(%edx)
-	movl	$0, PT_GS(%edx)
-	movl	%ebx, PT_ORIG_EAX(%edx)
-	movl	%ecx, PT_EIP(%edx)
-	movl	12(%esp), %ecx
-	movl	$__KERNEL_CS, PT_CS(%edx)
-	movl	%ebx, PT_EFLAGS(%edx)
-	movl	%eax, PT_OLDESP(%edx)
-	movl	8(%esp), %eax
-	movl	%ecx, 8(%esp)
-	movl	PT_EBX(%edx), %ebx
-	movl	$__KERNEL_DS, PT_OLDSS(%edx)
-	jmpl	*%eax
-	CFI_ENDPROC
-ENDPROC(arch_unwind_init_running)
-#endif
-
 ENTRY(kernel_thread_helper)
 	pushl $0		# fake return address for unwinder
 	CFI_STARTPROC
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 2b30dbf8d117..0efad8aeb41a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -94,11 +94,6 @@ asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
 int kstack_depth_to_print = 24;
-#ifdef CONFIG_STACK_UNWIND
-static int call_trace = 1;
-#else
-#define call_trace (-1)
-#endif
 ATOMIC_NOTIFIER_HEAD(i386die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
@@ -152,33 +147,6 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 	return ebp;
 }
 
-struct ops_and_data {
-	struct stacktrace_ops *ops;
-	void *data;
-};
-
-static asmlinkage int
-dump_trace_unwind(struct unwind_frame_info *info, void *data)
-{
-	struct ops_and_data *oad = (struct ops_and_data *)data;
-	int n = 0;
-	unsigned long sp = UNW_SP(info);
-
-	if (arch_unw_user_mode(info))
-		return -1;
-	while (unwind(info) == 0 && UNW_PC(info)) {
-		n++;
-		oad->ops->address(oad->data, UNW_PC(info));
-		if (arch_unw_user_mode(info))
-			break;
-		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
-		    && sp > UNW_SP(info))
-			break;
-		sp = UNW_SP(info);
-	}
-	return n;
-}
-
 #define MSG(msg) ops->warning(data, msg)
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
@@ -190,41 +158,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	if (!task)
 		task = current;
 
-	if (call_trace >= 0) {
-		int unw_ret = 0;
-		struct unwind_frame_info info;
-		struct ops_and_data oad = { .ops = ops, .data = data };
-
-		if (regs) {
-			if (unwind_init_frame_info(&info, task, regs) == 0)
-				unw_ret = dump_trace_unwind(&info, &oad);
-		} else if (task == current)
-			unw_ret = unwind_init_running(&info, dump_trace_unwind,
-						      &oad);
-		else {
-			if (unwind_init_blocked(&info, task) == 0)
-				unw_ret = dump_trace_unwind(&info, &oad);
-		}
-		if (unw_ret > 0) {
-			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
-				ops->warning_symbol(data,
-					     "DWARF2 unwinder stuck at %s",
-					     UNW_PC(&info));
-				if (UNW_SP(&info) >= PAGE_OFFSET) {
-					MSG("Leftover inexact backtrace:");
-					stack = (void *)UNW_SP(&info);
-					if (!stack)
-						return;
-					ebp = UNW_FP(&info);
-				} else
-					MSG("Full inexact backtrace again:");
-			} else if (call_trace >= 1)
-				return;
-			else
-				MSG("Full inexact backtrace again:");
-		} else
-			MSG("Inexact backtrace:");
-	}
 	if (!stack) {
 		unsigned long dummy;
 		stack = &dummy;
@@ -1258,19 +1191,3 @@ static int __init kstack_setup(char *s)
 	return 1;
 }
 __setup("kstack=", kstack_setup);
-
-#ifdef CONFIG_STACK_UNWIND
-static int __init call_trace_setup(char *s)
-{
-	if (strcmp(s, "old") == 0)
-		call_trace = -1;
-	else if (strcmp(s, "both") == 0)
-		call_trace = 0;
-	else if (strcmp(s, "newfallback") == 0)
-		call_trace = 1;
-	else if (strcmp(s, "new") == 2)
-		call_trace = 2;
-	return 1;
-}
-__setup("call_trace=", call_trace_setup);
-#endif
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index b471b8550d03..2941a915d4ef 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -45,9 +45,7 @@ cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
 # actually it makes the kernel smaller too.
 cflags-y += -fno-reorder-blocks
 cflags-y += -Wno-sign-compare
-ifneq ($(CONFIG_UNWIND_INFO),y)
 cflags-y += -fno-asynchronous-unwind-tables
-endif
 ifneq ($(CONFIG_DEBUG_INFO),y)
 # -fweb shrinks the kernel a bit, but the difference is very small
 # it also messes up debugging, so don't use it for now.
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 1a1c6a1a299b..ac80b1209fc0 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1523,8 +1523,6 @@ CONFIG_DEBUG_FS=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
-CONFIG_UNWIND_INFO=y
-CONFIG_STACK_UNWIND=y
 # CONFIG_FORCED_INLINING is not set
 # CONFIG_HEADERS_CHECK is not set
 # CONFIG_RCU_TORTURE_TEST is not set
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 601d332c4b79..9f5dac64aa8f 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -1155,36 +1155,3 @@ ENTRY(call_softirq)
 	ret
 	CFI_ENDPROC
 ENDPROC(call_softirq)
-
-#ifdef CONFIG_STACK_UNWIND
-ENTRY(arch_unwind_init_running)
-	CFI_STARTPROC
-	movq	%r15, R15(%rdi)
-	movq	%r14, R14(%rdi)
-	xchgq	%rsi, %rdx
-	movq	%r13, R13(%rdi)
-	movq	%r12, R12(%rdi)
-	xorl	%eax, %eax
-	movq	%rbp, RBP(%rdi)
-	movq	%rbx, RBX(%rdi)
-	movq	(%rsp), %rcx
-	movq	%rax, R11(%rdi)
-	movq	%rax, R10(%rdi)
-	movq	%rax, R9(%rdi)
-	movq	%rax, R8(%rdi)
-	movq	%rax, RAX(%rdi)
-	movq	%rax, RCX(%rdi)
-	movq	%rax, RDX(%rdi)
-	movq	%rax, RSI(%rdi)
-	movq	%rax, RDI(%rdi)
-	movq	%rax, ORIG_RAX(%rdi)
-	movq	%rcx, RIP(%rdi)
-	leaq	8(%rsp), %rcx
-	movq	$__KERNEL_CS, CS(%rdi)
-	movq	%rax, EFLAGS(%rdi)
-	movq	%rcx, RSP(%rdi)
-	movq	$__KERNEL_DS, SS(%rdi)
-	jmpq	*%rdx
-	CFI_ENDPROC
-ENDPROC(arch_unwind_init_running)
-#endif
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index b54ccc07f379..1d9eb6db732a 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,11 +110,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 }
 
 int kstack_depth_to_print = 12;
-#ifdef CONFIG_STACK_UNWIND
-static int call_trace = 1;
-#else
-#define call_trace (-1)
-#endif
 
 #ifdef CONFIG_KALLSYMS
 void printk_address(unsigned long address)
@@ -217,32 +212,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 	return NULL;
 }
 
-struct ops_and_data {
-	struct stacktrace_ops *ops;
-	void *data;
-};
-
-static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
-{
-	struct ops_and_data *oad = (struct ops_and_data *)context;
-	int n = 0;
-	unsigned long sp = UNW_SP(info);
-
-	if (arch_unw_user_mode(info))
-		return -1;
-	while (unwind(info) == 0 && UNW_PC(info)) {
-		n++;
-		oad->ops->address(oad->data, UNW_PC(info));
-		if (arch_unw_user_mode(info))
-			break;
-		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
-		    && sp > UNW_SP(info))
-			break;
-		sp = UNW_SP(info);
-	}
-	return n;
-}
-
 #define MSG(txt) ops->warning(data, txt)
 
 /*
@@ -270,40 +239,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 	if (!tsk)
 		tsk = current;
 
-	if (call_trace >= 0) {
-		int unw_ret = 0;
-		struct unwind_frame_info info;
-		struct ops_and_data oad = { .ops = ops, .data = data };
-
-		if (regs) {
-			if (unwind_init_frame_info(&info, tsk, regs) == 0)
-				unw_ret = dump_trace_unwind(&info, &oad);
-		} else if (tsk == current)
-			unw_ret = unwind_init_running(&info, dump_trace_unwind,
-						      &oad);
-		else {
-			if (unwind_init_blocked(&info, tsk) == 0)
-				unw_ret = dump_trace_unwind(&info, &oad);
-		}
-		if (unw_ret > 0) {
-			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
-				ops->warning_symbol(data,
-					     "DWARF2 unwinder stuck at %s",
-					     UNW_PC(&info));
-				if ((long)UNW_SP(&info) < 0) {
-					MSG("Leftover inexact backtrace:");
-					stack = (unsigned long *)UNW_SP(&info);
-					if (!stack)
-						goto out;
-				} else
-					MSG("Full inexact backtrace again:");
-			} else if (call_trace >= 1)
-				goto out;
-			else
-				MSG("Full inexact backtrace again:");
-		} else
-			MSG("Inexact backtrace:");
-	}
 	if (!stack) {
 		unsigned long dummy;
 		stack = &dummy;
@@ -387,7 +322,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 	tinfo = current_thread_info();
 	HANDLE_STACK (valid_stack_ptr(tinfo, stack));
 #undef HANDLE_STACK
-out:
 	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
@@ -1188,21 +1122,3 @@ static int __init kstack_setup(char *s)
 	return 0;
 }
 early_param("kstack", kstack_setup);
-
-#ifdef CONFIG_STACK_UNWIND
-static int __init call_trace_setup(char *s)
-{
-	if (!s)
-		return -EINVAL;
-	if (strcmp(s, "old") == 0)
-		call_trace = -1;
-	else if (strcmp(s, "both") == 0)
-		call_trace = 0;
-	else if (strcmp(s, "newfallback") == 0)
-		call_trace = 1;
-	else if (strcmp(s, "new") == 0)
-		call_trace = 2;
-	return 0;
-}
-early_param("call_trace", call_trace_setup);
-#endif
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 514be5dd2303..1e54ddf2338d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -221,9 +221,7 @@ SECTIONS
   /* Sections to be discarded */
   /DISCARD/ : {
 	*(.exitcall.exit)
-#ifndef CONFIG_UNWIND_INFO
 	*(.eh_frame)
-#endif
 	}
 
   STABS_DEBUG
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7437ccaada77..1587121730c5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -119,8 +119,6 @@
 		*(__ksymtab_strings)					\
 	}								\
 									\
-	EH_FRAME							\
-									\
 	/* Built-in module parameters. */				\
 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
 		VMLINUX_SYMBOL(__start___param) = .;			\
@@ -160,26 +158,6 @@
 		*(.kprobes.text)					\
 		VMLINUX_SYMBOL(__kprobes_text_end) = .;
 
-#ifdef CONFIG_STACK_UNWIND
-#define EH_FRAME							\
-		/* Unwind data binary search table */			\
-		. = ALIGN(8);						\
-        	.eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) {	\
-			VMLINUX_SYMBOL(__start_unwind_hdr) = .;		\
-			*(.eh_frame_hdr)				\
-			VMLINUX_SYMBOL(__end_unwind_hdr) = .;		\
-		}							\
-		/* Unwind data */					\
-		. = ALIGN(8);						\
-		.eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {		\
-			VMLINUX_SYMBOL(__start_unwind) = .;		\
-		  	*(.eh_frame)					\
-			VMLINUX_SYMBOL(__end_unwind) = .;		\
-		}
-#else
-#define EH_FRAME
-#endif
-
 		/* DWARF debug sections.
 		Symbols in the DWARF debugging sections are relative to
 		the beginning of the section so we begin them at 0.  */
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h
index aa2c931e30db..43c70c3de2f9 100644
--- a/include/asm-i386/unwind.h
+++ b/include/asm-i386/unwind.h
@@ -1,95 +1,6 @@
 #ifndef _ASM_I386_UNWIND_H
 #define _ASM_I386_UNWIND_H
 
-/*
- * Copyright (C) 2002-2006 Novell, Inc.
- *	Jan Beulich <jbeulich@novell.com>
- * This code is released under version 2 of the GNU GPL.
- */
-
-#ifdef CONFIG_STACK_UNWIND
-
-#include <linux/sched.h>
-#include <asm/fixmap.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-struct unwind_frame_info
-{
-	struct pt_regs regs;
-	struct task_struct *task;
-	unsigned call_frame:1;
-};
-
-#define UNW_PC(frame)        (frame)->regs.eip
-#define UNW_SP(frame)        (frame)->regs.esp
-#ifdef CONFIG_FRAME_POINTER
-#define UNW_FP(frame)        (frame)->regs.ebp
-#define FRAME_RETADDR_OFFSET 4
-#define FRAME_LINK_OFFSET    0
-#define STACK_BOTTOM(tsk)    STACK_LIMIT((tsk)->thread.esp0)
-#define STACK_TOP(tsk)       ((tsk)->thread.esp0)
-#else
-#define UNW_FP(frame) ((void)(frame), 0)
-#endif
-#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
-
-#define UNW_REGISTER_INFO \
-	PTREGS_INFO(eax), \
-	PTREGS_INFO(ecx), \
-	PTREGS_INFO(edx), \
-	PTREGS_INFO(ebx), \
-	PTREGS_INFO(esp), \
-	PTREGS_INFO(ebp), \
-	PTREGS_INFO(esi), \
-	PTREGS_INFO(edi), \
-	PTREGS_INFO(eip)
-
-#define UNW_DEFAULT_RA(raItem, dataAlign) \
-	((raItem).where == Memory && \
-	 !((raItem).value * (dataAlign) + 4))
-
-static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
-                                            /*const*/ struct pt_regs *regs)
-{
-	if (user_mode_vm(regs))
-		info->regs = *regs;
-	else {
-		memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
-		info->regs.esp = (unsigned long)&regs->esp;
-		info->regs.xss = __KERNEL_DS;
-	}
-}
-
-static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
-{
-	memset(&info->regs, 0, sizeof(info->regs));
-	info->regs.eip = info->task->thread.eip;
-	info->regs.xcs = __KERNEL_CS;
-	__get_user(info->regs.ebp, (long *)info->task->thread.esp);
-	info->regs.esp = info->task->thread.esp;
-	info->regs.xss = __KERNEL_DS;
-	info->regs.xds = __USER_DS;
-	info->regs.xes = __USER_DS;
-	info->regs.xgs = __KERNEL_PDA;
-}
-
-extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
-                                               asmlinkage int (*callback)(struct unwind_frame_info *,
-                                                                          void *arg),
-                                               void *arg);
-
-static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
-{
-	return user_mode_vm(&info->regs)
-	       || info->regs.eip < PAGE_OFFSET
-	       || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
-	           && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
-	       || info->regs.esp < PAGE_OFFSET;
-}
-
-#else
-
 #define UNW_PC(frame) ((void)(frame), 0)
 #define UNW_SP(frame) ((void)(frame), 0)
 #define UNW_FP(frame) ((void)(frame), 0)
@@ -99,6 +10,4 @@ static inline int arch_unw_user_mode(const void *info)
 	return 0;
 }
 
-#endif
-
 #endif /* _ASM_I386_UNWIND_H */
diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h
index 2f6349e48717..02710f6a4560 100644
--- a/include/asm-x86_64/unwind.h
+++ b/include/asm-x86_64/unwind.h
@@ -1,100 +1,6 @@
 #ifndef _ASM_X86_64_UNWIND_H
 #define _ASM_X86_64_UNWIND_H
 
-/*
- * Copyright (C) 2002-2006 Novell, Inc.
- *	Jan Beulich <jbeulich@novell.com>
- * This code is released under version 2 of the GNU GPL.
- */
-
-#ifdef CONFIG_STACK_UNWIND
-
-#include <linux/sched.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-#include <asm/vsyscall.h>
-
-struct unwind_frame_info
-{
-	struct pt_regs regs;
-	struct task_struct *task;
-	unsigned call_frame:1;
-};
-
-#define UNW_PC(frame)        (frame)->regs.rip
-#define UNW_SP(frame)        (frame)->regs.rsp
-#ifdef CONFIG_FRAME_POINTER
-#define UNW_FP(frame)        (frame)->regs.rbp
-#define FRAME_RETADDR_OFFSET 8
-#define FRAME_LINK_OFFSET    0
-#define STACK_BOTTOM(tsk)    (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1))
-#define STACK_TOP(tsk)       ((tsk)->thread.rsp0)
-#endif
-/* Might need to account for the special exception and interrupt handling
-   stacks here, since normally
-	EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
-   but the construct is needed only for getting across the stack switch to
-   the interrupt stack - thus considering the IRQ stack itself is unnecessary,
-   and the overhead of comparing against all exception handling stacks seems
-   not desirable. */
-#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
-
-#define UNW_REGISTER_INFO \
-	PTREGS_INFO(rax), \
-	PTREGS_INFO(rdx), \
-	PTREGS_INFO(rcx), \
-	PTREGS_INFO(rbx), \
-	PTREGS_INFO(rsi), \
-	PTREGS_INFO(rdi), \
-	PTREGS_INFO(rbp), \
-	PTREGS_INFO(rsp), \
-	PTREGS_INFO(r8), \
-	PTREGS_INFO(r9), \
-	PTREGS_INFO(r10), \
-	PTREGS_INFO(r11), \
-	PTREGS_INFO(r12), \
-	PTREGS_INFO(r13), \
-	PTREGS_INFO(r14), \
-	PTREGS_INFO(r15), \
-	PTREGS_INFO(rip)
-
-#define UNW_DEFAULT_RA(raItem, dataAlign) \
-	((raItem).where == Memory && \
-	 !((raItem).value * (dataAlign) + 8))
-
-static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
-                                            /*const*/ struct pt_regs *regs)
-{
-	info->regs = *regs;
-}
-
-static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
-{
-	extern const char thread_return[];
-
-	memset(&info->regs, 0, sizeof(info->regs));
-	info->regs.rip = (unsigned long)thread_return;
-	info->regs.cs = __KERNEL_CS;
-	__get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp);
-	info->regs.rsp = info->task->thread.rsp;
-	info->regs.ss = __KERNEL_DS;
-}
-
-extern int arch_unwind_init_running(struct unwind_frame_info *,
-                                    int (*callback)(struct unwind_frame_info *,
-                                                    void *arg),
-                                    void *arg);
-
-static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
-{
-	return user_mode(&info->regs)
-	       || (long)info->regs.rip >= 0
-	       || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
-	       || (long)info->regs.rsp >= 0;
-}
-
-#else
-
 #define UNW_PC(frame) ((void)(frame), 0UL)
 #define UNW_SP(frame) ((void)(frame), 0UL)
 
@@ -103,6 +9,4 @@ static inline int arch_unw_user_mode(const void *info)
 	return 0;
 }
 
-#endif
-
 #endif /* _ASM_X86_64_UNWIND_H */
diff --git a/include/linux/unwind.h b/include/linux/unwind.h
index 749928c161fb..7760860fa170 100644
--- a/include/linux/unwind.h
+++ b/include/linux/unwind.h
@@ -14,63 +14,6 @@
 
 struct module;
 
-#ifdef CONFIG_STACK_UNWIND
-
-#include <asm/unwind.h>
-
-#ifndef ARCH_UNWIND_SECTION_NAME
-#define ARCH_UNWIND_SECTION_NAME ".eh_frame"
-#endif
-
-/*
- * Initialize unwind support.
- */
-extern void unwind_init(void);
-extern void unwind_setup(void);
-
-#ifdef CONFIG_MODULES
-
-extern void *unwind_add_table(struct module *,
-                              const void *table_start,
-                              unsigned long table_size);
-
-extern void unwind_remove_table(void *handle, int init_only);
-
-#endif
-
-extern int unwind_init_frame_info(struct unwind_frame_info *,
-                                  struct task_struct *,
-                                  /*const*/ struct pt_regs *);
-
-/*
- * Prepare to unwind a blocked task.
- */
-extern int unwind_init_blocked(struct unwind_frame_info *,
-                               struct task_struct *);
-
-/*
- * Prepare to unwind the currently running thread.
- */
-extern int unwind_init_running(struct unwind_frame_info *,
-                               asmlinkage int (*callback)(struct unwind_frame_info *,
-                                                          void *arg),
-                               void *arg);
-
-/*
- * Unwind to previous to frame.  Returns 0 if successful, negative
- * number in case of an error.
- */
-extern int unwind(struct unwind_frame_info *);
-
-/*
- * Unwind until the return pointer is in user-land (or until an error
- * occurs).  Returns 0 if successful, negative number in case of
- * error.
- */
-extern int unwind_to_user(struct unwind_frame_info *);
-
-#else
-
 struct unwind_frame_info {};
 
 static inline void unwind_init(void) {}
@@ -85,12 +28,12 @@ static inline void *unwind_add_table(struct module *mod,
 	return NULL;
 }
 
-#endif
-
 static inline void unwind_remove_table(void *handle, int init_only)
 {
 }
 
+#endif
+
 static inline int unwind_init_frame_info(struct unwind_frame_info *info,
                                          struct task_struct *tsk,
                                          const struct pt_regs *regs)
@@ -122,6 +65,4 @@ static inline int unwind_to_user(struct unwind_frame_info *info)
 	return -ENOSYS;
 }
 
-#endif
-
 #endif /* _LINUX_UNWIND_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 5e3f3b75563a..14f4d45e0ae9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
-obj-$(CONFIG_STACK_UNWIND) += unwind.o
 obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
diff --git a/kernel/unwind.c b/kernel/unwind.c
deleted file mode 100644
index 09c261329249..000000000000
--- a/kernel/unwind.c
+++ /dev/null
@@ -1,1305 +0,0 @@
-/*
- * Copyright (C) 2002-2006 Novell, Inc.
- *	Jan Beulich <jbeulich@novell.com>
- * This code is released under version 2 of the GNU GPL.
- *
- * A simple API for unwinding kernel stacks.  This is used for
- * debugging and error reporting purposes.  The kernel doesn't need
- * full-blown stack unwinding with all the bells and whistles, so there
- * is not much point in implementing the full Dwarf2 unwind API.
- */
-
-#include <linux/unwind.h>
-#include <linux/module.h>
-#include <linux/bootmem.h>
-#include <linux/sort.h>
-#include <linux/stop_machine.h>
-#include <linux/uaccess.h>
-#include <asm/sections.h>
-#include <asm/uaccess.h>
-#include <asm/unaligned.h>
-
-extern const char __start_unwind[], __end_unwind[];
-extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
-
-#define MAX_STACK_DEPTH 8
-
-#define EXTRA_INFO(f) { \
-		BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
-		                  % FIELD_SIZEOF(struct unwind_frame_info, f)) \
-		+ offsetof(struct unwind_frame_info, f) \
-		  / FIELD_SIZEOF(struct unwind_frame_info, f), \
-		FIELD_SIZEOF(struct unwind_frame_info, f) \
-	}
-#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
-
-static const struct {
-	unsigned offs:BITS_PER_LONG / 2;
-	unsigned width:BITS_PER_LONG / 2;
-} reg_info[] = {
-	UNW_REGISTER_INFO
-};
-
-#undef PTREGS_INFO
-#undef EXTRA_INFO
-
-#ifndef REG_INVALID
-#define REG_INVALID(r) (reg_info[r].width == 0)
-#endif
-
-#define DW_CFA_nop                          0x00
-#define DW_CFA_set_loc                      0x01
-#define DW_CFA_advance_loc1                 0x02
-#define DW_CFA_advance_loc2                 0x03
-#define DW_CFA_advance_loc4                 0x04
-#define DW_CFA_offset_extended              0x05
-#define DW_CFA_restore_extended             0x06
-#define DW_CFA_undefined                    0x07
-#define DW_CFA_same_value                   0x08
-#define DW_CFA_register                     0x09
-#define DW_CFA_remember_state               0x0a
-#define DW_CFA_restore_state                0x0b
-#define DW_CFA_def_cfa                      0x0c
-#define DW_CFA_def_cfa_register             0x0d
-#define DW_CFA_def_cfa_offset               0x0e
-#define DW_CFA_def_cfa_expression           0x0f
-#define DW_CFA_expression                   0x10
-#define DW_CFA_offset_extended_sf           0x11
-#define DW_CFA_def_cfa_sf                   0x12
-#define DW_CFA_def_cfa_offset_sf            0x13
-#define DW_CFA_val_offset                   0x14
-#define DW_CFA_val_offset_sf                0x15
-#define DW_CFA_val_expression               0x16
-#define DW_CFA_lo_user                      0x1c
-#define DW_CFA_GNU_window_save              0x2d
-#define DW_CFA_GNU_args_size                0x2e
-#define DW_CFA_GNU_negative_offset_extended 0x2f
-#define DW_CFA_hi_user                      0x3f
-
-#define DW_EH_PE_FORM     0x07
-#define DW_EH_PE_native   0x00
-#define DW_EH_PE_leb128   0x01
-#define DW_EH_PE_data2    0x02
-#define DW_EH_PE_data4    0x03
-#define DW_EH_PE_data8    0x04
-#define DW_EH_PE_signed   0x08
-#define DW_EH_PE_ADJUST   0x70
-#define DW_EH_PE_abs      0x00
-#define DW_EH_PE_pcrel    0x10
-#define DW_EH_PE_textrel  0x20
-#define DW_EH_PE_datarel  0x30
-#define DW_EH_PE_funcrel  0x40
-#define DW_EH_PE_aligned  0x50
-#define DW_EH_PE_indirect 0x80
-#define DW_EH_PE_omit     0xff
-
-typedef unsigned long uleb128_t;
-typedef   signed long sleb128_t;
-#define sleb128abs __builtin_labs
-
-static struct unwind_table {
-	struct {
-		unsigned long pc;
-		unsigned long range;
-	} core, init;
-	const void *address;
-	unsigned long size;
-	const unsigned char *header;
-	unsigned long hdrsz;
-	struct unwind_table *link;
-	const char *name;
-} root_table;
-
-struct unwind_item {
-	enum item_location {
-		Nowhere,
-		Memory,
-		Register,
-		Value
-	} where;
-	uleb128_t value;
-};
-
-struct unwind_state {
-	uleb128_t loc, org;
-	const u8 *cieStart, *cieEnd;
-	uleb128_t codeAlign;
-	sleb128_t dataAlign;
-	struct cfa {
-		uleb128_t reg, offs;
-	} cfa;
-	struct unwind_item regs[ARRAY_SIZE(reg_info)];
-	unsigned stackDepth:8;
-	unsigned version:8;
-	const u8 *label;
-	const u8 *stack[MAX_STACK_DEPTH];
-};
-
-static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
-
-static unsigned unwind_debug;
-static int __init unwind_debug_setup(char *s)
-{
-	unwind_debug = simple_strtoul(s, NULL, 0);
-	return 1;
-}
-__setup("unwind_debug=", unwind_debug_setup);
-#define dprintk(lvl, fmt, args...) \
-	((void)(lvl > unwind_debug \
-	 || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
-
-static struct unwind_table *find_table(unsigned long pc)
-{
-	struct unwind_table *table;
-
-	for (table = &root_table; table; table = table->link)
-		if ((pc >= table->core.pc
-		     && pc < table->core.pc + table->core.range)
-		    || (pc >= table->init.pc
-		        && pc < table->init.pc + table->init.range))
-			break;
-
-	return table;
-}
-
-static unsigned long read_pointer(const u8 **pLoc,
-                                  const void *end,
-                                  signed ptrType,
-                                  unsigned long text_base,
-                                  unsigned long data_base);
-
-static void init_unwind_table(struct unwind_table *table,
-                              const char *name,
-                              const void *core_start,
-                              unsigned long core_size,
-                              const void *init_start,
-                              unsigned long init_size,
-                              const void *table_start,
-                              unsigned long table_size,
-                              const u8 *header_start,
-                              unsigned long header_size)
-{
-	const u8 *ptr = header_start + 4;
-	const u8 *end = header_start + header_size;
-
-	table->core.pc = (unsigned long)core_start;
-	table->core.range = core_size;
-	table->init.pc = (unsigned long)init_start;
-	table->init.range = init_size;
-	table->address = table_start;
-	table->size = table_size;
-	/* See if the linker provided table looks valid. */
-	if (header_size <= 4
-	    || header_start[0] != 1
-	    || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
-	       != table_start
-	    || !read_pointer(&ptr, end, header_start[2], 0, 0)
-	    || !read_pointer(&ptr, end, header_start[3], 0,
-	                     (unsigned long)header_start)
-	    || !read_pointer(&ptr, end, header_start[3], 0,
-	                     (unsigned long)header_start))
-		header_start = NULL;
-	table->hdrsz = header_size;
-	smp_wmb();
-	table->header = header_start;
-	table->link = NULL;
-	table->name = name;
-}
-
-void __init unwind_init(void)
-{
-	init_unwind_table(&root_table, "kernel",
-	                  _text, _end - _text,
-	                  NULL, 0,
-	                  __start_unwind, __end_unwind - __start_unwind,
-	                  __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
-}
-
-static const u32 bad_cie, not_fde;
-static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
-static signed fde_pointer_type(const u32 *cie);
-
-struct eh_frame_hdr_table_entry {
-	unsigned long start, fde;
-};
-
-static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
-{
-	const struct eh_frame_hdr_table_entry *e1 = p1;
-	const struct eh_frame_hdr_table_entry *e2 = p2;
-
-	return (e1->start > e2->start) - (e1->start < e2->start);
-}
-
-static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
-{
-	struct eh_frame_hdr_table_entry *e1 = p1;
-	struct eh_frame_hdr_table_entry *e2 = p2;
-	unsigned long v;
-
-	v = e1->start;
-	e1->start = e2->start;
-	e2->start = v;
-	v = e1->fde;
-	e1->fde = e2->fde;
-	e2->fde = v;
-}
-
-static void __init setup_unwind_table(struct unwind_table *table,
-					void *(*alloc)(unsigned long))
-{
-	const u8 *ptr;
-	unsigned long tableSize = table->size, hdrSize;
-	unsigned n;
-	const u32 *fde;
-	struct {
-		u8 version;
-		u8 eh_frame_ptr_enc;
-		u8 fde_count_enc;
-		u8 table_enc;
-		unsigned long eh_frame_ptr;
-		unsigned int fde_count;
-		struct eh_frame_hdr_table_entry table[];
-	} __attribute__((__packed__)) *header;
-
-	if (table->header)
-		return;
-
-	if (table->hdrsz)
-		printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
-		       table->name);
-
-	if (tableSize & (sizeof(*fde) - 1))
-		return;
-
-	for (fde = table->address, n = 0;
-	     tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
-	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
-		const u32 *cie = cie_for_fde(fde, table);
-		signed ptrType;
-
-		if (cie == &not_fde)
-			continue;
-		if (cie == NULL
-		    || cie == &bad_cie
-		    || (ptrType = fde_pointer_type(cie)) < 0)
-			return;
-		ptr = (const u8 *)(fde + 2);
-		if (!read_pointer(&ptr,
-		                  (const u8 *)(fde + 1) + *fde,
-		                  ptrType, 0, 0))
-			return;
-		++n;
-	}
-
-	if (tableSize || !n)
-		return;
-
-	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
-	        + 2 * n * sizeof(unsigned long);
-	dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
-	header = alloc(hdrSize);
-	if (!header)
-		return;
-	header->version          = 1;
-	header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
-	header->fde_count_enc    = DW_EH_PE_abs|DW_EH_PE_data4;
-	header->table_enc        = DW_EH_PE_abs|DW_EH_PE_native;
-	put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
-	BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
-	             % __alignof(typeof(header->fde_count)));
-	header->fde_count        = n;
-
-	BUILD_BUG_ON(offsetof(typeof(*header), table)
-	             % __alignof(typeof(*header->table)));
-	for (fde = table->address, tableSize = table->size, n = 0;
-	     tableSize;
-	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
-		const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
-
-		if (!fde[1])
-			continue; /* this is a CIE */
-		ptr = (const u8 *)(fde + 2);
-		header->table[n].start = read_pointer(&ptr,
-		                                      (const u8 *)(fde + 1) + *fde,
-		                                      fde_pointer_type(cie), 0, 0);
-		header->table[n].fde = (unsigned long)fde;
-		++n;
-	}
-	WARN_ON(n != header->fde_count);
-
-	sort(header->table,
-	     n,
-	     sizeof(*header->table),
-	     cmp_eh_frame_hdr_table_entries,
-	     swap_eh_frame_hdr_table_entries);
-
-	table->hdrsz = hdrSize;
-	smp_wmb();
-	table->header = (const void *)header;
-}
-
-static void *__init balloc(unsigned long sz)
-{
-	return __alloc_bootmem_nopanic(sz,
-	                               sizeof(unsigned int),
-	                               __pa(MAX_DMA_ADDRESS));
-}
-
-void __init unwind_setup(void)
-{
-	setup_unwind_table(&root_table, balloc);
-}
-
-#ifdef CONFIG_MODULES
-
-static struct unwind_table *last_table;
-
-/* Must be called with module_mutex held. */
-void *unwind_add_table(struct module *module,
-                       const void *table_start,
-                       unsigned long table_size)
-{
-	struct unwind_table *table;
-
-	if (table_size <= 0)
-		return NULL;
-
-	table = kmalloc(sizeof(*table), GFP_KERNEL);
-	if (!table)
-		return NULL;
-
-	init_unwind_table(table, module->name,
-	                  module->module_core, module->core_size,
-	                  module->module_init, module->init_size,
-	                  table_start, table_size,
-	                  NULL, 0);
-
-	if (last_table)
-		last_table->link = table;
-	else
-		root_table.link = table;
-	last_table = table;
-
-	return table;
-}
-
-struct unlink_table_info
-{
-	struct unwind_table *table;
-	int init_only;
-};
-
-static int unlink_table(void *arg)
-{
-	struct unlink_table_info *info = arg;
-	struct unwind_table *table = info->table, *prev;
-
-	for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
-		;
-
-	if (prev->link) {
-		if (info->init_only) {
-			table->init.pc = 0;
-			table->init.range = 0;
-			info->table = NULL;
-		} else {
-			prev->link = table->link;
-			if (!prev->link)
-				last_table = prev;
-		}
-	} else
-		info->table = NULL;
-
-	return 0;
-}
-
-/* Must be called with module_mutex held. */
-void unwind_remove_table(void *handle, int init_only)
-{
-	struct unwind_table *table = handle;
-	struct unlink_table_info info;
-
-	if (!table || table == &root_table)
-		return;
-
-	if (init_only && table == last_table) {
-		table->init.pc = 0;
-		table->init.range = 0;
-		return;
-	}
-
-	info.table = table;
-	info.init_only = init_only;
-	stop_machine_run(unlink_table, &info, NR_CPUS);
-
-	if (info.table)
-		kfree(table);
-}
-
-#endif /* CONFIG_MODULES */
-
-static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
-{
-	const u8 *cur = *pcur;
-	uleb128_t value;
-	unsigned shift;
-
-	for (shift = 0, value = 0; cur < end; shift += 7) {
-		if (shift + 7 > 8 * sizeof(value)
-		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
-			cur = end + 1;
-			break;
-		}
-		value |= (uleb128_t)(*cur & 0x7f) << shift;
-		if (!(*cur++ & 0x80))
-			break;
-	}
-	*pcur = cur;
-
-	return value;
-}
-
-static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
-{
-	const u8 *cur = *pcur;
-	sleb128_t value;
-	unsigned shift;
-
-	for (shift = 0, value = 0; cur < end; shift += 7) {
-		if (shift + 7 > 8 * sizeof(value)
-		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
-			cur = end + 1;
-			break;
-		}
-		value |= (sleb128_t)(*cur & 0x7f) << shift;
-		if (!(*cur & 0x80)) {
-			value |= -(*cur++ & 0x40) << shift;
-			break;
-		}
-	}
-	*pcur = cur;
-
-	return value;
-}
-
-static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
-{
-	const u32 *cie;
-
-	if (!*fde || (*fde & (sizeof(*fde) - 1)))
-		return &bad_cie;
-	if (!fde[1])
-		return &not_fde; /* this is a CIE */
-	if ((fde[1] & (sizeof(*fde) - 1))
-	    || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
-		return NULL; /* this is not a valid FDE */
-	cie = fde + 1 - fde[1] / sizeof(*fde);
-	if (*cie <= sizeof(*cie) + 4
-	    || *cie >= fde[1] - sizeof(*fde)
-	    || (*cie & (sizeof(*cie) - 1))
-	    || cie[1])
-		return NULL; /* this is not a (valid) CIE */
-	return cie;
-}
-
-static unsigned long read_pointer(const u8 **pLoc,
-                                  const void *end,
-                                  signed ptrType,
-                                  unsigned long text_base,
-                                  unsigned long data_base)
-{
-	unsigned long value = 0;
-	union {
-		const u8 *p8;
-		const u16 *p16u;
-		const s16 *p16s;
-		const u32 *p32u;
-		const s32 *p32s;
-		const unsigned long *pul;
-	} ptr;
-
-	if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
-		dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
-		return 0;
-	}
-	ptr.p8 = *pLoc;
-	switch(ptrType & DW_EH_PE_FORM) {
-	case DW_EH_PE_data2:
-		if (end < (const void *)(ptr.p16u + 1)) {
-			dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
-			return 0;
-		}
-		if(ptrType & DW_EH_PE_signed)
-			value = get_unaligned(ptr.p16s++);
-		else
-			value = get_unaligned(ptr.p16u++);
-		break;
-	case DW_EH_PE_data4:
-#ifdef CONFIG_64BIT
-		if (end < (const void *)(ptr.p32u + 1)) {
-			dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
-			return 0;
-		}
-		if(ptrType & DW_EH_PE_signed)
-			value = get_unaligned(ptr.p32s++);
-		else
-			value = get_unaligned(ptr.p32u++);
-		break;
-	case DW_EH_PE_data8:
-		BUILD_BUG_ON(sizeof(u64) != sizeof(value));
-#else
-		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
-#endif
-	case DW_EH_PE_native:
-		if (end < (const void *)(ptr.pul + 1)) {
-			dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
-			return 0;
-		}
-		value = get_unaligned(ptr.pul++);
-		break;
-	case DW_EH_PE_leb128:
-		BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
-		value = ptrType & DW_EH_PE_signed
-		        ? get_sleb128(&ptr.p8, end)
-		        : get_uleb128(&ptr.p8, end);
-		if ((const void *)ptr.p8 > end) {
-			dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
-			return 0;
-		}
-		break;
-	default:
-		dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
-		        ptrType, ptr.p8, end);
-		return 0;
-	}
-	switch(ptrType & DW_EH_PE_ADJUST) {
-	case DW_EH_PE_abs:
-		break;
-	case DW_EH_PE_pcrel:
-		value += (unsigned long)*pLoc;
-		break;
-	case DW_EH_PE_textrel:
-		if (likely(text_base)) {
-			value += text_base;
-			break;
-		}
-		dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
-		        ptrType, *pLoc, end);
-		return 0;
-	case DW_EH_PE_datarel:
-		if (likely(data_base)) {
-			value += data_base;
-			break;
-		}
-		dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
-		        ptrType, *pLoc, end);
-		return 0;
-	default:
-		dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
-		        ptrType, *pLoc, end);
-		return 0;
-	}
-	if ((ptrType & DW_EH_PE_indirect)
-	    && probe_kernel_address((unsigned long *)value, value)) {
-		dprintk(1, "Cannot read indirect value %lx (%p,%p).",
-		        value, *pLoc, end);
-		return 0;
-	}
-	*pLoc = ptr.p8;
-
-	return value;
-}
-
-static signed fde_pointer_type(const u32 *cie)
-{
-	const u8 *ptr = (const u8 *)(cie + 2);
-	unsigned version = *ptr;
-
-	if (version != 1)
-		return -1; /* unsupported */
-	if (*++ptr) {
-		const char *aug;
-		const u8 *end = (const u8 *)(cie + 1) + *cie;
-		uleb128_t len;
-
-		/* check if augmentation size is first (and thus present) */
-		if (*ptr != 'z')
-			return -1;
-		/* check if augmentation string is nul-terminated */
-		if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
-			return -1;
-		++ptr; /* skip terminator */
-		get_uleb128(&ptr, end); /* skip code alignment */
-		get_sleb128(&ptr, end); /* skip data alignment */
-		/* skip return address column */
-		version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
-		len = get_uleb128(&ptr, end); /* augmentation length */
-		if (ptr + len < ptr || ptr + len > end)
-			return -1;
-		end = ptr + len;
-		while (*++aug) {
-			if (ptr >= end)
-				return -1;
-			switch(*aug) {
-			case 'L':
-				++ptr;
-				break;
-			case 'P': {
-					signed ptrType = *ptr++;
-
-					if (!read_pointer(&ptr, end, ptrType, 0, 0)
-					    || ptr > end)
-						return -1;
-				}
-				break;
-			case 'R':
-				return *ptr;
-			default:
-				return -1;
-			}
-		}
-	}
-	return DW_EH_PE_native|DW_EH_PE_abs;
-}
-
-static int advance_loc(unsigned long delta, struct unwind_state *state)
-{
-	state->loc += delta * state->codeAlign;
-
-	return delta > 0;
-}
-
-static void set_rule(uleb128_t reg,
-                     enum item_location where,
-                     uleb128_t value,
-                     struct unwind_state *state)
-{
-	if (reg < ARRAY_SIZE(state->regs)) {
-		state->regs[reg].where = where;
-		state->regs[reg].value = value;
-	}
-}
-
-static int processCFI(const u8 *start,
-                      const u8 *end,
-                      unsigned long targetLoc,
-                      signed ptrType,
-                      struct unwind_state *state)
-{
-	union {
-		const u8 *p8;
-		const u16 *p16;
-		const u32 *p32;
-	} ptr;
-	int result = 1;
-
-	if (start != state->cieStart) {
-		state->loc = state->org;
-		result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
-		if (targetLoc == 0 && state->label == NULL)
-			return result;
-	}
-	for (ptr.p8 = start; result && ptr.p8 < end; ) {
-		switch(*ptr.p8 >> 6) {
-			uleb128_t value;
-
-		case 0:
-			switch(*ptr.p8++) {
-			case DW_CFA_nop:
-				break;
-			case DW_CFA_set_loc:
-				state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
-				if (state->loc == 0)
-					result = 0;
-				break;
-			case DW_CFA_advance_loc1:
-				result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
-				break;
-			case DW_CFA_advance_loc2:
-				result = ptr.p8 <= end + 2
-				         && advance_loc(*ptr.p16++, state);
-				break;
-			case DW_CFA_advance_loc4:
-				result = ptr.p8 <= end + 4
-				         && advance_loc(*ptr.p32++, state);
-				break;
-			case DW_CFA_offset_extended:
-				value = get_uleb128(&ptr.p8, end);
-				set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
-				break;
-			case DW_CFA_val_offset:
-				value = get_uleb128(&ptr.p8, end);
-				set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
-				break;
-			case DW_CFA_offset_extended_sf:
-				value = get_uleb128(&ptr.p8, end);
-				set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
-				break;
-			case DW_CFA_val_offset_sf:
-				value = get_uleb128(&ptr.p8, end);
-				set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
-				break;
-			case DW_CFA_restore_extended:
-			case DW_CFA_undefined:
-			case DW_CFA_same_value:
-				set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
-				break;
-			case DW_CFA_register:
-				value = get_uleb128(&ptr.p8, end);
-				set_rule(value,
-				         Register,
-				         get_uleb128(&ptr.p8, end), state);
-				break;
-			case DW_CFA_remember_state:
-				if (ptr.p8 == state->label) {
-					state->label = NULL;
-					return 1;
-				}
-				if (state->stackDepth >= MAX_STACK_DEPTH) {
-					dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
-					return 0;
-				}
-				state->stack[state->stackDepth++] = ptr.p8;
-				break;
-			case DW_CFA_restore_state:
-				if (state->stackDepth) {
-					const uleb128_t loc = state->loc;
-					const u8 *label = state->label;
-
-					state->label = state->stack[state->stackDepth - 1];
-					memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
-					memset(state->regs, 0, sizeof(state->regs));
-					state->stackDepth = 0;
-					result = processCFI(start, end, 0, ptrType, state);
-					state->loc = loc;
-					state->label = label;
-				} else {
-					dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
-					return 0;
-				}
-				break;
-			case DW_CFA_def_cfa:
-				state->cfa.reg = get_uleb128(&ptr.p8, end);
-				/*nobreak*/
-			case DW_CFA_def_cfa_offset:
-				state->cfa.offs = get_uleb128(&ptr.p8, end);
-				break;
-			case DW_CFA_def_cfa_sf:
-				state->cfa.reg = get_uleb128(&ptr.p8, end);
-				/*nobreak*/
-			case DW_CFA_def_cfa_offset_sf:
-				state->cfa.offs = get_sleb128(&ptr.p8, end)
-				                  * state->dataAlign;
-				break;
-			case DW_CFA_def_cfa_register:
-				state->cfa.reg = get_uleb128(&ptr.p8, end);
-				break;
-			/*todo case DW_CFA_def_cfa_expression: */
-			/*todo case DW_CFA_expression: */
-			/*todo case DW_CFA_val_expression: */
-			case DW_CFA_GNU_args_size:
-				get_uleb128(&ptr.p8, end);
-				break;
-			case DW_CFA_GNU_negative_offset_extended:
-				value = get_uleb128(&ptr.p8, end);
-				set_rule(value,
-				         Memory,
-				         (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
-				break;
-			case DW_CFA_GNU_window_save:
-			default:
-				dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
-				result = 0;
-				break;
-			}
-			break;
-		case 1:
-			result = advance_loc(*ptr.p8++ & 0x3f, state);
-			break;
-		case 2:
-			value = *ptr.p8++ & 0x3f;
-			set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
-			break;
-		case 3:
-			set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
-			break;
-		}
-		if (ptr.p8 > end) {
-			dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
-			result = 0;
-		}
-		if (result && targetLoc != 0 && targetLoc < state->loc)
-			return 1;
-	}
-
-	if (result && ptr.p8 < end)
-		dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
-
-	return result
-	   && ptr.p8 == end
-	   && (targetLoc == 0
-	    || (/*todo While in theory this should apply, gcc in practice omits
-	          everything past the function prolog, and hence the location
-	          never reaches the end of the function.
-	        targetLoc < state->loc &&*/ state->label == NULL));
-}
-
-/* Unwind to previous to frame.  Returns 0 if successful, negative
- * number in case of an error. */
-int unwind(struct unwind_frame_info *frame)
-{
-#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
-	const u32 *fde = NULL, *cie = NULL;
-	const u8 *ptr = NULL, *end = NULL;
-	unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
-	unsigned long startLoc = 0, endLoc = 0, cfa;
-	unsigned i;
-	signed ptrType = -1;
-	uleb128_t retAddrReg = 0;
-	const struct unwind_table *table;
-	struct unwind_state state;
-
-	if (UNW_PC(frame) == 0)
-		return -EINVAL;
-	if ((table = find_table(pc)) != NULL
-	    && !(table->size & (sizeof(*fde) - 1))) {
-		const u8 *hdr = table->header;
-		unsigned long tableSize;
-
-		smp_rmb();
-		if (hdr && hdr[0] == 1) {
-			switch(hdr[3] & DW_EH_PE_FORM) {
-			case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
-			case DW_EH_PE_data2: tableSize = 2; break;
-			case DW_EH_PE_data4: tableSize = 4; break;
-			case DW_EH_PE_data8: tableSize = 8; break;
-			default: tableSize = 0; break;
-			}
-			ptr = hdr + 4;
-			end = hdr + table->hdrsz;
-			if (tableSize
-			    && read_pointer(&ptr, end, hdr[1], 0, 0)
-			       == (unsigned long)table->address
-			    && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
-			    && i == (end - ptr) / (2 * tableSize)
-			    && !((end - ptr) % (2 * tableSize))) {
-				do {
-					const u8 *cur = ptr + (i / 2) * (2 * tableSize);
-
-					startLoc = read_pointer(&cur,
-					                        cur + tableSize,
-					                        hdr[3], 0,
-					                        (unsigned long)hdr);
-					if (pc < startLoc)
-						i /= 2;
-					else {
-						ptr = cur - tableSize;
-						i = (i + 1) / 2;
-					}
-				} while (startLoc && i > 1);
-				if (i == 1
-				    && (startLoc = read_pointer(&ptr,
-				                                ptr + tableSize,
-				                                hdr[3], 0,
-				                                (unsigned long)hdr)) != 0
-				    && pc >= startLoc)
-					fde = (void *)read_pointer(&ptr,
-					                           ptr + tableSize,
-					                           hdr[3], 0,
-					                           (unsigned long)hdr);
-			}
-		}
-		if(hdr && !fde)
-			dprintk(3, "Binary lookup for %lx failed.", pc);
-
-		if (fde != NULL) {
-			cie = cie_for_fde(fde, table);
-			ptr = (const u8 *)(fde + 2);
-			if(cie != NULL
-			   && cie != &bad_cie
-			   && cie != &not_fde
-			   && (ptrType = fde_pointer_type(cie)) >= 0
-			   && read_pointer(&ptr,
-			                   (const u8 *)(fde + 1) + *fde,
-			                   ptrType, 0, 0) == startLoc) {
-				if (!(ptrType & DW_EH_PE_indirect))
-					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
-				endLoc = startLoc
-				         + read_pointer(&ptr,
-				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType, 0, 0);
-				if(pc >= endLoc)
-					fde = NULL;
-			} else
-				fde = NULL;
-			if(!fde)
-				dprintk(1, "Binary lookup result for %lx discarded.", pc);
-		}
-		if (fde == NULL) {
-			for (fde = table->address, tableSize = table->size;
-			     cie = NULL, tableSize > sizeof(*fde)
-			     && tableSize - sizeof(*fde) >= *fde;
-			     tableSize -= sizeof(*fde) + *fde,
-			     fde += 1 + *fde / sizeof(*fde)) {
-				cie = cie_for_fde(fde, table);
-				if (cie == &bad_cie) {
-					cie = NULL;
-					break;
-				}
-				if (cie == NULL
-				    || cie == &not_fde
-				    || (ptrType = fde_pointer_type(cie)) < 0)
-					continue;
-				ptr = (const u8 *)(fde + 2);
-				startLoc = read_pointer(&ptr,
-				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType, 0, 0);
-				if (!startLoc)
-					continue;
-				if (!(ptrType & DW_EH_PE_indirect))
-					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
-				endLoc = startLoc
-				         + read_pointer(&ptr,
-				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType, 0, 0);
-				if (pc >= startLoc && pc < endLoc)
-					break;
-			}
-			if(!fde)
-				dprintk(3, "Linear lookup for %lx failed.", pc);
-		}
-	}
-	if (cie != NULL) {
-		memset(&state, 0, sizeof(state));
-		state.cieEnd = ptr; /* keep here temporarily */
-		ptr = (const u8 *)(cie + 2);
-		end = (const u8 *)(cie + 1) + *cie;
-		frame->call_frame = 1;
-		if ((state.version = *ptr) != 1)
-			cie = NULL; /* unsupported version */
-		else if (*++ptr) {
-			/* check if augmentation size is first (and thus present) */
-			if (*ptr == 'z') {
-				while (++ptr < end && *ptr) {
-					switch(*ptr) {
-					/* check for ignorable (or already handled)
-					 * nul-terminated augmentation string */
-					case 'L':
-					case 'P':
-					case 'R':
-						continue;
-					case 'S':
-						frame->call_frame = 0;
-						continue;
-					default:
-						break;
-					}
-					break;
-				}
-			}
-			if (ptr >= end || *ptr)
-				cie = NULL;
-		}
-		if(!cie)
-			dprintk(1, "CIE unusable (%p,%p).", ptr, end);
-		++ptr;
-	}
-	if (cie != NULL) {
-		/* get code aligment factor */
-		state.codeAlign = get_uleb128(&ptr, end);
-		/* get data aligment factor */
-		state.dataAlign = get_sleb128(&ptr, end);
-		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
-			cie = NULL;
-		else if (UNW_PC(frame) % state.codeAlign
-		         || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
-			dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
-			        UNW_PC(frame), UNW_SP(frame));
-			return -EPERM;
-		} else {
-			retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
-			/* skip augmentation */
-			if (((const char *)(cie + 2))[1] == 'z') {
-				uleb128_t augSize = get_uleb128(&ptr, end);
-
-				ptr += augSize;
-			}
-			if (ptr > end
-			   || retAddrReg >= ARRAY_SIZE(reg_info)
-			   || REG_INVALID(retAddrReg)
-			   || reg_info[retAddrReg].width != sizeof(unsigned long))
-				cie = NULL;
-		}
-		if(!cie)
-			dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
-	}
-	if (cie != NULL) {
-		state.cieStart = ptr;
-		ptr = state.cieEnd;
-		state.cieEnd = end;
-		end = (const u8 *)(fde + 1) + *fde;
-		/* skip augmentation */
-		if (((const char *)(cie + 2))[1] == 'z') {
-			uleb128_t augSize = get_uleb128(&ptr, end);
-
-			if ((ptr += augSize) > end)
-				fde = NULL;
-		}
-		if(!fde)
-			dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
-	}
-	if (cie == NULL || fde == NULL) {
-#ifdef CONFIG_FRAME_POINTER
-		unsigned long top, bottom;
-
-		if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
-			return -EPERM;
-		top = STACK_TOP(frame->task);
-		bottom = STACK_BOTTOM(frame->task);
-# if FRAME_RETADDR_OFFSET < 0
-		if (UNW_SP(frame) < top
-		    && UNW_FP(frame) <= UNW_SP(frame)
-		    && bottom < UNW_FP(frame)
-# else
-		if (UNW_SP(frame) > top
-		    && UNW_FP(frame) >= UNW_SP(frame)
-		    && bottom > UNW_FP(frame)
-# endif
-		   && !((UNW_SP(frame) | UNW_FP(frame))
-		        & (sizeof(unsigned long) - 1))) {
-			unsigned long link;
-
-			if (!probe_kernel_address(
-			                (unsigned long *)(UNW_FP(frame)
-			                                  + FRAME_LINK_OFFSET),
-						  link)
-# if FRAME_RETADDR_OFFSET < 0
-			   && link > bottom && link < UNW_FP(frame)
-# else
-			   && link > UNW_FP(frame) && link < bottom
-# endif
-			   && !(link & (sizeof(link) - 1))
-			   && !probe_kernel_address(
-			                  (unsigned long *)(UNW_FP(frame)
-			                                    + FRAME_RETADDR_OFFSET), UNW_PC(frame))) {
-				UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
-# if FRAME_RETADDR_OFFSET < 0
-					-
-# else
-					+
-# endif
-					  sizeof(UNW_PC(frame));
-				UNW_FP(frame) = link;
-				return 0;
-			}
-		}
-#endif
-		return -ENXIO;
-	}
-	state.org = startLoc;
-	memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
-	/* process instructions */
-	if (!processCFI(ptr, end, pc, ptrType, &state)
-	   || state.loc > endLoc
-	   || state.regs[retAddrReg].where == Nowhere
-	   || state.cfa.reg >= ARRAY_SIZE(reg_info)
-	   || reg_info[state.cfa.reg].width != sizeof(unsigned long)
-	   || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
-	   || state.cfa.offs % sizeof(unsigned long)) {
-		dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
-		return -EIO;
-	}
-	/* update frame */
-#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
-	if(frame->call_frame
-	   && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
-		frame->call_frame = 0;
-#endif
-	cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
-	startLoc = min((unsigned long)UNW_SP(frame), cfa);
-	endLoc = max((unsigned long)UNW_SP(frame), cfa);
-	if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
-		startLoc = min(STACK_LIMIT(cfa), cfa);
-		endLoc = max(STACK_LIMIT(cfa), cfa);
-	}
-#ifndef CONFIG_64BIT
-# define CASES CASE(8); CASE(16); CASE(32)
-#else
-# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
-#endif
-	pc = UNW_PC(frame);
-	sp = UNW_SP(frame);
-	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
-		if (REG_INVALID(i)) {
-			if (state.regs[i].where == Nowhere)
-				continue;
-			dprintk(1, "Cannot restore register %u (%d).",
-			        i, state.regs[i].where);
-			return -EIO;
-		}
-		switch(state.regs[i].where) {
-		default:
-			break;
-		case Register:
-			if (state.regs[i].value >= ARRAY_SIZE(reg_info)
-			   || REG_INVALID(state.regs[i].value)
-			   || reg_info[i].width > reg_info[state.regs[i].value].width) {
-				dprintk(1, "Cannot restore register %u from register %lu.",
-				        i, state.regs[i].value);
-				return -EIO;
-			}
-			switch(reg_info[state.regs[i].value].width) {
-#define CASE(n) \
-			case sizeof(u##n): \
-				state.regs[i].value = FRAME_REG(state.regs[i].value, \
-				                                const u##n); \
-				break
-			CASES;
-#undef CASE
-			default:
-				dprintk(1, "Unsupported register size %u (%lu).",
-				        reg_info[state.regs[i].value].width,
-				        state.regs[i].value);
-				return -EIO;
-			}
-			break;
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
-		if (REG_INVALID(i))
-			continue;
-		switch(state.regs[i].where) {
-		case Nowhere:
-			if (reg_info[i].width != sizeof(UNW_SP(frame))
-			   || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
-			      != &UNW_SP(frame))
-				continue;
-			UNW_SP(frame) = cfa;
-			break;
-		case Register:
-			switch(reg_info[i].width) {
-#define CASE(n) case sizeof(u##n): \
-				FRAME_REG(i, u##n) = state.regs[i].value; \
-				break
-			CASES;
-#undef CASE
-			default:
-				dprintk(1, "Unsupported register size %u (%u).",
-				        reg_info[i].width, i);
-				return -EIO;
-			}
-			break;
-		case Value:
-			if (reg_info[i].width != sizeof(unsigned long)) {
-				dprintk(1, "Unsupported value size %u (%u).",
-				        reg_info[i].width, i);
-				return -EIO;
-			}
-			FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
-			                                    * state.dataAlign;
-			break;
-		case Memory: {
-				unsigned long addr = cfa + state.regs[i].value
-				                           * state.dataAlign;
-
-				if ((state.regs[i].value * state.dataAlign)
-				    % sizeof(unsigned long)
-				    || addr < startLoc
-				    || addr + sizeof(unsigned long) < addr
-				    || addr + sizeof(unsigned long) > endLoc) {
-					dprintk(1, "Bad memory location %lx (%lx).",
-					        addr, state.regs[i].value);
-					return -EIO;
-				}
-				switch(reg_info[i].width) {
-#define CASE(n)     case sizeof(u##n): \
-					probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
-					break
-				CASES;
-#undef CASE
-				default:
-					dprintk(1, "Unsupported memory size %u (%u).",
-					        reg_info[i].width, i);
-					return -EIO;
-				}
-			}
-			break;
-		}
-	}
-
-	if (UNW_PC(frame) % state.codeAlign
-	    || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
-		dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
-		        UNW_PC(frame), UNW_SP(frame));
-		return -EIO;
-	}
-	if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
-		dprintk(1, "No progress (%lx,%lx).", pc, sp);
-		return -EIO;
-	}
-
-	return 0;
-#undef CASES
-#undef FRAME_REG
-}
-EXPORT_SYMBOL(unwind);
-
-int unwind_init_frame_info(struct unwind_frame_info *info,
-                           struct task_struct *tsk,
-                           /*const*/ struct pt_regs *regs)
-{
-	info->task = tsk;
-	info->call_frame = 0;
-	arch_unw_init_frame_info(info, regs);
-
-	return 0;
-}
-EXPORT_SYMBOL(unwind_init_frame_info);
-
-/*
- * Prepare to unwind a blocked task.
- */
-int unwind_init_blocked(struct unwind_frame_info *info,
-                        struct task_struct *tsk)
-{
-	info->task = tsk;
-	info->call_frame = 0;
-	arch_unw_init_blocked(info);
-
-	return 0;
-}
-EXPORT_SYMBOL(unwind_init_blocked);
-
-/*
- * Prepare to unwind the currently running thread.
- */
-int unwind_init_running(struct unwind_frame_info *info,
-                        asmlinkage int (*callback)(struct unwind_frame_info *,
-                                                   void *arg),
-                        void *arg)
-{
-	info->task = current;
-	info->call_frame = 0;
-
-	return arch_unwind_init_running(info, callback, arg);
-}
-EXPORT_SYMBOL(unwind_init_running);
-
-/*
- * Unwind until the return pointer is in user-land (or until an error
- * occurs).  Returns 0 if successful, negative number in case of
- * error.
- */
-int unwind_to_user(struct unwind_frame_info *info)
-{
-	while (!arch_unw_user_mode(info)) {
-		int err = unwind(info);
-
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(unwind_to_user);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 818e4589f718..5c2681875b9a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -354,24 +354,6 @@ config FRAME_POINTER
 	  some architectures or if you use external debuggers.
 	  If you don't debug the kernel, you can say N.
 
-config UNWIND_INFO
-	bool "Compile the kernel with frame unwind information"
-	depends on !IA64 && !PARISC && !ARM
-	depends on !MODULES || !(MIPS || PPC || SUPERH || V850)
-	help
-	  If you say Y here the resulting kernel image will be slightly larger
-	  but not slower, and it will give very useful debugging information.
-	  If you don't debug the kernel, you can say N, but we may not be able
-	  to solve problems without frame unwind information or frame pointers.
-
-config STACK_UNWIND
-	bool "Stack unwind support"
-	depends on UNWIND_INFO
-	depends on X86
-	help
-	  This enables more precise stack traces, omitting all unrelated
-	  occurrences of pointers into kernel code from the dump.
-
 config FORCED_INLINING
 	bool "Force gcc to inline functions marked 'inline'"
 	depends on DEBUG_KERNEL
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index d143c0faf248..b5a90fc056d3 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -55,37 +55,7 @@ static bool fail_task(struct fault_attr *attr, struct task_struct *task)
 
 #define MAX_STACK_TRACE_DEPTH 32
 
-#ifdef CONFIG_STACK_UNWIND
-
-static asmlinkage int fail_stacktrace_callback(struct unwind_frame_info *info,
-						void *arg)
-{
-	int depth;
-	struct fault_attr *attr = arg;
-	bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX);
-
-	for (depth = 0; depth < attr->stacktrace_depth
-			&& unwind(info) == 0 && UNW_PC(info); depth++) {
-		if (arch_unw_user_mode(info))
-			break;
-		if (attr->reject_start <= UNW_PC(info) &&
-			       UNW_PC(info) < attr->reject_end)
-			return false;
-		if (attr->require_start <= UNW_PC(info) &&
-			       UNW_PC(info) < attr->require_end)
-			found = true;
-	}
-	return found;
-}
-
-static bool fail_stacktrace(struct fault_attr *attr)
-{
-	struct unwind_frame_info info;
-
-	return unwind_init_running(&info, fail_stacktrace_callback, attr);
-}
-
-#elif defined(CONFIG_STACKTRACE)
+#if defined(CONFIG_STACKTRACE)
 
 static bool fail_stacktrace(struct fault_attr *attr)
 {
-- 
cgit v1.2.3


From a08727bae727fc2ca3a6ee9506d77786b71070b3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Sat, 16 Dec 2006 09:53:50 -0800
Subject: Make workqueue bit operations work on "atomic_long_t"

On architectures where the atomicity of the bit operations is handled by
external means (ie a separate spinlock to protect concurrent accesses),
just doing a direct assignment on the workqueue data field (as done by
commit 4594bf159f1962cec3b727954b7c598b07e2e737) can cause the
assignment to be lost due to lack of serialization with the bitops on
the same word.

So we need to serialize the assignment with the locks on those
architectures (notably older ARM chips, PA-RISC and sparc32).

So rather than using an "unsigned long", let's use "atomic_long_t",
which already has a safe assignment operation (atomic_long_set()) on
such architectures.

This requires that the atomic operations use the same atomicity locks as
the bit operations do, but that is largely the case anyway.  Sparc32
will probably need fixing.

Architectures (including modern ARM with LL/SC) that implement sane
atomic operations for SMP won't see any of this matter.

Cc: Russell King <rmk+lkml@arm.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: David Miller <davem@davemloft.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Linux Arch Maintainers <linux-arch@vger.kernel.org>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/workqueue.h | 32 ++++++++++++++++++++++----------
 kernel/workqueue.c        | 16 ++++++++--------
 2 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5b13dcf02714..2a7b38d87018 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -8,16 +8,21 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <asm/atomic.h>
 
 struct workqueue_struct;
 
 struct work_struct;
 typedef void (*work_func_t)(struct work_struct *work);
 
+/*
+ * The first word is the work queue pointer and the flags rolled into
+ * one
+ */
+#define work_data_bits(work) ((unsigned long *)(&(work)->data))
+
 struct work_struct {
-	/* the first word is the work queue pointer and the flags rolled into
-	 * one */
-	unsigned long management;
+	atomic_long_t data;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
 #define WORK_STRUCT_NOAUTOREL 1		/* F if work item automatically released on exec */
 #define WORK_STRUCT_FLAG_MASK (3UL)
@@ -26,6 +31,9 @@ struct work_struct {
 	work_func_t func;
 };
 
+#define WORK_DATA_INIT(autorelease) \
+	ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL)
+
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
@@ -36,13 +44,13 @@ struct execute_work {
 };
 
 #define __WORK_INITIALIZER(n, f) {				\
-	.management = 0,					\
+	.data = WORK_DATA_INIT(0),				\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	}
 
 #define __WORK_INITIALIZER_NAR(n, f) {				\
-	.management = (1 << WORK_STRUCT_NOAUTOREL),		\
+	.data = WORK_DATA_INIT(1),				\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	}
@@ -82,17 +90,21 @@ struct execute_work {
 
 /*
  * initialize all of a work item in one go
+ *
+ * NOTE! No point in using "atomic_long_set()": useing a direct
+ * assignment of the work data initializer allows the compiler
+ * to generate better code.
  */
 #define INIT_WORK(_work, _func)					\
 	do {							\
-		(_work)->management = 0;			\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT(0);	\
 		INIT_LIST_HEAD(&(_work)->entry);		\
 		PREPARE_WORK((_work), (_func));			\
 	} while (0)
 
 #define INIT_WORK_NAR(_work, _func)					\
 	do {								\
-		(_work)->management = (1 << WORK_STRUCT_NOAUTOREL);	\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT(1);	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
@@ -114,7 +126,7 @@ struct execute_work {
  * @work: The work item in question
  */
 #define work_pending(work) \
-	test_bit(WORK_STRUCT_PENDING, &(work)->management)
+	test_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 /**
  * delayed_work_pending - Find out whether a delayable work item is currently
@@ -143,7 +155,7 @@ struct execute_work {
  * This should also be used to release a delayed work item.
  */
 #define work_release(work) \
-	clear_bit(WORK_STRUCT_PENDING, &(work)->management)
+	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
@@ -188,7 +200,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
-		clear_bit(WORK_STRUCT_PENDING, &work->work.management);
+		work_release(&work->work);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index db49886bfae1..742cbbe49bdc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -96,13 +96,13 @@ static inline void set_wq_data(struct work_struct *work, void *wq)
 	BUG_ON(!work_pending(work));
 
 	new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
-	new |= work->management & WORK_STRUCT_FLAG_MASK;
-	work->management = new;
+	new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
+	atomic_long_set(&work->data, new);
 }
 
 static inline void *get_wq_data(struct work_struct *work)
 {
-	return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
+	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
 static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
@@ -133,7 +133,7 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work
 		list_del_init(&work->entry);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
-		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
 			work_release(work);
 		f(work);
 
@@ -206,7 +206,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
 	int ret = 0, cpu = get_cpu();
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		if (unlikely(is_single_threaded(wq)))
 			cpu = singlethread_cpu;
 		BUG_ON(!list_empty(&work->entry));
@@ -248,7 +248,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 	if (delay == 0)
 		return queue_work(wq, work);
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -280,7 +280,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -321,7 +321,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
 		BUG_ON(get_wq_data(work) != cwq);
-		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
 			work_release(work);
 		f(work);
 
-- 
cgit v1.2.3


From b039db8eeab0b3cee66dcf9820526dd9cfb04f6b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 20 Dec 2006 15:59:48 +0100
Subject: [PATCH] __set_irq_handler bogus space

__set_irq_handler: Kill a bogus space

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/irq/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ebfd24a41858..1c1b7125b8ac 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -520,7 +520,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	if (desc->chip == &no_irq_chip) {
 		printk(KERN_WARNING "Trying to install %sinterrupt handler "
-		       "for IRQ%d\n", is_chained ? "chained " : " ", irq);
+		       "for IRQ%d\n", is_chained ? "chained " : "", irq);
 		/*
 		 * Some ARM implementations install a handler for really dumb
 		 * interrupt hardware without setting an irq_chip. This worked
-- 
cgit v1.2.3


From bc947631d1d532c758f8fcbdeb1f7fc2f4c863f8 Mon Sep 17 00:00:00 2001
From: Peter Williams <pwil3058@bigpond.net.au>
Date: Tue, 19 Dec 2006 12:48:50 +1000
Subject: [PATCH] sched: improve efficiency of sched_fork()

Problem:
  sched_fork() has always called scheduler_tick() in some (unlikely)
  circumstances in order to update the current task in light of those
  circumstances.  It has always been the case that the work done by
  scheduler_tick() was more than was required to handle the problem in
  hand but no harm was done except for the waste of a few CPU cycles.

  However, the splitting of scheduler_tick() into two procedures in
  2.6.20-rc1 enables the wasted cycles to be saved as the new procedure
  task_running_tick() does all the work that is required to rectify the
  problem being handled.

Solution:
  Replace the call to scheduler_tick() in sched_fork() with a call to
  task_running_tick().

Signed-off-by: Peter Williams <pwil3058@bigpond.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 5cd833bc2173..b81ed8703f70 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1567,6 +1567,7 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state)
 	return try_to_wake_up(p, state, 0);
 }
 
+static void task_running_tick(struct rq *rq, struct task_struct *p);
 /*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
@@ -1627,7 +1628,7 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
 		 * runqueue lock is not a problem.
 		 */
 		current->time_slice = 1;
-		scheduler_tick();
+		task_running_tick(cpu_rq(cpu), current);
 	}
 	local_irq_enable();
 	put_cpu();
-- 
cgit v1.2.3


From 9bfb18392ef586467277fa25d8f3a7a93611f6df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 18 Dec 2006 20:05:09 +0100
Subject: [PATCH] workqueue: fix schedule_on_each_cpu()

fix the schedule_on_each_cpu() implementation: __queue_work() is now
stricter, hence set the work-pending bit before passing in the new work.

(found in the -rt tree, using Peter Zijlstra's files-lock scalability
patchset)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/workqueue.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 742cbbe49bdc..180a8ce11535 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -637,9 +637,11 @@ int schedule_on_each_cpu(work_func_t func)
 
 	mutex_lock(&workqueue_mutex);
 	for_each_online_cpu(cpu) {
-		INIT_WORK(per_cpu_ptr(works, cpu), func);
-		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
-				per_cpu_ptr(works, cpu));
+		struct work_struct *work = per_cpu_ptr(works, cpu);
+
+		INIT_WORK(work, func);
+		set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
+		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
 	}
 	mutex_unlock(&workqueue_mutex);
 	flush_workqueue(keventd_wq);
-- 
cgit v1.2.3


From ba0084048ab785c2cb1d6cc2cccabe642a5b799a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 22 Dec 2006 01:06:15 -0800
Subject: [PATCH] Conditionally check expected_preempt_count in
 __resched_legal()

Commit 2d7d253548cffdce80f4e03664686e9ccb1b0ed7 ("fix cond_resched() fix")
introduced an 'expected_preempt_count' parameter to __resched_legal() to
fix a bug where it was returning a false negative when called from
cond_resched_lock() and preemption was enabled.

Unfortunately this broke things for when preemption is disabled.
preempt_count() will always return zero, thus failing the check against any
value of expected_preempt_count not equal to zero.  cond_resched_lock() for
example, passes an expected_preempt_count value of 1.

So fix the fix for the cond_resched() fix by skipping the check of
preempt_count() against expected_preempt_count when preemption is disabled.

Credit should go to Sunil Mushran for spotting the bug during testing.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index b81ed8703f70..850bde4b31a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4619,8 +4619,10 @@ asmlinkage long sys_sched_yield(void)
 
 static inline int __resched_legal(int expected_preempt_count)
 {
+#ifdef CONFIG_PREEMPT
 	if (unlikely(preempt_count() != expected_preempt_count))
 		return 0;
+#endif
 	if (unlikely(system_state != SYSTEM_RUNNING))
 		return 0;
 	return 1;
-- 
cgit v1.2.3


From af9997e426f9ddfe7a84cb4cd3c7ff938fabd41a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 22 Dec 2006 01:06:52 -0800
Subject: [PATCH] fix kernel-doc warnings in 2.6.20-rc1

Fix kernel-doc warnings in 2.6.20-rc1.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/ll_rw_blk.c              | 1 +
 drivers/base/firmware_class.c  | 1 +
 drivers/message/i2o/exec-osm.c | 2 +-
 kernel/relay.c                 | 2 +-
 kernel/workqueue.c             | 4 ++--
 mm/slab.c                      | 1 +
 6 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index e07c079e07e6..fb6789725e1b 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2453,6 +2453,7 @@ EXPORT_SYMBOL(blk_rq_map_user);
  * @rq:		request to map data to
  * @iov:	pointer to the iovec
  * @iov_count:	number of elements in the iovec
+ * @len:	I/O byte count
  *
  * Description:
  *    Data will be mapped directly for zero copy io, if possible. Otherwise
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 4bad2870c485..64558f45e6bc 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -127,6 +127,7 @@ static ssize_t firmware_loading_show(struct device *dev,
 /**
  * firmware_loading_store - set value in the 'loading' control file
  * @dev: device pointer
+ * @attr: device attribute pointer
  * @buf: buffer to scan for loading control value
  * @count: number of bytes in @buf
  *
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index a539d3b61e76..5278aad92bc4 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -367,7 +367,7 @@ static int i2o_exec_remove(struct device *dev)
 
 /**
  *	i2o_exec_lct_modified - Called on LCT NOTIFY reply
- *	@work: work struct for a specific controller
+ *	@_work: work struct for a specific controller
  *
  *	This function handles asynchronus LCT NOTIFY replies. It parses the
  *	new LCT and if the buffer for the LCT was to small sends a LCT NOTIFY
diff --git a/kernel/relay.c b/kernel/relay.c
index a4701e7ba7d0..3e076f2acd31 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -302,7 +302,7 @@ static struct rchan_callbacks default_channel_callbacks = {
 
 /**
  *	wakeup_readers - wake up readers waiting on a channel
- *	@private: the channel buffer
+ *	@work: work struct that contains the the channel buffer
  *
  *	This is the work function used to defer reader waking.  The
  *	reason waking is deferred is that calling directly from write
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 180a8ce11535..a3da07c5af28 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -233,7 +233,7 @@ static void delayed_work_timer_fn(unsigned long __data)
 /**
  * queue_delayed_work - queue work on a workqueue after delay
  * @wq: workqueue to use
- * @work: delayable work to queue
+ * @dwork: delayable work to queue
  * @delay: number of jiffies to wait before queueing
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
@@ -268,7 +268,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
  * queue_delayed_work_on - queue work on specific CPU after delay
  * @cpu: CPU number to execute work on
  * @wq: workqueue to use
- * @work: work to queue
+ * @dwork: work to queue
  * @delay: number of jiffies to wait before queueing
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
diff --git a/mm/slab.c b/mm/slab.c
index 176037bcc66a..0d4e57431de4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3587,6 +3587,7 @@ out:
  * @cachep: The cache to allocate from.
  * @flags: See kmalloc().
  * @nodeid: node number of the target node.
+ * @caller: return address of caller, used for debug information
  *
  * Identical to kmem_cache_alloc but it will allocate memory on the given
  * node, which can improve the performance for cpu bound structures.
-- 
cgit v1.2.3


From 99eea6a105106a94758724ccce996607f60bc0f2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 22 Dec 2006 01:07:00 -0800
Subject: [PATCH] make kernel/printk.c:ignore_loglevel_setup() static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 185bb45eacf7..c770e1a4e882 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -335,7 +335,7 @@ static void __call_console_drivers(unsigned long start, unsigned long end)
 
 static int __read_mostly ignore_loglevel;
 
-int __init ignore_loglevel_setup(char *str)
+static int __init ignore_loglevel_setup(char *str)
 {
 	ignore_loglevel = 1;
 	printk(KERN_INFO "debug: ignoring loglevel setting.\n");
-- 
cgit v1.2.3


From 67af63a6ab4ce064f807bdce614fe0fa2bcea252 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Fri, 22 Dec 2006 01:07:50 -0800
Subject: [PATCH] sched: remove __cpuinitdata anotation to cpu_isolated_map

The structure cpu_isolated_map is used not only during initialization.
Multi-core scheduler configuration changes and exclusive cpusets
use this during run time.  During setting of sched_mc_power_savings
 policy, this structure is accessed to update sched_domains.

Signed-off-by: Tim Chen <tim.c.chen@intel.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 850bde4b31a2..b515e3caad7f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5610,7 +5610,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 }
 
 /* cpus with isolated domains */
-static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE;
+static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
 
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
-- 
cgit v1.2.3


From 9d7ac8be4b48737ad1cebd94ed754a269f360708 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 22 Dec 2006 01:08:14 -0800
Subject: [PATCH] genirq: fix irq flow handler uninstall

The sanity check for no_irq_chip in __set_irq_hander() is unconditional on
both install and uninstall of an handler.  This triggers false warnings and
replaces no_irq_chip by dummy_irq_chip in the uninstall case.

Check only, when a real handler is installed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Sylvain Munaut <tnt@246tNt.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/irq/chip.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 1c1b7125b8ac..d27b25855743 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -517,8 +517,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	if (!handle)
 		handle = handle_bad_irq;
-
-	if (desc->chip == &no_irq_chip) {
+	else if (desc->chip == &no_irq_chip) {
 		printk(KERN_WARNING "Trying to install %sinterrupt handler "
 		       "for IRQ%d\n", is_chained ? "chained " : "", irq);
 		/*
-- 
cgit v1.2.3


From 3e1fbd12c958591695f89b11f9c6ec08d002e358 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 22 Dec 2006 01:10:02 -0800
Subject: [PATCH] audit: fix kstrdup() error check

kstrdup() returns NULL on error.

Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/auditfilter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 2e896f8ae29e..9c8c23227c7f 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -800,8 +800,8 @@ static inline int audit_dupe_selinux_field(struct audit_field *df,
 
 	/* our own copy of se_str */
 	se_str = kstrdup(sf->se_str, GFP_KERNEL);
-	if (unlikely(IS_ERR(se_str)))
-	    return -ENOMEM;
+	if (unlikely(!se_str))
+		return -ENOMEM;
 	df->se_str = se_str;
 
 	/* our own (refreshed) copy of se_rule */
-- 
cgit v1.2.3


From 5b149bcc230e4696a1d893504bed38aeb3832314 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 22 Dec 2006 01:10:14 -0800
Subject: [PATCH] schedule_timeout(): improve warning message

Kyle is hitting this warning, and we don't have a clue what it's caused by.
Add the obligatory dump_stack().

Cc: kyle <kylewong@southa.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/timer.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index feddf817baa5..c2a8ccfc2882 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1344,11 +1344,10 @@ fastcall signed long __sched schedule_timeout(signed long timeout)
 		 * should never happens anyway). You just have the printk()
 		 * that will tell you if something is gone wrong and where.
 		 */
-		if (timeout < 0)
-		{
+		if (timeout < 0) {
 			printk(KERN_ERR "schedule_timeout: wrong timeout "
-				"value %lx from %p\n", timeout,
-				__builtin_return_address(0));
+				"value %lx\n", timeout);
+			dump_stack();
 			current->state = TASK_RUNNING;
 			goto out;
 		}
-- 
cgit v1.2.3


From 01b2d93ca4c495f056471189ac6c4e6ac4cbbccb Mon Sep 17 00:00:00 2001
From: Vadim Lobanov <vlobanov@speakeasy.net>
Date: Fri, 22 Dec 2006 01:10:43 -0800
Subject: [PATCH] fdtable: Provide free_fdtable() wrapper

Christoph Hellwig has expressed concerns that the recent fdtable changes
expose the details of the RCU methodology used to release no-longer-used
fdtable structures to the rest of the kernel.  The trivial patch below
addresses these concerns by introducing the appropriate free_fdtable()
calls, which simply wrap the release RCU usage.  Since free_fdtable() is a
one-liner, it makes sense to promote it to an inline helper.

Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c            | 2 +-
 include/linux/file.h | 5 +++++
 kernel/exit.c        | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/fs/file.c b/fs/file.c
index 857fa49e984c..c5575de01113 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -206,7 +206,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
 		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
-			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
+			free_fdtable(cur_fdt);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
 		free_fdarr(new_fdt);
diff --git a/include/linux/file.h b/include/linux/file.h
index edca361f2ab4..a59001e9ea58 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -80,6 +80,11 @@ extern int expand_files(struct files_struct *, int nr);
 extern void free_fdtable_rcu(struct rcu_head *rcu);
 extern void __init files_defer_init(void);
 
+static inline void free_fdtable(struct fdtable *fdt)
+{
+	call_rcu(&fdt->rcu, free_fdtable_rcu);
+}
+
 static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
 {
 	struct file * file = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 122fadb972fc..85917c2bf065 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -468,7 +468,7 @@ void fastcall put_files_struct(struct files_struct *files)
 		fdt = files_fdtable(files);
 		if (fdt != &files->fdtab)
 			kmem_cache_free(files_cachep, files);
-		call_rcu(&fdt->rcu, free_fdtable_rcu);
+		free_fdtable(fdt);
 	}
 }
 
-- 
cgit v1.2.3


From 192636ad9097b13d58310a6358fd512d3084c09a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 22 Dec 2006 01:11:30 -0800
Subject: [PATCH] relay: remove inlining

          text    data     bss     dec     hex filename
before:   4036      44       0    4080     ff0 kernel/relay.o
after:    3727      44       0    3771     ebb kernel/relay.o

Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Tom Zanussi <zanussi@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/relay.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/relay.c b/kernel/relay.c
index 3e076f2acd31..284e2e8b4eed 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -322,7 +322,7 @@ static void wakeup_readers(struct work_struct *work)
  *
  *	See relay_reset for description of effect.
  */
-static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
+static void __relay_reset(struct rchan_buf *buf, unsigned int init)
 {
 	size_t i;
 
@@ -418,7 +418,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan,
  *	The channel buffer and channel buffer data structure are then freed
  *	automatically when the last reference is given up.
  */
-static inline void relay_close_buf(struct rchan_buf *buf)
+static void relay_close_buf(struct rchan_buf *buf)
 {
 	buf->finalized = 1;
 	cancel_delayed_work(&buf->wake_readers);
@@ -426,7 +426,7 @@ static inline void relay_close_buf(struct rchan_buf *buf)
 	kref_put(&buf->kref, relay_remove_buf);
 }
 
-static inline void setup_callbacks(struct rchan *chan,
+static void setup_callbacks(struct rchan *chan,
 				   struct rchan_callbacks *cb)
 {
 	if (!cb) {
@@ -946,11 +946,10 @@ typedef int (*subbuf_actor_t) (size_t read_start,
 /*
  *	relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
  */
-static inline ssize_t relay_file_read_subbufs(struct file *filp,
-					      loff_t *ppos,
-					      subbuf_actor_t subbuf_actor,
-					      read_actor_t actor,
-					      read_descriptor_t *desc)
+static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
+					subbuf_actor_t subbuf_actor,
+					read_actor_t actor,
+					read_descriptor_t *desc)
 {
 	struct rchan_buf *buf = filp->private_data;
 	size_t read_start, avail;
-- 
cgit v1.2.3


From b2b2cbc4b2a2f389442549399a993a8306420baf Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 21 Dec 2006 21:28:40 -0700
Subject: [PATCH] Fix reparenting to the same thread group. (take 2)

This patch fixes the case when we reparent to a different thread in the
same thread group.  This modifies the code so that we do not send
signals and do not change the signal to send to SIGCHLD unless we have
change the thread group of our parents.  It also suppresses sending
pdeath_sig in this cas as well since the result of geppid doesn't
change.

Thanks to Oleg for spotting my bug of only fixing this for non-ptraced
tasks.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Albert Cahalan <acahalan@gmail.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Coywolf Qi Hunt <qiyong@fc-cn.com>
Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 85917c2bf065..46cf6b681460 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -597,14 +597,6 @@ choose_new_parent(struct task_struct *p, struct task_struct *reaper)
 static void
 reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 {
-	/* We don't want people slaying init.  */
-	if (p->exit_signal != -1)
-		p->exit_signal = SIGCHLD;
-
-	if (p->pdeath_signal)
-		/* We already hold the tasklist_lock here.  */
-		group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
-
 	/* Move the child from its dying parent to the new one.  */
 	if (unlikely(traced)) {
 		/* Preserve ptrace links if someone else is tracing this child.  */
@@ -620,13 +612,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 		p->parent = p->real_parent;
 		add_parent(p);
 
-		/* If we'd notified the old parent about this child's death,
-		 * also notify the new parent.
-		 */
-		if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
-		    thread_group_empty(p))
-			do_notify_parent(p, p->exit_signal);
-		else if (p->state == TASK_TRACED) {
+		if (p->state == TASK_TRACED) {
 			/*
 			 * If it was at a trace stop, turn it into
 			 * a normal stop since it's no longer being
@@ -636,6 +622,27 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 		}
 	}
 
+	/* If this is a threaded reparent there is no need to
+	 * notify anyone anything has happened.
+	 */
+	if (p->real_parent->group_leader == father->group_leader)
+		return;
+
+	/* We don't want people slaying init.  */
+	if (p->exit_signal != -1)
+		p->exit_signal = SIGCHLD;
+		
+	if (p->pdeath_signal)
+		/* We already hold the tasklist_lock here.  */
+		group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+
+	/* If we'd notified the old parent about this child's death,
+	 * also notify the new parent.
+	 */
+	if (!traced && p->exit_state == EXIT_ZOMBIE &&
+	    p->exit_signal != -1 && thread_group_empty(p))
+		do_notify_parent(p, p->exit_signal);
+
 	/*
 	 * process group orphan check
 	 * Case ii: Our child is in a different pgrp
-- 
cgit v1.2.3


From e1d9fd2e3d33b2fec3207171ec8ca6e71d5c81c7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 23 Dec 2006 16:55:29 +0100
Subject: [PATCH] suspend: fix suspend on single-CPU systems

Clark Williams reported that suspend doesnt work on his laptop on
2.6.20-rc1-rt kernels. The bug was introduced by the following cleanup
commit:

 commit 112cecb2cc0e7341db92281ba04b26c41bb8146d
 Author: Siddha, Suresh B <suresh.b.siddha@intel.com>
 Date:   Wed Dec 6 20:34:31 2006 -0800

    [PATCH] suspend: don't change cpus_allowed for task initiating the suspend

because with this change 'error' is not initialized to 0 anymore, if
there are no other online CPUs. (i.e. if the system is single-CPU).

the fix is the initialize it to 0. The really weird thing is that my
version of gcc does not warn about this non-initialized variable
situation ...

(also fix the kernel printk in the error branch, it was missing a
 newline)

Reported-by: Clark Williams <williams@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9124669f4586..241064a32241 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -258,7 +258,7 @@ static cpumask_t frozen_cpus;
 
 int disable_nonboot_cpus(void)
 {
-	int cpu, first_cpu, error;
+	int cpu, first_cpu, error = 0;
 
 	mutex_lock(&cpu_add_remove_lock);
 	first_cpu = first_cpu(cpu_present_map);
@@ -294,7 +294,7 @@ int disable_nonboot_cpus(void)
 		/* Make sure the CPUs won't be enabled by someone else */
 		cpu_hotplug_disabled = 1;
 	} else {
-		printk(KERN_ERR "Non-boot CPUs are not disabled");
+		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
 	}
 out:
 	mutex_unlock(&cpu_add_remove_lock);
-- 
cgit v1.2.3


From e4e6bdbb426d1ecd9e4587f22115f8d0d426d21f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 Dec 2006 16:47:14 -0800
Subject: [PATCH] rcu: rcutorture suspend fix

Fix suspend hang: rcutorture threads need to be nofreeze.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcutorture.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index c52f981ea008..482b11ff65cb 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -522,6 +522,7 @@ rcu_torture_writer(void *arg)
 
 	VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
 	set_user_nice(current, 19);
+	current->flags |= PF_NOFREEZE;
 
 	do {
 		schedule_timeout_uninterruptible(1);
@@ -561,6 +562,7 @@ rcu_torture_fakewriter(void *arg)
 
 	VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
 	set_user_nice(current, 19);
+	current->flags |= PF_NOFREEZE;
 
 	do {
 		schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
@@ -591,6 +593,7 @@ rcu_torture_reader(void *arg)
 
 	VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
 	set_user_nice(current, 19);
+	current->flags |= PF_NOFREEZE;
 
 	do {
 		idx = cur_ops->readlock();
-- 
cgit v1.2.3


From 9414232fa0cc28e2f51b8c76d260f2748f7953fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 Dec 2006 16:48:13 -0800
Subject: [PATCH] sched: fix cond_resched_softirq() offset

Remove the __resched_legal() check: it is conceptually broken.  The biggest
problem it had is that it can mask buggy cond_resched() calls.  A
cond_resched() call is only legal if we are not in an atomic context, with
two narrow exceptions:

 - if the system is booting
 - a reacquire_kernel_lock() down() done while PREEMPT_ACTIVE is set

But __resched_legal() hid this and just silently returned whenever
these primitives were called from invalid contexts. (Same goes for
cond_resched_locked() and cond_resched_softirq()).

Furthermore, the __legal_resched(0) call was buggy in that it caused
unnecessarily long softirq latencies via cond_resched_softirq().  (which is
only called from softirq-off sections, hence the code did nothing.)

The fix is to resurrect the efficiency of the might_sleep checks and to
only allow the narrow exceptions.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index b515e3caad7f..3df33da0dafc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4617,17 +4617,6 @@ asmlinkage long sys_sched_yield(void)
 	return 0;
 }
 
-static inline int __resched_legal(int expected_preempt_count)
-{
-#ifdef CONFIG_PREEMPT
-	if (unlikely(preempt_count() != expected_preempt_count))
-		return 0;
-#endif
-	if (unlikely(system_state != SYSTEM_RUNNING))
-		return 0;
-	return 1;
-}
-
 static void __cond_resched(void)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -4647,7 +4636,8 @@ static void __cond_resched(void)
 
 int __sched cond_resched(void)
 {
-	if (need_resched() && __resched_legal(0)) {
+	if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
+					system_state == SYSTEM_RUNNING) {
 		__cond_resched();
 		return 1;
 	}
@@ -4673,7 +4663,7 @@ int cond_resched_lock(spinlock_t *lock)
 		ret = 1;
 		spin_lock(lock);
 	}
-	if (need_resched() && __resched_legal(1)) {
+	if (need_resched() && system_state == SYSTEM_RUNNING) {
 		spin_release(&lock->dep_map, 1, _THIS_IP_);
 		_raw_spin_unlock(lock);
 		preempt_enable_no_resched();
@@ -4689,7 +4679,7 @@ int __sched cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	if (need_resched() && __resched_legal(0)) {
+	if (need_resched() && system_state == SYSTEM_RUNNING) {
 		raw_local_irq_disable();
 		_local_bh_enable();
 		raw_local_irq_enable();
-- 
cgit v1.2.3


From a3f99f8ba8cbd2d4e231b767b3d6236a555da38c Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 29 Dec 2006 16:48:20 -0800
Subject: [PATCH] module: fix mod_sysfs_setup() return value

mod_sysfs_setup() doesn't return error when kobject_add_dir() failed.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/module.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index b565eaeff7e6..dbce132b354c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1132,8 +1132,10 @@ static int mod_sysfs_setup(struct module *mod,
 		goto out;
 
 	mod->drivers_dir = kobject_add_dir(&mod->mkobj.kobj, "drivers");
-	if (!mod->drivers_dir)
+	if (!mod->drivers_dir) {
+		err = -ENOMEM;
 		goto out_unreg;
+	}
 
 	err = module_param_sysfs_setup(mod, kparam, num_params);
 	if (err)
-- 
cgit v1.2.3


From 089e34b60033863549fbe561d31ac8c778a20e7f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 29 Dec 2006 16:49:04 -0800
Subject: [PATCH] cpuset procfs warning fix

fs/proc/base.c:1869: warning: initialization discards qualifiers from pointer target type
fs/proc/base.c:2150: warning: initialization discards qualifiers from pointer target type

Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/cpuset.h | 2 +-
 kernel/cpuset.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 826b15e914e2..fd404416f31c 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -55,7 +55,7 @@ extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
 extern int cpuset_memory_pressure_enabled;
 extern void __cpuset_memory_pressure_bump(void);
 
-extern const struct file_operations proc_cpuset_operations;
+extern struct file_operations proc_cpuset_operations;
 extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
 
 extern void cpuset_lock(void);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 232aed2b10f9..6b05dc69c959 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2656,7 +2656,7 @@ static int cpuset_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_cpuset_show, pid);
 }
 
-const struct file_operations proc_cpuset_operations = {
+struct file_operations proc_cpuset_operations = {
 	.open		= cpuset_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-- 
cgit v1.2.3


From 755cd90029b61b96816b8ac0ab6a33a197f842d0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 29 Dec 2006 16:49:14 -0800
Subject: [PATCH] lockdep: printk warning fix

kernel/lockdep.c: In function `lookup_chain_cache':
kernel/lockdep.c:1339: warning: long long unsigned int format, u64 arg (arg 2)
kernel/lockdep.c:1344: warning: long long unsigned int format, u64 arg (arg 2)

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/lockdep.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 01e750559034..509efd49540f 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1318,12 +1318,16 @@ static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class)
 cache_hit:
 			debug_atomic_inc(&chain_lookup_hits);
 			if (very_verbose(class))
-				printk("\nhash chain already cached, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name);
+				printk("\nhash chain already cached, key: "
+					"%016Lx tail class: [%p] %s\n",
+					(unsigned long long)chain_key,
+					class->key, class->name);
 			return 0;
 		}
 	}
 	if (very_verbose(class))
-		printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", chain_key, class->key, class->name);
+		printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n",
+			(unsigned long long)chain_key, class->key, class->name);
 	/*
 	 * Allocate a new chain entry from the static array, and add
 	 * it to the hash:
-- 
cgit v1.2.3


From 241ceee0b442c69226fb882d61d9b9785743898f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 24 Dec 2006 23:30:44 +0300
Subject: [PATCH] restore ->pdeath_signal behaviour

Commit b2b2cbc4b2a2f389442549399a993a8306420baf introduced a user-
visible change: ->pdeath_signal is sent only when the entire thread
group exits.

While this change is imho good, it may break things.  So restore the
old behaviour for now.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
To: Albert Cahalan <acahalan@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Linus Torvalds <torvalds@osdl.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Qi Yong <qiyong@fc-cn.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 46cf6b681460..35401720635b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -597,6 +597,10 @@ choose_new_parent(struct task_struct *p, struct task_struct *reaper)
 static void
 reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 {
+	if (p->pdeath_signal)
+		/* We already hold the tasklist_lock here.  */
+		group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+
 	/* Move the child from its dying parent to the new one.  */
 	if (unlikely(traced)) {
 		/* Preserve ptrace links if someone else is tracing this child.  */
@@ -631,10 +635,6 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 	/* We don't want people slaying init.  */
 	if (p->exit_signal != -1)
 		p->exit_signal = SIGCHLD;
-		
-	if (p->pdeath_signal)
-		/* We already hold the tasklist_lock here.  */
-		group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
 
 	/* If we'd notified the old parent about this child's death,
 	 * also notify the new parent.
-- 
cgit v1.2.3


From 7f422e2e84307bdd9b51c19a602e1601d160aebc Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 20 Dec 2006 10:39:33 +0100
Subject: [PATCH] Driver core: Fix prefix driver links in /sys/module by
 bus-name

Modules may have drivers with the same name on different buses.
This patch fixes this problem.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 kernel/module.c | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index dbce132b354c..d0f2260a0210 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1148,10 +1148,10 @@ static int mod_sysfs_setup(struct module *mod,
 	kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
 	return 0;
 
-out_unreg_drivers:
-	kobject_unregister(mod->drivers_dir);
 out_unreg_param:
 	module_param_sysfs_remove(mod);
+out_unreg_drivers:
+	kobject_unregister(mod->drivers_dir);
 out_unreg:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
@@ -2327,8 +2327,22 @@ void print_modules(void)
 	printk("\n");
 }
 
+static char *make_driver_name(struct device_driver *drv)
+{
+	char *driver_name;
+
+	driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2,
+			      GFP_KERNEL);
+	if (!driver_name)
+		return NULL;
+
+	sprintf(driver_name, "%s:%s", drv->bus->name, drv->name);
+	return driver_name;
+}
+
 void module_add_driver(struct module *mod, struct device_driver *drv)
 {
+	char *driver_name;
 	int no_warn;
 
 	if (!mod || !drv)
@@ -2336,17 +2350,31 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
 
 	/* Don't check return codes; these calls are idempotent */
 	no_warn = sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
-	no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj, drv->name);
+	driver_name = make_driver_name(drv);
+	if (driver_name) {
+		no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj,
+					    driver_name);
+		kfree(driver_name);
+	}
 }
 EXPORT_SYMBOL(module_add_driver);
 
 void module_remove_driver(struct device_driver *drv)
 {
+	char *driver_name;
+
 	if (!drv)
 		return;
+
 	sysfs_remove_link(&drv->kobj, "module");
-	if (drv->owner && drv->owner->drivers_dir)
-		sysfs_remove_link(drv->owner->drivers_dir, drv->name);
+	if (drv->owner && drv->owner->drivers_dir) {
+		driver_name = make_driver_name(drv);
+		if (driver_name) {
+			sysfs_remove_link(drv->owner->drivers_dir,
+					  driver_name);
+			kfree(driver_name);
+		}
+	}
 }
 EXPORT_SYMBOL(module_remove_driver);
 
-- 
cgit v1.2.3


From a416aba637dcb4127595c02a59041cd278422f7e Mon Sep 17 00:00:00 2001
From: Ard van Breemen <ard@telegraafnet.nl>
Date: Fri, 5 Jan 2007 16:36:20 -0800
Subject: [PATCH] kernelparams: detect if and which parameter parsing enabled
 irq's

The parsing of some kernel parameters seem to enable irq's at a stage that
irq's are not supposed to be enabled (Particularly the ide kernel parameters).

Having irq's enabled before the irq controller is initialized might lead to a
kernel panic.  This patch only detects this behaviour and warns about wich
parameter caused it.

[akpm@osdl.org: cleanups]
Signed-off-by: Ard van Breemen <ard@telegraafnet.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/params.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/params.c b/kernel/params.c
index f406655d6653..718945da8f58 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -143,9 +143,15 @@ int parse_args(const char *name,
 
 	while (*args) {
 		int ret;
+		int irq_was_disabled;
 
 		args = next_arg(args, &param, &val);
+		irq_was_disabled = irqs_disabled();
 		ret = parse_one(param, val, params, num, unknown);
+		if (irq_was_disabled && !irqs_disabled()) {
+			printk(KERN_WARNING "parse_args(): option '%s' enabled "
+					"irq's!\n", param);
+		}
 		switch (ret) {
 		case -ENOENT:
 			printk(KERN_ERR "%s: Unknown parameter `%s'\n",
-- 
cgit v1.2.3


From 7bf236874292fd073c6bdd27f89c3d9e81a79cbc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 5 Jan 2007 16:36:28 -0800
Subject: [PATCH] swsusp: Do not fail if resume device is not set

In the kernels later than 2.6.19 there is a regression that makes swsusp
fail if the resume device is not explicitly specified.

It can be fixed by adding an additional parameter to
mm/swapfile.c:swap_type_of() allowing us to pass the (struct block_device
*) corresponding to the first available swap back to the caller.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h | 2 +-
 kernel/power/swap.c  | 9 +++++----
 kernel/power/user.c  | 7 ++++---
 mm/swapfile.c        | 8 +++++++-
 4 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index add51cebc8d9..5423559a44a6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -245,7 +245,7 @@ extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
 extern void free_swap_and_cache(swp_entry_t);
-extern int swap_type_of(dev_t, sector_t);
+extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
 extern sector_t swapdev_block(int, pgoff_t);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f133d4a6d817..3581f8f86acd 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -165,14 +165,15 @@ static int swsusp_swap_check(void) /* This is called before saving image */
 {
 	int res;
 
-	res = swap_type_of(swsusp_resume_device, swsusp_resume_block);
+	res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
+			&resume_bdev);
 	if (res < 0)
 		return res;
 
 	root_swap = res;
-	resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE);
-	if (IS_ERR(resume_bdev))
-		return PTR_ERR(resume_bdev);
+	res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR);
+	if (res)
+		return res;
 
 	res = set_blocksize(resume_bdev, PAGE_SIZE);
 	if (res < 0)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 89443b85163b..f7b7a785a5c6 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -57,7 +57,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	memset(&data->handle, 0, sizeof(struct snapshot_handle));
 	if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
 		data->swap = swsusp_resume_device ?
-				swap_type_of(swsusp_resume_device, 0) : -1;
+			swap_type_of(swsusp_resume_device, 0, NULL) : -1;
 		data->mode = O_RDONLY;
 	} else {
 		data->swap = -1;
@@ -268,7 +268,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			 * so we need to recode them
 			 */
 			if (old_decode_dev(arg)) {
-				data->swap = swap_type_of(old_decode_dev(arg), 0);
+				data->swap = swap_type_of(old_decode_dev(arg),
+							0, NULL);
 				if (data->swap < 0)
 					error = -ENODEV;
 			} else {
@@ -365,7 +366,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			swdev = old_decode_dev(swap_area.dev);
 			if (swdev) {
 				offset = swap_area.offset;
-				data->swap = swap_type_of(swdev, offset);
+				data->swap = swap_type_of(swdev, offset, NULL);
 				if (data->swap < 0)
 					error = -ENODEV;
 			} else {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b9fc0e5de6d5..a2d9bb4e80df 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -434,7 +434,7 @@ void free_swap_and_cache(swp_entry_t entry)
  *
  * This is needed for the suspend to disk (aka swsusp).
  */
-int swap_type_of(dev_t device, sector_t offset)
+int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 {
 	struct block_device *bdev = NULL;
 	int i;
@@ -450,6 +450,9 @@ int swap_type_of(dev_t device, sector_t offset)
 			continue;
 
 		if (!bdev) {
+			if (bdev_p)
+				*bdev_p = sis->bdev;
+
 			spin_unlock(&swap_lock);
 			return i;
 		}
@@ -459,6 +462,9 @@ int swap_type_of(dev_t device, sector_t offset)
 			se = list_entry(sis->extent_list.next,
 					struct swap_extent, list);
 			if (se->start_block == offset) {
+				if (bdev_p)
+					*bdev_p = sis->bdev;
+
 				spin_unlock(&swap_lock);
 				bdput(bdev);
 				return i;
-- 
cgit v1.2.3


From a75acf850ca80136a4f845cf9a7cd26e7465c1f4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jan 2007 16:36:29 -0800
Subject: [PATCH] profiling: fix sched profiling typo

Fix sched profiling typo, introduced by the sleep profiling patch.  This
bug caused profile=sched to not work.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/profile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/profile.c b/kernel/profile.c
index fb5e03d57e9d..11550b2290b6 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -63,7 +63,7 @@ static int __init profile_setup(char * str)
 		printk(KERN_INFO
 			"kernel sleep profiling enabled (shift: %ld)\n",
 			prof_shift);
-	} else if (!strncmp(str, sleepstr, strlen(sleepstr))) {
+	} else if (!strncmp(str, schedstr, strlen(schedstr))) {
 		prof_on = SCHED_PROFILING;
 		if (str[strlen(schedstr)] == ',')
 			str += strlen(schedstr) + 1;
-- 
cgit v1.2.3


From 343cde51b3b856470eea24a89f00166b8e2d7272 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Thu, 11 Jan 2007 01:52:44 +0100
Subject: [PATCH] x86-64: Make noirqdebug_setup function non init to fix
 modpost warning

o noirqdebug_setup() is __init but it is being called by
  quirk_intel_irqbalance() which if of type __devinit. If CONFIG_HOTPLUG=y,
  quirk_intel_irqbalance() is put into text section and it is wrong to
  call a function in __init section.

o MODPOST flags this on i386 if CONFIG_RELOCATABLE=y

WARNING: vmlinux - Section mismatch: reference to .init.text:noirqdebug_setup from .text between 'quirk_intel_irqbalance' (at offset 0xc010969e) and 'i8237A_suspend'

o Make noirqdebug_setup() non-init.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 kernel/irq/spurious.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 543ea2e5ad93..9d8c79b48823 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -176,7 +176,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 
 int noirqdebug __read_mostly;
 
-int __init noirqdebug_setup(char *str)
+int noirqdebug_setup(char *str)
 {
 	noirqdebug = 1;
 	printk(KERN_INFO "IRQ lockup detection disabled\n");
-- 
cgit v1.2.3


From e5e5673f828623e58a401862b33173591faaeaff Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Wed, 10 Jan 2007 23:15:28 -0800
Subject: [PATCH] sched: tasks cannot run on cpus onlined after boot

Commit 5c1e176781f43bc902a51e5832f789756bff911b ("sched: force /sbin/init
off isolated cpus") sets init's cpus_allowed to a subset of cpu_online_map
at boot time, which means that tasks won't be scheduled on cpus that are
added to the system later.

Make init's cpus_allowed a subset of cpu_possible_map instead.  This should
still preserve the behavior that Nick's change intended.

Thanks to Giuliano Pochini for reporting this and testing the fix:

http://ozlabs.org/pipermail/linuxppc-dev/2006-December/029397.html

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 3df33da0dafc..cca93cc0dd7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6865,7 +6865,7 @@ void __init sched_init_smp(void)
 
 	lock_cpu_hotplug();
 	arch_init_sched_domains(&cpu_online_map);
-	cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map);
+	cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
 	if (cpus_empty(non_isolated_cpus))
 		cpu_set(smp_processor_id(), non_isolated_cpus);
 	unlock_cpu_hotplug();
-- 
cgit v1.2.3


From b282b6f8a8d1cf3e132ce3769d7d1cac81d9dd2d Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@in.ibm.com>
Date: Wed, 10 Jan 2007 23:15:34 -0800
Subject: [PATCH] Change cpu_up and co from __devinit to __cpuinit

Compiling the kernel with CONFIG_HOTPLUG = y and CONFIG_HOTPLUG_CPU = n
with CONFIG_RELOCATABLE = y generates the following modpost warnings

WARNING: vmlinux - Section mismatch: reference to .init.data: from
.text between '_cpu_up' (at offset 0xc0141b7d) and 'cpu_up'
WARNING: vmlinux - Section mismatch: reference to .init.data: from
.text between '_cpu_up' (at offset 0xc0141b9c) and 'cpu_up'
WARNING: vmlinux - Section mismatch: reference to .init.text:__cpu_up
from .text between '_cpu_up' (at offset 0xc0141bd8) and 'cpu_up'
WARNING: vmlinux - Section mismatch: reference to .init.data: from
.text between '_cpu_up' (at offset 0xc0141c05) and 'cpu_up'
WARNING: vmlinux - Section mismatch: reference to .init.data: from
.text between '_cpu_up' (at offset 0xc0141c26) and 'cpu_up'
WARNING: vmlinux - Section mismatch: reference to .init.data: from
.text between '_cpu_up' (at offset 0xc0141c37) and 'cpu_up'

This is because cpu_up, _cpu_up and __cpu_up (in some architectures) are
defined as __devinit
AND
__cpu_up calls some __cpuinit functions.

Since __cpuinit would map to __init with this kind of a configuration,
we get a .text refering .init.data warning.

This patch solves the problem by converting all of __cpu_up, _cpu_up
and cpu_up from __devinit to __cpuinit. The approach is justified since
the callers of cpu_up are either dependent on CONFIG_HOTPLUG_CPU or
are of __init type.

Thus when CONFIG_HOTPLUG_CPU=y, all these cpu up functions would land up
in .text section, and when CONFIG_HOTPLUG_CPU=n, all these functions would
land up in .init section.

Tested on a i386 SMP machine running linux-2.6.20-rc3-mm1.

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/cris/arch-v32/kernel/smp.c | 2 +-
 arch/m32r/kernel/smpboot.c      | 2 +-
 arch/mips/kernel/smp.c          | 2 +-
 arch/parisc/kernel/smp.c        | 2 +-
 arch/powerpc/kernel/smp.c       | 2 +-
 arch/sparc64/kernel/smp.c       | 2 +-
 kernel/cpu.c                    | 4 ++--
 7 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 2d0023f2d49b..77e655f26560 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -195,7 +195,7 @@ int setup_profiling_timer(unsigned int multiplier)
  */
 unsigned long cache_decay_ticks = 1;
 
-int __devinit __cpu_up(unsigned int cpu)
+int __cpuinit __cpu_up(unsigned int cpu)
 {
 	smp_boot_one_cpu(cpu);
 	return cpu_online(cpu) ? 0 : -ENOSYS;
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index fa7865609495..48d376f47e1a 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -351,7 +351,7 @@ static void __init do_boot_cpu(int phys_id)
 	}
 }
 
-int __devinit __cpu_up(unsigned int cpu_id)
+int __cpuinit __cpu_up(unsigned int cpu_id)
 {
 	int timeout;
 
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f2a8701e414d..0555fc554f65 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -271,7 +271,7 @@ void __devinit smp_prepare_boot_cpu(void)
  * and keep control until "cpu_online(cpu)" is set.  Note: cpu is
  * physical, not logical.
  */
-int __devinit __cpu_up(unsigned int cpu)
+int __cpuinit __cpu_up(unsigned int cpu)
 {
 	struct task_struct *idle;
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 4a23a97b06cd..12cc019307ad 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -608,7 +608,7 @@ void smp_cpus_done(unsigned int cpu_max)
 }
 
 
-int __devinit __cpu_up(unsigned int cpu)
+int __cpuinit __cpu_up(unsigned int cpu)
 {
 	if (cpu != 0 && cpu < parisc_max_cpus)
 		smp_boot_one_cpu(cpu);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9b28c238b6c0..0e8beca460af 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -468,7 +468,7 @@ static int __devinit cpu_enable(unsigned int cpu)
 	return -ENOSYS;
 }
 
-int __devinit __cpu_up(unsigned int cpu)
+int __cpuinit __cpu_up(unsigned int cpu)
 {
 	int c;
 
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 0a4958536bcd..fc99f7b8012f 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1388,7 +1388,7 @@ void __devinit smp_prepare_boot_cpu(void)
 {
 }
 
-int __devinit __cpu_up(unsigned int cpu)
+int __cpuinit __cpu_up(unsigned int cpu)
 {
 	int ret = smp_boot_one_cpu(cpu);
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 241064a32241..7406fe6966f9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -204,7 +204,7 @@ int cpu_down(unsigned int cpu)
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 /* Requires cpu_add_remove_lock to be held */
-static int __devinit _cpu_up(unsigned int cpu)
+static int __cpuinit _cpu_up(unsigned int cpu)
 {
 	int ret;
 	void *hcpu = (void *)(long)cpu;
@@ -239,7 +239,7 @@ out_notify:
 	return ret;
 }
 
-int __devinit cpu_up(unsigned int cpu)
+int __cpuinit cpu_up(unsigned int cpu)
 {
 	int err = 0;
 
-- 
cgit v1.2.3


From 07031e14c1127fc7e1a5b98dfcc59f434e025104 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 10 Jan 2007 23:15:38 -0800
Subject: [PATCH] KVM: add VM-exit profiling

This adds the profile=kvm boot option, which enables KVM to profile VM
exits.

Use: "readprofile -m ./System.map | sort -n" to see the resulting
output:

   [...]
   18246 serial_out                               148.3415
   18945 native_flush_tlb                         378.9000
   23618 serial_in                                212.7748
   29279 __spin_unlock_irq                        622.9574
   43447 native_apic_write                        2068.9048
   52702 enable_8259A_irq                         742.2817
   54250 vgacon_scroll                             89.3740
   67394 ide_inb                                  6126.7273
   79514 copy_page_range                           98.1654
   84868 do_wp_page                                86.6000
  140266 pit_read                                 783.6089
  151436 ide_outb                                 25239.3333
  152668 native_io_delay                          21809.7143
  174783 mask_and_ack_8259A                       783.7803
  362404 native_set_pte_at                        36240.4000
 1688747 total                                      0.5009

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Avi Kivity <avi@qumranet.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/kvm/svm.c       |  8 ++++++++
 drivers/kvm/vmx.c       |  7 +++++++
 include/linux/profile.h |  1 +
 kernel/profile.c        | 14 ++++++++++++++
 4 files changed, 30 insertions(+)

(limited to 'kernel')

diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index ccc06b1b91b5..714f6a7841cd 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
+#include <linux/profile.h>
 #include <asm/desc.h>
 
 #include "kvm_svm.h"
@@ -1558,6 +1559,13 @@ again:
 
 	reload_tss(vcpu);
 
+	/*
+	 * Profile KVM exit RIPs:
+	 */
+	if (unlikely(prof_on == KVM_PROFILING))
+		profile_hit(KVM_PROFILING,
+			(void *)(unsigned long)vcpu->svm->vmcb->save.rip);
+
 	stgi();
 
 	kvm_reput_irq(vcpu);
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index d4701cb4c654..ce219e3f557f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/profile.h>
 #include <asm/io.h>
 #include <asm/desc.h>
 
@@ -1859,6 +1860,12 @@ again:
 	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 #endif
 
+	/*
+	 * Profile KVM exit RIPs:
+	 */
+	if (unlikely(prof_on == KVM_PROFILING))
+		profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
+
 	kvm_run->exit_type = 0;
 	if (fail) {
 		kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 5670b340c4ef..eec48f5f9348 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -15,6 +15,7 @@ extern int prof_on __read_mostly;
 #define CPU_PROFILING	1
 #define SCHED_PROFILING	2
 #define SLEEP_PROFILING	3
+#define KVM_PROFILING	4
 
 struct proc_dir_entry;
 struct pt_regs;
diff --git a/kernel/profile.c b/kernel/profile.c
index 11550b2290b6..a6574a18514e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -40,7 +40,10 @@ int (*timer_hook)(struct pt_regs *) __read_mostly;
 
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
+
 int prof_on __read_mostly;
+EXPORT_SYMBOL_GPL(prof_on);
+
 static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
@@ -52,6 +55,7 @@ static int __init profile_setup(char * str)
 {
 	static char __initdata schedstr[] = "schedule";
 	static char __initdata sleepstr[] = "sleep";
+	static char __initdata kvmstr[] = "kvm";
 	int par;
 
 	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
@@ -72,6 +76,15 @@ static int __init profile_setup(char * str)
 		printk(KERN_INFO
 			"kernel schedule profiling enabled (shift: %ld)\n",
 			prof_shift);
+	} else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
+		prof_on = KVM_PROFILING;
+		if (str[strlen(kvmstr)] == ',')
+			str += strlen(kvmstr) + 1;
+		if (get_option(&str, &par))
+			prof_shift = par;
+		printk(KERN_INFO
+			"kernel KVM profiling enabled (shift: %ld)\n",
+			prof_shift);
 	} else if (get_option(&str, &par)) {
 		prof_shift = par;
 		prof_on = CPU_PROFILING;
@@ -318,6 +331,7 @@ out:
 	local_irq_restore(flags);
 	put_cpu();
 }
+EXPORT_SYMBOL_GPL(profile_hits);
 
 static int __devinit profile_cpu_callback(struct notifier_block *info,
 					unsigned long action, void *__cpu)
-- 
cgit v1.2.3