From b00c1a99e7758f794923c61e5cd55268d61c9469 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 29 Sep 2008 15:44:46 +0200
Subject: hrtimer: mark migration state

Impact: during migration active hrtimers can be seen as inactive

The migration code removes the hrtimers from the queues of the dead
CPU and sets the state temporary to INACTIVE. The enqueue code sets it
to ACTIVE/PENDING again.

Prevent that the wrong state can be seen by using a separate migration
state bit.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 6d93dce61cbb..bdd88df1b4e5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -67,9 +67,10 @@ enum hrtimer_cb_mode {
  * 0x02		callback function running
  * 0x04		callback pending (high resolution mode)
  *
- * Special case:
+ * Special cases:
  * 0x03		callback function running and enqueued
  *		(was requeued on another CPU)
+ * 0x09		timer was migrated on CPU hotunplug
  * The "callback function running and enqueued" status is only possible on
  * SMP. It happens for example when a posix timer expired and the callback
  * queued a signal. Between dropping the lock which protects the posix timer
@@ -87,6 +88,7 @@ enum hrtimer_cb_mode {
 #define HRTIMER_STATE_ENQUEUED	0x01
 #define HRTIMER_STATE_CALLBACK	0x02
 #define HRTIMER_STATE_PENDING	0x04
+#define HRTIMER_STATE_MIGRATE	0x08
 
 /**
  * struct hrtimer - the basic hrtimer structure
-- 
cgit v1.2.3


From ccc7dadf736639da86f3e0c86832c11a66fc8221 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 29 Sep 2008 15:47:42 +0200
Subject: hrtimer: prevent migration of per CPU hrtimers

Impact: per CPU hrtimers can be migrated from a dead CPU

The hrtimer code has no knowledge about per CPU timers, but we need to
prevent the migration of such timers and warn when such a timer is
active at migration time.

Explicitely mark the timers as per CPU and use a more understandable
mode descriptor for the interrupts safe unlocked callback mode, which
is used by hrtimer_sleeper and the scheduler code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h      | 14 +++++++++++---
 kernel/hrtimer.c             | 37 +++++++++++++++++++++++++------------
 kernel/sched.c               |  4 ++--
 kernel/time/tick-sched.c     |  2 +-
 kernel/trace/trace_sysprof.c |  2 +-
 5 files changed, 40 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bdd88df1b4e5..2f245fe63bda 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -47,14 +47,22 @@ enum hrtimer_restart {
  *	HRTIMER_CB_IRQSAFE:		Callback may run in hardirq context
  *	HRTIMER_CB_IRQSAFE_NO_RESTART:	Callback may run in hardirq context and
  *					does not restart the timer
- *	HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:	Callback must run in hardirq context
- *					Special mode for tick emultation
+ *	HRTIMER_CB_IRQSAFE_PERCPU:	Callback must run in hardirq context
+ *					Special mode for tick emulation and
+ *					scheduler timer. Such timers are per
+ *					cpu and not allowed to be migrated on
+ *					cpu unplug.
+ *	HRTIMER_CB_IRQSAFE_UNLOCKED:	Callback should run in hardirq context
+ *					with timer->base lock unlocked
+ *					used for timers which call wakeup to
+ *					avoid lock order problems with rq->lock
  */
 enum hrtimer_cb_mode {
 	HRTIMER_CB_SOFTIRQ,
 	HRTIMER_CB_IRQSAFE,
 	HRTIMER_CB_IRQSAFE_NO_RESTART,
-	HRTIMER_CB_IRQSAFE_NO_SOFTIRQ,
+	HRTIMER_CB_IRQSAFE_PERCPU,
+	HRTIMER_CB_IRQSAFE_UNLOCKED,
 };
 
 /*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ace723dd1e52..cdec83e722fa 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 			 */
 			BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
 			return 1;
-		case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
+		case HRTIMER_CB_IRQSAFE_PERCPU:
+		case HRTIMER_CB_IRQSAFE_UNLOCKED:
 			/*
 			 * This is solely for the sched tick emulation with
 			 * dynamic tick support to ensure that we do not
 			 * restart the tick right on the edge and end up with
 			 * the tick timer in the softirq ! The calling site
-			 * takes care of this.
+			 * takes care of this. Also used for hrtimer sleeper !
 			 */
 			debug_hrtimer_deactivate(timer);
 			return 1;
@@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer)
 	timer_stats_account_hrtimer(timer);
 
 	fn = timer->function;
-	if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+	if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+	    timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
 		/*
 		 * Used for scheduler timers, avoid lock inversion with
 		 * rq->lock and tasklist_lock.
@@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = task;
 #ifdef CONFIG_HIGH_RES_TIMERS
-	sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+	sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 #endif
 }
 
@@ -1592,7 +1594,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 #ifdef CONFIG_HOTPLUG_CPU
 
 static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
-				struct hrtimer_clock_base *new_base)
+				struct hrtimer_clock_base *new_base, int dcpu)
 {
 	struct hrtimer *timer;
 	struct rb_node *node;
@@ -1603,6 +1605,18 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		BUG_ON(hrtimer_callback_running(timer));
 		debug_hrtimer_deactivate(timer);
 
+		/*
+		 * Should not happen. Per CPU timers should be
+		 * canceled _before_ the migration code is called
+		 */
+		if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
+			__remove_hrtimer(timer, old_base,
+					 HRTIMER_STATE_INACTIVE, 0);
+			WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
+			     timer, timer->function, dcpu);
+			continue;
+		}
+
 		/*
 		 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
 		 * timer could be seen as !active and just vanish away
@@ -1619,12 +1633,11 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		/*
 		 * Happens with high res enabled when the timer was
 		 * already expired and the callback mode is
-		 * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ
-		 * (hrtimer_sleeper). The enqueue code does not move
-		 * them to the soft irq pending list for
-		 * performance/latency reasons, but in the migration
-		 * state, we need to do that otherwise we end up with
-		 * a stale timer.
+		 * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
+		 * enqueue code does not move them to the soft irq
+		 * pending list for performance/latency reasons, but
+		 * in the migration state, we need to do that
+		 * otherwise we end up with a stale timer.
 		 */
 		if (timer->state == HRTIMER_STATE_MIGRATE) {
 			timer->state = HRTIMER_STATE_PENDING;
@@ -1682,7 +1695,7 @@ static void migrate_hrtimers(int cpu)
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		if (migrate_hrtimer_list(&old_base->clock_base[i],
-					 &new_base->clock_base[i]))
+					 &new_base->clock_base[i], cpu))
 			raise = 1;
 	}
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 13dd2db9fb2d..ad1962dc0aa2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 	hrtimer_init(&rt_b->rt_period_timer,
 			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
-	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 }
 
 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
@@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq)
 
 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rq->hrtick_timer.function = hrtick;
-	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 }
 #else
 static inline void hrtick_clear(struct rq *rq)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 39019b3f7621..cb02324bdb88 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -625,7 +625,7 @@ void tick_setup_sched_timer(void)
 	 */
 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	ts->sched_timer.function = tick_sched_timer;
-	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
 	/* Get the next period (per cpu) */
 	ts->sched_timer.expires = tick_init_jiffy_update();
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index bb948e52ce20..db58fb66a135 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
 
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = stack_trace_timer_fn;
-	hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+	hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
 	hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
 }
-- 
cgit v1.2.3


From ba0166708ef4da7eeb61dd92bbba4d5a749d6561 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 30 Sep 2008 05:32:24 -0700
Subject: sctp: Fix kernel panic while process protocol violation parameter

Since call to function sctp_sf_abort_violation() need paramter 'arg' with
'struct sctp_chunk' type, it will read the chunk type and chunk length from
the chunk_hdr member of chunk. But call to sctp_sf_violation_paramlen()
always with 'struct sctp_paramhdr' type's parameter, it will be passed to
sctp_sf_abort_violation(). This may cause kernel panic.

   sctp_sf_violation_paramlen()
     |-- sctp_sf_abort_violation()
        |-- sctp_make_abort_violation()

This patch fixed this problem. This patch also fix two place which called
sctp_sf_violation_paramlen() with wrong paramter type.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h    |  3 +++
 net/sctp/sm_make_chunk.c | 37 ++++++++++++++++++++++++-------------
 net/sctp/sm_statefuns.c  | 48 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 64 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 24811732bdb2..029a54a02396 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -227,6 +227,9 @@ struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *,
 				   const struct sctp_chunk *,
 				   const __u8 *,
 				   const size_t );
+struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *,
+				   const struct sctp_chunk *,
+				   struct sctp_paramhdr *);
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *,
 				  const struct sctp_transport *,
 				  const void *payload,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b599cbba4fbe..d68869f966c3 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1012,6 +1012,29 @@ end:
 	return retval;
 }
 
+struct sctp_chunk *sctp_make_violation_paramlen(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk,
+	struct sctp_paramhdr *param)
+{
+	struct sctp_chunk *retval;
+	static const char error[] = "The following parameter had invalid length:";
+	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+				sizeof(sctp_paramhdr_t);
+
+	retval = sctp_make_abort(asoc, chunk, payload_len);
+	if (!retval)
+		goto nodata;
+
+	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION,
+			sizeof(error) + sizeof(sctp_paramhdr_t));
+	sctp_addto_chunk(retval, sizeof(error), error);
+	sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param);
+
+nodata:
+	return retval;
+}
+
 /* Make a HEARTBEAT chunk.  */
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 				  const struct sctp_transport *transport,
@@ -1782,11 +1805,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
 					const struct sctp_chunk *chunk,
 					struct sctp_chunk **errp)
 {
-	static const char error[] = "The following parameter had invalid length:";
-	size_t		payload_len = WORD_ROUND(sizeof(error)) +
-						sizeof(sctp_paramhdr_t);
-
-
 	/* This is a fatal error.  Any accumulated non-fatal errors are
 	 * not reported.
 	 */
@@ -1794,14 +1812,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
 		sctp_chunk_free(*errp);
 
 	/* Create an error chunk and fill it in with our payload. */
-	*errp = sctp_make_op_error_space(asoc, chunk, payload_len);
-
-	if (*errp) {
-		sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION,
-				sizeof(error) + sizeof(sctp_paramhdr_t));
-		sctp_addto_chunk(*errp, sizeof(error), error);
-		sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param);
-	}
+	*errp = sctp_make_violation_paramlen(asoc, chunk, param);
 
 	return 0;
 }
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8848d329aa2c..7c622af2ce55 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
-				     void *arg,
+				     void *arg, void *ext,
 				     sctp_cmd_seq_t *commands);
 
 static sctp_disposition_t sctp_sf_violation_ctsn(
@@ -3425,7 +3425,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 	addr_param = (union sctp_addr_param *)hdr->params;
 	length = ntohs(addr_param->p.length);
 	if (length < sizeof(sctp_paramhdr_t))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
 			   (void *)addr_param, commands);
 
 	/* Verify the ASCONF chunk before processing it. */
@@ -3433,8 +3433,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 			    (sctp_paramhdr_t *)((void *)addr_param + length),
 			    (void *)chunk->chunk_end,
 			    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
-						  (void *)&err_param, commands);
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+						  (void *)err_param, commands);
 
 	/* ADDIP 5.2 E1) Compare the value of the serial number to the value
 	 * the endpoint stored in a new association variable
@@ -3542,8 +3542,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	    (sctp_paramhdr_t *)addip_hdr->params,
 	    (void *)asconf_ack->chunk_end,
 	    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
-			   (void *)&err_param, commands);
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+			   (void *)err_param, commands);
 
 	if (last_asconf) {
 		addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
@@ -4240,12 +4240,38 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands) {
-	static const char err_str[] = "The following parameter had invalid length:";
+				     void *arg, void *ext,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk =  arg;
+	struct sctp_paramhdr *param = ext;
+	struct sctp_chunk *abort = NULL;
 
-	return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
+		goto discard;
+
+	/* Make the abort chunk. */
+	abort = sctp_make_violation_paramlen(asoc, chunk, param);
+	if (!abort)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
+			SCTP_ERROR(ECONNABORTED));
+	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+			SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+
+discard:
+	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+
+	return SCTP_DISPOSITION_ABORT;
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
 }
 
 /* Handle a protocol violation when the peer trying to advance the
-- 
cgit v1.2.3


From 16dbc6c9616363fe53811abcbd935336dc0a0f01 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Thu, 2 Oct 2008 14:50:12 -0700
Subject: inotify: fix lock ordering wrt do_page_fault's mmap_sem

Fix inotify lock order reversal with mmap_sem due to holding locks over
copy_to_user.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Reported-by: "Daniel J Blueman" <daniel.blueman@gmail.com>
Tested-by: "Daniel J Blueman" <daniel.blueman@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inotify_user.c            | 27 ++++++++++++++++++++-------
 include/asm-x86/uaccess_64.h |  1 +
 2 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 60249429a253..d85c7d931cdf 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -323,7 +323,7 @@ out:
 }
 
 /*
- * remove_kevent - cleans up and ultimately frees the given kevent
+ * remove_kevent - cleans up the given kevent
  *
  * Caller must hold dev->ev_mutex.
  */
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev,
 
 	dev->event_count--;
 	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
+}
 
+/*
+ * free_kevent - frees the given kevent.
+ */
+static void free_kevent(struct inotify_kernel_event *kevent)
+{
 	kfree(kevent->name);
 	kmem_cache_free(event_cachep, kevent);
 }
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
 		struct inotify_kernel_event *kevent;
 		kevent = inotify_dev_get_event(dev);
 		remove_kevent(dev, kevent);
+		free_kevent(kevent);
 	}
 }
 
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	dev = file->private_data;
 
 	while (1) {
-		int events;
 
 		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
 
 		mutex_lock(&dev->ev_mutex);
-		events = !list_empty(&dev->events);
-		mutex_unlock(&dev->ev_mutex);
-		if (events) {
+		if (!list_empty(&dev->events)) {
 			ret = 0;
 			break;
 		}
+		mutex_unlock(&dev->ev_mutex);
 
 		if (file->f_flags & O_NONBLOCK) {
 			ret = -EAGAIN;
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	if (ret)
 		return ret;
 
-	mutex_lock(&dev->ev_mutex);
 	while (1) {
 		struct inotify_kernel_event *kevent;
 
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 			}
 			break;
 		}
+		remove_kevent(dev, kevent);
+
+		/*
+		 * Must perform the copy_to_user outside the mutex in order
+		 * to avoid a lock order reversal with mmap_sem.
+		 */
+		mutex_unlock(&dev->ev_mutex);
 
 		if (copy_to_user(buf, &kevent->event, event_size)) {
 			ret = -EFAULT;
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 			count -= kevent->event.len;
 		}
 
-		remove_kevent(dev, kevent);
+		free_kevent(kevent);
+
+		mutex_lock(&dev->ev_mutex);
 	}
 	mutex_unlock(&dev->ev_mutex);
 
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 515d4dce96b5..45806d60bcbe 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -7,6 +7,7 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/prefetch.h>
+#include <linux/lockdep.h>
 #include <asm/page.h>
 
 /*
-- 
cgit v1.2.3


From 4b19de6d1cb07c8bcb6778e771f9cfd5bcfdfd3e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 2 Oct 2008 14:50:16 -0700
Subject: mm: tiny-shmem nommu fix

The previous patch db203d53d474aa068984e409d807628f5841da1b ("mm:
tiny-shmem fix lock ordering: mmap_sem vs i_mutex") to fix the lock
ordering in tiny-shmem breaks shared anonymous and IPC memory on NOMMU
architectures because it was using the expanding truncate to signal ramfs
to allocate a physically contiguous RAM backing the inode (otherwise it is
unusable for "memory mapping" it to userspace).

However do_truncate is what caused the lock ordering error, due to it
taking i_mutex.  In this case, we can actually just call ramfs directly to
allocate memory for the mapping, rather than go via truncate.

Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/file-nommu.c | 2 +-
 include/linux/ramfs.h | 1 +
 mm/tiny-shmem.c       | 6 ++++++
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 52312ec93ff4..5145cb9125af 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = {
  * size 0 on the assumption that it's going to be used for an mmap of shared
  * memory
  */
-static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
+int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 {
 	struct pagevec lru_pvec;
 	unsigned long npages, xpages, loop, limit;
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index b160fb18e8d6..37aaf2b39863 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -6,6 +6,7 @@ extern int ramfs_get_sb(struct file_system_type *fs_type,
 	 int flags, const char *dev_name, void *data, struct vfsmount *mnt);
 
 #ifndef CONFIG_MMU
+extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize);
 extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
 						   unsigned long addr,
 						   unsigned long len,
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index d17cb6f6ab10..8d7a27a6335c 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -80,6 +80,12 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
 	inode->i_nlink = 0;	/* It is unlinked */
 	init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
 			&ramfs_file_operations);
+
+#ifndef CONFIG_MMU
+	error = ramfs_nommu_expand_for_mapping(inode, size);
+	if (error)
+		goto close_file;
+#endif
 	return file;
 
 close_file:
-- 
cgit v1.2.3


From b7e4226e4f427b59dc8e9c45a2a1a1ed1353a140 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 1 Oct 2008 21:52:41 +0100
Subject: [MIPS] Build fix: Fix irq flags type

Though from a hardware perspective it would be sensible to use only a
32-bit unsigned int type Linux defines interrupt flags to be stored in
an unsigned long and nothing else.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/ptrace.c   | 2 +-
 arch/mips/kernel/smtc.c     | 6 +++---
 include/asm-mips/mipsregs.h | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 35234b92b9a5..96ffc9c6d194 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -238,7 +238,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		case FPC_EIR: {	/* implementation / version register */
 			unsigned int flags;
 #ifdef CONFIG_MIPS_MT_SMTC
-			unsigned int irqflags;
+			unsigned long irqflags;
 			unsigned int mtflags;
 #endif /* CONFIG_MIPS_MT_SMTC */
 
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index a516286532ab..05f2708a9029 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -556,7 +556,7 @@ void mipsmt_prepare_cpus(void)
 void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 {
 	extern u32 kernelsp[NR_CPUS];
-	long flags;
+	unsigned long flags;
 	int mtflags;
 
 	LOCK_MT_PRA();
@@ -753,7 +753,7 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 {
 	int tcstatus;
 	struct smtc_ipi *pipi;
-	long flags;
+	unsigned long flags;
 	int mtflags;
 
 	if (cpu == smp_processor_id()) {
@@ -975,7 +975,7 @@ static irqreturn_t ipi_interrupt(int irq, void *dev_idm)
 	struct smtc_ipi *pipi;
 	unsigned long tcstatus;
 	int sent;
-	long flags;
+	unsigned long flags;
 	unsigned int mtflags;
 	unsigned int vpflags;
 
diff --git a/include/asm-mips/mipsregs.h b/include/asm-mips/mipsregs.h
index a46f8e258e6b..979866000da4 100644
--- a/include/asm-mips/mipsregs.h
+++ b/include/asm-mips/mipsregs.h
@@ -1462,7 +1462,7 @@ set_c0_##name(unsigned int set)					\
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 	omt = __dmt();						\
@@ -1480,7 +1480,7 @@ clear_c0_##name(unsigned int clear)				\
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 	omt = __dmt();						\
@@ -1498,7 +1498,7 @@ change_c0_##name(unsigned int change, unsigned int new)		\
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 								\
-- 
cgit v1.2.3


From d2bb01b042a38219fbddaafc214c5beb96248d2f Mon Sep 17 00:00:00 2001
From: "Kevin D. Kissell" <kevink@paralogos.com>
Date: Tue, 9 Sep 2008 21:35:01 +0200
Subject: [MIPS] SMTC: Close tiny holes in the SMTC IPI replay system.

Signed-off-by: Kevin D. Kissell <kevink@paralogos.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/entry.S      | 10 +++---
 arch/mips/kernel/smtc.c       |  2 +-
 include/asm-mips/stackframe.h | 72 ++++++++++++++++++++++++++++++++++++-------
 3 files changed, 67 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index e29598ae939d..ffa331029e08 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -79,11 +79,6 @@ FEXPORT(syscall_exit)
 
 FEXPORT(restore_all)			# restore full frame
 #ifdef CONFIG_MIPS_MT_SMTC
-/* Detect and execute deferred IPI "interrupts" */
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-	jal	deferred_smtc_ipi
-	LONG_S	s0, TI_REGS($28)
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 /* Re-arm any temporarily masked interrupts not explicitly "acked" */
 	mfc0	v0, CP0_TCSTATUS
@@ -112,6 +107,11 @@ FEXPORT(restore_all)			# restore full frame
 	xor	t0, t0, t3
 	mtc0	t0, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
+/* Detect and execute deferred IPI "interrupts" */
+	LONG_L	s0, TI_REGS($28)
+	LONG_S	sp, TI_REGS($28)
+	jal	deferred_smtc_ipi
+	LONG_S	s0, TI_REGS($28)
 #endif /* CONFIG_MIPS_MT_SMTC */
 	.set	noat
 	RESTORE_TEMP
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 05f2708a9029..39b491b9ad87 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -70,7 +70,7 @@ static atomic_t ipi_timer_latch[NR_CPUS];
 
 #define IPIBUF_PER_CPU 4
 
-static struct smtc_ipi_q IPIQ[NR_CPUS];
+struct smtc_ipi_q IPIQ[NR_CPUS];
 static struct smtc_ipi_q freeIPIq;
 
 
diff --git a/include/asm-mips/stackframe.h b/include/asm-mips/stackframe.h
index 051e1af0bb95..4c37c4e5f72e 100644
--- a/include/asm-mips/stackframe.h
+++ b/include/asm-mips/stackframe.h
@@ -297,14 +297,31 @@
 #ifdef CONFIG_MIPS_MT_SMTC
 		.set	mips32r2
 		/*
-		 * This may not really be necessary if ints are already
-		 * inhibited here.
+		 * We need to make sure the read-modify-write
+		 * of Status below isn't perturbed by an interrupt
+		 * or cross-TC access, so we need to do at least a DMT,
+		 * protected by an interrupt-inhibit. But setting IXMT
+		 * also creates a few-cycle window where an IPI could
+		 * be queued and not be detected before potentially
+		 * returning to a WAIT or user-mode loop. It must be
+		 * replayed.
+		 *
+		 * We're in the middle of a context switch, and
+		 * we can't dispatch it directly without trashing
+		 * some registers, so we'll try to detect this unlikely
+		 * case and program a software interrupt in the VPE,
+		 * as would be done for a cross-VPE IPI.  To accomodate
+		 * the handling of that case, we're doing a DVPE instead
+		 * of just a DMT here to protect against other threads.
+		 * This is a lot of cruft to cover a tiny window.
+		 * If you can find a better design, implement it!
+		 *
 		 */
 		mfc0	v0, CP0_TCSTATUS
 		ori	v0, TCSTATUS_IXMT
 		mtc0	v0, CP0_TCSTATUS
 		_ehb
-		DMT	5				# dmt a1
+		DVPE	5				# dvpe a1
 		jal	mips_ihb
 #endif /* CONFIG_MIPS_MT_SMTC */
 		mfc0	a0, CP0_STATUS
@@ -325,17 +342,50 @@
  */
 		LONG_L	v1, PT_TCSTATUS(sp)
 		_ehb
-		mfc0	v0, CP0_TCSTATUS
+		mfc0	a0, CP0_TCSTATUS
 		andi	v1, TCSTATUS_IXMT
-		/* We know that TCStatua.IXMT should be set from above */
-		xori	v0, v0, TCSTATUS_IXMT
-		or	v0, v0, v1
-		mtc0	v0, CP0_TCSTATUS
-		_ehb
-		andi	a1, a1, VPECONTROL_TE
+		bnez	v1, 0f
+
+/*
+ * We'd like to detect any IPIs queued in the tiny window
+ * above and request an software interrupt to service them
+ * when we ERET.
+ *
+ * Computing the offset into the IPIQ array of the executing
+ * TC's IPI queue in-line would be tedious.  We use part of
+ * the TCContext register to hold 16 bits of offset that we
+ * can add in-line to find the queue head.
+ */
+		mfc0	v0, CP0_TCCONTEXT
+		la	a2, IPIQ
+		srl	v0, v0, 16
+		addu	a2, a2, v0
+		LONG_L	v0, 0(a2)
+		beqz	v0, 0f
+/*
+ * If we have a queue, provoke dispatch within the VPE by setting C_SW1
+ */
+		mfc0	v0, CP0_CAUSE
+		ori	v0, v0, C_SW1
+		mtc0	v0, CP0_CAUSE
+0:
+		/*
+		 * This test should really never branch but
+		 * let's be prudent here.  Having atomized
+		 * the shared register modifications, we can
+		 * now EVPE, and must do so before interrupts
+		 * are potentially re-enabled.
+		 */
+		andi	a1, a1, MVPCONTROL_EVP
 		beqz	a1, 1f
-		emt
+		evpe
 1:
+		/* We know that TCStatua.IXMT should be set from above */
+		xori	a0, a0, TCSTATUS_IXMT
+		or	a0, a0, v1
+		mtc0	a0, CP0_TCSTATUS
+		_ehb
+
 		.set	mips0
 #endif /* CONFIG_MIPS_MT_SMTC */
 		LONG_L	v1, PT_EPC(sp)
-- 
cgit v1.2.3


From 8531a35e5e275b17c57c39b7911bc2b37025f28c Mon Sep 17 00:00:00 2001
From: "Kevin D. Kissell" <kevink@paralogos.com>
Date: Tue, 9 Sep 2008 21:48:52 +0200
Subject: [MIPS] SMTC: Fix SMTC dyntick support.

Rework of SMTC support to make it work with the new clock event system,
allowing "tickless" operation, and to make it compatible with the use of
the "wait_irqoff" idle loop.  The new clocking scheme means that the
previously optional IPI instant replay mechanism is now required, and has
been made more robust.

Signed-off-by: Kevin D. Kissell <kevink@paralogos.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                |  26 +---
 arch/mips/kernel/Makefile        |   1 +
 arch/mips/kernel/cevt-r4k.c      | 173 ++++++---------------
 arch/mips/kernel/cevt-smtc.c     | 321 +++++++++++++++++++++++++++++++++++++++
 arch/mips/kernel/cpu-probe.c     |  10 +-
 arch/mips/kernel/genex.S         |   4 +-
 arch/mips/kernel/smtc.c          | 252 +++++++++++++++---------------
 arch/mips/mti-malta/malta-smtc.c |   9 +-
 include/asm-mips/cevt-r4k.h      |  46 ++++++
 include/asm-mips/irqflags.h      |  26 +++-
 include/asm-mips/smtc.h          |   8 +-
 11 files changed, 597 insertions(+), 279 deletions(-)
 create mode 100644 arch/mips/kernel/cevt-smtc.c
 create mode 100644 include/asm-mips/cevt-r4k.h

(limited to 'include')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 49896a2a1d72..c930b8ceb418 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1403,7 +1403,6 @@ config MIPS_MT_SMTC
 	depends on CPU_MIPS32_R2
 	#depends on CPU_MIPS64_R2		# once there is hardware ...
 	depends on SYS_SUPPORTS_MULTITHREADING
-	select GENERIC_CLOCKEVENTS_BROADCAST
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select MIPS_MT
@@ -1451,32 +1450,17 @@ config MIPS_VPE_LOADER
 	  Includes a loader for loading an elf relocatable object
 	  onto another VPE and running it.
 
-config MIPS_MT_SMTC_INSTANT_REPLAY
-	bool "Low-latency Dispatch of Deferred SMTC IPIs"
-	depends on MIPS_MT_SMTC && !PREEMPT
-	default y
-	help
-	  SMTC pseudo-interrupts between TCs are deferred and queued
-	  if the target TC is interrupt-inhibited (IXMT). In the first
-	  SMTC prototypes, these queued IPIs were serviced on return
-	  to user mode, or on entry into the kernel idle loop. The
-	  INSTANT_REPLAY option dispatches them as part of local_irq_restore()
-	  processing, which adds runtime overhead (hence the option to turn
-	  it off), but ensures that IPIs are handled promptly even under
-	  heavy I/O interrupt load.
-
 config MIPS_MT_SMTC_IM_BACKSTOP
 	bool "Use per-TC register bits as backstop for inhibited IM bits"
 	depends on MIPS_MT_SMTC
-	default y
+	default n
 	help
 	  To support multiple TC microthreads acting as "CPUs" within
 	  a VPE, VPE-wide interrupt mask bits must be specially manipulated
 	  during interrupt handling. To support legacy drivers and interrupt
 	  controller management code, SMTC has a "backstop" to track and
 	  if necessary restore the interrupt mask. This has some performance
-	  impact on interrupt service overhead. Disable it only if you know
-	  what you are doing.
+	  impact on interrupt service overhead.
 
 config MIPS_MT_SMTC_IRQAFF
 	bool "Support IRQ affinity API"
@@ -1486,10 +1470,8 @@ config MIPS_MT_SMTC_IRQAFF
 	  Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.)
 	  for SMTC Linux kernel. Requires platform support, of which
 	  an example can be found in the MIPS kernel i8259 and Malta
-	  platform code.  It is recommended that MIPS_MT_SMTC_INSTANT_REPLAY
-	  be enabled if MIPS_MT_SMTC_IRQAFF is used. Adds overhead to
-	  interrupt dispatch, and should be used only if you know what
-	  you are doing.
+	  platform code.  Adds some overhead to interrupt dispatch, and
+	  should be used only if you know what you are doing.
 
 config MIPS_VPE_LOADER_TOM
 	bool "Load VPE program into memory hidden from linux"
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 706f93974797..25775cb54000 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y		+= cpu-probe.o branch.o entry.o genex.o irq.o process.o \
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
 obj-$(CONFIG_CEVT_R4K)		+= cevt-r4k.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= cevt-smtc.o
 obj-$(CONFIG_CEVT_DS1287)	+= cevt-ds1287.o
 obj-$(CONFIG_CEVT_GT641XX)	+= cevt-gt641xx.o
 obj-$(CONFIG_CEVT_SB1250)	+= cevt-sb1250.o
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 24a2d907aa0d..4a4c59f2737a 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -12,6 +12,14 @@
 
 #include <asm/smtc_ipi.h>
 #include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * The SMTC Kernel for the 34K, 1004K, et. al. replaces several
+ * of these routines with SMTC-specific variants.
+ */
+
+#ifndef CONFIG_MIPS_MT_SMTC
 
 static int mips_next_event(unsigned long delta,
                            struct clock_event_device *evt)
@@ -19,60 +27,27 @@ static int mips_next_event(unsigned long delta,
 	unsigned int cnt;
 	int res;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	{
-	unsigned long flags, vpflags;
-	local_irq_save(flags);
-	vpflags = dvpe();
-#endif
 	cnt = read_c0_count();
 	cnt += delta;
 	write_c0_compare(cnt);
 	res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0;
-#ifdef CONFIG_MIPS_MT_SMTC
-	evpe(vpflags);
-	local_irq_restore(flags);
-	}
-#endif
 	return res;
 }
 
-static void mips_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
+#endif /* CONFIG_MIPS_MT_SMTC */
+
+void mips_set_clock_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
 {
 	/* Nothing to do ...  */
 }
 
-static DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
-static int cp0_timer_irq_installed;
+DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
+int cp0_timer_irq_installed;
 
-/*
- * Timer ack for an R4k-compatible timer of a known frequency.
- */
-static void c0_timer_ack(void)
-{
-	write_c0_compare(read_c0_compare());
-}
+#ifndef CONFIG_MIPS_MT_SMTC
 
-/*
- * Possibly handle a performance counter interrupt.
- * Return true if the timer interrupt should not be checked
- */
-static inline int handle_perf_irq(int r2)
-{
-	/*
-	 * The performance counter overflow interrupt may be shared with the
-	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
-	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
-	 * and we can't reliably determine if a counter interrupt has also
-	 * happened (!r2) then don't check for a timer interrupt.
-	 */
-	return (cp0_perfcount_irq < 0) &&
-		perf_irq() == IRQ_HANDLED &&
-		!r2;
-}
-
-static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 {
 	const int r2 = cpu_has_mips_r2;
 	struct clock_event_device *cd;
@@ -93,12 +68,8 @@ static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 	 * interrupt.  Being the paranoiacs we are we check anyway.
 	 */
 	if (!r2 || (read_c0_cause() & (1 << 30))) {
-		c0_timer_ack();
-#ifdef CONFIG_MIPS_MT_SMTC
-		if (cpu_data[cpu].vpe_id)
-			goto out;
-		cpu = 0;
-#endif
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
 		cd = &per_cpu(mips_clockevent_device, cpu);
 		cd->event_handler(cd);
 	}
@@ -107,65 +78,16 @@ out:
 	return IRQ_HANDLED;
 }
 
-static struct irqaction c0_compare_irqaction = {
+#endif /* Not CONFIG_MIPS_MT_SMTC */
+
+struct irqaction c0_compare_irqaction = {
 	.handler = c0_compare_interrupt,
-#ifdef CONFIG_MIPS_MT_SMTC
-	.flags = IRQF_DISABLED,
-#else
 	.flags = IRQF_DISABLED | IRQF_PERCPU,
-#endif
 	.name = "timer",
 };
 
-#ifdef CONFIG_MIPS_MT_SMTC
-DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
-
-static void smtc_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
-{
-}
-
-static void mips_broadcast(cpumask_t mask)
-{
-	unsigned int cpu;
-
-	for_each_cpu_mask(cpu, mask)
-		smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
-}
-
-static void setup_smtc_dummy_clockevent_device(void)
-{
-	//uint64_t mips_freq = mips_hpt_^frequency;
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
 
-	cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-
-	cd->name		= "SMTC";
-	cd->features		= CLOCK_EVT_FEAT_DUMMY;
-
-	/* Calculate the min / max delta */
-	cd->mult	= 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
-	cd->shift		= 0; //32;
-	cd->max_delta_ns	= 0; //clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= 0; //clockevent_delta2ns(0x30, cd);
-
-	cd->rating		= 200;
-	cd->irq			= 17; //-1;
-//	if (cpu)
-//		cd->cpumask	= CPU_MASK_ALL; // cpumask_of_cpu(cpu);
-//	else
-		cd->cpumask	= cpumask_of_cpu(cpu);
-
-	cd->set_mode		= smtc_set_mode;
-
-	cd->broadcast		= mips_broadcast;
-
-	clockevents_register_device(cd);
-}
-#endif
-
-static void mips_event_handler(struct clock_event_device *dev)
+void mips_event_handler(struct clock_event_device *dev)
 {
 }
 
@@ -177,7 +99,23 @@ static int c0_compare_int_pending(void)
 	return (read_c0_cause() >> cp0_compare_irq) & 0x100;
 }
 
-static int c0_compare_int_usable(void)
+/*
+ * Compare interrupt can be routed and latched outside the core,
+ * so a single execution hazard barrier may not be enough to give
+ * it time to clear as seen in the Cause register.  4 time the
+ * pipeline depth seems reasonably conservative, and empirically
+ * works better in configurations with high CPU/bus clock ratios.
+ */
+
+#define compare_change_hazard() \
+	do { \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+	} while (0)
+
+int c0_compare_int_usable(void)
 {
 	unsigned int delta;
 	unsigned int cnt;
@@ -187,7 +125,7 @@ static int c0_compare_int_usable(void)
 	 */
 	if (c0_compare_int_pending()) {
 		write_c0_compare(read_c0_count());
-		irq_disable_hazard();
+		compare_change_hazard();
 		if (c0_compare_int_pending())
 			return 0;
 	}
@@ -196,7 +134,7 @@ static int c0_compare_int_usable(void)
 		cnt = read_c0_count();
 		cnt += delta;
 		write_c0_compare(cnt);
-		irq_disable_hazard();
+		compare_change_hazard();
 		if ((int)(read_c0_count() - cnt) < 0)
 		    break;
 		/* increase delta if the timer was already expired */
@@ -205,11 +143,12 @@ static int c0_compare_int_usable(void)
 	while ((int)(read_c0_count() - cnt) <= 0)
 		;	/* Wait for expiry  */
 
+	compare_change_hazard();
 	if (!c0_compare_int_pending())
 		return 0;
 
 	write_c0_compare(read_c0_count());
-	irq_disable_hazard();
+	compare_change_hazard();
 	if (c0_compare_int_pending())
 		return 0;
 
@@ -219,6 +158,8 @@ static int c0_compare_int_usable(void)
 	return 1;
 }
 
+#ifndef CONFIG_MIPS_MT_SMTC
+
 int __cpuinit mips_clockevent_init(void)
 {
 	uint64_t mips_freq = mips_hpt_frequency;
@@ -229,17 +170,6 @@ int __cpuinit mips_clockevent_init(void)
 	if (!cpu_has_counter || !mips_hpt_frequency)
 		return -ENXIO;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	setup_smtc_dummy_clockevent_device();
-
-	/*
-	 * On SMTC we only register VPE0's compare interrupt as clockevent
-	 * device.
-	 */
-	if (cpu)
-		return 0;
-#endif
-
 	if (!c0_compare_int_usable())
 		return -ENXIO;
 
@@ -265,13 +195,9 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-#ifdef CONFIG_MIPS_MT_SMTC
-	cd->cpumask		= CPU_MASK_ALL;
-#else
 	cd->cpumask		= cpumask_of_cpu(cpu);
-#endif
 	cd->set_next_event	= mips_next_event;
-	cd->set_mode		= mips_set_mode;
+	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
 
 	clockevents_register_device(cd);
@@ -281,12 +207,9 @@ int __cpuinit mips_clockevent_init(void)
 
 	cp0_timer_irq_installed = 1;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#define CPUCTR_IMASKBIT (0x100 << cp0_compare_irq)
-	setup_irq_smtc(irq, &c0_compare_irqaction, CPUCTR_IMASKBIT);
-#else
 	setup_irq(irq, &c0_compare_irqaction);
-#endif
 
 	return 0;
 }
+
+#endif /* Not CONFIG_MIPS_MT_SMTC */
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
new file mode 100644
index 000000000000..5162fe4b5952
--- /dev/null
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -0,0 +1,321 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
+ * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl
+ */
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+
+#include <asm/smtc_ipi.h>
+#include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * Variant clock event timer support for SMTC on MIPS 34K, 1004K
+ * or other MIPS MT cores.
+ *
+ * Notes on SMTC Support:
+ *
+ * SMTC has multiple microthread TCs pretending to be Linux CPUs.
+ * But there's only one Count/Compare pair per VPE, and Compare
+ * interrupts are taken opportunisitically by available TCs
+ * bound to the VPE with the Count register.  The new timer
+ * framework provides for global broadcasts, but we really
+ * want VPE-level multicasts for best behavior. So instead
+ * of invoking the high-level clock-event broadcast code,
+ * this version of SMTC support uses the historical SMTC
+ * multicast mechanisms "under the hood", appearing to the
+ * generic clock layer as if the interrupts are per-CPU.
+ *
+ * The approach taken here is to maintain a set of NR_CPUS
+ * virtual timers, and track which "CPU" needs to be alerted
+ * at each event.
+ *
+ * It's unlikely that we'll see a MIPS MT core with more than
+ * 2 VPEs, but we *know* that we won't need to handle more
+ * VPEs than we have "CPUs".  So NCPUs arrays of NCPUs elements
+ * is always going to be overkill, but always going to be enough.
+ */
+
+unsigned long smtc_nexttime[NR_CPUS][NR_CPUS];
+static int smtc_nextinvpe[NR_CPUS];
+
+/*
+ * Timestamps stored are absolute values to be programmed
+ * into Count register.  Valid timestamps will never be zero.
+ * If a Zero Count value is actually calculated, it is converted
+ * to be a 1, which will introduce 1 or two CPU cycles of error
+ * roughly once every four billion events, which at 1000 HZ means
+ * about once every 50 days.  If that's actually a problem, one
+ * could alternate squashing 0 to 1 and to -1.
+ */
+
+#define MAKEVALID(x) (((x) == 0L) ? 1L : (x))
+#define ISVALID(x) ((x) != 0L)
+
+/*
+ * Time comparison is subtle, as it's really truncated
+ * modular arithmetic.
+ */
+
+#define IS_SOONER(a, b, reference) \
+    (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference)))
+
+/*
+ * CATCHUP_INCREMENT, used when the function falls behind the counter.
+ * Could be an increasing function instead of a constant;
+ */
+
+#define CATCHUP_INCREMENT 64
+
+static int mips_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	unsigned long timestamp, reference, previous;
+	unsigned long nextcomp = 0L;
+	int vpe = current_cpu_data.vpe_id;
+	int cpu = smp_processor_id();
+	local_irq_save(flags);
+	mtflags = dmt();
+
+	/*
+	 * Maintain the per-TC virtual timer
+	 * and program the per-VPE shared Count register
+	 * as appropriate here...
+	 */
+	reference = (unsigned long)read_c0_count();
+	timestamp = MAKEVALID(reference + delta);
+	/*
+	 * To really model the clock, we have to catch the case
+	 * where the current next-in-VPE timestamp is the old
+	 * timestamp for the calling CPE, but the new value is
+	 * in fact later.  In that case, we have to do a full
+	 * scan and discover the new next-in-VPE CPU id and
+	 * timestamp.
+	 */
+	previous = smtc_nexttime[vpe][cpu];
+	if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous)
+	    && IS_SOONER(previous, timestamp, reference)) {
+		int i;
+		int soonest = cpu;
+
+		/*
+		 * Update timestamp array here, so that new
+		 * value gets considered along with those of
+		 * other virtual CPUs on the VPE.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+		for_each_online_cpu(i) {
+			if (ISVALID(smtc_nexttime[vpe][i])
+			    && IS_SOONER(smtc_nexttime[vpe][i],
+				smtc_nexttime[vpe][soonest], reference)) {
+				    soonest = i;
+			}
+		}
+		smtc_nextinvpe[vpe] = soonest;
+		nextcomp = smtc_nexttime[vpe][soonest];
+	/*
+	 * Otherwise, we don't have to process the whole array rank,
+	 * we just have to see if the event horizon has gotten closer.
+	 */
+	} else {
+		if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) ||
+		    IS_SOONER(timestamp,
+			smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) {
+			    smtc_nextinvpe[vpe] = cpu;
+			    nextcomp = timestamp;
+		}
+		/*
+		 * Since next-in-VPE may me the same as the executing
+		 * virtual CPU, we update the array *after* checking
+		 * its value.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+	}
+
+	/*
+	 * It may be that, in fact, we don't need to update Compare,
+	 * but if we do, we want to make sure we didn't fall into
+	 * a crack just behind Count.
+	 */
+	if (ISVALID(nextcomp)) {
+		write_c0_compare(nextcomp);
+		ehb();
+		/*
+		 * We never return an error, we just make sure
+		 * that we trigger the handlers as quickly as
+		 * we can if we fell behind.
+		 */
+		while ((nextcomp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX) {
+			nextcomp += CATCHUP_INCREMENT;
+			write_c0_compare(nextcomp);
+			ehb();
+		}
+	}
+	emt(mtflags);
+	local_irq_restore(flags);
+	return 0;
+}
+
+
+void smtc_distribute_timer(int vpe)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	int cpu;
+	struct clock_event_device *cd;
+	unsigned long nextstamp = 0L;
+	unsigned long reference;
+
+
+repeat:
+	for_each_online_cpu(cpu) {
+	    /*
+	     * Find virtual CPUs within the current VPE who have
+	     * unserviced timer requests whose time is now past.
+	     */
+	    local_irq_save(flags);
+	    mtflags = dmt();
+	    if (cpu_data[cpu].vpe_id == vpe &&
+		ISVALID(smtc_nexttime[vpe][cpu])) {
+		reference = (unsigned long)read_c0_count();
+		if ((smtc_nexttime[vpe][cpu] - reference)
+			 > (unsigned long)LONG_MAX) {
+			    smtc_nexttime[vpe][cpu] = 0L;
+			    emt(mtflags);
+			    local_irq_restore(flags);
+			    /*
+			     * We don't send IPIs to ourself.
+			     */
+			    if (cpu != smp_processor_id()) {
+				smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
+			    } else {
+				cd = &per_cpu(mips_clockevent_device, cpu);
+				cd->event_handler(cd);
+			    }
+		} else {
+			/* Local to VPE but Valid Time not yet reached. */
+			if (!ISVALID(nextstamp) ||
+			    IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp,
+			    reference)) {
+				smtc_nextinvpe[vpe] = cpu;
+				nextstamp = smtc_nexttime[vpe][cpu];
+			}
+			emt(mtflags);
+			local_irq_restore(flags);
+		}
+	    } else {
+		emt(mtflags);
+		local_irq_restore(flags);
+
+	    }
+	}
+	/* Reprogram for interrupt at next soonest timestamp for VPE */
+	if (ISVALID(nextstamp)) {
+		write_c0_compare(nextstamp);
+		ehb();
+		if ((nextstamp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX)
+				goto repeat;
+	}
+}
+
+
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	/* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */
+	handle_perf_irq(1);
+
+	if (read_c0_cause() & (1 << 30)) {
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
+		smtc_distribute_timer(cpu_data[cpu].vpe_id);
+	}
+	return IRQ_HANDLED;
+}
+
+
+int __cpuinit mips_clockevent_init(void)
+{
+	uint64_t mips_freq = mips_hpt_frequency;
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *cd;
+	unsigned int irq;
+	int i;
+	int j;
+
+	if (!cpu_has_counter || !mips_hpt_frequency)
+		return -ENXIO;
+	if (cpu == 0) {
+		for (i = 0; i < num_possible_cpus(); i++) {
+			smtc_nextinvpe[i] = 0;
+			for (j = 0; j < num_possible_cpus(); j++)
+				smtc_nexttime[i][j] = 0L;
+		}
+		/*
+		 * SMTC also can't have the usablility test
+		 * run by secondary TCs once Compare is in use.
+		 */
+		if (!c0_compare_int_usable())
+			return -ENXIO;
+	}
+
+	/*
+	 * With vectored interrupts things are getting platform specific.
+	 * get_c0_compare_int is a hook to allow a platform to return the
+	 * interrupt number of it's liking.
+	 */
+	irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
+	if (get_c0_compare_int)
+		irq = get_c0_compare_int();
+
+	cd = &per_cpu(mips_clockevent_device, cpu);
+
+	cd->name		= "MIPS";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+
+	/* Calculate the min / max delta */
+	cd->mult	= div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
+	cd->shift		= 32;
+	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
+	cd->min_delta_ns	= clockevent_delta2ns(0x300, cd);
+
+	cd->rating		= 300;
+	cd->irq			= irq;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= mips_next_event;
+	cd->set_mode		= mips_set_clock_mode;
+	cd->event_handler	= mips_event_handler;
+
+	clockevents_register_device(cd);
+
+	/*
+	 * On SMTC we only want to do the data structure
+	 * initialization and IRQ setup once.
+	 */
+	if (cpu)
+		return 0;
+	/*
+	 * And we need the hwmask associated with the c0_compare
+	 * vector to be initialized.
+	 */
+	irq_hwmask[irq] = (0x100 << cp0_compare_irq);
+	if (cp0_timer_irq_installed)
+		return 0;
+
+	cp0_timer_irq_installed = 1;
+
+	setup_irq(irq, &c0_compare_irqaction);
+
+	return 0;
+}
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 11c92dc53791..e621fda8ab37 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -54,14 +54,18 @@ extern void r4k_wait(void);
  * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes
  * using this version a gamble.
  */
-static void r4k_wait_irqoff(void)
+void r4k_wait_irqoff(void)
 {
 	local_irq_disable();
 	if (!need_resched())
-		__asm__("	.set	mips3		\n"
+		__asm__("	.set	push		\n"
+			"	.set	mips3		\n"
 			"	wait			\n"
-			"	.set	mips0		\n");
+			"	.set	pop		\n");
 	local_irq_enable();
+	__asm__(" 	.globl __pastwait	\n"
+		"__pastwait:			\n");
+	return;
 }
 
 /*
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index f886dd7f708e..01dcbe38fa01 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -282,8 +282,8 @@ NESTED(except_vec_vi_handler, 0, sp)
 	and	t0, a0, t1
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 	mfc0	t2, CP0_TCCONTEXT
-	or	t0, t0, t2
-	mtc0	t0, CP0_TCCONTEXT
+	or	t2, t0, t2
+	mtc0	t2, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
 	xor	t1, t1, t0
 	mtc0	t1, CP0_STATUS
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 39b491b9ad87..897fb2b4751c 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -1,4 +1,21 @@
-/* Copyright (C) 2004 Mips Technologies, Inc */
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * Copyright (C) 2004 Mips Technologies, Inc
+ * Copyright (C) 2008 Kevin D. Kissell
+ */
 
 #include <linux/clockchips.h>
 #include <linux/kernel.h>
@@ -21,7 +38,6 @@
 #include <asm/time.h>
 #include <asm/addrspace.h>
 #include <asm/smtc.h>
-#include <asm/smtc_ipi.h>
 #include <asm/smtc_proc.h>
 
 /*
@@ -58,11 +74,6 @@ unsigned long irq_hwmask[NR_IRQS];
 
 asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
 
-/*
- * Clock interrupt "latch" buffers, per "CPU"
- */
-
-static atomic_t ipi_timer_latch[NR_CPUS];
 
 /*
  * Number of InterProcessor Interrupt (IPI) message buffers to allocate
@@ -282,7 +293,7 @@ static void smtc_configure_tlb(void)
  * phys_cpu_present_map and the logical/physical mappings.
  */
 
-int __init mipsmt_build_cpu_map(int start_cpu_slot)
+int __init smtc_build_cpu_map(int start_cpu_slot)
 {
 	int i, ntcs;
 
@@ -325,7 +336,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 	write_tc_c0_tcstatus((read_tc_c0_tcstatus()
 			& ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT))
 			| TCSTATUS_A);
-	write_tc_c0_tccontext(0);
+	/*
+	 * TCContext gets an offset from the base of the IPIQ array
+	 * to be used in low-level code to detect the presence of
+	 * an active IPI queue
+	 */
+	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
 	/* Bind tc to vpe */
 	write_tc_c0_tcbind(vpe);
 	/* In general, all TCs should have the same cpu_data indications */
@@ -336,10 +352,18 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 		cpu_data[cpu].options &= ~MIPS_CPU_FPU;
 	cpu_data[cpu].vpe_id = vpe;
 	cpu_data[cpu].tc_id = tc;
+	/* Multi-core SMTC hasn't been tested, but be prepared */
+	cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff;
 }
 
+/*
+ * Tweak to get Count registes in as close a sync as possible.
+ * Value seems good for 34K-class cores.
+ */
+
+#define CP0_SKEW 8
 
-void mipsmt_prepare_cpus(void)
+void smtc_prepare_cpus(int cpus)
 {
 	int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu;
 	unsigned long flags;
@@ -363,13 +387,13 @@ void mipsmt_prepare_cpus(void)
 		IPIQ[i].head = IPIQ[i].tail = NULL;
 		spin_lock_init(&IPIQ[i].lock);
 		IPIQ[i].depth = 0;
-		atomic_set(&ipi_timer_latch[i], 0);
 	}
 
 	/* cpu_data index starts at zero */
 	cpu = 0;
 	cpu_data[cpu].vpe_id = 0;
 	cpu_data[cpu].tc_id = 0;
+	cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff;
 	cpu++;
 
 	/* Report on boot-time options */
@@ -484,7 +508,8 @@ void mipsmt_prepare_cpus(void)
 			write_vpe_c0_compare(0);
 			/* Propagate Config7 */
 			write_vpe_c0_config7(read_c0_config7());
-			write_vpe_c0_count(read_c0_count());
+			write_vpe_c0_count(read_c0_count() + CP0_SKEW);
+			ehb();
 		}
 		/* enable multi-threading within VPE */
 		write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE);
@@ -585,24 +610,22 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 
 void smtc_init_secondary(void)
 {
-	/*
-	 * Start timer on secondary VPEs if necessary.
-	 * plat_timer_setup has already have been invoked by init/main
-	 * on "boot" TC.  Like per_cpu_trap_init() hack, this assumes that
-	 * SMTC init code assigns TCs consdecutively and in ascending order
-	 * to across available VPEs.
-	 */
-	if (((read_c0_tcbind() & TCBIND_CURTC) != 0) &&
-	    ((read_c0_tcbind() & TCBIND_CURVPE)
-	    != cpu_data[smp_processor_id() - 1].vpe_id)){
-		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
-	}
-
 	local_irq_enable();
 }
 
 void smtc_smp_finish(void)
 {
+	int cpu = smp_processor_id();
+
+	/*
+	 * Lowest-numbered CPU per VPE starts a clock tick.
+	 * Like per_cpu_trap_init() hack, this assumes that
+	 * SMTC init code assigns TCs consdecutively and
+	 * in ascending order across available VPEs.
+	 */
+	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
+		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
+
 	printk("TC %d going on-line as CPU %d\n",
 		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
 }
@@ -755,6 +778,8 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 	struct smtc_ipi *pipi;
 	unsigned long flags;
 	int mtflags;
+	unsigned long tcrestart;
+	extern void r4k_wait_irqoff(void), __pastwait(void);
 
 	if (cpu == smp_processor_id()) {
 		printk("Cannot Send IPI to self!\n");
@@ -771,8 +796,6 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 	pipi->arg = (void *)action;
 	pipi->dest = cpu;
 	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
-		if (type == SMTC_CLOCK_TICK)
-			atomic_inc(&ipi_timer_latch[cpu]);
 		/* If not on same VPE, enqueue and send cross-VPE interrupt */
 		smtc_ipi_nq(&IPIQ[cpu], pipi);
 		LOCK_CORE_PRA();
@@ -800,22 +823,29 @@ void smtc_send_ipi(int cpu, int type, unsigned int action)
 
 		if ((tcstatus & TCSTATUS_IXMT) != 0) {
 			/*
-			 * Spin-waiting here can deadlock,
-			 * so we queue the message for the target TC.
+			 * If we're in the the irq-off version of the wait
+			 * loop, we need to force exit from the wait and
+			 * do a direct post of the IPI.
+			 */
+			if (cpu_wait == r4k_wait_irqoff) {
+				tcrestart = read_tc_c0_tcrestart();
+				if (tcrestart >= (unsigned long)r4k_wait_irqoff
+				    && tcrestart < (unsigned long)__pastwait) {
+					write_tc_c0_tcrestart(__pastwait);
+					tcstatus &= ~TCSTATUS_IXMT;
+					write_tc_c0_tcstatus(tcstatus);
+					goto postdirect;
+				}
+			}
+			/*
+			 * Otherwise we queue the message for the target TC
+			 * to pick up when he does a local_irq_restore()
 			 */
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
-			/* Try to reduce redundant timer interrupt messages */
-			if (type == SMTC_CLOCK_TICK) {
-			    if (atomic_postincrement(&ipi_timer_latch[cpu])!=0){
-				smtc_ipi_nq(&freeIPIq, pipi);
-				return;
-			    }
-			}
 			smtc_ipi_nq(&IPIQ[cpu], pipi);
 		} else {
-			if (type == SMTC_CLOCK_TICK)
-				atomic_inc(&ipi_timer_latch[cpu]);
+postdirect:
 			post_direct_ipi(cpu, pipi);
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
@@ -883,7 +913,7 @@ static void ipi_call_interrupt(void)
 	smp_call_function_interrupt();
 }
 
-DECLARE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
+DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 
 void ipi_decode(struct smtc_ipi *pipi)
 {
@@ -891,20 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi)
 	struct clock_event_device *cd;
 	void *arg_copy = pipi->arg;
 	int type_copy = pipi->type;
-	int ticks;
-
 	smtc_ipi_nq(&freeIPIq, pipi);
 	switch (type_copy) {
 	case SMTC_CLOCK_TICK:
 		irq_enter();
 		kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++;
-		cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-		ticks = atomic_read(&ipi_timer_latch[cpu]);
-		atomic_sub(ticks, &ipi_timer_latch[cpu]);
-		while (ticks) {
-			cd->event_handler(cd);
-			ticks--;
-		}
+		cd = &per_cpu(mips_clockevent_device, cpu);
+		cd->event_handler(cd);
 		irq_exit();
 		break;
 
@@ -937,24 +960,48 @@ void ipi_decode(struct smtc_ipi *pipi)
 	}
 }
 
+/*
+ * Similar to smtc_ipi_replay(), but invoked from context restore,
+ * so it reuses the current exception frame rather than set up a
+ * new one with self_ipi.
+ */
+
 void deferred_smtc_ipi(void)
 {
-	struct smtc_ipi *pipi;
-	unsigned long flags;
-/* DEBUG */
-	int q = smp_processor_id();
+	int cpu = smp_processor_id();
 
 	/*
 	 * Test is not atomic, but much faster than a dequeue,
 	 * and the vast majority of invocations will have a null queue.
+	 * If irq_disabled when this was called, then any IPIs queued
+	 * after we test last will be taken on the next irq_enable/restore.
+	 * If interrupts were enabled, then any IPIs added after the
+	 * last test will be taken directly.
 	 */
-	if (IPIQ[q].head != NULL) {
-		while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) {
-			/* ipi_decode() should be called with interrupts off */
-			local_irq_save(flags);
+
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
+
+		/*
+		 * It may be possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
+
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		if (pipi != NULL)
 			ipi_decode(pipi);
-			local_irq_restore(flags);
-		}
+		/*
+		 * The use of the __raw_local restore isn't
+		 * as obviously necessary here as in smtc_ipi_replay(),
+		 * but it's more efficient, given that we're already
+		 * running down the IPI queue.
+		 */
+		__raw_local_irq_restore(flags);
 	}
 }
 
@@ -1066,55 +1113,53 @@ static void setup_cross_vpe_interrupts(unsigned int nvpe)
 
 /*
  * SMTC-specific hacks invoked from elsewhere in the kernel.
- *
- * smtc_ipi_replay is called from raw_local_irq_restore which is only ever
- * called with interrupts disabled.  We do rely on interrupts being disabled
- * here because using spin_lock_irqsave()/spin_unlock_irqrestore() would
- * result in a recursive call to raw_local_irq_restore().
  */
 
-static void __smtc_ipi_replay(void)
+ /*
+  * smtc_ipi_replay is called from raw_local_irq_restore
+  */
+
+void smtc_ipi_replay(void)
 {
 	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * To the extent that we've ever turned interrupts off,
 	 * we may have accumulated deferred IPIs.  This is subtle.
-	 * If we use the smtc_ipi_qdepth() macro, we'll get an
-	 * exact number - but we'll also disable interrupts
-	 * and create a window of failure where a new IPI gets
-	 * queued after we test the depth but before we re-enable
-	 * interrupts. So long as IXMT never gets set, however,
 	 * we should be OK:  If we pick up something and dispatch
 	 * it here, that's great. If we see nothing, but concurrent
 	 * with this operation, another TC sends us an IPI, IXMT
 	 * is clear, and we'll handle it as a real pseudo-interrupt
-	 * and not a pseudo-pseudo interrupt.
+	 * and not a pseudo-pseudo interrupt.  The important thing
+	 * is to do the last check for queued message *after* the
+	 * re-enabling of interrupts.
 	 */
-	if (IPIQ[cpu].depth > 0) {
-		while (1) {
-			struct smtc_ipi_q *q = &IPIQ[cpu];
-			struct smtc_ipi *pipi;
-			extern void self_ipi(struct smtc_ipi *);
-
-			spin_lock(&q->lock);
-			pipi = __smtc_ipi_dq(q);
-			spin_unlock(&q->lock);
-			if (!pipi)
-				break;
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
 
+		/*
+		 * It's just possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
+
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		/*
+		 ** But use a raw restore here to avoid recursion.
+		 */
+		__raw_local_irq_restore(flags);
+
+		if (pipi) {
 			self_ipi(pipi);
 			smtc_cpu_stats[cpu].selfipis++;
 		}
 	}
 }
 
-void smtc_ipi_replay(void)
-{
-	raw_local_irq_disable();
-	__smtc_ipi_replay();
-}
-
 EXPORT_SYMBOL(smtc_ipi_replay);
 
 void smtc_idle_loop_hook(void)
@@ -1193,40 +1238,13 @@ void smtc_idle_loop_hook(void)
 		}
 	}
 
-	/*
-	 * Now that we limit outstanding timer IPIs, check for hung TC
-	 */
-	for (tc = 0; tc < NR_CPUS; tc++) {
-		/* Don't check ourself - we'll dequeue IPIs just below */
-		if ((tc != smp_processor_id()) &&
-		    atomic_read(&ipi_timer_latch[tc]) > timerq_limit) {
-		    if (clock_hang_reported[tc] == 0) {
-			pdb_msg += sprintf(pdb_msg,
-				"TC %d looks hung with timer latch at %d\n",
-				tc, atomic_read(&ipi_timer_latch[tc]));
-			clock_hang_reported[tc]++;
-			}
-		}
-	}
 	emt(mtflags);
 	local_irq_restore(flags);
 	if (pdb_msg != &id_ho_db_msg[0])
 		printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg);
 #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
 
-	/*
-	 * Replay any accumulated deferred IPIs. If "Instant Replay"
-	 * is in use, there should never be any.
-	 */
-#ifndef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
-	{
-		unsigned long flags;
-
-		local_irq_save(flags);
-		__smtc_ipi_replay();
-		local_irq_restore(flags);
-	}
-#endif /* CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY */
+	smtc_ipi_replay();
 }
 
 void smtc_soft_dump(void)
@@ -1242,10 +1260,6 @@ void smtc_soft_dump(void)
 		printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
 	}
 	smtc_ipi_qdump();
-	printk("Timer IPI Backlogs:\n");
-	for (i=0; i < NR_CPUS; i++) {
-		printk("%d: %d\n", i, atomic_read(&ipi_timer_latch[i]));
-	}
 	printk("%d Recoveries of \"stolen\" FPU\n",
 	       atomic_read(&smtc_fpu_recoveries));
 }
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ea705e49454..f84a46a8ae6e 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -84,12 +84,17 @@ static void msmtc_cpus_done(void)
 
 static void __init msmtc_smp_setup(void)
 {
-	mipsmt_build_cpu_map(0);
+	/*
+	 * we won't get the definitive value until
+	 * we've run smtc_prepare_cpus later, but
+	 * we would appear to need an upper bound now.
+	 */
+	smp_num_siblings = smtc_build_cpu_map(0);
 }
 
 static void __init msmtc_prepare_cpus(unsigned int max_cpus)
 {
-	mipsmt_prepare_cpus();
+	smtc_prepare_cpus(max_cpus);
 }
 
 struct plat_smp_ops msmtc_smp_ops = {
diff --git a/include/asm-mips/cevt-r4k.h b/include/asm-mips/cevt-r4k.h
new file mode 100644
index 000000000000..fa4328f9124f
--- /dev/null
+++ b/include/asm-mips/cevt-r4k.h
@@ -0,0 +1,46 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 Kevin D. Kissell
+ */
+
+/*
+ * Definitions used for common event timer implementation
+ * for MIPS 4K-type processors and their MIPS MT variants.
+ * Avoids unsightly extern declarations in C files.
+ */
+#ifndef __ASM_CEVT_R4K_H
+#define __ASM_CEVT_R4K_H
+
+DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
+
+void mips_event_handler(struct clock_event_device *dev);
+int c0_compare_int_usable(void);
+void mips_set_clock_mode(enum clock_event_mode, struct clock_event_device *);
+irqreturn_t c0_compare_interrupt(int, void *);
+
+extern struct irqaction c0_compare_irqaction;
+extern int cp0_timer_irq_installed;
+
+/*
+ * Possibly handle a performance counter interrupt.
+ * Return true if the timer interrupt should not be checked
+ */
+
+static inline int handle_perf_irq(int r2)
+{
+	/*
+	 * The performance counter overflow interrupt may be shared with the
+	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
+	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
+	 * and we can't reliably determine if a counter interrupt has also
+	 * happened (!r2) then don't check for a timer interrupt.
+	 */
+	return (cp0_perfcount_irq < 0) &&
+		perf_irq() == IRQ_HANDLED &&
+		!r2;
+}
+
+#endif /* __ASM_CEVT_R4K_H */
diff --git a/include/asm-mips/irqflags.h b/include/asm-mips/irqflags.h
index 881e8866501d..701ec0ba8fa9 100644
--- a/include/asm-mips/irqflags.h
+++ b/include/asm-mips/irqflags.h
@@ -38,8 +38,17 @@ __asm__(
 	"	.set	pop						\n"
 	"	.endm");
 
+extern void smtc_ipi_replay(void);
+
 static inline void raw_local_irq_enable(void)
 {
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC kernel needs to do a software replay of queued
+	 * IPIs, at the cost of call overhead on each local_irq_enable()
+	 */
+	smtc_ipi_replay();
+#endif
 	__asm__ __volatile__(
 		"raw_local_irq_enable"
 		: /* no outputs */
@@ -47,6 +56,7 @@ static inline void raw_local_irq_enable(void)
 		: "memory");
 }
 
+
 /*
  * For cli() we have to insert nops to make sure that the new value
  * has actually arrived in the status register before the end of this
@@ -185,15 +195,14 @@ __asm__(
 	"	.set	pop						\n"
 	"	.endm							\n");
 
-extern void smtc_ipi_replay(void);
 
 static inline void raw_local_irq_restore(unsigned long flags)
 {
 	unsigned long __tmp1;
 
-#ifdef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
+#ifdef CONFIG_MIPS_MT_SMTC
 	/*
-	 * CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY does prompt replay of deferred
+	 * SMTC kernel needs to do a software replay of queued
 	 * IPIs, at the cost of branch and call overhead on each
 	 * local_irq_restore()
 	 */
@@ -208,6 +217,17 @@ static inline void raw_local_irq_restore(unsigned long flags)
 		: "memory");
 }
 
+static inline void __raw_local_irq_restore(unsigned long flags)
+{
+	unsigned long __tmp1;
+
+	__asm__ __volatile__(
+		"raw_local_irq_restore\t%0"
+		: "=r" (__tmp1)
+		: "0" (flags)
+		: "memory");
+}
+
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
 #ifdef CONFIG_MIPS_MT_SMTC
diff --git a/include/asm-mips/smtc.h b/include/asm-mips/smtc.h
index 3639b28f80db..ea60bf08dcb0 100644
--- a/include/asm-mips/smtc.h
+++ b/include/asm-mips/smtc.h
@@ -6,6 +6,7 @@
  */
 
 #include <asm/mips_mt.h>
+#include <asm/smtc_ipi.h>
 
 /*
  * System-wide SMTC status information
@@ -38,14 +39,15 @@ struct mm_struct;
 struct task_struct;
 
 void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu);
-
+void self_ipi(struct smtc_ipi *);
 void smtc_flush_tlb_asid(unsigned long asid);
-extern int mipsmt_build_cpu_map(int startslot);
-extern void mipsmt_prepare_cpus(void);
+extern int smtc_build_cpu_map(int startslot);
+extern void smtc_prepare_cpus(int cpus);
 extern void smtc_smp_finish(void);
 extern void smtc_boot_secondary(int cpu, struct task_struct *t);
 extern void smtc_cpus_done(void);
 
+
 /*
  * Sharing the TLB between multiple VPEs means that the
  * "random" index selection function is not allowed to
-- 
cgit v1.2.3


From 897312bd240357c88ce906633703c324c6f0a5cd Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 3 Oct 2008 15:23:41 -0700
Subject: include/linux/stacktrace.h: declare struct task_struct

include/linux/stacktrace.h:13: warning:
 'struct task_struct' declared inside parameter list

(This might be a hard error on sparc64, which uses this header and has
-Werror)

Reported-by: "Randy.Dunlap" <rdunlap@xenotime.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stacktrace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 5da9794b2d78..b106fd8e0d5c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_STACKTRACE_H
 #define __LINUX_STACKTRACE_H
 
+struct task_struct;
+
 #ifdef CONFIG_STACKTRACE
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
-- 
cgit v1.2.3


From f20f258603ebc5da91e76884cf0c0d7ac9804b1c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Sun, 5 Oct 2008 18:23:27 +0200
Subject: ide-cd: temporary tray close fix

This one fixes http://bugzilla.kernel.org/show_bug.cgi?id=11602.

A more generic fix for drives which cannot autoclose tray will follow.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
[bart: add an extra parentheses for consistency with the rest of kernel code]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 5 ++++-
 include/linux/ide.h  | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 49a8c589e346..e1cd7d520fc0 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1661,7 +1661,9 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 		cdi->mask &= ~CDC_PLAY_AUDIO;
 
 	mechtype = buf[8 + 6] >> 5;
-	if (mechtype == mechtype_caddy || mechtype == mechtype_popup)
+	if (mechtype == mechtype_caddy ||
+	    mechtype == mechtype_popup ||
+	    (drive->atapi_flags & IDE_AFLAG_NO_AUTOCLOSE))
 		cdi->mask |= CDC_CLOSE_TRAY;
 
 	if (cdi->sanyo_slot > 0) {
@@ -1859,6 +1861,7 @@ static const struct cd_list_entry ide_cd_quirks_list[] = {
 	{ "MATSHITADVD-ROM SR-8176", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
 	{ "MATSHITADVD-ROM SR-8174", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
 	{ "Optiarc DVD RW AD-5200A", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
+	{ "Optiarc DVD RW AD-7543A", NULL,   IDE_AFLAG_NO_AUTOCLOSE	     },
 	{ NULL, NULL, 0 }
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1524829f73f2..6514db8fd2e4 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -366,7 +366,9 @@ enum {
 	/* Currently on a filemark */
 	IDE_AFLAG_FILEMARK		= (1 << 25),
 	/* 0 = no tape is loaded, so we don't rewind after ejecting */
-	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 26)
+	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 26),
+
+	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 27),
 };
 
 struct ide_drive_s {
-- 
cgit v1.2.3


From fd3d2764ee5aad862e51c21b8239561acdea8c2f Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 3 Oct 2008 22:43:38 +0100
Subject: [MIPS] IP27: Fix build errors if CONFIG_MAPPED_KERNEL=y

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/head.S             | 1 +
 include/asm-mips/sn/mapped_kernel.h | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 361364501d34..492a0a8d70fb 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -22,6 +22,7 @@
 #include <asm/irqflags.h>
 #include <asm/regdef.h>
 #include <asm/page.h>
+#include <asm/pgtable-bits.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
 
diff --git a/include/asm-mips/sn/mapped_kernel.h b/include/asm-mips/sn/mapped_kernel.h
index c3dd5d0d525f..721496a0bb92 100644
--- a/include/asm-mips/sn/mapped_kernel.h
+++ b/include/asm-mips/sn/mapped_kernel.h
@@ -5,6 +5,8 @@
 #ifndef __ASM_SN_MAPPED_KERNEL_H
 #define __ASM_SN_MAPPED_KERNEL_H
 
+#include <linux/mmzone.h>
+
 /*
  * Note on how mapped kernels work: the text and data section is
  * compiled at cksseg segment (LOADADDR = 0xc001c000), and the
@@ -29,10 +31,8 @@
 #define MAPPED_ADDR_RO_TO_PHYS(x)	(x - REP_BASE)
 #define MAPPED_ADDR_RW_TO_PHYS(x)	(x - REP_BASE - 16777216)
 
-#define MAPPED_KERN_RO_PHYSBASE(n) \
-			(PLAT_NODE_DATA(n)->kern_vars.kv_ro_baseaddr)
-#define MAPPED_KERN_RW_PHYSBASE(n) \
-			(PLAT_NODE_DATA(n)->kern_vars.kv_rw_baseaddr)
+#define MAPPED_KERN_RO_PHYSBASE(n) (hub_data(n)->kern_vars.kv_ro_baseaddr)
+#define MAPPED_KERN_RW_PHYSBASE(n) (hub_data(n)->kern_vars.kv_rw_baseaddr)
 
 #define MAPPED_KERN_RO_TO_PHYS(x) \
 				((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \
-- 
cgit v1.2.3