24 files changed, 1047 insertions, 614 deletions
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 103dd8edda71..3d45d24a9d61 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -67,6 +67,8 @@ EXPORT_SYMBOL(pm_power_off);
 unsigned int acpi_cpei_override;
 unsigned int acpi_cpei_phys_cpuid;
 
+unsigned long acpi_wakeup_address = 0;
+
 const char __init *
 acpi_get_sysname(void)
 {
@@ -739,16 +741,15 @@ int __init acpi_boot_init(void)
 
 int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 {
-	int vector;
+	int tmp;
 
 	if (has_8259 && gsi < 16)
 		*irq = isa_irq_to_vector(gsi);
 	else {
-		vector = gsi_to_vector(gsi);
-		if (vector == -1)
+		tmp = gsi_to_irq(gsi);
+		if (tmp == -1)
 			return -1;
-
-		*irq = vector;
+		*irq = tmp;
 	}
 	return 0;
 }
@@ -986,4 +987,21 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
+/*
+ * acpi_save_state_mem() - save kernel state
+ *
+ * TBD when when IA64 starts to support suspend...
+ */
+int acpi_save_state_mem(void) { return 0; } 
+
+/*
+ * acpi_restore_state()
+ */
+void acpi_restore_state_mem(void) {}
+
+/*
+ * do_suspend_lowlevel()
+ */
+void do_suspend_lowlevel(void) {}
+
 #endif				/* CONFIG_ACPI */
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 2236fabbb3c6..0aebc6f79e95 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
 #define ASM_OFFSETS_C 1
 
 #include <linux/sched.h>
+#include <linux/clocksource.h>
 
 #include <asm-ia64/processor.h>
 #include <asm-ia64/ptrace.h>
@@ -15,6 +16,7 @@
 #include <asm-ia64/mca.h>
 
 #include "../kernel/sigframe.h"
+#include "../kernel/fsyscall_gtod_data.h"
 
 #define DEFINE(sym, val) \
         asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -256,17 +258,24 @@ void foo(void)
 	BLANK();
 
 	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
-	DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
-	DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
-	DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
-	DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
-	DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
-	DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
-	DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
-	DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
-	DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
-	DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
-	DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
-	DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
-	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
+	DEFINE(IA64_GTOD_LOCK_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, lock));
+	DEFINE(IA64_GTOD_WALL_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, wall_time));
+	DEFINE(IA64_GTOD_MONO_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, monotonic_time));
+	DEFINE(IA64_CLKSRC_MASK_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mask));
+	DEFINE(IA64_CLKSRC_MULT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mult));
+	DEFINE(IA64_CLKSRC_SHIFT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_shift));
+	DEFINE(IA64_CLKSRC_MMIO_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio));
+	DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_cycle_last));
+	DEFINE(IA64_ITC_JITTER_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_jitter));
+	DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_lastcycle));
 }
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
index 15c08d52f09f..b8498ea62068 100644
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -113,10 +113,8 @@ processor_get_freq (
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (smp_processor_id() != cpu) {
-		ret = -EAGAIN;
+	if (smp_processor_id() != cpu)
 		goto migrate_end;
-	}
 
 	/* processor_get_pstate gets the instantaneous frequency */
 	ret = processor_get_pstate(&value);
@@ -125,7 +123,7 @@ processor_get_freq (
 		set_cpus_allowed(current, saved_mask);
 		printk(KERN_WARNING "get performance failed with error %d\n",
 		       ret);
-		ret = -EAGAIN;
+		ret = 0;
 		goto migrate_end;
 	}
 	clock_freq = extract_clock(data, value, cpu);
@@ -323,8 +321,6 @@ acpi_cpufreq_cpu_init (
 			    data->acpi_data.states[i].transition_latency * 1000;
 		}
 	}
-	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
-
 	policy->cur = processor_get_freq(data, policy->cpu);
 
 	/* table init */
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index e00b21514f7c..790ef0d87e12 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -3,6 +3,7 @@
 #include <linux/time.h>
 #include <linux/errno.h>
 #include <linux/timex.h>
+#include <linux/clocksource.h>
 #include <asm/io.h>
 
 /* IBM Summit (EXA) Cyclone counter code*/
@@ -18,22 +19,30 @@ void __init cyclone_setup(void)
 	use_cyclone = 1;
 }
 
+static void __iomem *cyclone_mc;
 
-struct time_interpolator cyclone_interpolator = {
-	.source =	TIME_SOURCE_MMIO64,
-	.shift =	16,
-	.frequency =	CYCLONE_TIMER_FREQ,
-	.drift =	-100,
-	.mask =		(1LL << 40) - 1
+static cycle_t read_cyclone(void)
+{
+	return (cycle_t)readq((void __iomem *)cyclone_mc);
+}
+
+static struct clocksource clocksource_cyclone = {
+        .name           = "cyclone",
+        .rating         = 300,
+        .read           = read_cyclone,
+        .mask           = (1LL << 40) - 1,
+        .mult           = 0, /*to be caluclated*/
+        .shift          = 16,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 int __init init_cyclone_clock(void)
 {
-	u64* reg;
+	u64 __iomem *reg;
 	u64 base;	/* saved cyclone base address */
 	u64 offset;	/* offset from pageaddr to cyclone_timer register */
 	int i;
-	u32* volatile cyclone_timer;	/* Cyclone MPMC0 register */
+	u32 __iomem *cyclone_timer;	/* Cyclone MPMC0 register */
 
 	if (!use_cyclone)
 		return 0;
@@ -42,15 +51,17 @@ int __init init_cyclone_clock(void)
 
 	/* find base address */
 	offset = (CYCLONE_CBAR_ADDR);
-	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap_nocache(offset, sizeof(u64));
 	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
 	base = readq(reg);
 	if(!base){
-		printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" value.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
@@ -58,9 +69,10 @@ int __init init_cyclone_clock(void)
 
 	/* setup PMCC */
 	offset = (base + CYCLONE_PMCC_OFFSET);
-	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap_nocache(offset, sizeof(u64));
 	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid PMCC"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
@@ -69,9 +81,10 @@ int __init init_cyclone_clock(void)
 
 	/* setup MPCS */
 	offset = (base + CYCLONE_MPCS_OFFSET);
-	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap_nocache(offset, sizeof(u64));
 	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid MPCS"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
@@ -80,9 +93,10 @@ int __init init_cyclone_clock(void)
 
 	/* map in cyclone_timer */
 	offset = (base + CYCLONE_MPMC_OFFSET);
-	cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32));
+	cyclone_timer = ioremap_nocache(offset, sizeof(u32));
 	if(!cyclone_timer){
-		printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid MPMC"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
@@ -93,16 +107,20 @@ int __init init_cyclone_clock(void)
 		int stall = 100;
 		while(stall--) barrier();
 		if(readl(cyclone_timer) == old){
-			printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
+			printk(KERN_ERR "Summit chipset: Counter not counting!"
+					" DISABLED\n");
 			iounmap(cyclone_timer);
-			cyclone_timer = 0;
+			cyclone_timer = NULL;
 			use_cyclone = 0;
 			return -ENODEV;
 		}
 	}
 	/* initialize last tick */
-	cyclone_interpolator.addr = cyclone_timer;
-	register_time_interpolator(&cyclone_interpolator);
+	cyclone_mc = cyclone_timer;
+	clocksource_cyclone.fsys_mmio = cyclone_timer;
+	clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
+						clocksource_cyclone.shift);
+	clocksource_register(&clocksource_cyclone);
 
 	return 0;
 }
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 95f517515235..c36f43c94600 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1581,7 +1581,7 @@ sys_call_table:
 	data8 sys_sync_file_range		// 1300
 	data8 sys_tee
 	data8 sys_vmsplice
-	data8 sys_ni_syscall			// reserved for move_pages
+	data8 sys_fallocate
 	data8 sys_getcpu
 	data8 sys_epoll_pwait			// 1305
 	data8 sys_utimensat
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 3f926c2dc708..44841971f077 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -147,12 +147,11 @@ ENTRY(fsys_set_tid_address)
 	FSYS_RETURN
 END(fsys_set_tid_address)
 
-/*
- * Ensure that the time interpolator structure is compatible with the asm code
- */
-#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
-	|| IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
-#error fsys_gettimeofday incompatible with changes to struct time_interpolator
+#if IA64_GTOD_LOCK_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
+#endif
+#if IA64_ITC_JITTER_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
 #endif
 #define CLOCK_REALTIME 0
 #define CLOCK_MONOTONIC 1
@@ -179,126 +178,124 @@ ENTRY(fsys_gettimeofday)
 	// r11 = preserved: saved ar.pfs
 	// r12 = preserved: memory stack
 	// r13 = preserved: thread pointer
-	// r14 = address of mask / mask
+	// r14 = address of mask / mask value
 	// r15 = preserved: system call number
 	// r16 = preserved: current task pointer
-	// r17 = wall to monotonic use
-	// r18 = time_interpolator->offset
-	// r19 = address of wall_to_monotonic
-	// r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
-	// r21 = shift factor
-	// r22 = address of time interpolator->last_counter
-	// r23 = address of time_interpolator->last_cycle
-	// r24 = adress of time_interpolator->offset
-	// r25 = last_cycle value
-	// r26 = last_counter value
-	// r27 = pointer to xtime
+	// r17 = (not used)
+	// r18 = (not used)
+	// r19 = address of itc_lastcycle
+	// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
+	// r21 = address of mmio_ptr
+	// r22 = address of wall_time or monotonic_time
+	// r23 = address of shift / value
+	// r24 = address mult factor / cycle_last value
+	// r25 = itc_lastcycle value
+	// r26 = address clocksource cycle_last
+	// r27 = (not used)
 	// r28 = sequence number at the beginning of critcal section
-	// r29 = address of seqlock
+	// r29 = address of itc_jitter
 	// r30 = time processing flags / memory address
 	// r31 = pointer to result
 	// Predicates
 	// p6,p7 short term use
 	// p8 = timesource ar.itc
 	// p9 = timesource mmio64
-	// p10 = timesource mmio32
+	// p10 = timesource mmio32 - not used
 	// p11 = timesource not to be handled by asm code
-	// p12 = memory time source ( = p9 | p10)
-	// p13 = do cmpxchg with time_interpolator_last_cycle
+	// p12 = memory time source ( = p9 | p10) - not used
+	// p13 = do cmpxchg with itc_lastcycle
 	// p14 = Divide by 1000
 	// p15 = Add monotonic
 	//
-	// Note that instructions are optimized for McKinley. McKinley can process two
-	// bundles simultaneously and therefore we continuously try to feed the CPU
-	// two bundles and then a stop.
-	tnat.nz p6,p0 = r31	// branch deferred since it does not fit into bundle structure
+	// Note that instructions are optimized for McKinley. McKinley can
+	// process two bundles simultaneously and therefore we continuously
+	// try to feed the CPU two bundles and then a stop.
+	//
+	// Additional note that code has changed a lot. Optimization is TBD.
+	// Comments begin with "?" are maybe outdated.
+	tnat.nz p6,p0 = r31	// ? branch deferred to fit later bundle
 	mov pr = r30,0xc000	// Set predicates according to function
 	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
-	movl r20 = time_interpolator
+	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
 	;;
-	ld8 r20 = [r20]		// get pointer to time_interpolator structure
-	movl r29 = xtime_lock
+	movl r29 = itc_jitter_data	// itc_jitter
+	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
 	ld4 r2 = [r2]		// process work pending flags
-	movl r27 = xtime
-	;;	// only one bundle here
-	ld8 r21 = [r20]		// first quad with control information
+	;;
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
+	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
 	and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval	// deferred branch
+(p6)    br.cond.spnt.few .fail_einval	// ? deferred branch
 	;;
-	add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
-	extr r3 = r21,32,32	// time_interpolator->nsec_per_cyc
-	extr r8 = r21,0,16	// time_interpolator->source
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
 	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
 (p6)    br.cond.spnt.many fsys_fallback_syscall
 	;;
-	cmp.eq p8,p12 = 0,r8	// Check for cpu timer
-	cmp.eq p9,p0 = 1,r8	// MMIO64 ?
-	extr r2 = r21,24,8	// time_interpolator->jitter
-	cmp.eq p10,p0 = 2,r8	// MMIO32 ?
-	cmp.ltu p11,p0 = 2,r8	// function or other clock
-(p11)	br.cond.spnt.many fsys_fallback_syscall
+	// Begin critical section
+.time_redo:
+	ld4.acq r28 = [r20]	// gtod_lock.sequence, Must take first
+	;;
+	and r28 = ~1,r28	// And make sequence even to force retry if odd
 	;;
-	setf.sig f7 = r3	// Setup for scaling of counter
-(p15)	movl r19 = wall_to_monotonic
-(p12)	ld8 r30 = [r10]
-	cmp.ne p13,p0 = r2,r0	// need jitter compensation?
-	extr r21 = r21,16,8	// shift factor
+	ld8 r30 = [r21]		// clocksource->mmio_ptr
+	add r24 = IA64_CLKSRC_MULT_OFFSET,r20
+	ld4 r2 = [r29]		// itc_jitter value
+	add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
+	add r14 = IA64_CLKSRC_MASK_OFFSET,r20
 	;;
-.time_redo:
-	.pred.rel.mutex p8,p9,p10
-	ld4.acq r28 = [r29]	// xtime_lock.sequence. Must come first for locking purposes
+	ld4 r3 = [r24]		// clocksource mult value
+	ld8 r14 = [r14]         // clocksource mask value
+	cmp.eq p8,p9 = 0,r30	// use cpu timer if no mmio_ptr
 	;;
-	and r28 = ~1,r28	// Make sequence even to force retry if odd
+	setf.sig f7 = r3	// Setup for mult scaling of counter
+(p8)	cmp.ne p13,p0 = r2,r0	// need itc_jitter compensation, set p13
+	ld4 r23 = [r23]		// clocksource shift value
+	ld8 r24 = [r26]		// get clksrc_cycle_last value
+(p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
 	;;
+	.pred.rel.mutex p8,p9
 (p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
-	add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
-(p9)	ld8 r2 = [r30]		// readq(ti->address). Could also have latency issues..
-(p10)	ld4 r2 = [r30]		// readw(ti->address)
-(p13)	add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
-	;;			// could be removed by moving the last add upward
-	ld8 r26 = [r22]		// time_interpolator->last_counter
-(p13)	ld8 r25 = [r23]		// time interpolator->last_cycle
-	add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
-(p15)	ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
- 	ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
-	add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
-	;;
-	ld8 r18 = [r24]		// time_interpolator->offset
-	ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET	// xtime.tv_nsec
-(p13)	sub r3 = r25,r2	// Diff needed before comparison (thanks davidm)
-	;;
-	ld8 r14 = [r14]		// time_interpolator->mask
-(p13)	cmp.gt.unc p6,p7 = r3,r0	// check if it is less than last. p6,p7 cleared
-	sub r10 = r2,r26	// current_counter - last_counter
-	;;
-(p6)	sub r10 = r25,r26	// time we got was less than last_cycle
+(p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
+(p13)	ld8 r25 = [r19]		// get itc_lastcycle value
+	;;		// ? could be removed by moving the last add upward
+	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
+	;;
+	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
+(p13)	sub r3 = r25,r2		// Diff needed before comparison (thanks davidm)
+	;;
+(p13)	cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
+	sub r10 = r2,r24	// current_cycle - last_cycle
+	;;
+(p6)	sub r10 = r25,r24	// time we got was less than last_cycle
 (p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
 	;;
+(p7)	cmpxchg8.rel r3 = [r19],r2,ar.ccv
+	;;
+(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful
+	;;
+(p7)	sub r10 = r3,r24	// then use new last_cycle instead
+	;;
 	and r10 = r10,r14	// Apply mask
 	;;
 	setf.sig f8 = r10
 	nop.i 123
 	;;
-(p7)	cmpxchg8.rel r3 = [r23],r2,ar.ccv
-EX(.fail_efault, probe.w.fault r31, 3)	// This takes 5 cycles and we have spare time
+	// fault check takes 5 cycles and we have spare time
+EX(.fail_efault, probe.w.fault r31, 3)
 	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
-(p15)	add r9 = r9,r17		// Add wall to monotonic.secs to result secs
 	;;
-(p15)	ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
-(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful redo
-	// simulate tbit.nz.or p7,p0 = r28,0
+	// ? simulate tbit.nz.or p7,p0 = r28,0
 	getf.sig r2 = f8
 	mf
-	add r8 = r8,r18		// Add time interpolator offset
 	;;
-	ld4 r10 = [r29]		// xtime_lock.sequence
-(p15)	add r8 = r8, r17	// Add monotonic.nsecs to nsecs
-	shr.u r2 = r2,r21
-	;;		// overloaded 3 bundles!
-	// End critical section.
+	ld4 r10 = [r20]		// gtod_lock.sequence
+	shr.u r2 = r2,r23	// shift by factor
+	;;		// ? overloaded 3 bundles!
 	add r8 = r8,r2		// Add xtime.nsecs
-	cmp4.ne.or p7,p0 = r28,r10
-(p7)	br.cond.dpnt.few .time_redo	// sequence number changed ?
+	cmp4.ne p7,p0 = r28,r10
+(p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
+	// End critical section.
 	// Now r8=tv->tv_nsec and r9=tv->tv_sec
 	mov r10 = r0
 	movl r2 = 1000000000
@@ -308,19 +305,19 @@ EX(.fail_efault, probe.w.fault r31, 3)	// This takes 5 cycles and we have spare
 .time_normalize:
 	mov r21 = r8
 	cmp.ge p6,p0 = r8,r2
-(p14)	shr.u r20 = r8, 3		// We can repeat this if necessary just wasting some time
+(p14)	shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
 	;;
 (p14)	setf.sig f8 = r20
 (p6)	sub r8 = r8,r2
-(p6)	add r9 = 1,r9			// two nops before the branch.
-(p14)	setf.sig f7 = r3		// Chances for repeats are 1 in 10000 for gettod
+(p6)	add r9 = 1,r9		// two nops before the branch.
+(p14)	setf.sig f7 = r3	// Chances for repeats are 1 in 10000 for gettod
 (p6)	br.cond.dpnt.few .time_normalize
 	;;
 	// Divided by 8 though shift. Now divide by 125
 	// The compiler was able to do that with a multiply
 	// and a shift and we do the same
-EX(.fail_efault, probe.w.fault r23, 3)		// This also costs 5 cycles
-(p14)	xmpy.hu f8 = f8, f7			// xmpy has 5 cycles latency so use it...
+EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
+(p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
 	;;
 	mov r8 = r0
 (p14)	getf.sig r2 = f8
diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h
new file mode 100644
index 000000000000..490dab55fba3
--- /dev/null
+++ b/arch/ia64/kernel/fsyscall_gtod_data.h
@@ -0,0 +1,23 @@
+/*
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *        Contributed by Peter Keilty <peter.keilty@hp.com>
+ *
+ * fsyscall gettimeofday data
+ */
+
+struct fsyscall_gtod_data_t {
+	seqlock_t	lock;
+	struct timespec	wall_time;
+	struct timespec monotonic_time;
+	cycle_t		clk_mask;
+	u32		clk_mult;
+	u32		clk_shift;
+	void		*clk_fsys_mmio;
+	cycle_t		clk_cycle_last;
+} __attribute__ ((aligned (L1_CACHE_BYTES)));
+
+struct itc_jitter_data_t {
+	int		itc_jitter;
+	cycle_t		itc_lastcycle;
+} __attribute__ ((aligned (L1_CACHE_BYTES)));
+
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 44d540efa6d1..4e5e27540e27 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -178,7 +178,7 @@ swapper_pg_dir:
 halt_msg:
 	stringz "Halting kernel\n"
 
-	.text
+	.section .text.head,"ax"
 
 	.global start_ap
 
@@ -392,6 +392,8 @@ self:	hint @pause
 	br.sptk.many self		// endless loop
 END(_start)
 
+	.text
+
 GLOBAL_ENTRY(ia64_save_debug_regs)
 	alloc r16=ar.pfs,1,0,0,0
 	mov r20=ar.lc			// preserve ar.lc
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index b69c397ed1bf..bc8efcad28b8 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -8,6 +8,7 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/init_task.h>
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 37f46527d233..cfe4654838f4 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -118,21 +118,31 @@ static DEFINE_SPINLOCK(iosapic_lock);
  * vector.
  */
 
-struct iosapic_rte_info {
-	struct list_head rte_list;	/* node in list of RTEs sharing the
-					 * same vector */
+#define NO_REF_RTE	0
+
+static struct iosapic {
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int	gsi_base;	/* first GSI assigned to this
-					 * IOSAPIC */
+	unsigned int	gsi_base;	/* GSI base */
+	unsigned short	num_rte;	/* # of RTEs on this IOSAPIC */
+	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
+#ifdef CONFIG_NUMA
+	unsigned short	node;		/* numa node association via pxm */
+#endif
+	spinlock_t	lock;		/* lock for indirect reg access */
+} iosapic_lists[NR_IOSAPICS];
+
+struct iosapic_rte_info {
+	struct list_head rte_list;	/* RTEs sharing the same vector */
 	char		rte_index;	/* IOSAPIC RTE index */
 	int		refcnt;		/* reference counter */
 	unsigned int	flags;		/* flags */
+	struct iosapic	*iosapic;
 } ____cacheline_aligned;
 
 static struct iosapic_intr_info {
 	struct list_head rtes;		/* RTEs using this vector (empty =>
 					 * not an IOSAPIC interrupt) */
-	int		count;		/* # of RTEs that shares this vector */
+	int		count;		/* # of registered RTEs */
 	u32		low32;		/* current value of low word of
 					 * Redirection table entry */
 	unsigned int	dest;		/* destination CPU physical ID */
@@ -140,24 +150,23 @@ static struct iosapic_intr_info {
 	unsigned char 	polarity: 1;	/* interrupt polarity
 					 * (see iosapic.h) */
 	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
-} iosapic_intr_info[IA64_NUM_VECTORS];
-
-static struct iosapic {
-	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int 	gsi_base;	/* first GSI assigned to this
-					 * IOSAPIC */
-	unsigned short 	num_rte;	/* # of RTEs on this IOSAPIC */
-	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
-#ifdef CONFIG_NUMA
-	unsigned short	node;		/* numa node association via pxm */
-#endif
-} iosapic_lists[NR_IOSAPICS];
+} iosapic_intr_info[NR_IRQS];
 
 static unsigned char pcat_compat __devinitdata;	/* 8259 compatibility flag */
 
 static int iosapic_kmalloc_ok;
 static LIST_HEAD(free_rte_list);
 
+static inline void
+iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iosapic->lock, flags);
+	__iosapic_write(iosapic->addr, reg, val);
+	spin_unlock_irqrestore(&iosapic->lock, flags);
+}
+
 /*
  * Find an IOSAPIC associated with a GSI
  */
@@ -175,17 +184,18 @@ find_iosapic (unsigned int gsi)
 	return -1;
 }
 
-static inline int
-_gsi_to_vector (unsigned int gsi)
+static inline int __gsi_to_irq(unsigned int gsi)
 {
+	int irq;
 	struct iosapic_intr_info *info;
 	struct iosapic_rte_info *rte;
 
-	for (info = iosapic_intr_info; info <
-		     iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		info = &iosapic_intr_info[irq];
 		list_for_each_entry(rte, &info->rtes, rte_list)
-			if (rte->gsi_base + rte->rte_index == gsi)
-				return info - iosapic_intr_info;
+			if (rte->iosapic->gsi_base + rte->rte_index == gsi)
+				return irq;
+	}
 	return -1;
 }
 
@@ -196,7 +206,10 @@ _gsi_to_vector (unsigned int gsi)
 inline int
 gsi_to_vector (unsigned int gsi)
 {
-	return _gsi_to_vector(gsi);
+	int irq = __gsi_to_irq(gsi);
+	if (check_irq_used(irq) < 0)
+		return -1;
+	return irq_to_vector(irq);
 }
 
 int
@@ -204,66 +217,48 @@ gsi_to_irq (unsigned int gsi)
 {
 	unsigned long flags;
 	int irq;
-	/*
-	 * XXX fix me: this assumes an identity mapping between IA-64 vector
-	 * and Linux irq numbers...
-	 */
+
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		irq = _gsi_to_vector(gsi);
-	}
+	irq = __gsi_to_irq(gsi);
 	spin_unlock_irqrestore(&iosapic_lock, flags);
-
 	return irq;
 }
 
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
-						  unsigned int vec)
+static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi)
 {
 	struct iosapic_rte_info *rte;
 
-	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
-		if (rte->gsi_base + rte->rte_index == gsi)
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
+		if (rte->iosapic->gsi_base + rte->rte_index == gsi)
 			return rte;
 	return NULL;
 }
 
 static void
-set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
+set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask)
 {
 	unsigned long pol, trigger, dmode;
 	u32 low32, high32;
-	char __iomem *addr;
 	int rte_index;
 	char redir;
 	struct iosapic_rte_info *rte;
+	ia64_vector vector = irq_to_vector(irq);
 
 	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
 
-	rte = gsi_vector_to_rte(gsi, vector);
+	rte = find_rte(irq, gsi);
 	if (!rte)
 		return;		/* not an IOSAPIC interrupt */
 
 	rte_index = rte->rte_index;
-	addr	= rte->addr;
-	pol     = iosapic_intr_info[vector].polarity;
-	trigger = iosapic_intr_info[vector].trigger;
-	dmode   = iosapic_intr_info[vector].dmode;
+	pol     = iosapic_intr_info[irq].polarity;
+	trigger = iosapic_intr_info[irq].trigger;
+	dmode   = iosapic_intr_info[irq].dmode;
 
 	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
 
 #ifdef CONFIG_SMP
-	{
-		unsigned int irq;
-
-		for (irq = 0; irq < NR_IRQS; ++irq)
-			if (irq_to_vector(irq) == vector) {
-				set_irq_affinity_info(irq,
-						      (int)(dest & 0xffff),
-						      redir);
-				break;
-			}
-	}
+	set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
 #endif
 
 	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
@@ -275,10 +270,10 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 	/* dest contains both id and eid */
 	high32 = (dest << IOSAPIC_DEST_SHIFT);
 
-	iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
-	iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-	iosapic_intr_info[vector].low32 = low32;
-	iosapic_intr_info[vector].dest = dest;
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
+	iosapic_intr_info[irq].low32 = low32;
+	iosapic_intr_info[irq].dest = dest;
 }
 
 static void
@@ -294,15 +289,18 @@ kexec_disable_iosapic(void)
 {
 	struct iosapic_intr_info *info;
 	struct iosapic_rte_info *rte;
-	u8 vec = 0;
-	for (info = iosapic_intr_info; info <
-			iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) {
+	ia64_vector vec;
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		info = &iosapic_intr_info[irq];
+		vec = irq_to_vector(irq);
 		list_for_each_entry(rte, &info->rtes,
 				rte_list) {
-			iosapic_write(rte->addr,
+			iosapic_write(rte->iosapic,
 					IOSAPIC_RTE_LOW(rte->rte_index),
 					IOSAPIC_MASK|vec);
-			iosapic_eoi(rte->addr, vec);
+			iosapic_eoi(rte->iosapic->addr, vec);
 		}
 	}
 }
@@ -311,54 +309,36 @@ kexec_disable_iosapic(void)
 static void
 mask_irq (unsigned int irq)
 {
-	unsigned long flags;
-	char __iomem *addr;
 	u32 low32;
 	int rte_index;
-	ia64_vector vec = irq_to_vector(irq);
 	struct iosapic_rte_info *rte;
 
-	if (list_empty(&iosapic_intr_info[vec].rtes))
+	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt! */
 
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		/* set only the mask bit */
-		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
+	/* set only the mask bit */
+	low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		rte_index = rte->rte_index;
+		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
 
 static void
 unmask_irq (unsigned int irq)
 {
-	unsigned long flags;
-	char __iomem *addr;
 	u32 low32;
 	int rte_index;
-	ia64_vector vec = irq_to_vector(irq);
 	struct iosapic_rte_info *rte;
 
-	if (list_empty(&iosapic_intr_info[vec].rtes))
+	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt! */
 
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
+	low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		rte_index = rte->rte_index;
+		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
 
 
@@ -366,23 +346,24 @@ static void
 iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 {
 #ifdef CONFIG_SMP
-	unsigned long flags;
 	u32 high32, low32;
 	int dest, rte_index;
-	char __iomem *addr;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
-	ia64_vector vec;
 	struct iosapic_rte_info *rte;
+	struct iosapic *iosapic;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
-	vec = irq_to_vector(irq);
 
+	cpus_and(mask, mask, cpu_online_map);
 	if (cpus_empty(mask))
 		return;
 
+	if (reassign_irq_vector(irq, first_cpu(mask)))
+		return;
+
 	dest = cpu_physical_id(first_cpu(mask));
 
-	if (list_empty(&iosapic_intr_info[vec].rtes))
+	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt */
 
 	set_irq_affinity_info(irq, dest, redir);
@@ -390,31 +371,24 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 	/* dest contains both id and eid */
 	high32 = dest << IOSAPIC_DEST_SHIFT;
 
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		low32 = iosapic_intr_info[vec].low32 &
-			~(7 << IOSAPIC_DELIVERY_SHIFT);
-
-		if (redir)
-		        /* change delivery mode to lowest priority */
-			low32 |= (IOSAPIC_LOWEST_PRIORITY <<
-				  IOSAPIC_DELIVERY_SHIFT);
-		else
-		        /* change delivery mode to fixed */
-			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
-
-		iosapic_intr_info[vec].low32 = low32;
-		iosapic_intr_info[vec].dest = dest;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
-				      high32);
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
+	low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+	if (redir)
+		/* change delivery mode to lowest priority */
+		low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+	else
+		/* change delivery mode to fixed */
+		low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
+	low32 &= IOSAPIC_VECTOR_MASK;
+	low32 |= irq_to_vector(irq);
+
+	iosapic_intr_info[irq].low32 = low32;
+	iosapic_intr_info[irq].dest = dest;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		iosapic = rte->iosapic;
+		rte_index = rte->rte_index;
+		iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
+		iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
 #endif
 }
 
@@ -434,10 +408,20 @@ iosapic_end_level_irq (unsigned int irq)
 {
 	ia64_vector vec = irq_to_vector(irq);
 	struct iosapic_rte_info *rte;
+	int do_unmask_irq = 0;
 
-	move_native_irq(irq);
-	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
-		iosapic_eoi(rte->addr, vec);
+	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+		do_unmask_irq = 1;
+		mask_irq(irq);
+	}
+
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
+		iosapic_eoi(rte->iosapic->addr, vec);
+
+	if (unlikely(do_unmask_irq)) {
+		move_masked_irq(irq);
+		unmask_irq(irq);
+	}
 }
 
 #define iosapic_shutdown_level_irq	mask_irq
@@ -519,13 +503,12 @@ iosapic_version (char __iomem *addr)
 	 *	unsigned int reserved2 : 8;
 	 * }
 	 */
-	return iosapic_read(addr, IOSAPIC_VERSION);
+	return __iosapic_read(addr, IOSAPIC_VERSION);
 }
 
-static int iosapic_find_sharable_vector (unsigned long trigger,
-					 unsigned long pol)
+static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
 {
-	int i, vector = -1, min_count = -1;
+	int i, irq = -ENOSPC, min_count = -1;
 	struct iosapic_intr_info *info;
 
 	/*
@@ -533,21 +516,21 @@ static int iosapic_find_sharable_vector (unsigned long trigger,
 	 * supported yet
 	 */
 	if (trigger == IOSAPIC_EDGE)
-		return -1;
+		return -EINVAL;
 
-	for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
+	for (i = 0; i <= NR_IRQS; i++) {
 		info = &iosapic_intr_info[i];
 		if (info->trigger == trigger && info->polarity == pol &&
-		    (info->dmode == IOSAPIC_FIXED || info->dmode ==
-		     IOSAPIC_LOWEST_PRIORITY)) {
+		    (info->dmode == IOSAPIC_FIXED ||
+		     info->dmode == IOSAPIC_LOWEST_PRIORITY) &&
+		    can_request_irq(i, IRQF_SHARED)) {
 			if (min_count == -1 || info->count < min_count) {
-				vector = i;
+				irq = i;
 				min_count = info->count;
 			}
 		}
 	}
-
-	return vector;
+	return irq;
 }
 
 /*
@@ -555,29 +538,29 @@ static int iosapic_find_sharable_vector (unsigned long trigger,
  *  assign a new vector for the other and make the vector available
  */
 static void __init
-iosapic_reassign_vector (int vector)
+iosapic_reassign_vector (int irq)
 {
-	int new_vector;
+	int new_irq;
 
-	if (!list_empty(&iosapic_intr_info[vector].rtes)) {
-		new_vector = assign_irq_vector(AUTO_ASSIGN);
-		if (new_vector < 0)
+	if (iosapic_intr_info[irq].count) {
+		new_irq = create_irq();
+		if (new_irq < 0)
 			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
 		printk(KERN_INFO "Reassigning vector %d to %d\n",
-		       vector, new_vector);
-		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
+		       irq_to_vector(irq), irq_to_vector(new_irq));
+		memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
 		       sizeof(struct iosapic_intr_info));
-		INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
-		list_move(iosapic_intr_info[vector].rtes.next,
-			  &iosapic_intr_info[new_vector].rtes);
-		memset(&iosapic_intr_info[vector], 0,
+		INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes);
+		list_move(iosapic_intr_info[irq].rtes.next,
+			  &iosapic_intr_info[new_irq].rtes);
+		memset(&iosapic_intr_info[irq], 0,
 		       sizeof(struct iosapic_intr_info));
-		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
+		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
+		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
 	}
 }
 
-static struct iosapic_rte_info *iosapic_alloc_rte (void)
+static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
 {
 	int i;
 	struct iosapic_rte_info *rte;
@@ -610,29 +593,18 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
 	return rte;
 }
 
-static void iosapic_free_rte (struct iosapic_rte_info *rte)
+static inline int irq_is_shared (int irq)
 {
-	if (rte->flags & RTE_PREALLOCATED)
-		list_add_tail(&rte->rte_list, &free_rte_list);
-	else
-		kfree(rte);
-}
-
-static inline int vector_is_shared (int vector)
-{
-	return (iosapic_intr_info[vector].count > 1);
+	return (iosapic_intr_info[irq].count > 1);
 }
 
 static int
-register_intr (unsigned int gsi, int vector, unsigned char delivery,
+register_intr (unsigned int gsi, int irq, unsigned char delivery,
 	       unsigned long polarity, unsigned long trigger)
 {
 	irq_desc_t *idesc;
 	struct hw_interrupt_type *irq_type;
-	int rte_index;
 	int index;
-	unsigned long gsi_base;
-	void __iomem *iosapic_address;
 	struct iosapic_rte_info *rte;
 
 	index = find_iosapic(gsi);
@@ -642,10 +614,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 		return -ENODEV;
 	}
 
-	iosapic_address = iosapic_lists[index].addr;
-	gsi_base = iosapic_lists[index].gsi_base;
-
-	rte = gsi_vector_to_rte(gsi, vector);
+	rte = find_rte(irq, gsi);
 	if (!rte) {
 		rte = iosapic_alloc_rte();
 		if (!rte) {
@@ -654,40 +623,42 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 			return -ENOMEM;
 		}
 
-		rte_index = gsi - gsi_base;
-		rte->rte_index	= rte_index;
-		rte->addr	= iosapic_address;
-		rte->gsi_base	= gsi_base;
+		rte->iosapic	= &iosapic_lists[index];
+		rte->rte_index	= gsi - rte->iosapic->gsi_base;
 		rte->refcnt++;
-		list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
-		iosapic_intr_info[vector].count++;
+		list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes);
+		iosapic_intr_info[irq].count++;
 		iosapic_lists[index].rtes_inuse++;
 	}
-	else if (vector_is_shared(vector)) {
-		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
-		if (info->trigger != trigger || info->polarity != polarity) {
+	else if (rte->refcnt == NO_REF_RTE) {
+		struct iosapic_intr_info *info = &iosapic_intr_info[irq];
+		if (info->count > 0 &&
+		    (info->trigger != trigger || info->polarity != polarity)){
 			printk (KERN_WARNING
 				"%s: cannot override the interrupt\n",
 				__FUNCTION__);
 			return -EINVAL;
 		}
+		rte->refcnt++;
+		iosapic_intr_info[irq].count++;
+		iosapic_lists[index].rtes_inuse++;
 	}
 
-	iosapic_intr_info[vector].polarity = polarity;
-	iosapic_intr_info[vector].dmode    = delivery;
-	iosapic_intr_info[vector].trigger  = trigger;
+	iosapic_intr_info[irq].polarity = polarity;
+	iosapic_intr_info[irq].dmode    = delivery;
+	iosapic_intr_info[irq].trigger  = trigger;
 
 	if (trigger == IOSAPIC_EDGE)
 		irq_type = &irq_type_iosapic_edge;
 	else
 		irq_type = &irq_type_iosapic_level;
 
-	idesc = irq_desc + vector;
+	idesc = irq_desc + irq;
 	if (idesc->chip != irq_type) {
 		if (idesc->chip != &no_irq_type)
 			printk(KERN_WARNING
 			       "%s: changing vector %d from %s to %s\n",
-			       __FUNCTION__, vector,
+			       __FUNCTION__, irq_to_vector(irq),
 			       idesc->chip->name, irq_type->name);
 		idesc->chip = irq_type;
 	}
@@ -695,18 +666,19 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 }
 
 static unsigned int
-get_target_cpu (unsigned int gsi, int vector)
+get_target_cpu (unsigned int gsi, int irq)
 {
 #ifdef CONFIG_SMP
 	static int cpu = -1;
 	extern int cpe_vector;
+	cpumask_t domain = irq_to_domain(irq);
 
 	/*
 	 * In case of vector shared by multiple RTEs, all RTEs that
 	 * share the vector need to use the same destination CPU.
 	 */
-	if (!list_empty(&iosapic_intr_info[vector].rtes))
-		return iosapic_intr_info[vector].dest;
+	if (iosapic_intr_info[irq].count)
+		return iosapic_intr_info[irq].dest;
 
 	/*
 	 * If the platform supports redirection via XTP, let it
@@ -723,7 +695,7 @@ get_target_cpu (unsigned int gsi, int vector)
 		return cpu_physical_id(smp_processor_id());
 
 #ifdef CONFIG_ACPI
-	if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+	if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR)
 		return get_cpei_target_cpu();
 #endif
 
@@ -738,7 +710,7 @@ get_target_cpu (unsigned int gsi, int vector)
 			goto skip_numa_setup;
 
 		cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
-
+		cpus_and(cpu_mask, cpu_mask, domain);
 		for_each_cpu_mask(numa_cpu, cpu_mask) {
 			if (!cpu_online(numa_cpu))
 				cpu_clear(numa_cpu, cpu_mask);
@@ -749,8 +721,8 @@ get_target_cpu (unsigned int gsi, int vector)
 		if (!num_cpus)
 			goto skip_numa_setup;
 
-		/* Use vector assignment to distribute across cpus in node */
-		cpu_index = vector % num_cpus;
+		/* Use irq assignment to distribute across cpus in node */
+		cpu_index = irq % num_cpus;
 
 		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
 			numa_cpu = next_cpu(numa_cpu, cpu_mask);
@@ -768,7 +740,7 @@ skip_numa_setup:
 	do {
 		if (++cpu >= NR_CPUS)
 			cpu = 0;
-	} while (!cpu_online(cpu));
+	} while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
 
 	return cpu_physical_id(cpu);
 #else  /* CONFIG_SMP */
@@ -785,84 +757,73 @@ int
 iosapic_register_intr (unsigned int gsi,
 		       unsigned long polarity, unsigned long trigger)
 {
-	int vector, mask = 1, err;
+	int irq, mask = 1, err;
 	unsigned int dest;
 	unsigned long flags;
 	struct iosapic_rte_info *rte;
 	u32 low32;
-again:
+
 	/*
 	 * If this GSI has already been registered (i.e., it's a
 	 * shared interrupt, or we lost a race to register it),
 	 * don't touch the RTE.
 	 */
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		vector = gsi_to_vector(gsi);
-		if (vector > 0) {
-			rte = gsi_vector_to_rte(gsi, vector);
+	irq = __gsi_to_irq(gsi);
+	if (irq > 0) {
+		rte = find_rte(irq, gsi);
+		if(iosapic_intr_info[irq].count == 0) {
+			assign_irq_vector(irq);
+			dynamic_irq_init(irq);
+		} else if (rte->refcnt != NO_REF_RTE) {
 			rte->refcnt++;
-			spin_unlock_irqrestore(&iosapic_lock, flags);
-			return vector;
+			goto unlock_iosapic_lock;
 		}
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
+	} else
+		irq = create_irq();
 
 	/* If vector is running out, we try to find a sharable vector */
-	vector = assign_irq_vector(AUTO_ASSIGN);
-	if (vector < 0) {
-		vector = iosapic_find_sharable_vector(trigger, polarity);
-  		if (vector < 0)
-			return -ENOSPC;
+	if (irq < 0) {
+		irq = iosapic_find_sharable_irq(trigger, polarity);
+		if (irq < 0)
+			goto unlock_iosapic_lock;
 	}
 
-	spin_lock_irqsave(&irq_desc[vector].lock, flags);
-	spin_lock(&iosapic_lock);
-	{
-		if (gsi_to_vector(gsi) > 0) {
-			if (list_empty(&iosapic_intr_info[vector].rtes))
-				free_irq_vector(vector);
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_desc[vector].lock,
-					       flags);
-			goto again;
-		}
-
-		dest = get_target_cpu(gsi, vector);
-		err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
-			      polarity, trigger);
-		if (err < 0) {
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_desc[vector].lock,
-					       flags);
-			return err;
-		}
-
-		/*
-		 * If the vector is shared and already unmasked for
-		 * other interrupt sources, don't mask it.
-		 */
-		low32 = iosapic_intr_info[vector].low32;
-		if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
-			mask = 0;
-		set_rte(gsi, vector, dest, mask);
+	spin_lock(&irq_desc[irq].lock);
+	dest = get_target_cpu(gsi, irq);
+	err = register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY,
+			    polarity, trigger);
+	if (err < 0) {
+		spin_unlock(&irq_desc[irq].lock);
+		irq = err;
+		goto unlock_iosapic_lock;
 	}
-	spin_unlock(&iosapic_lock);
-	spin_unlock_irqrestore(&irq_desc[vector].lock, flags);
+
+	/*
+	 * If the vector is shared and already unmasked for other
+	 * interrupt sources, don't mask it.
+	 */
+	low32 = iosapic_intr_info[irq].low32;
+	if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK))
+		mask = 0;
+	set_rte(gsi, irq, dest, mask);
 
 	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
 	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-	       cpu_logical_id(dest), dest, vector);
+	       cpu_logical_id(dest), dest, irq_to_vector(irq));
 
-	return vector;
+	spin_unlock(&irq_desc[irq].lock);
+ unlock_iosapic_lock:
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+	return irq;
 }
 
 void
 iosapic_unregister_intr (unsigned int gsi)
 {
 	unsigned long flags;
-	int irq, vector, index;
+	int irq, index;
 	irq_desc_t *idesc;
 	u32 low32;
 	unsigned long trigger, polarity;
@@ -881,78 +842,56 @@ iosapic_unregister_intr (unsigned int gsi)
 		WARN_ON(1);
 		return;
 	}
-	vector = irq_to_vector(irq);
 
-	idesc = irq_desc + irq;
-	spin_lock_irqsave(&idesc->lock, flags);
-	spin_lock(&iosapic_lock);
-	{
-		if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
-			printk(KERN_ERR
-			       "iosapic_unregister_intr(%u) unbalanced\n",
-			       gsi);
-			WARN_ON(1);
-			goto out;
-		}
+	spin_lock_irqsave(&iosapic_lock, flags);
+	if ((rte = find_rte(irq, gsi)) == NULL) {
+		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+		       gsi);
+		WARN_ON(1);
+		goto out;
+	}
 
-		if (--rte->refcnt > 0)
-			goto out;
+	if (--rte->refcnt > 0)
+		goto out;
 
-		/* Mask the interrupt */
-		low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
-		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
-			      low32);
+	idesc = irq_desc + irq;
+	rte->refcnt = NO_REF_RTE;
 
-		/* Remove the rte entry from the list */
-		list_del(&rte->rte_list);
-		iosapic_intr_info[vector].count--;
-		iosapic_free_rte(rte);
-		index = find_iosapic(gsi);
-		iosapic_lists[index].rtes_inuse--;
-		WARN_ON(iosapic_lists[index].rtes_inuse < 0);
-
-		trigger	 = iosapic_intr_info[vector].trigger;
-		polarity = iosapic_intr_info[vector].polarity;
-		dest     = iosapic_intr_info[vector].dest;
-		printk(KERN_INFO
-		       "GSI %u (%s, %s) -> CPU %d (0x%04x)"
-		       " vector %d unregistered\n",
-		       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
-		       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-		       cpu_logical_id(dest), dest, vector);
+	/* Mask the interrupt */
+	low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK;
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32);
 
-		if (list_empty(&iosapic_intr_info[vector].rtes)) {
-			/* Sanity check */
-			BUG_ON(iosapic_intr_info[vector].count);
+	iosapic_intr_info[irq].count--;
+	index = find_iosapic(gsi);
+	iosapic_lists[index].rtes_inuse--;
+	WARN_ON(iosapic_lists[index].rtes_inuse < 0);
 
-			/* Clear the interrupt controller descriptor */
-			idesc->chip = &no_irq_type;
+	trigger  = iosapic_intr_info[irq].trigger;
+	polarity = iosapic_intr_info[irq].polarity;
+	dest     = iosapic_intr_info[irq].dest;
+	printk(KERN_INFO
+	       "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       cpu_logical_id(dest), dest, irq_to_vector(irq));
 
+	if (iosapic_intr_info[irq].count == 0) {
 #ifdef CONFIG_SMP
-			/* Clear affinity */
-			cpus_setall(idesc->affinity);
+		/* Clear affinity */
+		cpus_setall(idesc->affinity);
 #endif
-
-			/* Clear the interrupt information */
-			memset(&iosapic_intr_info[vector], 0,
-			       sizeof(struct iosapic_intr_info));
-			iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
-			INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
-
-			if (idesc->action) {
-				printk(KERN_ERR
-				       "interrupt handlers still exist on"
-				       "IRQ %u\n", irq);
-				WARN_ON(1);
-			}
-
-			/* Free the interrupt vector */
-			free_irq_vector(vector);
-		}
+		/* Clear the interrupt information */
+		iosapic_intr_info[irq].dest = 0;
+		iosapic_intr_info[irq].dmode = 0;
+		iosapic_intr_info[irq].polarity = 0;
+		iosapic_intr_info[irq].trigger = 0;
+		iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
+
+		/* Destroy and reserve IRQ */
+		destroy_and_reserve_irq(irq);
 	}
  out:
-	spin_unlock(&iosapic_lock);
-	spin_unlock_irqrestore(&idesc->lock, flags);
+	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
 
 /*
@@ -965,27 +904,30 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 {
 	static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
 	unsigned char delivery;
-	int vector, mask = 0;
+	int irq, vector, mask = 0;
 	unsigned int dest = ((id << 8) | eid) & 0xffff;
 
 	switch (int_type) {
 	      case ACPI_INTERRUPT_PMI:
-		vector = iosapic_vector;
+		irq = vector = iosapic_vector;
+		bind_irq_vector(irq, vector, CPU_MASK_ALL);
 		/*
 		 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
 		 * we need to make sure the vector is available
 		 */
-		iosapic_reassign_vector(vector);
+		iosapic_reassign_vector(irq);
 		delivery = IOSAPIC_PMI;
 		break;
 	      case ACPI_INTERRUPT_INIT:
-		vector = assign_irq_vector(AUTO_ASSIGN);
-		if (vector < 0)
+		irq = create_irq();
+		if (irq < 0)
 			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+		vector = irq_to_vector(irq);
 		delivery = IOSAPIC_INIT;
 		break;
 	      case ACPI_INTERRUPT_CPEI:
-		vector = IA64_CPE_VECTOR;
+		irq = vector = IA64_CPE_VECTOR;
+		BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
 		delivery = IOSAPIC_LOWEST_PRIORITY;
 		mask = 1;
 		break;
@@ -995,7 +937,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 		return -1;
 	}
 
-	register_intr(gsi, vector, delivery, polarity, trigger);
+	register_intr(gsi, irq, delivery, polarity, trigger);
 
 	printk(KERN_INFO
 	       "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
@@ -1005,7 +947,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
 	       cpu_logical_id(dest), dest, vector);
 
-	set_rte(gsi, vector, dest, mask);
+	set_rte(gsi, irq, dest, mask);
 	return vector;
 }
 
@@ -1017,30 +959,32 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
 			  unsigned long polarity,
 			  unsigned long trigger)
 {
-	int vector;
+	int vector, irq;
 	unsigned int dest = cpu_physical_id(smp_processor_id());
 
-	vector = isa_irq_to_vector(isa_irq);
-
-	register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
+	irq = vector = isa_irq_to_vector(isa_irq);
+	BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
+	register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
 
 	DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
 	    isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
 	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
 	    cpu_logical_id(dest), dest, vector);
 
-	set_rte(gsi, vector, dest, 1);
+	set_rte(gsi, irq, dest, 1);
 }
 
 void __init
 iosapic_system_init (int system_pcat_compat)
 {
-	int vector;
+	int irq;
 
-	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
-		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
 		/* mark as unused */
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
+		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
+
+		iosapic_intr_info[irq].count = 0;
 	}
 
 	pcat_compat = system_pcat_compat;
@@ -1108,31 +1052,35 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
 	unsigned long flags;
 
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		addr = ioremap(phys_addr, 0);
-		ver = iosapic_version(addr);
+	index = find_iosapic(gsi_base);
+	if (index >= 0) {
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return -EBUSY;
+	}
 
-		if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
-			iounmap(addr);
-			spin_unlock_irqrestore(&iosapic_lock, flags);
-			return err;
-		}
+	addr = ioremap(phys_addr, 0);
+	ver = iosapic_version(addr);
+	if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
+		iounmap(addr);
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return err;
+	}
 
-		/*
-		 * The MAX_REDIR register holds the highest input pin
-		 * number (starting from 0).
-		 * We add 1 so that we can use it for number of pins (= RTEs)
-		 */
-		num_rte = ((ver >> 16) & 0xff) + 1;
+	/*
+	 * The MAX_REDIR register holds the highest input pin number
+	 * (starting from 0).  We add 1 so that we can use it for
+	 * number of pins (= RTEs)
+	 */
+	num_rte = ((ver >> 16) & 0xff) + 1;
 
-		index = iosapic_alloc();
-		iosapic_lists[index].addr = addr;
-		iosapic_lists[index].gsi_base = gsi_base;
-		iosapic_lists[index].num_rte = num_rte;
+	index = iosapic_alloc();
+	iosapic_lists[index].addr = addr;
+	iosapic_lists[index].gsi_base = gsi_base;
+	iosapic_lists[index].num_rte = num_rte;
 #ifdef CONFIG_NUMA
-		iosapic_lists[index].node = MAX_NUMNODES;
+	iosapic_lists[index].node = MAX_NUMNODES;
 #endif
-	}
+	spin_lock_init(&iosapic_lists[index].lock);
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 
 	if ((gsi_base == 0) && pcat_compat) {
@@ -1157,25 +1105,22 @@ iosapic_remove (unsigned int gsi_base)
 	unsigned long flags;
 
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		index = find_iosapic(gsi_base);
-		if (index < 0) {
-			printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
-			       __FUNCTION__, gsi_base);
-			goto out;
-		}
-
-		if (iosapic_lists[index].rtes_inuse) {
-			err = -EBUSY;
-			printk(KERN_WARNING
-			       "%s: IOSAPIC for GSI base %u is busy\n",
-			       __FUNCTION__, gsi_base);
-			goto out;
-		}
+	index = find_iosapic(gsi_base);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
+		       __FUNCTION__, gsi_base);
+		goto out;
+	}
 
-		iounmap(iosapic_lists[index].addr);
-		iosapic_free(index);
+	if (iosapic_lists[index].rtes_inuse) {
+		err = -EBUSY;
+		printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+		       __FUNCTION__, gsi_base);
+		goto out;
 	}
+
+	iounmap(iosapic_lists[index].addr);
+	iosapic_free(index);
  out:
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 	return err;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 407b45870489..44be1c952b7c 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -33,9 +33,14 @@ void ack_bad_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_IA64_GENERIC
+ia64_vector __ia64_irq_to_vector(int irq)
+{
+	return irq_cfg[irq].vector;
+}
+
 unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
 {
-	return (unsigned int) vec;
+	return __get_cpu_var(vector_irq)[vec];
 }
 #endif
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index bc47049f060f..00a4599e5f47 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -46,6 +46,12 @@
 
 #define IRQ_DEBUG	0
 
+#define IRQ_VECTOR_UNASSIGNED	(0)
+
+#define IRQ_UNUSED		(0)
+#define IRQ_USED		(1)
+#define IRQ_RSVD		(2)
+
 /* These can be overridden in platform_irq_init */
 int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
 int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
@@ -54,6 +60,8 @@ int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
 void __iomem *ipi_base_addr = ((void __iomem *)
 			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
 
+static cpumask_t vector_allocation_domain(int cpu);
+
 /*
  * Legacy IRQ to IA-64 vector translation table.
  */
@@ -64,46 +72,265 @@ __u8 isa_irq_to_vector_map[16] = {
 };
 EXPORT_SYMBOL(isa_irq_to_vector_map);
 
-static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)];
+DEFINE_SPINLOCK(vector_lock);
+
+struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+	[0 ... NR_IRQS - 1] = {
+		.vector = IRQ_VECTOR_UNASSIGNED,
+		.domain = CPU_MASK_NONE
+	}
+};
+
+DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = {
+	[0 ... IA64_NUM_VECTORS - 1] = -1
+};
+
+static cpumask_t vector_table[IA64_NUM_VECTORS] = {
+	[0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE
+};
+
+static int irq_status[NR_IRQS] = {
+	[0 ... NR_IRQS -1] = IRQ_UNUSED
+};
+
+int check_irq_used(int irq)
+{
+	if (irq_status[irq] == IRQ_USED)
+		return 1;
+
+	return -1;
+}
+
+static inline int find_unassigned_irq(void)
+{
+	int irq;
+
+	for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++)
+		if (irq_status[irq] == IRQ_UNUSED)
+			return irq;
+	return -ENOSPC;
+}
+
+static inline int find_unassigned_vector(cpumask_t domain)
+{
+	cpumask_t mask;
+	int pos, vector;
+
+	cpus_and(mask, domain, cpu_online_map);
+	if (cpus_empty(mask))
+		return -EINVAL;
+
+	for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) {
+		vector = IA64_FIRST_DEVICE_VECTOR + pos;
+		cpus_and(mask, domain, vector_table[vector]);
+		if (!cpus_empty(mask))
+			continue;
+		return vector;
+	}
+	return -ENOSPC;
+}
+
+static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	cpumask_t mask;
+	int cpu;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
+
+	cpus_and(mask, domain, cpu_online_map);
+	if (cpus_empty(mask))
+		return -EINVAL;
+	if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
+		return 0;
+	if (cfg->vector != IRQ_VECTOR_UNASSIGNED)
+		return -EBUSY;
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	cfg->vector = vector;
+	cfg->domain = domain;
+	irq_status[irq] = IRQ_USED;
+	cpus_or(vector_table[vector], vector_table[vector], domain);
+	return 0;
+}
+
+int bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __bind_irq_vector(irq, vector, domain);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+static void __clear_irq_vector(int irq)
+{
+	int vector, cpu;
+	cpumask_t mask;
+	cpumask_t domain;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
+	vector = cfg->vector;
+	domain = cfg->domain;
+	cpus_and(mask, cfg->domain, cpu_online_map);
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	irq_status[irq] = IRQ_UNUSED;
+	cpus_andnot(vector_table[vector], vector_table[vector], domain);
+}
+
+static void clear_irq_vector(int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
 
 int
 assign_irq_vector (int irq)
 {
-	int pos, vector;
- again:
-	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
-	vector = IA64_FIRST_DEVICE_VECTOR + pos;
-	if (vector > IA64_LAST_DEVICE_VECTOR)
-		return -ENOSPC;
-	if (test_and_set_bit(pos, ia64_vector_mask))
-		goto again;
+	unsigned long flags;
+	int vector, cpu;
+	cpumask_t domain;
+
+	vector = -ENOSPC;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	if (irq == AUTO_ASSIGN)
+		irq = vector;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
 	return vector;
 }
 
 void
 free_irq_vector (int vector)
 {
-	int pos;
-
-	if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
 		return;
-
-	pos = vector - IA64_FIRST_DEVICE_VECTOR;
-	if (!test_and_clear_bit(pos, ia64_vector_mask))
-		printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
+	clear_irq_vector(vector);
 }
 
 int
 reserve_irq_vector (int vector)
 {
-	int pos;
-
 	if (vector < IA64_FIRST_DEVICE_VECTOR ||
 	    vector > IA64_LAST_DEVICE_VECTOR)
 		return -EINVAL;
+	return !!bind_irq_vector(vector, vector, CPU_MASK_ALL);
+}
+
+/*
+ * Initialize vector_irq on a new cpu. This function must be called
+ * with vector_lock held.
+ */
+void __setup_vector_irq(int cpu)
+{
+	int irq, vector;
+
+	/* Clear vector_irq */
+	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	/* Mark the inuse vectors */
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+			continue;
+		vector = irq_to_vector(irq);
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+}
 
-	pos = vector - IA64_FIRST_DEVICE_VECTOR;
-	return test_and_set_bit(pos, ia64_vector_mask);
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+static enum vector_domain_type {
+	VECTOR_DOMAIN_NONE,
+	VECTOR_DOMAIN_PERCPU
+} vector_domain_type = VECTOR_DOMAIN_NONE;
+
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	if (vector_domain_type == VECTOR_DOMAIN_PERCPU)
+		return cpumask_of_cpu(cpu);
+	return CPU_MASK_ALL;
+}
+
+static int __init parse_vector_domain(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+	if (!strcmp(arg, "percpu")) {
+		vector_domain_type = VECTOR_DOMAIN_PERCPU;
+		no_int_routing = 1;
+	}
+	return 0;
+}
+early_param("vector", parse_vector_domain);
+#else
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	return CPU_MASK_ALL;
+}
+#endif
+
+
+void destroy_and_reserve_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	dynamic_irq_cleanup(irq);
+
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	irq_status[irq] = IRQ_RSVD;
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+static int __reassign_irq_vector(int irq, int cpu)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	int vector;
+	cpumask_t domain;
+
+	if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
+		return -EINVAL;
+	if (cpu_isset(cpu, cfg->domain))
+		return 0;
+	domain = vector_allocation_domain(cpu);
+	vector = find_unassigned_vector(domain);
+	if (vector < 0)
+		return -ENOSPC;
+	__clear_irq_vector(irq);
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+	return 0;
+}
+
+int reassign_irq_vector(int irq, int cpu)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __reassign_irq_vector(irq, cpu);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
 }
 
 /*
@@ -111,18 +338,35 @@ reserve_irq_vector (int vector)
  */
 int create_irq(void)
 {
-	int vector = assign_irq_vector(AUTO_ASSIGN);
-
-	if (vector >= 0)
-		dynamic_irq_init(vector);
-
-	return vector;
+	unsigned long flags;
+	int irq, vector, cpu;
+	cpumask_t domain;
+
+	irq = vector = -ENOSPC;
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	irq = find_unassigned_irq();
+	if (irq < 0)
+		goto out;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
+	if (irq >= 0)
+		dynamic_irq_init(irq);
+	return irq;
 }
 
 void destroy_irq(unsigned int irq)
 {
 	dynamic_irq_cleanup(irq);
-	free_irq_vector(irq);
+	clear_irq_vector(irq);
 }
 
 #ifdef CONFIG_SMP
@@ -188,10 +432,18 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 		} else if (unlikely(IS_RESCHEDULE(vector)))
 			kstat_this_cpu.irqs[vector]++;
 		else {
+			int irq = local_vector_to_irq(vector);
+
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
 			ia64_srlz_d();
 
-			generic_handle_irq(local_vector_to_irq(vector));
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d is not mapped "
+				       "to any IRQ!\n", __FUNCTION__, vector,
+				       smp_processor_id());
+			} else
+				generic_handle_irq(irq);
 
 			/*
 			 * Disable interrupts and send EOI:
@@ -239,6 +491,7 @@ void ia64_process_pending_intr(void)
 			kstat_this_cpu.irqs[vector]++;
 		else {
 			struct pt_regs *old_regs = set_irq_regs(NULL);
+			int irq = local_vector_to_irq(vector);
 
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
 			ia64_srlz_d();
@@ -249,8 +502,15 @@ void ia64_process_pending_intr(void)
 			 * it will work. I hope it works!.
 			 * Probably could shared code.
 			 */
-			vectors_in_migration[local_vector_to_irq(vector)]=0;
-			generic_handle_irq(local_vector_to_irq(vector));
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d not being mapped "
+				       "to any IRQ!!\n", __FUNCTION__, vector,
+				       smp_processor_id());
+			} else {
+				vectors_in_migration[irq]=0;
+				generic_handle_irq(irq);
+			}
 			set_irq_regs(old_regs);
 
 			/*
@@ -301,14 +561,13 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action)
 	irq_desc_t *desc;
 	unsigned int irq;
 
-	for (irq = 0; irq < NR_IRQS; ++irq)
-		if (irq_to_vector(irq) == vec) {
-			desc = irq_desc + irq;
-			desc->status |= IRQ_PER_CPU;
-			desc->chip = &irq_type_ia64_lsapic;
-			if (action)
-				setup_irq(irq, action);
-		}
+	irq = vec;
+	BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL));
+	desc = irq_desc + irq;
+	desc->status |= IRQ_PER_CPU;
+	desc->chip = &irq_type_ia64_lsapic;
+	if (action)
+		setup_irq(irq, action);
 }
 
 void __init
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 5bc46f151344..5dc98b5abcfb 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -936,10 +936,15 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg)
 	return;
 }
 
+unsigned long arch_deref_entry_point(void *entry)
+{
+	return ((struct fnptr *)entry)->ip;
+}
+
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
-	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
+	unsigned long addr = arch_deref_entry_point(jp->entry);
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	struct param_bsp_cfm pa;
 	int bytes;
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 13df337508e7..7ccb228ceedc 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -13,14 +13,6 @@
 struct ia64_machine_vector ia64_mv;
 EXPORT_SYMBOL(ia64_mv);
 
-static __initdata const char *mvec_name;
-static __init int setup_mvec(char *s)
-{
-	mvec_name = s;
-	return 0;
-}
-early_param("machvec", setup_mvec);
-
 static struct ia64_machine_vector * __init
 lookup_machvec (const char *name)
 {
@@ -41,7 +33,7 @@ machvec_init (const char *name)
 	struct ia64_machine_vector *mv;
 
 	if (!name)
-		name = mvec_name ? mvec_name : acpi_get_sysname();
+		name = acpi_get_sysname();
 	mv = lookup_machvec(name);
 	if (!mv)
 		panic("generic kernel failed to find machine vector for"
@@ -51,6 +43,23 @@ machvec_init (const char *name)
 	printk(KERN_INFO "booting generic kernel on platform %s\n", name);
 }
 
+void __init
+machvec_init_from_cmdline(const char *cmdline)
+{
+	char str[64];
+	const char *start;
+	char *end;
+
+	if (! (start = strstr(cmdline, "machvec=")) )
+		return machvec_init(NULL);
+
+	strlcpy(str, start + strlen("machvec="), sizeof(str));
+	if ( (end = strchr(str, ' ')) )
+		*end = '\0';
+
+	return machvec_init(str);
+}
+
 #endif /* CONFIG_IA64_GENERIC */
 
 void
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 4b5daa3cc0fe..63b73f3d4c9f 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1750,8 +1750,17 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	strncpy(p->comm, type, sizeof(p->comm)-1);
 }
 
-/* Do per-CPU MCA-related initialization.  */
+/* Caller prevents this from being called after init */
+static void * __init_refok mca_bootmem(void)
+{
+	void *p;
+
+	p = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS +
+	                  KERNEL_STACK_SIZE);
+	return (void *)ALIGN((unsigned long)p, KERNEL_STACK_SIZE);
+}
 
+/* Do per-CPU MCA-related initialization.  */
 void __cpuinit
 ia64_mca_cpu_init(void *cpu_data)
 {
@@ -1763,11 +1772,7 @@ ia64_mca_cpu_init(void *cpu_data)
 		int cpu;
 
 		first_time = 0;
-		mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
-					 * NR_CPUS + KERNEL_STACK_SIZE);
-		mca_data = (void *)(((unsigned long)mca_data +
-					KERNEL_STACK_SIZE - 1) &
-				(-KERNEL_STACK_SIZE));
+		mca_data = mca_bootmem();
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
 			format_mca_init_stack(mca_data,
 					offsetof(struct ia64_mca_cpu, mca_stack),
@@ -2013,22 +2018,26 @@ ia64_mca_late_init(void)
 
 		if (cpe_vector >= 0) {
 			/* If platform supports CPEI, enable the irq. */
-			cpe_poll_enabled = 0;
-			for (irq = 0; irq < NR_IRQS; ++irq)
-				if (irq_to_vector(irq) == cpe_vector) {
-					desc = irq_desc + irq;
-					desc->status |= IRQ_PER_CPU;
-					setup_irq(irq, &mca_cpe_irqaction);
-					ia64_cpe_irq = irq;
-				}
-			ia64_mca_register_cpev(cpe_vector);
-			IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
-		} else {
-			/* If platform doesn't support CPEI, get the timer going. */
-			if (cpe_poll_enabled) {
-				ia64_mca_cpe_poll(0UL);
-				IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__);
+			irq = local_vector_to_irq(cpe_vector);
+			if (irq > 0) {
+				cpe_poll_enabled = 0;
+				desc = irq_desc + irq;
+				desc->status |= IRQ_PER_CPU;
+				setup_irq(irq, &mca_cpe_irqaction);
+				ia64_cpe_irq = irq;
+				ia64_mca_register_cpev(cpe_vector);
+				IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n",
+					__FUNCTION__);
+				return 0;
 			}
+			printk(KERN_ERR "%s: Failed to find irq for CPE "
+					"interrupt handler, vector %d\n",
+					__FUNCTION__, cpe_vector);
+		}
+		/* If platform doesn't support CPEI, get the timer going. */
+		if (cpe_poll_enabled) {
+			ia64_mca_cpe_poll(0UL);
+			IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__);
 		}
 	}
 #endif
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index c81080df70df..2fdbd5c3f213 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -13,6 +13,7 @@
 
 #define MSI_DATA_VECTOR_SHIFT		0
 #define	    MSI_DATA_VECTOR(v)		(((u8)v) << MSI_DATA_VECTOR_SHIFT)
+#define MSI_DATA_VECTOR_MASK		0xffffff00
 
 #define MSI_DATA_DELIVERY_SHIFT		8
 #define     MSI_DATA_DELIVERY_FIXED	(0 << MSI_DATA_DELIVERY_SHIFT)
@@ -50,17 +51,29 @@ static struct irq_chip	ia64_msi_chip;
 static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 {
 	struct msi_msg msg;
-	u32 addr;
+	u32 addr, data;
+	int cpu = first_cpu(cpu_mask);
+
+	if (!cpu_online(cpu))
+		return;
+
+	if (reassign_irq_vector(irq, cpu))
+		return;
 
 	read_msi_msg(irq, &msg);
 
 	addr = msg.address_lo;
 	addr &= MSI_ADDR_DESTID_MASK;
-	addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(first_cpu(cpu_mask)));
+	addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
 	msg.address_lo = addr;
 
+	data = msg.data;
+	data &= MSI_DATA_VECTOR_MASK;
+	data |= MSI_DATA_VECTOR(irq_to_vector(irq));
+	msg.data = data;
+
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = cpu_mask;
+	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
 }
 #endif /* CONFIG_SMP */
 
@@ -69,13 +82,15 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 	struct msi_msg	msg;
 	unsigned long	dest_phys_id;
 	int	irq, vector;
+	cpumask_t mask;
 
 	irq = create_irq();
 	if (irq < 0)
 		return irq;
 
 	set_irq_msi(irq, desc);
-	dest_phys_id = cpu_physical_id(first_cpu(cpu_online_map));
+	cpus_and(mask, irq_to_domain(irq), cpu_online_map);
+	dest_phys_id = cpu_physical_id(first_cpu(mask));
 	vector = irq_to_vector(irq);
 
 	msg.address_hi = 0;
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index b7133cabdbea..14b8e5a6222b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -591,13 +591,13 @@ pfm_set_task_notify(struct task_struct *task)
 	struct thread_info *info;
 
 	info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
-	set_bit(TIF_NOTIFY_RESUME, &info->flags);
+	set_bit(TIF_PERFMON_WORK, &info->flags);
 }
 
 static inline void
 pfm_clear_task_notify(void)
 {
-	clear_thread_flag(TIF_NOTIFY_RESUME);
+	clear_thread_flag(TIF_PERFMON_WORK);
 }
 
 static inline void
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index fa40cba43350..c613fc0e91cc 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -198,9 +198,13 @@ default_idle (void)
 {
 	local_irq_enable();
 	while (!need_resched()) {
-		if (can_do_pal_halt)
-			safe_halt();
-		else
+		if (can_do_pal_halt) {
+			local_irq_disable();
+			if (!need_resched()) {
+				safe_halt();
+			}
+			local_irq_enable();
+		} else
 			cpu_relax();
 	}
 }
@@ -499,7 +503,8 @@ copy_thread (int nr, unsigned long clone_flags,
 
 		/* Copy partially mapped page list */
 		if (!retval)
-			retval = ia32_copy_partial_page_list(p, clone_flags);
+			retval = ia32_copy_ia64_partial_page_list(p,
+								clone_flags);
 	}
 #endif
 
@@ -728,7 +733,7 @@ flush_thread (void)
 	ia64_drop_fpu(current);
 #ifdef CONFIG_IA32_SUPPORT
 	if (IS_IA32_PROCESS(task_pt_regs(current))) {
-		ia32_drop_partial_page_list(current);
+		ia32_drop_ia64_partial_page_list(current);
 		current->thread.task_size = IA32_PAGE_OFFSET;
 		set_fs(USER_DS);
 	}
@@ -754,7 +759,7 @@ exit_thread (void)
 		pfm_release_debug_registers(current);
 #endif
 	if (IS_IA32_PROCESS(task_pt_regs(current)))
-		ia32_drop_partial_page_list(current);
+		ia32_drop_ia64_partial_page_list(current);
 }
 
 unsigned long
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 00f803246948..2e96f17b2f3b 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -951,10 +951,14 @@ access_uarea (struct task_struct *child, unsigned long addr,
 			return 0;
 
 		      case PT_CR_IPSR:
-			if (write_access)
-				pt->cr_ipsr = ((*data & IPSR_MASK)
+			if (write_access) {
+				unsigned long tmp = *data;
+				/* psr.ri==3 is a reserved value: SDM 2:25 */
+				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+					tmp &= ~IA64_PSR_RI;
+				pt->cr_ipsr = ((tmp & IPSR_MASK)
 					       | (pt->cr_ipsr & ~IPSR_MASK));
-			else
+			} else
 				*data = (pt->cr_ipsr & IPSR_MASK);
 			return 0;
 
@@ -1573,7 +1577,6 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
 
 	      case PTRACE_DETACH:
 		/* detach a process that was attached. */
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		ret = ptrace_detach(child, data);
 		goto out_tsk;
 
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4d9864cc92c9..9e392a30d197 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -60,7 +60,7 @@
 #include <asm/smp.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
-#include <asm/system.h>
+#include <asm/hpsim.h>
 
 #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
 # error "struct cpuinfo_ia64 too big!"
@@ -390,6 +390,8 @@ early_console_setup (char *cmdline)
 	if (!efi_setup_pcdp_console(cmdline))
 		earlycons++;
 #endif
+	if (!simcons_register())
+		earlycons++;
 
 	return (earlycons) ? 0 : -1;
 }
@@ -491,12 +493,17 @@ setup_arch (char **cmdline_p)
 	efi_init();
 	io_port_init();
 
-	parse_early_param();
-
 #ifdef CONFIG_IA64_GENERIC
-	machvec_init(NULL);
+	/* machvec needs to be parsed from the command line
+	 * before parse_early_param() is called to ensure
+	 * that ia64_mv is initialised before any command line
+	 * settings may cause console setup to occur
+	 */
+	machvec_init_from_cmdline(*cmdline_p);
 #endif
 
+	parse_early_param();
+
 	if (early_console_setup(*cmdline_p) == 0)
 		mark_bsp_online();
 
@@ -949,6 +956,11 @@ cpu_init (void)
 
 	/* clear TPR & XTP to enable all interrupt classes: */
 	ia64_setreg(_IA64_REG_CR_TPR, 0);
+
+	/* Clear any pending interrupts left by SAL/EFI */
+	while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
+		ia64_eoi();
+
 #ifdef CONFIG_SMP
 	normal_xtp();
 #endif
@@ -980,15 +992,6 @@ cpu_init (void)
 	pm_idle = default_idle;
 }
 
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible.
- */
-void sched_cacheflush(void)
-{
-	ia64_sal_cache_flush(3);
-}
-
 void __init
 check_bugs (void)
 {
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index b3a47f986e1e..4e446aa5f4ac 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -82,7 +82,7 @@ static volatile struct call_data_struct *call_data;
 #define IPI_KDUMP_CPU_STOP	3
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
-static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
+static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation);
 
 extern void cpu_halt (void);
 
@@ -346,7 +346,7 @@ smp_flush_tlb_mm (struct mm_struct *mm)
 }
 
 /*
- * Run a function on another CPU
+ * Run a function on a specific CPU
  *  <func>	The function to run. This must be fast and non-blocking.
  *  <info>	An arbitrary pointer to pass to the function.
  *  <nonatomic>	Currently unused.
@@ -366,9 +366,11 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 	int me = get_cpu(); /* prevent preemption and reschedule on another processor */
 
 	if (cpuid == me) {
-		printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__);
+		local_irq_disable();
+		func(info);
+		local_irq_enable();
 		put_cpu();
-		return -EBUSY;
+		return 0;
 	}
 
 	data.func = func;
@@ -468,7 +470,7 @@ smp_send_stop (void)
 	send_IPI_allbutself(IPI_CPU_STOP);
 }
 
-int __init
+int
 setup_profiling_timer (unsigned int multiplier)
 {
 	return -EINVAL;
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 3c9d8e6089cf..308772f7cddc 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -58,6 +58,7 @@
 #include <asm/system.h>
 #include <asm/tlbflush.h>
 #include <asm/unistd.h>
+#include <asm/sn/arch.h>
 
 #define SMP_DEBUG 0
 
@@ -395,9 +396,13 @@ smp_callin (void)
 	fix_b0_for_bsp();
 
 	lock_ipi_calllock();
+	spin_lock(&vector_lock);
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(cpuid);
 	cpu_set(cpuid, cpu_online_map);
 	unlock_ipi_calllock();
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
+	spin_unlock(&vector_lock);
 
 	smp_setup_percpu_timer();
 
@@ -483,7 +488,7 @@ struct create_idle {
 	int cpu;
 };
 
-void
+void __cpuinit
 do_fork_idle(struct work_struct *work)
 {
 	struct create_idle *c_idle =
@@ -493,7 +498,7 @@ do_fork_idle(struct work_struct *work)
 	complete(&c_idle->done);
 }
 
-static int __devinit
+static int __cpuinit
 do_boot_cpu (int sapicid, int cpu)
 {
 	int timeout;
@@ -726,6 +731,11 @@ int __cpu_disable(void)
 		return (-EBUSY);
 	}
 
+	if (ia64_platform_is("sn2")) {
+		if (!sn_cpu_disable_allowed(cpu))
+			return -EBUSY;
+	}
+
 	cpu_clear(cpu, cpu_online_map);
 
 	if (migrate_platform_irqs(cpu)) {
@@ -804,7 +814,7 @@ set_cpu_sibling_map(int cpu)
 	}
 }
 
-int __devinit
+int __cpuinit
 __cpu_up (unsigned int cpu)
 {
 	int ret;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 3486fe7d6e65..98cfc90cab1d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -19,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/efi.h>
 #include <linux/timex.h>
+#include <linux/clocksource.h>
 
 #include <asm/machvec.h>
 #include <asm/delay.h>
@@ -28,6 +29,16 @@
 #include <asm/sections.h>
 #include <asm/system.h>
 
+#include "fsyscall_gtod_data.h"
+
+static cycle_t itc_get_cycles(void);
+
+struct fsyscall_gtod_data_t fsyscall_gtod_data = {
+	.lock = SEQLOCK_UNLOCKED,
+};
+
+struct itc_jitter_data_t itc_jitter_data;
+
 volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
 
 #ifdef CONFIG_IA64_DEBUG_IRQ
@@ -37,11 +48,16 @@ EXPORT_SYMBOL(last_cli_ip);
 
 #endif
 
-static struct time_interpolator itc_interpolator = {
-	.shift = 16,
-	.mask = 0xffffffffffffffffLL,
-	.source = TIME_SOURCE_CPU
+static struct clocksource clocksource_itc = {
+        .name           = "itc",
+        .rating         = 350,
+        .read           = itc_get_cycles,
+        .mask           = CLOCKSOURCE_MASK(64),
+        .mult           = 0, /*to be caluclated*/
+        .shift          = 16,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
+static struct clocksource *itc_clocksource;
 
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
@@ -210,8 +226,6 @@ ia64_init_itm (void)
 					+ itc_freq/2)/itc_freq;
 
 	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
-		itc_interpolator.frequency = local_cpu_data->itc_freq;
-		itc_interpolator.drift = itc_drift;
 #ifdef CONFIG_SMP
 		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
 		 * Jitter compensation requires a cmpxchg which may limit
@@ -223,15 +237,64 @@ ia64_init_itm (void)
 		 * even going backward) if the ITC offsets between the individual CPUs
 		 * are too large.
 		 */
-		if (!nojitter) itc_interpolator.jitter = 1;
+		if (!nojitter)
+			itc_jitter_data.itc_jitter = 1;
 #endif
-		register_time_interpolator(&itc_interpolator);
-	}
+	} else
+		/*
+		 * ITC is drifty and we have not synchronized the ITCs in smpboot.c.
+		 * ITC values may fluctuate significantly between processors.
+		 * Clock should not be used for hrtimers. Mark itc as only
+		 * useful for boot and testing.
+		 *
+		 * Note that jitter compensation is off! There is no point of
+		 * synchronizing ITCs since they may be large differentials
+		 * that change over time.
+		 *
+		 * The only way to fix this would be to repeatedly sync the
+		 * ITCs. Until that time we have to avoid ITC.
+		 */
+		clocksource_itc.rating = 50;
 
 	/* Setup the CPU local timer tick */
 	ia64_cpu_local_tick();
+
+	if (!itc_clocksource) {
+		/* Sort out mult/shift values: */
+		clocksource_itc.mult =
+			clocksource_hz2mult(local_cpu_data->itc_freq,
+						clocksource_itc.shift);
+		clocksource_register(&clocksource_itc);
+		itc_clocksource = &clocksource_itc;
+	}
+}
+
+static cycle_t itc_get_cycles(void)
+{
+	u64 lcycle, now, ret;
+
+	if (!itc_jitter_data.itc_jitter)
+		return get_cycles();
+
+	lcycle = itc_jitter_data.itc_lastcycle;
+	now = get_cycles();
+	if (lcycle && time_after(lcycle, now))
+		return lcycle;
+
+	/*
+	 * Keep track of the last timer value returned.
+	 * In an SMP environment, you could lose out in contention of
+	 * cmpxchg. If so, your cmpxchg returns new value which the
+	 * winner of contention updated to. Use the new value instead.
+	 */
+	ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now);
+	if (unlikely(ret != lcycle))
+		return ret;
+
+	return now;
 }
 
+
 static struct irqaction timer_irqaction = {
 	.handler =	timer_interrupt,
 	.flags =	IRQF_DISABLED | IRQF_IRQPOLL,
@@ -307,3 +370,34 @@ ia64_setup_printk_clock(void)
 	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT))
 		ia64_printk_clock = ia64_itc_printk_clock;
 }
+
+void update_vsyscall(struct timespec *wall, struct clocksource *c)
+{
+        unsigned long flags;
+
+        write_seqlock_irqsave(&fsyscall_gtod_data.lock, flags);
+
+        /* copy fsyscall clock data */
+        fsyscall_gtod_data.clk_mask = c->mask;
+        fsyscall_gtod_data.clk_mult = c->mult;
+        fsyscall_gtod_data.clk_shift = c->shift;
+        fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio;
+        fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
+
+	/* copy kernel time structures */
+        fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
+        fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec;
+        fsyscall_gtod_data.monotonic_time.tv_sec = wall_to_monotonic.tv_sec
+							+ wall->tv_sec;
+        fsyscall_gtod_data.monotonic_time.tv_nsec = wall_to_monotonic.tv_nsec
+							+ wall->tv_nsec;
+
+	/* normalize */
+	while (fsyscall_gtod_data.monotonic_time.tv_nsec >= NSEC_PER_SEC) {
+		fsyscall_gtod_data.monotonic_time.tv_nsec -= NSEC_PER_SEC;
+		fsyscall_gtod_data.monotonic_time.tv_sec++;
+	}
+
+        write_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags);
+}
+
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 5a65965c8b53..00232b4357ba 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -20,6 +20,8 @@ PHDRS {
   code   PT_LOAD;
   percpu PT_LOAD;
   data   PT_LOAD;
+  note   PT_NOTE;
+  unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
 }
 SECTIONS
 {
@@ -50,6 +52,8 @@ SECTIONS
 	KPROBES_TEXT
 	*(.gnu.linkonce.t*)
     }
+  .text.head : AT(ADDR(.text.head) - LOAD_OFFSET)
+	{ *(.text.head) }
   .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
 	{ *(.text2) }
 #ifdef CONFIG_SMP
@@ -60,6 +64,9 @@ SECTIONS
 
   /* Read-only data */
 
+  NOTES :code :note		/* put .notes in text and mark in PT_NOTE  */
+  code_continues : {} :code	/* switch back to regular program...  */
+
   /* Exception table */
   . = ALIGN(16);
   __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
@@ -97,7 +104,8 @@ SECTIONS
 	  __start_unwind = .;
 	  *(.IA_64.unwind*)
 	  __end_unwind = .;
-	}
+	} :code :unwind
+  code_continues2 : {} : code
 
   RODATA
 
@@ -206,6 +214,7 @@ SECTIONS
 	{
 		__per_cpu_start = .;
 		*(.data.percpu)
+		*(.data.percpu.shared_aligned)
 		__per_cpu_end = .;
 	}
   . = __phys_per_cpu_start + PERCPU_PAGE_SIZE;	/* ensure percpu data fits
@@ -273,10 +282,6 @@ SECTIONS
   .debug_typenames 0 : { *(.debug_typenames) }
   .debug_varnames  0 : { *(.debug_varnames) }
   /* These must appear regardless of  .  */
-  /* Discard them for now since Intel SoftSDV cannot handle them.
-  .comment 0 : { *(.comment) }
-  .note 0 : { *(.note) }
-  */
   /DISCARD/ : { *(.comment) }
   /DISCARD/ : { *(.note) }
 }