112 files changed, 4543 insertions, 1883 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 79f25cef32df..84b861316ce7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -108,6 +108,8 @@ config ARCH_NO_VIRT_TO_BUS
 config PPC
 	bool
 	default y
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_IDE
@@ -326,7 +328,8 @@ config KEXEC
 
 config CRASH_DUMP
 	bool "Build a kdump crash kernel"
-	depends on (PPC64 && RELOCATABLE) || 6xx
+	depends on PPC64 || 6xx
+	select RELOCATABLE if PPC64
 	help
 	  Build a kernel suitable for use as a kdump capture kernel.
 	  The same kernel binary can be used as production kernel and dump
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index f32829937aad..e84df338ea29 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -208,7 +208,7 @@ image-$(CONFIG_DEFAULT_UIMAGE)		+= uImage
 #
 # Theses are default targets to build images which embed device tree blobs.
 # They are only required on boards which do not have FDT support in firmware.
-# Boards with newish u-boot firmare can use the uImage target above
+# Boards with newish u-boot firmware can use the uImage target above
 #
 
 # Board ports in arch/powerpc/platform/40x/Kconfig
@@ -356,7 +356,7 @@ $(obj)/zImage.initrd:	$(addprefix $(obj)/, $(initrd-y))
 	@rm -f $@; ln $< $@
 
 install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
-	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $<
+	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
 
 # anything not in $(targets)
 clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
diff --git a/arch/powerpc/boot/dts/gef_sbc610.dts b/arch/powerpc/boot/dts/gef_sbc610.dts
index 9708b3423bbd..e78c355c7bac 100644
--- a/arch/powerpc/boot/dts/gef_sbc610.dts
+++ b/arch/powerpc/boot/dts/gef_sbc610.dts
@@ -88,6 +88,21 @@
 			compatible = "gef,fpga-regs";
 			reg = <0x4 0x0 0x40>;
 		};
+
+		wdt@4,2000 {
+			compatible = "gef,fpga-wdt";
+			reg = <0x4 0x2000 0x8>;
+			interrupts = <0x1a 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+		/* Second watchdog available, driver currently supports one.
+		wdt@4,2010 {
+			compatible = "gef,fpga-wdt";
+			reg = <0x4 0x2010 0x8>;
+			interrupts = <0x1b 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+		*/
 		gef_pic: pic@4,4000 {
 			#interrupt-cells = <1>;
 			interrupt-controller;
diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts
index 14534d04e4db..6e34f170fa62 100644
--- a/arch/powerpc/boot/dts/mpc836x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc836x_mds.dts
@@ -69,8 +69,18 @@
 		};
 
 		bcsr@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
  			compatible = "fsl,mpc8360mds-bcsr";
 			reg = <1 0 0x8000>;
+			ranges = <0 1 0 0x8000>;
+
+			bcsr13: gpio-controller@d {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8360mds-bcsr-gpio";
+				reg = <0xd 1>;
+				gpio-controller;
+			};
 		};
 	};
 
@@ -195,10 +205,21 @@
 		};
 
 		par_io@1400 {
+			#address-cells = <1>;
+			#size-cells = <1>;
 			reg = <0x1400 0x100>;
+			ranges = <0 0x1400 0x100>;
 			device_type = "par_io";
 			num-ports = <7>;
 
+			qe_pio_b: gpio-controller@18 {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8360-qe-pario-bank",
+					     "fsl,mpc8323-qe-pario-bank";
+				reg = <0x18 0x18>;
+				gpio-controller;
+			};
+
 			pio1: ucc_pin@01 {
 				pio-map = <
 			/* port  pin  dir  open_drain  assignment  has_irq */
@@ -282,6 +303,15 @@
 			};
 		};
 
+		timer@440 {
+			compatible = "fsl,mpc8360-qe-gtm",
+				     "fsl,qe-gtm", "fsl,gtm";
+			reg = <0x440 0x40>;
+			clock-frequency = <132000000>;
+			interrupts = <12 13 14 15>;
+			interrupt-parent = <&qeic>;
+		};
+
 		spi@4c0 {
 			cell-index = <0>;
 			compatible = "fsl,spi";
@@ -301,11 +331,20 @@
 		};
 
 		usb@6c0 {
-			compatible = "qe_udc";
+			compatible = "fsl,mpc8360-qe-usb",
+				     "fsl,mpc8323-qe-usb";
 			reg = <0x6c0 0x40 0x8b00 0x100>;
 			interrupts = <11>;
 			interrupt-parent = <&qeic>;
-			mode = "slave";
+			fsl,fullspeed-clock = "clk21";
+			fsl,lowspeed-clock = "brg9";
+			gpios = <&qe_pio_b  2 0   /* USBOE */
+				 &qe_pio_b  3 0   /* USBTP */
+				 &qe_pio_b  8 0   /* USBTN */
+				 &qe_pio_b  9 0   /* USBRP */
+				 &qe_pio_b 11 0   /* USBRN */
+				 &bcsr13    5 0   /* SPEED */
+				 &bcsr13    4 1>; /* POWER */
 		};
 
 		enet0: ucc@2000 {
diff --git a/arch/powerpc/boot/dts/mpc836x_rdk.dts b/arch/powerpc/boot/dts/mpc836x_rdk.dts
index decadf3d9e98..37b789510d68 100644
--- a/arch/powerpc/boot/dts/mpc836x_rdk.dts
+++ b/arch/powerpc/boot/dts/mpc836x_rdk.dts
@@ -218,8 +218,23 @@
 				reg = <0x440 0x40>;
 				interrupts = <12 13 14 15>;
 				interrupt-parent = <&qeic>;
-				/* filled by u-boot */
-				clock-frequency = <0>;
+				clock-frequency = <166666666>;
+			};
+
+			usb@6c0 {
+				compatible = "fsl,mpc8360-qe-usb",
+					     "fsl,mpc8323-qe-usb";
+				reg = <0x6c0 0x40 0x8b00 0x100>;
+				interrupts = <11>;
+				interrupt-parent = <&qeic>;
+				fsl,fullspeed-clock = "clk21";
+				gpios = <&qe_pio_b  2 0 /* USBOE */
+					 &qe_pio_b  3 0 /* USBTP */
+					 &qe_pio_b  8 0 /* USBTN */
+					 &qe_pio_b  9 0 /* USBRP */
+					 &qe_pio_b 11 0 /* USBRN */
+					 &qe_pio_e 20 0 /* SPEED */
+					 &qe_pio_e 21 1 /* POWER */>;
 			};
 
 			spi@4c0 {
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
index 35d5e248ccd7..4481532cbe77 100644
--- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts
+++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
@@ -26,7 +26,13 @@
 		serial1 = &serial1;
 		pci0 = &pci0;
 		pci1 = &pci1;
-		rapidio0 = &rapidio0;
+/*
+ * Only one of Rapid IO or PCI can be present due to HW limitations and
+ * due to the fact that the 2 now share address space in the new memory
+ * map.  The most likely case is that we have PCI, so comment out the
+ * rapidio node.  Leave it here for reference.
+ */
+		/* rapidio0 = &rapidio0; */
 	};
 
 	cpus {
@@ -62,18 +68,17 @@
 		reg = <0x00000000 0x40000000>;	// 1G at 0x0
 	};
 
-	localbus@f8005000 {
+	localbus@ffe05000 {
 		#address-cells = <2>;
 		#size-cells = <1>;
 		compatible = "fsl,mpc8641-localbus", "simple-bus";
-		reg = <0xf8005000 0x1000>;
+		reg = <0xffe05000 0x1000>;
 		interrupts = <19 2>;
 		interrupt-parent = <&mpic>;
 
-		ranges = <0 0 0xff800000 0x00800000
-			  1 0 0xfe000000 0x01000000
-			  2 0 0xf8200000 0x00100000
-			  3 0 0xf8100000 0x00100000>;
+		ranges = <0 0 0xef800000 0x00800000
+			  2 0 0xffdf8000 0x00008000
+			  3 0 0xffdf0000 0x00008000>;
 
 		flash@0,0 {
 			compatible = "cfi-flash";
@@ -103,13 +108,13 @@
 		};
 	};
 
-	soc8641@f8000000 {
+	soc8641@ffe00000 {
 		#address-cells = <1>;
 		#size-cells = <1>;
 		device_type = "soc";
 		compatible = "simple-bus";
-		ranges = <0x00000000 0xf8000000 0x00100000>;
-		reg = <0xf8000000 0x00001000>;	// CCSRBAR
+		ranges = <0x00000000 0xffe00000 0x00100000>;
+		reg = <0xffe00000 0x00001000>;	// CCSRBAR
 		bus-frequency = <0>;
 
 		i2c@3000 {
@@ -340,17 +345,17 @@
 		};
 	};
 
-	pci0: pcie@f8008000 {
+	pci0: pcie@ffe08000 {
 		cell-index = <0>;
 		compatible = "fsl,mpc8641-pcie";
 		device_type = "pci";
 		#interrupt-cells = <1>;
 		#size-cells = <2>;
 		#address-cells = <3>;
-		reg = <0xf8008000 0x1000>;
+		reg = <0xffe08000 0x1000>;
 		bus-range = <0x0 0xff>;
 		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
-			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
+			  0x01000000 0x0 0x00000000 0xffc00000 0x0 0x00010000>;
 		clock-frequency = <33333333>;
 		interrupt-parent = <&mpic>;
 		interrupts = <24 2>;
@@ -481,7 +486,7 @@
 
 				  0x01000000 0x0 0x00000000
 				  0x01000000 0x0 0x00000000
-				  0x0 0x00100000>;
+				  0x0 0x00010000>;
 			uli1575@0 {
 				reg = <0 0 0 0 0>;
 				#size-cells = <2>;
@@ -491,7 +496,7 @@
 					  0x0 0x20000000
 					  0x01000000 0x0 0x00000000
 					  0x01000000 0x0 0x00000000
-					  0x0 0x00100000>;
+					  0x0 0x00010000>;
 				isa@1e {
 					device_type = "isa";
 					#interrupt-cells = <2>;
@@ -549,17 +554,17 @@
 
 	};
 
-	pci1: pcie@f8009000 {
+	pci1: pcie@ffe09000 {
 		cell-index = <1>;
 		compatible = "fsl,mpc8641-pcie";
 		device_type = "pci";
 		#interrupt-cells = <1>;
 		#size-cells = <2>;
 		#address-cells = <3>;
-		reg = <0xf8009000 0x1000>;
+		reg = <0xffe09000 0x1000>;
 		bus-range = <0 0xff>;
 		ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
-			  0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
+			  0x01000000 0x0 0x00000000 0xffc10000 0x0 0x00010000>;
 		clock-frequency = <33333333>;
 		interrupt-parent = <&mpic>;
 		interrupts = <25 2>;
@@ -582,18 +587,21 @@
 
 				  0x01000000 0x0 0x00000000
 				  0x01000000 0x0 0x00000000
-				  0x0 0x00100000>;
+				  0x0 0x00010000>;
 		};
 	};
-	rapidio0: rapidio@f80c0000 {
+/*
+	rapidio0: rapidio@ffec0000 {
 		#address-cells = <2>;
 		#size-cells = <2>;
 		compatible = "fsl,rapidio-delta";
-		reg = <0xf80c0000 0x20000>;
-		ranges = <0 0 0xc0000000 0 0x20000000>;
+		reg = <0xffec0000 0x20000>;
+		ranges = <0 0 0x80000000 0 0x20000000>;
 		interrupt-parent = <&mpic>;
-		/* err_irq bell_outb_irq bell_inb_irq
-			msg1_tx_irq msg1_rx_irq	msg2_tx_irq msg2_rx_irq */
+		// err_irq bell_outb_irq bell_inb_irq
+		//	msg1_tx_irq msg1_rx_irq	msg2_tx_irq msg2_rx_irq
 		interrupts = <48 2 49 2 50 2 53 2 54 2 55 2 56 2>;
 	};
+*/
+
 };
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index 3b295e8df53f..43cc68bd3192 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -134,7 +134,7 @@
 		};
 
 		USB1: usb@e0000400 {
-			compatible = "ohci-be";
+			compatible = "ibm,usb-ohci-440epx", "ohci-be";
 			reg = <0x00000000 0xe0000400 0x00000060>;
 			interrupt-parent = <&UIC0>;
 			interrupts = <0x15 0x8>;
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index b002bfd56786..51b2387bdba0 100644
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -15,7 +15,7 @@
 #   $2 - kernel image file
 #   $3 - kernel map file
 #   $4 - default install path (blank if root directory)
-#   $5 - kernel boot file, the zImage
+#   $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc.
 #
 
 # User may have a custom install script
@@ -38,3 +38,15 @@ fi
 
 cat $2 > $4/$image_name
 cp $3 $4/System.map
+
+# Copy all the bootable image files
+path=$4
+shift 4
+while [ $# -ne 0 ]; do
+	image_name=`basename $1`
+	if [ -f $path/$image_name ]; then
+		mv $path/$image_name $path/$image_name.old
+	fi
+	cat $1 > $path/$image_name
+	shift
+done;
diff --git a/arch/powerpc/configs/85xx/mpc8572_ds_defconfig b/arch/powerpc/configs/85xx/mpc8572_ds_defconfig
index 635588319e0d..32aeb79216f7 100644
--- a/arch/powerpc/configs/85xx/mpc8572_ds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8572_ds_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28-rc3
-# Sat Nov  8 12:40:13 2008
+# Linux kernel version: 2.6.28-rc8
+# Tue Dec 30 11:17:46 2008
 #
 # CONFIG_PPC64 is not set
 
@@ -21,7 +21,10 @@ CONFIG_FSL_BOOKE=y
 CONFIG_FSL_EMB_PERFMON=y
 # CONFIG_PHYS_64BIT is not set
 CONFIG_SPE=y
+CONFIG_PPC_MMU_NOHASH=y
 # CONFIG_PPC_MM_SLICES is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
 CONFIG_PPC32=y
 CONFIG_WORD_SIZE=32
 # CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
@@ -50,7 +53,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_PPC_OF=y
 CONFIG_OF=y
 CONFIG_PPC_UDBG_16550=y
-# CONFIG_GENERIC_TBSYNC is not set
+CONFIG_GENERIC_TBSYNC=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_GENERIC_BUG=y
 CONFIG_DEFAULT_UIMAGE=y
@@ -62,7 +65,7 @@ CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 # General setup
 #
 CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
@@ -126,6 +129,7 @@ CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -138,6 +142,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 CONFIG_LBD=y
 # CONFIG_BLK_DEV_IO_TRACE is not set
@@ -197,6 +202,7 @@ CONFIG_PPC_I8259=y
 # CONFIG_CPM2 is not set
 CONFIG_FSL_ULI1575=y
 # CONFIG_MPC8xxx_GPIO is not set
+# CONFIG_SIMPLE_GPIO is not set
 
 #
 # Kernel options
@@ -224,6 +230,7 @@ CONFIG_MATH_EMULATION=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 CONFIG_ARCH_HAS_WALK_MEMORY=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+# CONFIG_IRQ_ALL_CPUS is not set
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_SELECT_MEMORY_MODEL=y
@@ -241,6 +248,9 @@ CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_PPC_4K_PAGES=y
+# CONFIG_PPC_16K_PAGES is not set
+# CONFIG_PPC_64K_PAGES is not set
 CONFIG_FORCE_MAX_ZONEORDER=11
 CONFIG_PROC_DEVICETREE=y
 # CONFIG_CMDLINE_BOOL is not set
@@ -443,8 +453,10 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_EEPROM_93CX6 is not set
 # CONFIG_SGI_IOC4 is not set
 # CONFIG_TIFM_CORE is not set
+# CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_C2PORT is not set
 CONFIG_HAVE_IDE=y
 # CONFIG_IDE is not set
 
@@ -784,6 +796,7 @@ CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_HVC_UDBG is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
 CONFIG_NVRAM=y
@@ -869,11 +882,11 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
 # CONFIG_THERMAL is not set
 # CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
 
 #
 # Sonics Silicon Backplane
 #
-CONFIG_SSB_POSSIBLE=y
 # CONFIG_SSB is not set
 
 #
@@ -886,14 +899,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_PMIC_DA903X is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
-
-#
-# Voltage and Current regulators
-#
 # CONFIG_REGULATOR is not set
-# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
-# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
-# CONFIG_REGULATOR_BQ24022 is not set
 
 #
 # Multimedia devices
@@ -1252,11 +1258,11 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
 #
 
 #
-# may also be needed; see USB_STORAGE Help for more information
+# see USB_STORAGE Help for more information
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1348,6 +1354,7 @@ CONFIG_RTC_INTF_DEV=y
 # CONFIG_RTC_DRV_M41T80 is not set
 # CONFIG_RTC_DRV_S35390A is not set
 # CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
 
 #
 # SPI RTC drivers
@@ -1624,6 +1631,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
+CONFIG_PRINT_STACK_DEPTH=64
 # CONFIG_DEBUG_STACKOVERFLOW is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
@@ -1649,11 +1657,16 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_FIPS is not set
 CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
 CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_RNG=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
 # CONFIG_CRYPTO_CRYPTD is not set
diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig
index cd1ffa449327..391874c7b436 100644
--- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig
+++ b/arch/powerpc/configs/86xx/gef_sbc610_defconfig
@@ -1164,6 +1164,7 @@ CONFIG_WATCHDOG=y
 # CONFIG_SOFT_WATCHDOG is not set
 # CONFIG_ALIM7101_WDT is not set
 # CONFIG_8xxx_WDT is not set
+CONFIG_GEF_WDT=y
 
 #
 # PCI-based Watchdog Cards
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 5ab7d7fe198c..9268602de5d0 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -35,3 +35,4 @@ unifdef-y += spu_info.h
 unifdef-y += termios.h
 unifdef-y += types.h
 unifdef-y += unistd.h
+unifdef-y += swab.h
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 499be5bdd6fa..b401950f5259 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -5,7 +5,7 @@
  * PowerPC atomic operations
  */
 
-typedef struct { int counter; } atomic_t;
+#include <linux/types.h>
 
 #ifdef __KERNEL__
 #include <linux/compiler.h>
@@ -251,8 +251,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 
 #ifdef __powerpc64__
 
-typedef struct { long counter; } atomic64_t;
-
 #define ATOMIC64_INIT(i)	{ (i) }
 
 static __inline__ long atomic64_read(const atomic64_t *v)
diff --git a/arch/powerpc/include/asm/byteorder.h b/arch/powerpc/include/asm/byteorder.h
index d5de325472e9..5cca27a41532 100644
--- a/arch/powerpc/include/asm/byteorder.h
+++ b/arch/powerpc/include/asm/byteorder.h
@@ -8,86 +8,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#define __BIG_ENDIAN
-
-#ifdef __GNUC__
-#ifdef __KERNEL__
-
-static __inline__ __u16 ld_le16(const volatile __u16 *addr)
-{
-	__u16 val;
-
-	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-#define __arch_swab16p ld_le16
-
-static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
-{
-	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static inline void __arch_swab16s(__u16 *addr)
-{
-	st_le16(addr, *addr);
-}
-#define __arch_swab16s __arch_swab16s
-
-static __inline__ __u32 ld_le32(const volatile __u32 *addr)
-{
-	__u32 val;
-
-	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-#define __arch_swab32p ld_le32
-
-static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
-{
-	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static inline void __arch_swab32s(__u32 *addr)
-{
-	st_le32(addr, *addr);
-}
-#define __arch_swab32s __arch_swab32s
-
-static inline __attribute_const__ __u16 __arch_swab16(__u16 value)
-{
-	__u16 result;
-
-	__asm__("rlwimi %0,%1,8,16,23"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 8));
-	return result;
-}
-#define __arch_swab16 __arch_swab16
-
-static inline __attribute_const__ __u32 __arch_swab32(__u32 value)
-{
-	__u32 result;
-
-	__asm__("rlwimi %0,%1,24,16,23\n\t"
-	    "rlwimi %0,%1,8,8,15\n\t"
-	    "rlwimi %0,%1,24,0,7"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 24));
-	return result;
-}
-#define __arch_swab32 __arch_swab32
-
-#endif /* __KERNEL__ */
-
-#ifndef __powerpc64__
-#define __SWAB_64_THRU_32__
-#endif /* __powerpc64__ */
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder.h>
+#include <asm/swab.h>
+#include <linux/byteorder/big_endian.h>
 
 #endif /* _ASM_POWERPC_BYTEORDER_H */
diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
index 8066eede3a0c..b4b7338ad79e 100644
--- a/arch/powerpc/include/asm/cell-pmu.h
+++ b/arch/powerpc/include/asm/cell-pmu.h
@@ -37,9 +37,11 @@
 #define CBE_PM_STOP_AT_MAX                 0x40000000
 #define CBE_PM_TRACE_MODE_GET(pm_control)  (((pm_control) >> 28) & 0x3)
 #define CBE_PM_TRACE_MODE_SET(mode)        (((mode)  & 0x3) << 28)
+#define CBE_PM_TRACE_BUF_OVFLW(bit)        (((bit) & 0x1) << 17)
 #define CBE_PM_COUNT_MODE_SET(count)       (((count) & 0x3) << 18)
 #define CBE_PM_FREEZE_ALL_CTRS             0x00100000
 #define CBE_PM_ENABLE_EXT_TRACE            0x00008000
+#define CBE_PM_SPU_ADDR_TRACE_SET(msk)     (((msk) & 0x3) << 9)
 
 /* Macros for the trace_address register. */
 #define CBE_PM_TRACE_BUF_FULL              0x00000800
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 000000000000..9b198d1b3b2b
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_PPC_DISASSEMBLE_H__
+#define __ASM_PPC_DISASSEMBLE_H__
+
+#include <linux/types.h>
+
+static inline unsigned int get_op(u32 inst)
+{
+	return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+	return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+	return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+	return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+	return inst & 0xffff;
+}
+
+#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 26f0d0ab27a5..b1dafb6a9743 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -18,6 +18,12 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep);
 
 /*
+ * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
+ * to override the version in mm/hugetlb.c
+ */
+#define vma_mmu_pagesize vma_mmu_pagesize
+
+/*
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
diff --git a/arch/powerpc/include/asm/ioctls.h b/arch/powerpc/include/asm/ioctls.h
index 279a6229584b..1842186d872c 100644
--- a/arch/powerpc/include/asm/ioctls.h
+++ b/arch/powerpc/include/asm/ioctls.h
@@ -89,6 +89,8 @@
 #define TIOCSBRK	0x5427  /* BSD compatibility */
 #define TIOCCBRK	0x5428  /* BSD compatibility */
 #define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TIOCGRS485	0x542e
+#define TIOCSRS485	0x542f
 #define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
 #define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
 
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 6dbffc981702..7e06b43720d3 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -48,63 +48,8 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 {
 	if (oldregs)
 		memcpy(newregs, oldregs, sizeof(*newregs));
-#ifdef __powerpc64__
-	else {
-		/* FIXME Merge this with xmon_save_regs ?? */
-		unsigned long tmp1, tmp2;
-		__asm__ __volatile__ (
-			"std    0,0(%2)\n"
-			"std    1,8(%2)\n"
-			"std    2,16(%2)\n"
-			"std    3,24(%2)\n"
-			"std    4,32(%2)\n"
-			"std    5,40(%2)\n"
-			"std    6,48(%2)\n"
-			"std    7,56(%2)\n"
-			"std    8,64(%2)\n"
-			"std    9,72(%2)\n"
-			"std    10,80(%2)\n"
-			"std    11,88(%2)\n"
-			"std    12,96(%2)\n"
-			"std    13,104(%2)\n"
-			"std    14,112(%2)\n"
-			"std    15,120(%2)\n"
-			"std    16,128(%2)\n"
-			"std    17,136(%2)\n"
-			"std    18,144(%2)\n"
-			"std    19,152(%2)\n"
-			"std    20,160(%2)\n"
-			"std    21,168(%2)\n"
-			"std    22,176(%2)\n"
-			"std    23,184(%2)\n"
-			"std    24,192(%2)\n"
-			"std    25,200(%2)\n"
-			"std    26,208(%2)\n"
-			"std    27,216(%2)\n"
-			"std    28,224(%2)\n"
-			"std    29,232(%2)\n"
-			"std    30,240(%2)\n"
-			"std    31,248(%2)\n"
-			"mfmsr  %0\n"
-			"std    %0, 264(%2)\n"
-			"mfctr  %0\n"
-			"std    %0, 280(%2)\n"
-			"mflr   %0\n"
-			"std    %0, 288(%2)\n"
-			"bl     1f\n"
-		"1:     mflr   %1\n"
-			"std    %1, 256(%2)\n"
-			"mtlr   %0\n"
-			"mfxer  %0\n"
-			"std    %0, 296(%2)\n"
-			: "=&r" (tmp1), "=&r" (tmp2)
-			: "b" (newregs)
-			: "memory");
-	}
-#else
 	else
 		ppc_save_regs(newregs);
-#endif /* __powerpc64__ */
 }
 
 extern void kexec_smp_wait(void);	/* get and clear naca physid, wait for
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 000000000000..f49031b632ca
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_44X_H__
+#define __ASM_44X_H__
+
+#include <linux/kvm_host.h>
+
+#define PPC44x_TLB_SIZE 64
+
+/* If the guest is expecting it, this can be as large as we like; we'd just
+ * need to find some way of advertising it. */
+#define KVM44x_GUEST_TLB_SIZE 64
+
+struct kvmppc_44x_shadow_ref {
+	struct page *page;
+	u16 gtlb_index;
+	u8 writeable;
+	u8 tid;
+};
+
+struct kvmppc_vcpu_44x {
+	/* Unmodified copy of the guest's TLB. */
+	struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
+
+	/* References to guest pages in the hardware TLB. */
+	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
+
+	/* State of the shadow TLB at guest context switch time. */
+	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
+	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
+	struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
+
+#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7180cd..c1e436fe7738 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,27 +64,58 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct tlbe {
+struct kvmppc_44x_tlbe {
 	u32 tid; /* Only the low 8 bits are used. */
 	u32 word0;
 	u32 word1;
 	u32 word2;
 };
 
-struct kvm_arch {
+enum kvm_exit_types {
+	MMIO_EXITS,
+	DCR_EXITS,
+	SIGNAL_EXITS,
+	ITLB_REAL_MISS_EXITS,
+	ITLB_VIRT_MISS_EXITS,
+	DTLB_REAL_MISS_EXITS,
+	DTLB_VIRT_MISS_EXITS,
+	SYSCALL_EXITS,
+	ISI_EXITS,
+	DSI_EXITS,
+	EMULATED_INST_EXITS,
+	EMULATED_MTMSRWE_EXITS,
+	EMULATED_WRTEE_EXITS,
+	EMULATED_MTSPR_EXITS,
+	EMULATED_MFSPR_EXITS,
+	EMULATED_MTMSR_EXITS,
+	EMULATED_MFMSR_EXITS,
+	EMULATED_TLBSX_EXITS,
+	EMULATED_TLBWE_EXITS,
+	EMULATED_RFI_EXITS,
+	DEC_EXITS,
+	EXT_INTR_EXITS,
+	HALT_WAKEUP,
+	USR_PR_INST,
+	FP_UNAVAIL,
+	DEBUG_EXITS,
+	TIMEINGUEST,
+	__NUMBER_OF_KVM_EXIT_TYPES
 };
 
-struct kvm_vcpu_arch {
-	/* Unmodified copy of the guest's TLB. */
-	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
-	/* TLB that's actually used when the guest is running. */
-	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	/* Pages which are referenced in the shadow TLB. */
-	struct page *shadow_pages[PPC44x_TLB_SIZE];
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+struct kvmppc_exit_timing {
+	union {
+		u64 tv64;
+		struct {
+			u32 tbu, tbl;
+		} tv32;
+	};
+};
 
-	/* Track which TLB entries we've modified in the current exit. */
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+struct kvm_arch {
+};
 
+struct kvm_vcpu_arch {
 	u32 host_stack;
 	u32 host_pid;
 	u32 host_dbcr0;
@@ -94,32 +125,32 @@ struct kvm_vcpu_arch {
 	u32 host_msr;
 
 	u64 fpr[32];
-	u32 gpr[32];
+	ulong gpr[32];
 
-	u32 pc;
+	ulong pc;
 	u32 cr;
-	u32 ctr;
-	u32 lr;
-	u32 xer;
+	ulong ctr;
+	ulong lr;
+	ulong xer;
 
-	u32 msr;
+	ulong msr;
 	u32 mmucr;
-	u32 sprg0;
-	u32 sprg1;
-	u32 sprg2;
-	u32 sprg3;
-	u32 sprg4;
-	u32 sprg5;
-	u32 sprg6;
-	u32 sprg7;
-	u32 srr0;
-	u32 srr1;
-	u32 csrr0;
-	u32 csrr1;
-	u32 dsrr0;
-	u32 dsrr1;
-	u32 dear;
-	u32 esr;
+	ulong sprg0;
+	ulong sprg1;
+	ulong sprg2;
+	ulong sprg3;
+	ulong sprg4;
+	ulong sprg5;
+	ulong sprg6;
+	ulong sprg7;
+	ulong srr0;
+	ulong srr1;
+	ulong csrr0;
+	ulong csrr1;
+	ulong dsrr0;
+	ulong dsrr1;
+	ulong dear;
+	ulong esr;
 	u32 dec;
 	u32 decar;
 	u32 tbl;
@@ -127,7 +158,7 @@ struct kvm_vcpu_arch {
 	u32 tcr;
 	u32 tsr;
 	u32 ivor[16];
-	u32 ivpr;
+	ulong ivpr;
 	u32 pir;
 
 	u32 shadow_pid;
@@ -140,9 +171,22 @@ struct kvm_vcpu_arch {
 	u32 dbcr0;
 	u32 dbcr1;
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	struct kvmppc_exit_timing timing_exit;
+	struct kvmppc_exit_timing timing_last_enter;
+	u32 last_exit_type;
+	u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_last_exit;
+	struct dentry *debugfs_exit_timing;
+#endif
+
 	u32 last_inst;
-	u32 fault_dear;
-	u32 fault_esr;
+	ulong fault_dear;
+	ulong fault_esr;
 	gpa_t paddr_accessed;
 
 	u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad876de3..36d2a50a8487 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 
-struct kvm_tlb {
-	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
-	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
-};
-
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
@@ -41,9 +36,6 @@ enum emulation_result {
 	EMULATE_FAIL,         /* can't emulate this instruction */
 };
 
-extern const unsigned char exception_priority[];
-extern const unsigned char priority_exception[];
-
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern char kvmppc_handlers_start[];
 extern unsigned long kvmppc_handler_len;
@@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
-                           u64 asid, u32 flags);
-extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                  gva_t eend, u32 asid);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+                           u64 asid, u32 flags, u32 max_bytes,
+                           unsigned int gtlb_idx);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
-/* XXX Book E specific */
-extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
-
-extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
-
-static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
-{
-	unsigned int priority = exception_priority[exception];
-	set_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
-static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
-{
-	unsigned int priority = exception_priority[exception];
-	clear_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
-	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
-		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
-
-	vcpu->arch.msr = new_msr;
-
-	if (vcpu->arch.msr & MSR_WE)
-		kvm_vcpu_block(vcpu);
-}
-
-static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
-	if (vcpu->arch.pid != new_pid) {
-		vcpu->arch.pid = new_pid;
-		vcpu->arch.swap_pid = 1;
-	}
-}
+/* Core-specific hooks */
+
+extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
+                                                unsigned int id);
+extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_check_processor_compat(void);
+extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                                      struct kvm_translation *tr);
+
+extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                       struct kvm_interrupt *irq);
+
+extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                  unsigned int op, int *advance);
+extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+
+extern int kvmppc_booke_init(void);
+extern void kvmppc_booke_exit(void);
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb08b7e..27cc6fdcd3b7 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
 #ifndef __ASSEMBLY__
 
 extern unsigned int tlb_44x_hwater;
+extern unsigned int tlb_44x_index;
 
 typedef struct {
 	unsigned int	id;
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
index 95035c602ba6..639dc96077ab 100644
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ b/arch/powerpc/include/asm/oprofile_impl.h
@@ -32,6 +32,12 @@ struct op_system_config {
 	unsigned long mmcr0;
 	unsigned long mmcr1;
 	unsigned long mmcra;
+#ifdef CONFIG_OPROFILE_CELL
+	/* Register for oprofile user tool to check cell kernel profiling
+	 * suport.
+	 */
+	unsigned long cell_support;
+#endif
 #endif
 	unsigned long enable_kernel;
 	unsigned long enable_user;
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index cff30c0ef1ff..eead5c67197a 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -320,6 +320,7 @@ enum ps3_match_id {
 
 enum ps3_match_sub_id {
 	PS3_MATCH_SUB_ID_GPU_FB		= 1,
+	PS3_MATCH_SUB_ID_GPU_RAMDISK	= 2,
 };
 
 #define PS3_MODULE_ALIAS_EHCI		"ps3:1:0"
@@ -332,6 +333,7 @@ enum ps3_match_sub_id {
 #define PS3_MODULE_ALIAS_STOR_FLASH	"ps3:8:0"
 #define PS3_MODULE_ALIAS_SOUND		"ps3:9:0"
 #define PS3_MODULE_ALIAS_GPU_FB		"ps3:10:1"
+#define PS3_MODULE_ALIAS_GPU_RAMDISK	"ps3:10:2"
 #define PS3_MODULE_ALIAS_LPM		"ps3:11:0"
 
 enum ps3_system_bus_device_type {
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index edee15d269ea..a0a15311d0d8 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -17,6 +17,8 @@
 #ifdef __KERNEL__
 
 #include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/err.h>
 #include <asm/cpm.h>
 #include <asm/immap_qe.h>
 
@@ -84,7 +86,11 @@ static inline bool qe_clock_is_brg(enum qe_clock clk)
 extern spinlock_t cmxgcr_lock;
 
 /* Export QE common operations */
+#ifdef CONFIG_QUICC_ENGINE
 extern void __init qe_reset(void);
+#else
+static inline void qe_reset(void) {}
+#endif
 
 /* QE PIO */
 #define QE_PIO_PINS 32
@@ -101,16 +107,43 @@ struct qe_pio_regs {
 #endif
 };
 
-extern int par_io_init(struct device_node *np);
-extern int par_io_of_config(struct device_node *np);
 #define QE_PIO_DIR_IN	2
 #define QE_PIO_DIR_OUT	1
 extern void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin,
 				int dir, int open_drain, int assignment,
 				int has_irq);
+#ifdef CONFIG_QUICC_ENGINE
+extern int par_io_init(struct device_node *np);
+extern int par_io_of_config(struct device_node *np);
 extern int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
 			     int assignment, int has_irq);
 extern int par_io_data_set(u8 port, u8 pin, u8 val);
+#else
+static inline int par_io_init(struct device_node *np) { return -ENOSYS; }
+static inline int par_io_of_config(struct device_node *np) { return -ENOSYS; }
+static inline int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
+		int assignment, int has_irq) { return -ENOSYS; }
+static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; }
+#endif /* CONFIG_QUICC_ENGINE */
+
+/*
+ * Pin multiplexing functions.
+ */
+struct qe_pin;
+#ifdef CONFIG_QE_GPIO
+extern struct qe_pin *qe_pin_request(struct device_node *np, int index);
+extern void qe_pin_free(struct qe_pin *qe_pin);
+extern void qe_pin_set_gpio(struct qe_pin *qe_pin);
+extern void qe_pin_set_dedicated(struct qe_pin *pin);
+#else
+static inline struct qe_pin *qe_pin_request(struct device_node *np, int index)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline void qe_pin_free(struct qe_pin *qe_pin) {}
+static inline void qe_pin_set_gpio(struct qe_pin *qe_pin) {}
+static inline void qe_pin_set_dedicated(struct qe_pin *pin) {}
+#endif /* CONFIG_QE_GPIO */
 
 /* QE internal API */
 int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input);
diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h
index 56a7745ca343..cf519663a791 100644
--- a/arch/powerpc/include/asm/qe_ic.h
+++ b/arch/powerpc/include/asm/qe_ic.h
@@ -17,6 +17,9 @@
 
 #include <linux/irq.h>
 
+struct device_node;
+struct qe_ic;
+
 #define NUM_OF_QE_IC_GROUPS	6
 
 /* Flags when we init the QE IC */
@@ -54,17 +57,27 @@ enum qe_ic_grp_id {
 	QE_IC_GRP_RISCB		/* QE interrupt controller RISC group B */
 };
 
+#ifdef CONFIG_QUICC_ENGINE
 void qe_ic_init(struct device_node *node, unsigned int flags,
 		void (*low_handler)(unsigned int irq, struct irq_desc *desc),
 		void (*high_handler)(unsigned int irq, struct irq_desc *desc));
+unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
+unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
+#else
+static inline void qe_ic_init(struct device_node *node, unsigned int flags,
+		void (*low_handler)(unsigned int irq, struct irq_desc *desc),
+		void (*high_handler)(unsigned int irq, struct irq_desc *desc))
+{}
+static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic)
+{ return 0; }
+static inline unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic)
+{ return 0; }
+#endif /* CONFIG_QUICC_ENGINE */
+
 void qe_ic_set_highest_priority(unsigned int virq, int high);
 int qe_ic_set_priority(unsigned int virq, unsigned int priority);
 int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high);
 
-struct qe_ic;
-unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
-unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
-
 static inline void qe_ic_cascade_low_ipic(unsigned int irq,
 					  struct irq_desc *desc)
 {
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 8b2eb044270a..0ab8d869e3d6 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -128,7 +128,7 @@ struct spu {
 	int number;
 	unsigned int irqs[3];
 	u32 node;
-	u64 flags;
+	unsigned long flags;
 	u64 class_0_pending;
 	u64 class_0_dar;
 	u64 class_1_dar;
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
new file mode 100644
index 000000000000..ef824ae4b79c
--- /dev/null
+++ b/arch/powerpc/include/asm/swab.h
@@ -0,0 +1,90 @@
+#ifndef _ASM_POWERPC_SWAB_H
+#define _ASM_POWERPC_SWAB_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+#ifdef __GNUC__
+
+#ifndef __powerpc64__
+#define __SWAB_64_THRU_32__
+#endif /* __powerpc64__ */
+
+#ifdef __KERNEL__
+
+static __inline__ __u16 ld_le16(const volatile __u16 *addr)
+{
+	__u16 val;
+
+	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+	return val;
+}
+#define __arch_swab16p ld_le16
+
+static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
+{
+	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static inline void __arch_swab16s(__u16 *addr)
+{
+	st_le16(addr, *addr);
+}
+#define __arch_swab16s __arch_swab16s
+
+static __inline__ __u32 ld_le32(const volatile __u32 *addr)
+{
+	__u32 val;
+
+	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+	return val;
+}
+#define __arch_swab32p ld_le32
+
+static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
+{
+	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static inline void __arch_swab32s(__u32 *addr)
+{
+	st_le32(addr, *addr);
+}
+#define __arch_swab32s __arch_swab32s
+
+static inline __attribute_const__ __u16 __arch_swab16(__u16 value)
+{
+	__u16 result;
+
+	__asm__("rlwimi %0,%1,8,16,23"
+	    : "=r" (result)
+	    : "r" (value), "0" (value >> 8));
+	return result;
+}
+#define __arch_swab16 __arch_swab16
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 value)
+{
+	__u32 result;
+
+	__asm__("rlwimi %0,%1,24,16,23\n\t"
+	    "rlwimi %0,%1,8,8,15\n\t"
+	    "rlwimi %0,%1,24,0,7"
+	    : "=r" (result)
+	    : "r" (value), "0" (value >> 24));
+	return result;
+}
+#define __arch_swab32 __arch_swab32
+
+#endif /* __KERNEL__ */
+
+#endif /* __GNUC__ */
+
+#endif /* _ASM_POWERPC_SWAB_H */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f97999..375258559ae6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node)
 	return numa_cpumask_lookup_table[node];
 }
 
+#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
+
 static inline int node_to_first_cpu(int node)
 {
-	cpumask_t tmp;
-	tmp = node_to_cpumask(node);
-	return first_cpu(tmp);
+	return cpumask_first(cpumask_of_node(node));
 }
 
 int of_node_to_nid(struct device_node *device);
@@ -46,9 +46,12 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 					node_to_cpumask(pcibus_to_node(bus)) \
 				)
 
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+
 /* sched_domains SD_NODE_INIT for PPC64 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
@@ -109,6 +112,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
 
 #define topology_thread_siblings(cpu)	(per_cpu(cpu_sibling_map, cpu))
 #define topology_core_siblings(cpu)	(per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu)	(&per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)	(&per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)		(cpu_to_core_id(cpu))
 #endif
 #endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1308a86e9070..8d1a419df35d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -29,7 +29,7 @@ endif
 obj-y				:= cputable.o ptrace.o syscalls.o \
 				   irq.o align.o signal_32.o pmc.o vdso.o \
 				   init_task.o process.o systbl.o idle.o \
-				   signal.o sysfs.o
+				   signal.o sysfs.o cacheinfo.o
 obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d2146b..9937fe44555f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/hrtimer.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm_host.h>
-#endif
 #ifdef CONFIG_PPC64
 #include <linux/time.h>
 #include <linux/hardirq.h>
@@ -51,6 +48,9 @@
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iseries/alpaca.h>
 #endif
+#ifdef CONFIG_KVM
+#include <asm/kvm_44x.h>
+#endif
 
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 #include "head_booke.h"
@@ -357,12 +357,10 @@ int main(void)
 	DEFINE(PTE_SIZE, sizeof(pte_t));
 
 #ifdef CONFIG_KVM
-	DEFINE(TLBE_BYTES, sizeof(struct tlbe));
+	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
 
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
-	DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
-	DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -385,5 +383,16 @@ int main(void)
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+						arch.timing_exit.tv32.tbu));
+	DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+						arch.timing_exit.tv32.tbl));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+					arch.timing_last_enter.tv32.tbu));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+					arch.timing_last_enter.tv32.tbl));
+#endif
+
 	return 0;
 }
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
new file mode 100644
index 000000000000..b33f0417a4bf
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -0,0 +1,837 @@
+/*
+ * Processor cache information made available to userspace via sysfs;
+ * intended to be compatible with x86 intel_cacheinfo implementation.
+ *
+ * Copyright 2008 IBM Corporation
+ * Author: Nathan Lynch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/percpu.h>
+#include <asm/prom.h>
+
+#include "cacheinfo.h"
+
+/* per-cpu object for tracking:
+ * - a "cache" kobject for the top-level directory
+ * - a list of "index" objects representing the cpu's local cache hierarchy
+ */
+struct cache_dir {
+	struct kobject *kobj; /* bare (not embedded) kobject for cache
+			       * directory */
+	struct cache_index_dir *index; /* list of index objects */
+};
+
+/* "index" object: each cpu's cache directory has an index
+ * subdirectory corresponding to a cache object associated with the
+ * cpu.  This object's lifetime is managed via the embedded kobject.
+ */
+struct cache_index_dir {
+	struct kobject kobj;
+	struct cache_index_dir *next; /* next index in parent directory */
+	struct cache *cache;
+};
+
+/* Template for determining which OF properties to query for a given
+ * cache type */
+struct cache_type_info {
+	const char *name;
+	const char *size_prop;
+
+	/* Allow for both [di]-cache-line-size and
+	 * [di]-cache-block-size properties.  According to the PowerPC
+	 * Processor binding, -line-size should be provided if it
+	 * differs from the cache block size (that which is operated
+	 * on by cache instructions), so we look for -line-size first.
+	 * See cache_get_line_size(). */
+
+	const char *line_size_props[2];
+	const char *nr_sets_prop;
+};
+
+/* These are used to index the cache_type_info array. */
+#define CACHE_TYPE_UNIFIED     0
+#define CACHE_TYPE_INSTRUCTION 1
+#define CACHE_TYPE_DATA        2
+
+static const struct cache_type_info cache_type_info[] = {
+	{
+		/* PowerPC Processor binding says the [di]-cache-*
+		 * must be equal on unified caches, so just use
+		 * d-cache properties. */
+		.name            = "Unified",
+		.size_prop       = "d-cache-size",
+		.line_size_props = { "d-cache-line-size",
+				     "d-cache-block-size", },
+		.nr_sets_prop    = "d-cache-sets",
+	},
+	{
+		.name            = "Instruction",
+		.size_prop       = "i-cache-size",
+		.line_size_props = { "i-cache-line-size",
+				     "i-cache-block-size", },
+		.nr_sets_prop    = "i-cache-sets",
+	},
+	{
+		.name            = "Data",
+		.size_prop       = "d-cache-size",
+		.line_size_props = { "d-cache-line-size",
+				     "d-cache-block-size", },
+		.nr_sets_prop    = "d-cache-sets",
+	},
+};
+
+/* Cache object: each instance of this corresponds to a distinct cache
+ * in the system.  There are separate objects for Harvard caches: one
+ * each for instruction and data, and each refers to the same OF node.
+ * The refcount of the OF node is elevated for the lifetime of the
+ * cache object.  A cache object is released when its shared_cpu_map
+ * is cleared (see cache_cpu_clear).
+ *
+ * A cache object is on two lists: an unsorted global list
+ * (cache_list) of cache objects; and a singly-linked list
+ * representing the local cache hierarchy, which is ordered by level
+ * (e.g. L1d -> L1i -> L2 -> L3).
+ */
+struct cache {
+	struct device_node *ofnode;    /* OF node for this cache, may be cpu */
+	struct cpumask shared_cpu_map; /* online CPUs using this cache */
+	int type;                      /* split cache disambiguation */
+	int level;                     /* level not explicit in device tree */
+	struct list_head list;         /* global list of cache objects */
+	struct cache *next_local;      /* next cache of >= level */
+};
+
+static DEFINE_PER_CPU(struct cache_dir *, cache_dir);
+
+/* traversal/modification of this list occurs only at cpu hotplug time;
+ * access is serialized by cpu hotplug locking
+ */
+static LIST_HEAD(cache_list);
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k)
+{
+	return container_of(k, struct cache_index_dir, kobj);
+}
+
+static const char *cache_type_string(const struct cache *cache)
+{
+	return cache_type_info[cache->type].name;
+}
+
+static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode)
+{
+	cache->type = type;
+	cache->level = level;
+	cache->ofnode = of_node_get(ofnode);
+	INIT_LIST_HEAD(&cache->list);
+	list_add(&cache->list, &cache_list);
+}
+
+static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode)
+{
+	struct cache *cache;
+
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+	if (cache)
+		cache_init(cache, type, level, ofnode);
+
+	return cache;
+}
+
+static void release_cache_debugcheck(struct cache *cache)
+{
+	struct cache *iter;
+
+	list_for_each_entry(iter, &cache_list, list)
+		WARN_ONCE(iter->next_local == cache,
+			  "cache for %s(%s) refers to cache for %s(%s)\n",
+			  iter->ofnode->full_name,
+			  cache_type_string(iter),
+			  cache->ofnode->full_name,
+			  cache_type_string(cache));
+}
+
+static void release_cache(struct cache *cache)
+{
+	if (!cache)
+		return;
+
+	pr_debug("freeing L%d %s cache for %s\n", cache->level,
+		 cache_type_string(cache), cache->ofnode->full_name);
+
+	release_cache_debugcheck(cache);
+	list_del(&cache->list);
+	of_node_put(cache->ofnode);
+	kfree(cache);
+}
+
+static void cache_cpu_set(struct cache *cache, int cpu)
+{
+	struct cache *next = cache;
+
+	while (next) {
+		WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
+			  "CPU %i already accounted in %s(%s)\n",
+			  cpu, next->ofnode->full_name,
+			  cache_type_string(next));
+		cpumask_set_cpu(cpu, &next->shared_cpu_map);
+		next = next->next_local;
+	}
+}
+
+static int cache_size(const struct cache *cache, unsigned int *ret)
+{
+	const char *propname;
+	const u32 *cache_size;
+
+	propname = cache_type_info[cache->type].size_prop;
+
+	cache_size = of_get_property(cache->ofnode, propname, NULL);
+	if (!cache_size)
+		return -ENODEV;
+
+	*ret = *cache_size;
+	return 0;
+}
+
+static int cache_size_kb(const struct cache *cache, unsigned int *ret)
+{
+	unsigned int size;
+
+	if (cache_size(cache, &size))
+		return -ENODEV;
+
+	*ret = size / 1024;
+	return 0;
+}
+
+/* not cache_line_size() because that's a macro in include/linux/cache.h */
+static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
+{
+	const u32 *line_size;
+	int i, lim;
+
+	lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
+
+	for (i = 0; i < lim; i++) {
+		const char *propname;
+
+		propname = cache_type_info[cache->type].line_size_props[i];
+		line_size = of_get_property(cache->ofnode, propname, NULL);
+		if (line_size)
+			break;
+	}
+
+	if (!line_size)
+		return -ENODEV;
+
+	*ret = *line_size;
+	return 0;
+}
+
+static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
+{
+	const char *propname;
+	const u32 *nr_sets;
+
+	propname = cache_type_info[cache->type].nr_sets_prop;
+
+	nr_sets = of_get_property(cache->ofnode, propname, NULL);
+	if (!nr_sets)
+		return -ENODEV;
+
+	*ret = *nr_sets;
+	return 0;
+}
+
+static int cache_associativity(const struct cache *cache, unsigned int *ret)
+{
+	unsigned int line_size;
+	unsigned int nr_sets;
+	unsigned int size;
+
+	if (cache_nr_sets(cache, &nr_sets))
+		goto err;
+
+	/* If the cache is fully associative, there is no need to
+	 * check the other properties.
+	 */
+	if (nr_sets == 1) {
+		*ret = 0;
+		return 0;
+	}
+
+	if (cache_get_line_size(cache, &line_size))
+		goto err;
+	if (cache_size(cache, &size))
+		goto err;
+
+	if (!(nr_sets > 0 && size > 0 && line_size > 0))
+		goto err;
+
+	*ret = (size / nr_sets) / line_size;
+	return 0;
+err:
+	return -ENODEV;
+}
+
+/* helper for dealing with split caches */
+static struct cache *cache_find_first_sibling(struct cache *cache)
+{
+	struct cache *iter;
+
+	if (cache->type == CACHE_TYPE_UNIFIED)
+		return cache;
+
+	list_for_each_entry(iter, &cache_list, list)
+		if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+			return iter;
+
+	return cache;
+}
+
+/* return the first cache on a local list matching node */
+static struct cache *cache_lookup_by_node(const struct device_node *node)
+{
+	struct cache *cache = NULL;
+	struct cache *iter;
+
+	list_for_each_entry(iter, &cache_list, list) {
+		if (iter->ofnode != node)
+			continue;
+		cache = cache_find_first_sibling(iter);
+		break;
+	}
+
+	return cache;
+}
+
+static bool cache_node_is_unified(const struct device_node *np)
+{
+	return of_get_property(np, "cache-unified", NULL);
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level)
+{
+	struct cache *cache;
+
+	pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+
+	cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
+
+	return cache;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level)
+{
+	struct cache *dcache, *icache;
+
+	pr_debug("creating L%d dcache and icache for %s\n", level,
+		 node->full_name);
+
+	dcache = new_cache(CACHE_TYPE_DATA, level, node);
+	icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+
+	if (!dcache || !icache)
+		goto err;
+
+	dcache->next_local = icache;
+
+	return dcache;
+err:
+	release_cache(dcache);
+	release_cache(icache);
+	return NULL;
+}
+
+static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level)
+{
+	struct cache *cache;
+
+	if (cache_node_is_unified(node))
+		cache = cache_do_one_devnode_unified(node, level);
+	else
+		cache = cache_do_one_devnode_split(node, level);
+
+	return cache;
+}
+
+static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level)
+{
+	struct cache *cache;
+
+	cache = cache_lookup_by_node(node);
+
+	WARN_ONCE(cache && cache->level != level,
+		  "cache level mismatch on lookup (got %d, expected %d)\n",
+		  cache->level, level);
+
+	if (!cache)
+		cache = cache_do_one_devnode(node, level);
+
+	return cache;
+}
+
+static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger)
+{
+	while (smaller->next_local) {
+		if (smaller->next_local == bigger)
+			return; /* already linked */
+		smaller = smaller->next_local;
+	}
+
+	smaller->next_local = bigger;
+}
+
+static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache)
+{
+	WARN_ON_ONCE(cache->level != 1);
+	WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
+}
+
+static void __cpuinit do_subsidiary_caches(struct cache *cache)
+{
+	struct device_node *subcache_node;
+	int level = cache->level;
+
+	do_subsidiary_caches_debugcheck(cache);
+
+	while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
+		struct cache *subcache;
+
+		level++;
+		subcache = cache_lookup_or_instantiate(subcache_node, level);
+		of_node_put(subcache_node);
+		if (!subcache)
+			break;
+
+		link_cache_lists(cache, subcache);
+		cache = subcache;
+	}
+}
+
+static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id)
+{
+	struct device_node *cpu_node;
+	struct cache *cpu_cache = NULL;
+
+	pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
+
+	cpu_node = of_get_cpu_node(cpu_id, NULL);
+	WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+	if (!cpu_node)
+		goto out;
+
+	cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+	if (!cpu_cache)
+		goto out;
+
+	do_subsidiary_caches(cpu_cache);
+
+	cache_cpu_set(cpu_cache, cpu_id);
+out:
+	of_node_put(cpu_node);
+
+	return cpu_cache;
+}
+
+static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
+{
+	struct cache_dir *cache_dir;
+	struct sys_device *sysdev;
+	struct kobject *kobj = NULL;
+
+	sysdev = get_cpu_sysdev(cpu_id);
+	WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id);
+	if (!sysdev)
+		goto err;
+
+	kobj = kobject_create_and_add("cache", &sysdev->kobj);
+	if (!kobj)
+		goto err;
+
+	cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+	if (!cache_dir)
+		goto err;
+
+	cache_dir->kobj = kobj;
+
+	WARN_ON_ONCE(per_cpu(cache_dir, cpu_id) != NULL);
+
+	per_cpu(cache_dir, cpu_id) = cache_dir;
+
+	return cache_dir;
+err:
+	kobject_put(kobj);
+	return NULL;
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+	struct cache_index_dir *index;
+
+	index = kobj_to_cache_index_dir(kobj);
+
+	pr_debug("freeing index directory for L%d %s cache\n",
+		 index->cache->level, cache_type_string(index->cache));
+
+	kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf)
+{
+	struct kobj_attribute *kobj_attr;
+
+	kobj_attr = container_of(attr, struct kobj_attribute, attr);
+
+	return kobj_attr->show(k, kobj_attr, buf);
+}
+
+static struct cache *index_kobj_to_cache(struct kobject *k)
+{
+	struct cache_index_dir *index;
+
+	index = kobj_to_cache_index_dir(k);
+
+	return index->cache;
+}
+
+static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int size_kb;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_size_kb(cache, &size_kb))
+		return -ENODEV;
+
+	return sprintf(buf, "%uK\n", size_kb);
+}
+
+static struct kobj_attribute cache_size_attr =
+	__ATTR(size, 0444, size_show, NULL);
+
+
+static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int line_size;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_get_line_size(cache, &line_size))
+		return -ENODEV;
+
+	return sprintf(buf, "%u\n", line_size);
+}
+
+static struct kobj_attribute cache_line_size_attr =
+	__ATTR(coherency_line_size, 0444, line_size_show, NULL);
+
+static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int nr_sets;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_nr_sets(cache, &nr_sets))
+		return -ENODEV;
+
+	return sprintf(buf, "%u\n", nr_sets);
+}
+
+static struct kobj_attribute cache_nr_sets_attr =
+	__ATTR(number_of_sets, 0444, nr_sets_show, NULL);
+
+static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int associativity;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_associativity(cache, &associativity))
+		return -ENODEV;
+
+	return sprintf(buf, "%u\n", associativity);
+}
+
+static struct kobj_attribute cache_assoc_attr =
+	__ATTR(ways_of_associativity, 0444, associativity_show, NULL);
+
+static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	return sprintf(buf, "%s\n", cache_type_string(cache));
+}
+
+static struct kobj_attribute cache_type_attr =
+	__ATTR(type, 0444, type_show, NULL);
+
+static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	struct cache_index_dir *index;
+	struct cache *cache;
+
+	index = kobj_to_cache_index_dir(k);
+	cache = index->cache;
+
+	return sprintf(buf, "%d\n", cache->level);
+}
+
+static struct kobj_attribute cache_level_attr =
+	__ATTR(level, 0444, level_show, NULL);
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	struct cache_index_dir *index;
+	struct cache *cache;
+	int len;
+	int n = 0;
+
+	index = kobj_to_cache_index_dir(k);
+	cache = index->cache;
+	len = PAGE_SIZE - 2;
+
+	if (len > 1) {
+		n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map);
+		buf[n++] = '\n';
+		buf[n] = '\0';
+	}
+	return n;
+}
+
+static struct kobj_attribute cache_shared_cpu_map_attr =
+	__ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+/* Attributes which should always be created -- the kobject/sysfs core
+ * does this automatically via kobj_type->default_attrs.  This is the
+ * minimum data required to uniquely identify a cache.
+ */
+static struct attribute *cache_index_default_attrs[] = {
+	&cache_type_attr.attr,
+	&cache_level_attr.attr,
+	&cache_shared_cpu_map_attr.attr,
+	NULL,
+};
+
+/* Attributes which should be created if the cache device node has the
+ * right properties -- see cacheinfo_create_index_opt_attrs
+ */
+static struct kobj_attribute *cache_index_opt_attrs[] = {
+	&cache_size_attr,
+	&cache_line_size_attr,
+	&cache_nr_sets_attr,
+	&cache_assoc_attr,
+};
+
+static struct sysfs_ops cache_index_ops = {
+	.show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+	.release = cache_index_release,
+	.sysfs_ops = &cache_index_ops,
+	.default_attrs = cache_index_default_attrs,
+};
+
+static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+{
+	const char *cache_name;
+	const char *cache_type;
+	struct cache *cache;
+	char *buf;
+	int i;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	cache = dir->cache;
+	cache_name = cache->ofnode->full_name;
+	cache_type = cache_type_string(cache);
+
+	/* We don't want to create an attribute that can't provide a
+	 * meaningful value.  Check the return value of each optional
+	 * attribute's ->show method before registering the
+	 * attribute.
+	 */
+	for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) {
+		struct kobj_attribute *attr;
+		ssize_t rc;
+
+		attr = cache_index_opt_attrs[i];
+
+		rc = attr->show(&dir->kobj, attr, buf);
+		if (rc <= 0) {
+			pr_debug("not creating %s attribute for "
+				 "%s(%s) (rc = %zd)\n",
+				 attr->attr.name, cache_name,
+				 cache_type, rc);
+			continue;
+		}
+		if (sysfs_create_file(&dir->kobj, &attr->attr))
+			pr_debug("could not create %s attribute for %s(%s)\n",
+				 attr->attr.name, cache_name, cache_type);
+	}
+
+	kfree(buf);
+}
+
+static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir)
+{
+	struct cache_index_dir *index_dir;
+	int rc;
+
+	index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+	if (!index_dir)
+		goto err;
+
+	index_dir->cache = cache;
+
+	rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+				  cache_dir->kobj, "index%d", index);
+	if (rc)
+		goto err;
+
+	index_dir->next = cache_dir->index;
+	cache_dir->index = index_dir;
+
+	cacheinfo_create_index_opt_attrs(index_dir);
+
+	return;
+err:
+	kfree(index_dir);
+}
+
+static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list)
+{
+	struct cache_dir *cache_dir;
+	struct cache *cache;
+	int index = 0;
+
+	cache_dir = cacheinfo_create_cache_dir(cpu_id);
+	if (!cache_dir)
+		return;
+
+	cache = cache_list;
+	while (cache) {
+		cacheinfo_create_index_dir(cache, index, cache_dir);
+		index++;
+		cache = cache->next_local;
+	}
+}
+
+void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id)
+{
+	struct cache *cache;
+
+	cache = cache_chain_instantiate(cpu_id);
+	if (!cache)
+		return;
+
+	cacheinfo_sysfs_populate(cpu_id, cache);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */
+
+static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
+{
+	struct device_node *cpu_node;
+	struct cache *cache;
+
+	cpu_node = of_get_cpu_node(cpu_id, NULL);
+	WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+	if (!cpu_node)
+		return NULL;
+
+	cache = cache_lookup_by_node(cpu_node);
+	of_node_put(cpu_node);
+
+	return cache;
+}
+
+static void remove_index_dirs(struct cache_dir *cache_dir)
+{
+	struct cache_index_dir *index;
+
+	index = cache_dir->index;
+
+	while (index) {
+		struct cache_index_dir *next;
+
+		next = index->next;
+		kobject_put(&index->kobj);
+		index = next;
+	}
+}
+
+static void remove_cache_dir(struct cache_dir *cache_dir)
+{
+	remove_index_dirs(cache_dir);
+
+	kobject_put(cache_dir->kobj);
+
+	kfree(cache_dir);
+}
+
+static void cache_cpu_clear(struct cache *cache, int cpu)
+{
+	while (cache) {
+		struct cache *next = cache->next_local;
+
+		WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
+			  "CPU %i not accounted in %s(%s)\n",
+			  cpu, cache->ofnode->full_name,
+			  cache_type_string(cache));
+
+		cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
+
+		/* Release the cache object if all the cpus using it
+		 * are offline */
+		if (cpumask_empty(&cache->shared_cpu_map))
+			release_cache(cache);
+
+		cache = next;
+	}
+}
+
+void cacheinfo_cpu_offline(unsigned int cpu_id)
+{
+	struct cache_dir *cache_dir;
+	struct cache *cache;
+
+	/* Prevent userspace from seeing inconsistent state - remove
+	 * the sysfs hierarchy first */
+	cache_dir = per_cpu(cache_dir, cpu_id);
+
+	/* careful, sysfs population may have failed */
+	if (cache_dir)
+		remove_cache_dir(cache_dir);
+
+	per_cpu(cache_dir, cpu_id) = NULL;
+
+	/* clear the CPU's bit in its cache chain, possibly freeing
+	 * cache objects */
+	cache = cache_lookup_by_cpu(cpu_id);
+	if (cache)
+		cache_cpu_clear(cache, cpu_id);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
new file mode 100644
index 000000000000..a7b74d36acd7
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -0,0 +1,8 @@
+#ifndef _PPC_CACHEINFO_H
+#define _PPC_CACHEINFO_H
+
+/* These are just hooks for sysfs.c to use. */
+extern void cacheinfo_cpu_online(unsigned int cpu_id);
+extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+
+#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0ab12e..23b8b5e36f98 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map)
 			mask = map;
 		}
 		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
+			irq_desc[irq].chip->set_affinity(irq, &mask);
 		else if (irq_desc[irq].action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index de79915452c8..c9329786073b 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -96,9 +96,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -316,7 +317,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * have a return probe installed on them, and/or more than one return
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 2538030954d8..da5a3855a0c4 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -16,7 +16,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
+#define DEBUG
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
@@ -1356,6 +1356,63 @@ static void __init pcibios_allocate_resources(int pass)
 	}
 }
 
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	resource_size_t	offset;
+	struct resource *res, *pres;
+	int i;
+
+	pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
+
+	/* Check for IO */
+	if (!(hose->io_resource.flags & IORESOURCE_IO))
+		goto no_io;
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+	BUG_ON(res == NULL);
+	res->name = "Legacy IO";
+	res->flags = IORESOURCE_IO;
+	res->start = offset;
+	res->end = (offset + 0xfff) & 0xfffffffful;
+	pr_debug("Candidate legacy IO: %pR\n", res);
+	if (request_resource(&hose->io_resource, res)) {
+		printk(KERN_DEBUG
+		       "PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
+		       pci_domain_nr(bus), bus->number, res);
+		kfree(res);
+	}
+
+ no_io:
+	/* Check for memory */
+	offset = hose->pci_mem_offset;
+	pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset);
+	for (i = 0; i < 3; i++) {
+		pres = &hose->mem_resources[i];
+		if (!(pres->flags & IORESOURCE_MEM))
+			continue;
+		pr_debug("hose mem res: %pR\n", pres);
+		if ((pres->start - offset) <= 0xa0000 &&
+		    (pres->end - offset) >= 0xbffff)
+			break;
+	}
+	if (i >= 3)
+		return;
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+	BUG_ON(res == NULL);
+	res->name = "Legacy VGA memory";
+	res->flags = IORESOURCE_MEM;
+	res->start = 0xa0000 + offset;
+	res->end = 0xbffff + offset;
+	pr_debug("Candidate VGA memory: %pR\n", res);
+	if (request_resource(pres, res)) {
+		printk(KERN_DEBUG
+		       "PCI %04x:%02x Cannot reserve VGA memory %pR\n",
+		       pci_domain_nr(bus), bus->number, res);
+		kfree(res);
+	}
+}
+
 void __init pcibios_resource_survey(void)
 {
 	struct pci_bus *b;
@@ -1371,6 +1428,18 @@ void __init pcibios_resource_survey(void)
 		pcibios_allocate_resources(1);
 	}
 
+	/* Before we start assigning unassigned resource, we try to reserve
+	 * the low IO area and the VGA memory area if they intersect the
+	 * bus available resources to avoid allocating things on top of them
+	 */
+	if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
+		list_for_each_entry(b, &pci_root_buses, node)
+			pcibios_reserve_legacy_regions(b);
+	}
+
+	/* Now, if the platform didn't decide to blindly trust the firmware,
+	 * we proceed to assigning things that were left unassigned
+	 */
 	if (!(ppc_pci_flags & PPC_PCI_PROBE_ONLY)) {
 		pr_debug("PCI: Assigning unassigned resouces...\n");
 		pci_assign_unassigned_resources();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 39fadc6e1492..586962f65c2a 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -560,9 +560,14 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
 	 * G5 machines... So when something asks for bus 0 io base
 	 * (bus 0 is HT root), we return the AGP one instead.
 	 */
-	if (machine_is_compatible("MacRISC4"))
-		if (in_bus == 0)
+	if (in_bus == 0 && machine_is_compatible("MacRISC4")) {
+		struct device_node *agp;
+
+		agp = of_find_compatible_node(NULL, NULL, "u3-agp");
+		if (agp)
 			in_bus = 0xf0;
+		of_node_put(agp);
+	}
 
 	/* That syscall isn't quite compatible with PCI domains, but it's
 	 * used on pre-domains setup. We return the first match
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index dcec1325d340..c8b27bb4dbde 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -165,6 +165,7 @@ EXPORT_SYMBOL(timer_interrupt);
 EXPORT_SYMBOL(irq_desc);
 EXPORT_SYMBOL(tb_ticks_per_jiffy);
 EXPORT_SYMBOL(cacheable_memcpy);
+EXPORT_SYMBOL(cacheable_memzero);
 #endif
 
 #ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
 #include <linux/mqueue.h>
 #include <linux/hardirq.h>
 #include <linux/utsname.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6f73c739f1e2..c09cffafb6ee 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -824,11 +824,11 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 #endif
 
 #ifdef CONFIG_KEXEC
-	lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
+	lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
 	if (lprop)
 		crashk_res.start = *lprop;
 
-	lprop = (u64*)of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
+	lprop = of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
 	if (lprop)
 		crashk_res.end = crashk_res.start + *lprop - 1;
 #endif
@@ -893,12 +893,12 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 	u64 base, size, lmb_size;
 	unsigned int is_kexec_kdump = 0, rngs;
 
-	ls = (cell_t *)of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+	ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
 	if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t))
 		return 0;
 	lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls);
 
-	dm = (cell_t *)of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
+	dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
 	if (dm == NULL || l < sizeof(cell_t))
 		return 0;
 
@@ -907,7 +907,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 		return 0;
 
 	/* check if this is a kexec/kdump kernel. */
-	usm = (cell_t *)of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
+	usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
 						 &l);
 	if (usm != NULL)
 		is_kexec_kdump = 1;
@@ -981,9 +981,9 @@ static int __init early_init_dt_scan_memory(unsigned long node,
 	} else if (strcmp(type, "memory") != 0)
 		return 0;
 
-	reg = (cell_t *)of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+	reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
 	if (reg == NULL)
-		reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
+		reg = of_get_flat_dt_prop(node, "reg", &l);
 	if (reg == NULL)
 		return 0;
 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 2445945d3761..7f1b33d5e30d 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1210,7 +1210,7 @@ static void __init prom_initialize_tce_table(void)
 		/* Initialize the table to have a one-to-one mapping
 		 * over the allocated size.
 		 */
-		tce_entryp = (unsigned long *)base;
+		tce_entryp = (u64 *)base;
 		for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
 			tce_entry = (i << PAGE_SHIFT);
 			tce_entry |= 0x3;
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 8c1335566089..8f0856f312da 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -232,11 +232,6 @@ int of_pci_address_to_resource(struct device_node *dev, int bar,
 }
 EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
 
-static u8 of_irq_pci_swizzle(u8 slot, u8 pin)
-{
-	return (((pin - 1) + slot) % 4) + 1;
-}
-
 int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 {
 	struct device_node *dn, *ppnode;
@@ -306,7 +301,7 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 		/* We can only get here if we hit a P2P bridge with no node,
 		 * let's do standard swizzling and try again
 		 */
-		lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec);
+		lspec = pci_swizzle_interrupt_pin(pdev, lspec);
 		pdev = ppdev;
 	}
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8ac3f721d235..65484b2200b3 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,13 +59,9 @@
 
 struct thread_info *secondary_ti;
 
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-cpumask_t cpu_online_map = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
 
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 0c64f10087b9..4a2ee08af6a7 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -18,6 +18,8 @@
 #include <asm/machdep.h>
 #include <asm/smp.h>
 
+#include "cacheinfo.h"
+
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #include <asm/lppaca.h>
@@ -25,8 +27,6 @@
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-static DEFINE_PER_CPU(struct kobject *, cache_toplevel);
-
 /*
  * SMT snooze delay stuff, 64-bit only for now
  */
@@ -343,283 +343,6 @@ static struct sysdev_attribute pa6t_attrs[] = {
 #endif /* HAS_PPC_PMC_PA6T */
 #endif /* HAS_PPC_PMC_CLASSIC */
 
-struct cache_desc {
-	struct kobject kobj;
-	struct cache_desc *next;
-	const char *type;	/* Instruction, Data, or Unified */
-	u32 size;		/* total cache size in KB */
-	u32 line_size;		/* in bytes */
-	u32 nr_sets;		/* number of sets */
-	u32 level;		/* e.g. 1, 2, 3... */
-	u32 associativity;	/* e.g. 8-way... 0 is fully associative */
-};
-
-DEFINE_PER_CPU(struct cache_desc *, cache_desc);
-
-static struct cache_desc *kobj_to_cache_desc(struct kobject *k)
-{
-	return container_of(k, struct cache_desc, kobj);
-}
-
-static void cache_desc_release(struct kobject *k)
-{
-	struct cache_desc *desc = kobj_to_cache_desc(k);
-
-	pr_debug("%s: releasing %s\n", __func__, kobject_name(k));
-
-	if (desc->next)
-		kobject_put(&desc->next->kobj);
-
-	kfree(kobj_to_cache_desc(k));
-}
-
-static ssize_t cache_desc_show(struct kobject *k, struct attribute *attr, char *buf)
-{
-	struct kobj_attribute *kobj_attr;
-
-	kobj_attr = container_of(attr, struct kobj_attribute, attr);
-
-	return kobj_attr->show(k, kobj_attr, buf);
-}
-
-static struct sysfs_ops cache_desc_sysfs_ops = {
-	.show = cache_desc_show,
-};
-
-static struct kobj_type cache_desc_type = {
-	.release = cache_desc_release,
-	.sysfs_ops = &cache_desc_sysfs_ops,
-};
-
-static ssize_t cache_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-	struct cache_desc *cache = kobj_to_cache_desc(k);
-
-	return sprintf(buf, "%uK\n", cache->size);
-}
-
-static struct kobj_attribute cache_size_attr =
-	__ATTR(size, 0444, cache_size_show, NULL);
-
-static ssize_t cache_line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-	struct cache_desc *cache = kobj_to_cache_desc(k);
-
-	return sprintf(buf, "%u\n", cache->line_size);
-}
-
-static struct kobj_attribute cache_line_size_attr =
-	__ATTR(coherency_line_size, 0444, cache_line_size_show, NULL);
-
-static ssize_t cache_nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-	struct cache_desc *cache = kobj_to_cache_desc(k);
-
-	return sprintf(buf, "%u\n", cache->nr_sets);
-}
-
-static struct kobj_attribute cache_nr_sets_attr =
-	__ATTR(number_of_sets, 0444, cache_nr_sets_show, NULL);
-
-static ssize_t cache_type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-	struct cache_desc *cache = kobj_to_cache_desc(k);
-
-	return sprintf(buf, "%s\n", cache->type);
-}
-
-static struct kobj_attribute cache_type_attr =
-	__ATTR(type, 0444, cache_type_show, NULL);
-
-static ssize_t cache_level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-	struct cache_desc *cache = kobj_to_cache_desc(k);
-
-	return sprintf(buf, "%u\n", cache->level);
-}
-
-static struct kobj_attribute cache_level_attr =
-	__ATTR(level, 0444, cache_level_show, NULL);
-
-static ssize_t cache_assoc_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
-{
-	struct cache_desc *cache = kobj_to_cache_desc(k);
-
-	return sprintf(buf, "%u\n", cache->associativity);
-}
-
-static struct kobj_attribute cache_assoc_attr =
-	__ATTR(ways_of_associativity, 0444, cache_assoc_show, NULL);
-
-struct cache_desc_info {
-	const char *type;
-	const char *size_prop;
-	const char *line_size_prop;
-	const char *nr_sets_prop;
-};
-
-/* PowerPC Processor binding says the [di]-cache-* must be equal on
- * unified caches, so just use d-cache properties. */
-static struct cache_desc_info ucache_info = {
-	.type = "Unified",
-	.size_prop = "d-cache-size",
-	.line_size_prop = "d-cache-line-size",
-	.nr_sets_prop = "d-cache-sets",
-};
-
-static struct cache_desc_info dcache_info = {
-	.type = "Data",
-	.size_prop = "d-cache-size",
-	.line_size_prop = "d-cache-line-size",
-	.nr_sets_prop = "d-cache-sets",
-};
-
-static struct cache_desc_info icache_info = {
-	.type = "Instruction",
-	.size_prop = "i-cache-size",
-	.line_size_prop = "i-cache-line-size",
-	.nr_sets_prop = "i-cache-sets",
-};
-
-static struct cache_desc * __cpuinit create_cache_desc(struct device_node *np, struct kobject *parent, int index, int level, struct cache_desc_info *info)
-{
-	const u32 *cache_line_size;
-	struct cache_desc *new;
-	const u32 *cache_size;
-	const u32 *nr_sets;
-	int rc;
-
-	new = kzalloc(sizeof(*new), GFP_KERNEL);
-	if (!new)
-		return NULL;
-
-	rc = kobject_init_and_add(&new->kobj, &cache_desc_type, parent,
-				  "index%d", index);
-	if (rc)
-		goto err;
-
-	/* type */
-	new->type = info->type;
-	rc = sysfs_create_file(&new->kobj, &cache_type_attr.attr);
-	WARN_ON(rc);
-
-	/* level */
-	new->level = level;
-	rc = sysfs_create_file(&new->kobj, &cache_level_attr.attr);
-	WARN_ON(rc);
-
-	/* size */
-	cache_size = of_get_property(np, info->size_prop, NULL);
-	if (cache_size) {
-		new->size = *cache_size / 1024;
-		rc = sysfs_create_file(&new->kobj,
-				       &cache_size_attr.attr);
-		WARN_ON(rc);
-	}
-
-	/* coherency_line_size */
-	cache_line_size = of_get_property(np, info->line_size_prop, NULL);
-	if (cache_line_size) {
-		new->line_size = *cache_line_size;
-		rc = sysfs_create_file(&new->kobj,
-				       &cache_line_size_attr.attr);
-		WARN_ON(rc);
-	}
-
-	/* number_of_sets */
-	nr_sets = of_get_property(np, info->nr_sets_prop, NULL);
-	if (nr_sets) {
-		new->nr_sets = *nr_sets;
-		rc = sysfs_create_file(&new->kobj,
-				       &cache_nr_sets_attr.attr);
-		WARN_ON(rc);
-	}
-
-	/* ways_of_associativity */
-	if (new->nr_sets == 1) {
-		/* fully associative */
-		new->associativity = 0;
-		goto create_assoc;
-	}
-
-	if (new->nr_sets && new->size && new->line_size) {
-		/* If we have values for all of these we can derive
-		 * the associativity. */
-		new->associativity =
-			((new->size * 1024) / new->nr_sets) / new->line_size;
-create_assoc:
-		rc = sysfs_create_file(&new->kobj,
-				       &cache_assoc_attr.attr);
-		WARN_ON(rc);
-	}
-
-	return new;
-err:
-	kfree(new);
-	return NULL;
-}
-
-static bool cache_is_unified(struct device_node *np)
-{
-	return of_get_property(np, "cache-unified", NULL);
-}
-
-static struct cache_desc * __cpuinit create_cache_index_info(struct device_node *np, struct kobject *parent, int index, int level)
-{
-	struct device_node *next_cache;
-	struct cache_desc *new, **end;
-
-	pr_debug("%s(node = %s, index = %d)\n", __func__, np->full_name, index);
-
-	if (cache_is_unified(np)) {
-		new = create_cache_desc(np, parent, index, level,
-					&ucache_info);
-	} else {
-		new = create_cache_desc(np, parent, index, level,
-					&dcache_info);
-		if (new) {
-			index++;
-			new->next = create_cache_desc(np, parent, index, level,
-						      &icache_info);
-		}
-	}
-	if (!new)
-		return NULL;
-
-	end = &new->next;
-	while (*end)
-		end = &(*end)->next;
-
-	next_cache = of_find_next_cache_node(np);
-	if (!next_cache)
-		goto out;
-
-	*end = create_cache_index_info(next_cache, parent, ++index, ++level);
-
-	of_node_put(next_cache);
-out:
-	return new;
-}
-
-static void __cpuinit create_cache_info(struct sys_device *sysdev)
-{
-	struct kobject *cache_toplevel;
-	struct device_node *np = NULL;
-	int cpu = sysdev->id;
-
-	cache_toplevel = kobject_create_and_add("cache", &sysdev->kobj);
-	if (!cache_toplevel)
-		return;
-	per_cpu(cache_toplevel, cpu) = cache_toplevel;
-	np = of_get_cpu_node(cpu, NULL);
-	if (np != NULL) {
-		per_cpu(cache_desc, cpu) =
-			create_cache_index_info(np, cache_toplevel, 0, 1);
-		of_node_put(np);
-	}
-	return;
-}
-
 static void __cpuinit register_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -684,25 +407,10 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 		sysdev_create_file(s, &attr_dscr);
 #endif /* CONFIG_PPC64 */
 
-	create_cache_info(s);
+	cacheinfo_cpu_online(cpu);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void remove_cache_info(struct sys_device *sysdev)
-{
-	struct kobject *cache_toplevel;
-	struct cache_desc *cache_desc;
-	int cpu = sysdev->id;
-
-	cache_desc = per_cpu(cache_desc, cpu);
-	if (cache_desc != NULL)
-		kobject_put(&cache_desc->kobj);
-
-	cache_toplevel = per_cpu(cache_toplevel, cpu);
-	if (cache_toplevel != NULL)
-		kobject_put(cache_toplevel);
-}
-
 static void unregister_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -769,7 +477,7 @@ static void unregister_cpu_online(unsigned int cpu)
 		sysdev_remove_file(s, &attr_dscr);
 #endif /* CONFIG_PPC64 */
 
-	remove_cache_info(s);
+	cacheinfo_cpu_offline(cpu);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e1f3a5140429..c9564031a2a9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
 		delta += sys_time;
 		get_paca()->system_time = 0;
 	}
-	account_system_time(tsk, 0, delta);
-	account_system_time_scaled(tsk, deltascaled);
+	if (in_irq() || idle_task(smp_processor_id()) != tsk)
+		account_system_time(tsk, 0, delta, deltascaled);
+	else
+		account_idle_time(delta);
 	per_cpu(cputime_last_delta, smp_processor_id()) = delta;
 	per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
 	local_irq_restore(flags);
@@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 
 	utime = get_paca()->user_time;
 	get_paca()->user_time = 0;
-	account_user_time(tsk, utime);
-
 	utimescaled = cputime_to_scaled(utime);
-	account_user_time_scaled(tsk, utimescaled);
+	account_user_time(tsk, utime, utimescaled);
 }
 
 /*
@@ -338,8 +338,12 @@ void calculate_steal_time(void)
 	tb = mftb();
 	purr = mfspr(SPRN_PURR);
 	stolen = (tb - pme->tb) - (purr - pme->purr);
-	if (stolen > 0)
-		account_steal_time(current, stolen);
+	if (stolen > 0) {
+		if (idle_task(smp_processor_id()) != current)
+			account_steal_time(stolen);
+		else
+			account_idle_time(stolen);
+	}
 	pme->tb = tb;
 	pme->purr = purr;
 }
@@ -844,7 +848,7 @@ static void register_decrementer_clockevent(int cpu)
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
 
 	*dec = decrementer_clockevent;
-	dec->cpumask = cpumask_of_cpu(cpu);
+	dec->cpumask = cpumask_of(cpu);
 
 	printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
 	       dec->name, dec->mult, dec->shift, cpu);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 000000000000..a66bec57265a
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,228 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_44x.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+/* Note: clearing MSR[DE] just means that the debug interrupt will not be
+ * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
+ * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
+ * will be delivered as an "imprecise debug event" (which is indicated by
+ * DBSR[IDE].
+ */
+static void kvm44x_disable_debug_interrupts(void)
+{
+	mtmsr(mfmsr() & ~MSR_DE);
+}
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+	kvm44x_disable_debug_interrupts();
+
+	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
+	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
+	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
+	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
+	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
+	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
+	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
+	mtmsr(vcpu->arch.host_msr);
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+	u32 dbcr0 = 0;
+
+	vcpu->arch.host_msr = mfmsr();
+	kvm44x_disable_debug_interrupts();
+
+	/* Save host debug register state. */
+	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
+	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
+	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
+	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
+	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
+	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
+	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
+
+	/* set registers up for guest */
+
+	if (dbg->bp[0]) {
+		mtspr(SPRN_IAC1, dbg->bp[0]);
+		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+	}
+	if (dbg->bp[1]) {
+		mtspr(SPRN_IAC2, dbg->bp[1]);
+		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+	}
+	if (dbg->bp[2]) {
+		mtspr(SPRN_IAC3, dbg->bp[2]);
+		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+	}
+	if (dbg->bp[3]) {
+		mtspr(SPRN_IAC4, dbg->bp[3]);
+		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+	}
+
+	mtspr(SPRN_DBCR0, dbcr0);
+	mtspr(SPRN_DBCR1, 0);
+	mtspr(SPRN_DBCR2, 0);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	kvmppc_44x_tlb_load(vcpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvmppc_44x_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+	int r;
+
+	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+		r = 0;
+	else
+		r = -ENOTSUPP;
+
+	return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
+	int i;
+
+	tlbe->tid = 0;
+	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+	tlbe->word1 = 0;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+	tlbe++;
+	tlbe->tid = 0;
+	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+	tlbe->word1 = 0xef600000;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+	              | PPC44x_TLB_I | PPC44x_TLB_G;
+
+	/* Since the guest can directly access the timebase, it must know the
+	 * real timebase frequency. Accordingly, it must see the state of
+	 * CCR1[TCS]. */
+	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
+		vcpu_44x->shadow_refs[i].gtlb_index = -1;
+
+	return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                               struct kvm_translation *tr)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *gtlbe;
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+	if (index == -1) {
+		tr->valid = 0;
+		return 0;
+	}
+
+	gtlbe = &vcpu_44x->guest_tlb[index];
+
+	tr->physical_address = tlb_xlate(gtlbe, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
+
+	return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x;
+	struct kvm_vcpu *vcpu;
+	int err;
+
+	vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu_44x) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	vcpu = &vcpu_44x->vcpu;
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	return vcpu;
+
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+out:
+	return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+}
+
+static int kvmppc_44x_init(void)
+{
+	int r;
+
+	r = kvmppc_booke_init();
+	if (r)
+		return r;
+
+	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
+}
+
+static void kvmppc_44x_exit(void)
+{
+	kvmppc_booke_exit();
+}
+
+module_init(kvmppc_44x_init);
+module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 000000000000..82489a743a6f
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,371 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
+
+#include "booke.h"
+#include "44x_tlb.h"
+
+#define OP_RFI      19
+
+#define XOP_RFI     50
+#define XOP_MFMSR   83
+#define XOP_WRTEE   131
+#define XOP_MTMSR   146
+#define XOP_WRTEEI  163
+#define XOP_MFDCR   323
+#define XOP_MTDCR   451
+#define XOP_TLBSX   914
+#define XOP_ICCCI   966
+#define XOP_TLBWE   978
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = vcpu->arch.srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                           unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int dcrn;
+	int ra;
+	int rb;
+	int rc;
+	int rs;
+	int rt;
+	int ws;
+
+	switch (get_op(inst)) {
+	case OP_RFI:
+		switch (get_xop(inst)) {
+		case XOP_RFI:
+			kvmppc_emul_rfi(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+			*advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case 31:
+		switch (get_xop(inst)) {
+
+		case XOP_MFMSR:
+			rt = get_rt(inst);
+			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+			break;
+
+		case XOP_MTMSR:
+			rs = get_rs(inst);
+			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+			break;
+
+		case XOP_WRTEE:
+			rs = get_rs(inst);
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (vcpu->arch.gpr[rs] & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		case XOP_WRTEEI:
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (inst & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		case XOP_MFDCR:
+			dcrn = get_dcrn(inst);
+			rt = get_rt(inst);
+
+			/* The guest may access CPR0 registers to determine the timebase
+			 * frequency, and it must know the real host frequency because it
+			 * can directly access the timebase registers.
+			 *
+			 * It would be possible to emulate those accesses in userspace,
+			 * but userspace can really only figure out the end frequency.
+			 * We could decompose that into the factors that compute it, but
+			 * that's tricky math, and it's easier to just report the real
+			 * CPR0 values.
+			 */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+				break;
+			case DCRN_CPR0_CONFIG_DATA:
+				local_irq_disable();
+				mtdcr(DCRN_CPR0_CONFIG_ADDR,
+					  vcpu->arch.cpr0_cfgaddr);
+				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+				local_irq_enable();
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data =  0;
+				run->dcr.is_write = 0;
+				vcpu->arch.io_gpr = rt;
+				vcpu->arch.dcr_needed = 1;
+				kvmppc_account_exit(vcpu, DCR_EXITS);
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case XOP_MTDCR:
+			dcrn = get_dcrn(inst);
+			rs = get_rs(inst);
+
+			/* emulate some access in kernel */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data = vcpu->arch.gpr[rs];
+				run->dcr.is_write = 1;
+				vcpu->arch.dcr_needed = 1;
+				kvmppc_account_exit(vcpu, DCR_EXITS);
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case XOP_TLBWE:
+			ra = get_ra(inst);
+			rs = get_rs(inst);
+			ws = get_ws(inst);
+			emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
+			break;
+
+		case XOP_TLBSX:
+			rt = get_rt(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+			rc = get_rc(inst);
+			emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
+			break;
+
+		case XOP_ICCCI:
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+		}
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	switch (sprn) {
+	case SPRN_MMUCR:
+		vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+	case SPRN_PID:
+		kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+	case SPRN_CCR0:
+		vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_CCR1:
+		vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DEAR:
+		vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+	case SPRN_ESR:
+		vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR0:
+		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR1:
+		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_TSR:
+		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+	case SPRN_TCR:
+		vcpu->arch.tcr = vcpu->arch.gpr[rs];
+		kvmppc_emulate_dec(vcpu);
+		break;
+
+	/* Note: SPRG4-7 are user-readable. These values are
+	 * loaded into the real SPRGs when resuming the
+	 * guest. */
+	case SPRN_SPRG4:
+		vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG5:
+		vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG6:
+		vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG7:
+		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+	case SPRN_IVPR:
+		vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR0:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR1:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR2:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR3:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR4:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR5:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR6:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR7:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR8:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR9:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR10:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR11:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR12:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR13:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR14:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR15:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+	return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	switch (sprn) {
+	/* 440 */
+	case SPRN_MMUCR:
+		vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+	case SPRN_CCR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+	case SPRN_CCR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+
+	/* Book E */
+	case SPRN_PID:
+		vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+	case SPRN_IVPR:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+	case SPRN_DEAR:
+		vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+	case SPRN_ESR:
+		vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+	case SPRN_DBCR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+	case SPRN_DBCR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+	case SPRN_IVOR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+		break;
+	case SPRN_IVOR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+		break;
+	case SPRN_IVOR2:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+		break;
+	case SPRN_IVOR3:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+		break;
+	case SPRN_IVOR4:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+		break;
+	case SPRN_IVOR5:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+		break;
+	case SPRN_IVOR6:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+		break;
+	case SPRN_IVOR7:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+		break;
+	case SPRN_IVOR8:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+		break;
+	case SPRN_IVOR9:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+		break;
+	case SPRN_IVOR10:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+		break;
+	case SPRN_IVOR11:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+		break;
+	case SPRN_IVOR12:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+		break;
+	case SPRN_IVOR13:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+		break;
+	case SPRN_IVOR14:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+		break;
+	case SPRN_IVOR15:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+	return EMULATE_DONE;
+}
+
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f9811f..9a34b8edb9e2 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,20 +22,103 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
 #include <asm/mmu-44x.h>
 #include <asm/kvm_ppc.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
 
 #include "44x_tlb.h"
 
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
+#endif
+
+#define PAGE_SIZE_4K (1<<12)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
+
+#define PPC44x_TLB_UATTR_MASK \
+	(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
 #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
 
-static unsigned int kvmppc_tlb_44x_pos;
+#ifdef DEBUG
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_44x_tlbe *tlbe;
+	int i;
+
+	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+	printk("| %2s | %3s | %8s | %8s | %8s |\n",
+			"nr", "tid", "word0", "word1", "word2");
+
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+		tlbe = &vcpu_44x->guest_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+}
+#endif
+
+static inline void kvmppc_44x_tlbie(unsigned int index)
+{
+	/* 0 <= index < 64, so the V bit is clear and we can use the index as
+	 * word0. */
+	asm volatile(
+		"tlbwe %[index], %[index], 0\n"
+	:
+	: [index] "r"(index)
+	);
+}
+
+static inline void kvmppc_44x_tlbre(unsigned int index,
+                                    struct kvmppc_44x_tlbe *tlbe)
+{
+	asm volatile(
+		"tlbre %[word0], %[index], 0\n"
+		"mfspr %[tid], %[sprn_mmucr]\n"
+		"andi. %[tid], %[tid], 0xff\n"
+		"tlbre %[word1], %[index], 1\n"
+		"tlbre %[word2], %[index], 2\n"
+		: [word0] "=r"(tlbe->word0),
+		  [word1] "=r"(tlbe->word1),
+		  [word2] "=r"(tlbe->word2),
+		  [tid]   "=r"(tlbe->tid)
+		: [index] "r"(index),
+		  [sprn_mmucr] "i"(SPRN_MMUCR)
+		: "cc"
+	);
+}
+
+static inline void kvmppc_44x_tlbwe(unsigned int index,
+                                    struct kvmppc_44x_tlbe *stlbe)
+{
+	unsigned long tmp;
+
+	asm volatile(
+		"mfspr %[tmp], %[sprn_mmucr]\n"
+		"rlwimi %[tmp], %[tid], 0, 0xff\n"
+		"mtspr %[sprn_mmucr], %[tmp]\n"
+		"tlbwe %[word0], %[index], 0\n"
+		"tlbwe %[word1], %[index], 1\n"
+		"tlbwe %[word2], %[index], 2\n"
+		: [tmp]   "=&r"(tmp)
+		: [word0] "r"(stlbe->word0),
+		  [word1] "r"(stlbe->word1),
+		  [word2] "r"(stlbe->word2),
+		  [tid]   "r"(stlbe->tid),
+		  [index] "r"(index),
+		  [sprn_mmucr] "i"(SPRN_MMUCR)
+	);
+}
 
 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 {
-	/* Mask off reserved bits. */
-	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
+	/* We only care about the guest's permission and user bits. */
+	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
 
 	if (!usermode) {
 		/* Guest is in supervisor mode, so we need to translate guest
@@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 	/* Make sure host can always access this memory. */
 	attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
 
+	/* WIMGE = 0b00100 */
+	attrib |= PPC44x_TLB_M;
+
 	return attrib;
 }
 
+/* Load shadow TLB back into hardware. */
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
+
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+			kvmppc_44x_tlbwe(i, stlbe);
+	}
+}
+
+static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
+                                         unsigned int i)
+{
+	vcpu_44x->shadow_tlb_mod[i] = 1;
+}
+
+/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
+
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+		if (vcpu_44x->shadow_tlb_mod[i])
+			kvmppc_44x_tlbre(i, stlbe);
+
+		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+			kvmppc_44x_tlbie(i);
+	}
+}
+
+
 /* Search the guest TLB for a matching entry. */
 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
                          unsigned int as)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+		struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 	return -1;
 }
 
-struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
-	unsigned int index;
 
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-	if (index == -1)
-		return NULL;
-	return &vcpu->arch.guest_tlb[index];
+	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
-struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
-	unsigned int index;
 
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-	if (index == -1)
-		return NULL;
-	return &vcpu->arch.guest_tlb[index];
+	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
-static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
+static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
+                                      unsigned int stlb_index)
 {
-	return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
-}
+	struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
 
-static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
-                                      unsigned int index)
-{
-	struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
-	struct page *page = vcpu->arch.shadow_pages[index];
+	if (!ref->page)
+		return;
 
-	if (get_tlb_v(stlbe)) {
-		if (kvmppc_44x_tlbe_is_writable(stlbe))
-			kvm_release_page_dirty(page);
-		else
-			kvm_release_page_clean(page);
-	}
+	/* Discard from the TLB. */
+	/* Note: we could actually invalidate a host mapping, if the host overwrote
+	 * this TLB entry since we inserted a guest mapping. */
+	kvmppc_44x_tlbie(stlb_index);
+
+	/* Now release the page. */
+	if (ref->writeable)
+		kvm_release_page_dirty(ref->page);
+	else
+		kvm_release_page_clean(ref->page);
+
+	ref->page = NULL;
+
+	/* XXX set tlb_44x_index to stlb_index? */
+
+	KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
 }
 
 void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_44x_shadow_release(vcpu, i);
-}
-
-void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
-{
-    vcpu->arch.shadow_tlb_mod[i] = 1;
+		kvmppc_44x_shadow_release(vcpu_44x, i);
 }
 
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
-                    u32 flags)
+/**
+ * kvmppc_mmu_map -- create a host mapping for guest memory
+ *
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
+                    u32 flags, u32 max_bytes, unsigned int gtlb_index)
 {
+	struct kvmppc_44x_tlbe stlbe;
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_shadow_ref *ref;
 	struct page *new_page;
-	struct tlbe *stlbe;
 	hpa_t hpaddr;
+	gfn_t gfn;
 	unsigned int victim;
 
-	/* Future optimization: don't overwrite the TLB entry containing the
-	 * current PC (or stack?). */
-	victim = kvmppc_tlb_44x_pos++;
-	if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
-		kvmppc_tlb_44x_pos = 0;
-	stlbe = &vcpu->arch.shadow_tlb[victim];
+	/* Select TLB entry to clobber. Indirectly guard against races with the TLB
+	 * miss handler by disabling interrupts. */
+	local_irq_disable();
+	victim = ++tlb_44x_index;
+	if (victim > tlb_44x_hwater)
+		victim = 0;
+	tlb_44x_index = victim;
+	local_irq_enable();
 
 	/* Get reference to new page. */
+	gfn = gpaddr >> PAGE_SHIFT;
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	}
 	hpaddr = page_to_phys(new_page);
 
-	/* Drop reference to old page. */
-	kvmppc_44x_shadow_release(vcpu, victim);
-
-	vcpu->arch.shadow_pages[victim] = new_page;
+	/* Invalidate any previous shadow mappings. */
+	kvmppc_44x_shadow_release(vcpu_44x, victim);
 
 	/* XXX Make sure (va, size) doesn't overlap any other
 	 * entries. 440x6 user manual says the result would be
@@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 
 	/* XXX what about AS? */
 
-	stlbe->tid = !(asid & 0xff);
-
 	/* Force TS=1 for all guest mappings. */
-	/* For now we hardcode 4KB mappings, but it will be important to
-	 * use host large pages in the future. */
-	stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
-	               | PPC44x_TLB_4K;
-	stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
-	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
-	                                            vcpu->arch.msr & MSR_PR);
-	kvmppc_tlbe_set_modified(vcpu, victim);
+	stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
+
+	if (max_bytes >= PAGE_SIZE) {
+		/* Guest mapping is larger than or equal to host page size. We can use
+		 * a "native" host mapping. */
+		stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+	} else {
+		/* Guest mapping is smaller than host page size. We must restrict the
+		 * size of the mapping to be at most the smaller of the two, but for
+		 * simplicity we fall back to a 4K mapping (this is probably what the
+		 * guest is using anyways). */
+		stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+
+		/* 'hpaddr' is a host page, which is larger than the mapping we're
+		 * inserting here. To compensate, we must add the in-page offset to the
+		 * sub-page. */
+		hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
+	}
 
-	KVMTRACE_5D(STLB_WRITE, vcpu, victim,
-			stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
-			handler);
+	stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+	stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+	                                            vcpu->arch.msr & MSR_PR);
+	stlbe.tid = !(asid & 0xff);
+
+	/* Keep track of the reference so we can properly release it later. */
+	ref = &vcpu_44x->shadow_refs[victim];
+	ref->page = new_page;
+	ref->gtlb_index = gtlb_index;
+	ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
+	ref->tid = stlbe.tid;
+
+	/* Insert shadow mapping into hardware TLB. */
+	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
+	kvmppc_44x_tlbwe(victim, &stlbe);
+	KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
+	            stlbe.word2, handler);
 }
 
-void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                           gva_t eend, u32 asid)
+/* For a particular guest TLB entry, invalidate the corresponding host TLB
+ * mappings and release the host pages. */
+static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
+                                  unsigned int gtlb_index)
 {
-	unsigned int pid = !(asid & 0xff);
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
-	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
-		unsigned int tid;
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+		if (ref->gtlb_index == gtlb_index)
+			kvmppc_44x_shadow_release(vcpu_44x, i);
+	}
+}
 
-		if (!get_tlb_v(stlbe))
-			continue;
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+	vcpu->arch.shadow_pid = !usermode;
+}
 
-		if (eend < get_tlb_eaddr(stlbe))
-			continue;
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
 
-		if (eaddr > get_tlb_end(stlbe))
-			continue;
+	if (unlikely(vcpu->arch.pid == new_pid))
+		return;
 
-		tid = get_tlb_tid(stlbe);
-		if (tid && (tid != pid))
-			continue;
+	vcpu->arch.pid = new_pid;
 
-		kvmppc_44x_shadow_release(vcpu, i);
-		stlbe->word0 = 0;
-		kvmppc_tlbe_set_modified(vcpu, i);
-		KVMTRACE_5D(STLB_INVAL, vcpu, i,
-				stlbe->tid, stlbe->word0, stlbe->word1,
-				stlbe->word2, handler);
+	/* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
+	 * can't access guest kernel mappings (TID=1). When we switch to a new
+	 * guest PID, which will also use host PID=0, we must discard the old guest
+	 * userspace mappings. */
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+
+		if (ref->tid == 0)
+			kvmppc_44x_shadow_release(vcpu_44x, i);
 	}
 }
 
-/* Invalidate all mappings on the privilege switch after PID has been changed.
- * The guest always runs with PID=1, so we must clear the entire TLB when
- * switching address spaces. */
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+                             const struct kvmppc_44x_tlbe *tlbe)
 {
-	int i;
+	gpa_t gpa;
 
-	if (vcpu->arch.swap_pid) {
-		/* XXX Replace loop with fancy data structures. */
-		for (i = 0; i <= tlb_44x_hwater; i++) {
-			struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
-
-			/* Future optimization: clear only userspace mappings. */
-			kvmppc_44x_shadow_release(vcpu, i);
-			stlbe->word0 = 0;
-			kvmppc_tlbe_set_modified(vcpu, i);
-			KVMTRACE_5D(STLB_INVAL, vcpu, i,
-			            stlbe->tid, stlbe->word0, stlbe->word1,
-			            stlbe->word2, handler);
-		}
-		vcpu->arch.swap_pid = 0;
+	if (!get_tlb_v(tlbe))
+		return 0;
+
+	/* Does it match current guest AS? */
+	/* XXX what about IS != DS? */
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+		return 0;
+
+	gpa = get_tlb_raddr(tlbe);
+	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+		/* Mapping is not for RAM. */
+		return 0;
+
+	return 1;
+}
+
+int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *tlbe;
+	unsigned int gtlb_index;
+
+	gtlb_index = vcpu->arch.gpr[ra];
+	if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
+		printk("%s: index %d\n", __func__, gtlb_index);
+		kvmppc_dump_vcpu(vcpu);
+		return EMULATE_FAIL;
 	}
 
-	vcpu->arch.shadow_pid = !usermode;
+	tlbe = &vcpu_44x->guest_tlb[gtlb_index];
+
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
+	if (tlbe->word0 & PPC44x_TLB_VALID)
+		kvmppc_44x_invalidate(vcpu, gtlb_index);
+
+	switch (ws) {
+	case PPC44x_TLB_PAGEID:
+		tlbe->tid = get_mmucr_stid(vcpu);
+		tlbe->word0 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_XLAT:
+		tlbe->word1 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_ATTRIB:
+		tlbe->word2 = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	if (tlbe_is_host_safe(vcpu, tlbe)) {
+		u64 asid;
+		gva_t eaddr;
+		gpa_t gpaddr;
+		u32 flags;
+		u32 bytes;
+
+		eaddr = get_tlb_eaddr(tlbe);
+		gpaddr = get_tlb_raddr(tlbe);
+
+		/* Use the advertised page size to mask effective and real addrs. */
+		bytes = get_tlb_bytes(tlbe);
+		eaddr &= ~(bytes - 1);
+		gpaddr &= ~(bytes - 1);
+
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		flags = tlbe->word2 & 0xffff;
+
+		kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+	}
+
+	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
+	            tlbe->word1, tlbe->word2, handler);
+
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+	return EMULATE_DONE;
+}
+
+int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+{
+	u32 ea;
+	int gtlb_index;
+	unsigned int as = get_mmucr_sts(vcpu);
+	unsigned int pid = get_mmucr_stid(vcpu);
+
+	ea = vcpu->arch.gpr[rb];
+	if (ra)
+		ea += vcpu->arch.gpr[ra];
+
+	gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+	if (rc) {
+		if (gtlb_index < 0)
+			vcpu->arch.cr &= ~0x20000000;
+		else
+			vcpu->arch.cr |= 0x20000000;
+	}
+	vcpu->arch.gpr[rt] = gtlb_index;
+
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+	return EMULATE_DONE;
 }
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b6f6b7..772191f29e62 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,52 @@
 
 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
                                 unsigned int pid, unsigned int as);
-extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+
+extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
+                                 u8 rc);
+extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
 
 /* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 4) & 0xf;
 }
 
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return tlbe->word0 & 0xfffffc00;
 }
 
-static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
+static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
 {
 	unsigned int pgsize = get_tlb_size(tlbe);
 	return 1 << 10 << (pgsize << 1);
 }
 
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
 }
 
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
 {
 	u64 word1 = tlbe->word1;
 	return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
 }
 
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return tlbe->tid & 0xff;
 }
 
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 8) & 0x1;
 }
 
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 9) & 0x1;
 }
@@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
 	return (vcpu->arch.mmucr >> 16) & 0x1;
 }
 
-static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
+static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
 {
 	unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
 
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66b25e5..6dbdc4817d80 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION
 if VIRTUALIZATION
 
 config KVM
-	bool "Kernel-based Virtual Machine (KVM) support"
-	depends on 44x && EXPERIMENTAL
+	bool
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
-	# We can only run on Book E hosts so far
-	select KVM_BOOKE_HOST
+
+config KVM_440
+	bool "KVM support for PowerPC 440 processors"
+	depends on EXPERIMENTAL && 44x
+	select KVM
 	---help---
-	  Support hosting virtualized guest machines. You will also
-	  need to select one or more of the processor modules below.
+	  Support running unmodified 440 guest kernels in virtual machines on
+	  440 host processors.
 
 	  This module provides access to the hardware capabilities through
 	  a character device node named /dev/kvm.
 
 	  If unsure, say N.
 
-config KVM_BOOKE_HOST
-	bool "KVM host support for Book E PowerPC processors"
-	depends on KVM && 44x
+config KVM_EXIT_TIMING
+	bool "Detailed exit timing"
+	depends on KVM
 	---help---
-	  Provides host support for KVM on Book E PowerPC processors. Currently
-	  this works on 440 processors only.
+	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
+	  report is available in debugfs kvm/vm#_vcpu#_timing.
+	  The overhead is relatively small, however it is not recommended for
+	  production environments.
+
+	  If unsure, say N.
 
 config KVM_TRACE
 	bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d4397ac4b..df7ba59e6d53 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 
-kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
+kvm-objs := $(common-objs-y) powerpc.o emulate.o
+obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
 obj-$(CONFIG_KVM) += kvm.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
-obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
+kvm-440-objs := \
+	booke.o \
+	booke_interrupts.o \
+	44x.o \
+	44x_tlb.o \
+	44x_emulate.o
+obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c
index 7b2591e26bae..35485dd6927e 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke.c
@@ -24,21 +24,26 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
+
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
+#include "timing.h"
+#include <asm/cacheflush.h>
+#include <asm/kvm_44x.h>
 
+#include "booke.h"
 #include "44x_tlb.h"
 
+unsigned long kvmppc_booke_handlers;
+
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "exits",      VCPU_STAT(sum_exits) },
 	{ "mmio",       VCPU_STAT(mmio_exits) },
 	{ "dcr",        VCPU_STAT(dcr_exits) },
 	{ "sig",        VCPU_STAT(signal_exits) },
-	{ "light",      VCPU_STAT(light_exits) },
 	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
 	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
 	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
@@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-static const u32 interrupt_msr_mask[16] = {
-	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
-	[BOOKE_INTERRUPT_DATA_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_INST_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_EXTERNAL]      = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ALIGNMENT]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_PROGRAM]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_SYSCALL]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_AP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DECREMENTER]   = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FIT]           = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_WATCHDOG]      = MSR_ME,
-	[BOOKE_INTERRUPT_DTLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ITLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DEBUG]         = MSR_ME,
-};
-
-const unsigned char exception_priority[] = {
-	[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
-	[BOOKE_INTERRUPT_INST_STORAGE] = 1,
-	[BOOKE_INTERRUPT_ALIGNMENT] = 2,
-	[BOOKE_INTERRUPT_PROGRAM] = 3,
-	[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
-	[BOOKE_INTERRUPT_SYSCALL] = 5,
-	[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
-	[BOOKE_INTERRUPT_DTLB_MISS] = 7,
-	[BOOKE_INTERRUPT_ITLB_MISS] = 8,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
-	[BOOKE_INTERRUPT_DEBUG] = 10,
-	[BOOKE_INTERRUPT_CRITICAL] = 11,
-	[BOOKE_INTERRUPT_WATCHDOG] = 12,
-	[BOOKE_INTERRUPT_EXTERNAL] = 13,
-	[BOOKE_INTERRUPT_FIT] = 14,
-	[BOOKE_INTERRUPT_DECREMENTER] = 15,
-};
-
-const unsigned char priority_exception[] = {
-	BOOKE_INTERRUPT_DATA_STORAGE,
-	BOOKE_INTERRUPT_INST_STORAGE,
-	BOOKE_INTERRUPT_ALIGNMENT,
-	BOOKE_INTERRUPT_PROGRAM,
-	BOOKE_INTERRUPT_FP_UNAVAIL,
-	BOOKE_INTERRUPT_SYSCALL,
-	BOOKE_INTERRUPT_AP_UNAVAIL,
-	BOOKE_INTERRUPT_DTLB_MISS,
-	BOOKE_INTERRUPT_ITLB_MISS,
-	BOOKE_INTERRUPT_MACHINE_CHECK,
-	BOOKE_INTERRUPT_DEBUG,
-	BOOKE_INTERRUPT_CRITICAL,
-	BOOKE_INTERRUPT_WATCHDOG,
-	BOOKE_INTERRUPT_EXTERNAL,
-	BOOKE_INTERRUPT_FIT,
-	BOOKE_INTERRUPT_DECREMENTER,
-};
-
-
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
-	struct tlbe *tlbe;
-	int i;
-
-	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
-	printk("| %2s | %3s | %8s | %8s | %8s |\n",
-			"nr", "tid", "word0", "word1", "word2");
-
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.guest_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.shadow_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-}
-
 /* TODO: use vcpu_printf() */
 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
 	int i;
 
-	printk("pc:   %08x msr:  %08x\n", vcpu->arch.pc, vcpu->arch.msr);
-	printk("lr:   %08x ctr:  %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
-	printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
+	printk("pc:   %08lx msr:  %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
+	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+	printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
 
 	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
 
 	for (i = 0; i < 32; i += 4) {
-		printk("gpr%02d: %08x %08x %08x %08x\n", i,
+		printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
 		       vcpu->arch.gpr[i],
 		       vcpu->arch.gpr[i+1],
 		       vcpu->arch.gpr[i+2],
@@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 	}
 }
 
-/* Check if we are ready to deliver the interrupt */
-static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
+                                       unsigned int priority)
 {
-	int r;
+	set_bit(priority, &vcpu->arch.pending_exceptions);
+}
 
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_CRITICAL:
-		r = vcpu->arch.msr & MSR_CE;
-		break;
-	case BOOKE_INTERRUPT_MACHINE_CHECK:
-		r = vcpu->arch.msr & MSR_ME;
-		break;
-	case BOOKE_INTERRUPT_EXTERNAL:
-		r = vcpu->arch.msr & MSR_EE;
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+	return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                struct kvm_interrupt *irq)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
+}
+
+/* Deliver the interrupt of the corresponding priority, if possible. */
+static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+                                        unsigned int priority)
+{
+	int allowed = 0;
+	ulong msr_mask;
+
+	switch (priority) {
+	case BOOKE_IRQPRIO_PROGRAM:
+	case BOOKE_IRQPRIO_DTLB_MISS:
+	case BOOKE_IRQPRIO_ITLB_MISS:
+	case BOOKE_IRQPRIO_SYSCALL:
+	case BOOKE_IRQPRIO_DATA_STORAGE:
+	case BOOKE_IRQPRIO_INST_STORAGE:
+	case BOOKE_IRQPRIO_FP_UNAVAIL:
+	case BOOKE_IRQPRIO_AP_UNAVAIL:
+	case BOOKE_IRQPRIO_ALIGNMENT:
+		allowed = 1;
+		msr_mask = MSR_CE|MSR_ME|MSR_DE;
 		break;
-	case BOOKE_INTERRUPT_DECREMENTER:
-		r = vcpu->arch.msr & MSR_EE;
+	case BOOKE_IRQPRIO_CRITICAL:
+	case BOOKE_IRQPRIO_WATCHDOG:
+		allowed = vcpu->arch.msr & MSR_CE;
+		msr_mask = MSR_ME;
 		break;
-	case BOOKE_INTERRUPT_FIT:
-		r = vcpu->arch.msr & MSR_EE;
+	case BOOKE_IRQPRIO_MACHINE_CHECK:
+		allowed = vcpu->arch.msr & MSR_ME;
+		msr_mask = 0;
 		break;
-	case BOOKE_INTERRUPT_WATCHDOG:
-		r = vcpu->arch.msr & MSR_CE;
+	case BOOKE_IRQPRIO_EXTERNAL:
+	case BOOKE_IRQPRIO_DECREMENTER:
+	case BOOKE_IRQPRIO_FIT:
+		allowed = vcpu->arch.msr & MSR_EE;
+		msr_mask = MSR_CE|MSR_ME|MSR_DE;
 		break;
-	case BOOKE_INTERRUPT_DEBUG:
-		r = vcpu->arch.msr & MSR_DE;
+	case BOOKE_IRQPRIO_DEBUG:
+		allowed = vcpu->arch.msr & MSR_DE;
+		msr_mask = MSR_ME;
 		break;
-	default:
-		r = 1;
 	}
 
-	return r;
-}
+	if (allowed) {
+		vcpu->arch.srr0 = vcpu->arch.pc;
+		vcpu->arch.srr1 = vcpu->arch.msr;
+		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+		kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
 
-static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_DECREMENTER:
-		vcpu->arch.tsr |= TSR_DIS;
-		break;
+		clear_bit(priority, &vcpu->arch.pending_exceptions);
 	}
 
-	vcpu->arch.srr0 = vcpu->arch.pc;
-	vcpu->arch.srr1 = vcpu->arch.msr;
-	vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
-	kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
+	return allowed;
 }
 
 /* Check pending exceptions and deliver one, if possible. */
-void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
-	unsigned int exception;
 	unsigned int priority;
 
-	priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
+	priority = __ffs(*pending);
 	while (priority <= BOOKE_MAX_INTERRUPT) {
-		exception = priority_exception[priority];
-		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
-			kvmppc_clear_exception(vcpu, exception);
-			kvmppc_deliver_interrupt(vcpu, exception);
+		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
 			break;
-		}
 
 		priority = find_next_bit(pending,
 		                         BITS_PER_BYTE * sizeof(*pending),
@@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	enum emulation_result er;
 	int r = RESUME_HOST;
 
+	/* update before a new last_exit_type is rewritten */
+	kvmppc_update_timing_stats(vcpu);
+
 	local_irq_enable();
 
 	run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_EXTERNAL:
+		kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
+		if (need_resched())
+			cond_resched();
+		r = RESUME_GUEST;
+		break;
+
 	case BOOKE_INTERRUPT_DECREMENTER:
 		/* Since we switched IVPR back to the host's value, the host
 		 * handled this interrupt the moment we enabled interrupts.
 		 * Now we just offer it a chance to reschedule the guest. */
-
-		/* XXX At this point the TLB still holds our shadow TLB, so if
-		 * we do reschedule the host will fault over it. Perhaps we
-		 * should politely restore the host's entries to minimize
-		 * misses before ceding control. */
+		kvmppc_account_exit(vcpu, DEC_EXITS);
 		if (need_resched())
 			cond_resched();
-		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
-			vcpu->stat.dec_exits++;
-		else
-			vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
 
@@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			/* Program traps generated by user-level software must be handled
 			 * by the guest kernel. */
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 			r = RESUME_GUEST;
+			kvmppc_account_exit(vcpu, USR_PR_INST);
 			break;
 		}
 
 		er = kvmppc_emulate_instruction(run, vcpu);
 		switch (er) {
 		case EMULATE_DONE:
+			/* don't overwrite subtypes, just account kvm_stats */
+			kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
 			/* Future optimization: only reload non-volatiles if
 			 * they were actually modified by emulation. */
-			vcpu->stat.emulated_inst_exits++;
 			r = RESUME_GUEST_NV;
 			break;
 		case EMULATE_DO_DCR:
@@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 		case EMULATE_FAIL:
 			/* XXX Deliver Program interrupt to guest. */
-			printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
+			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
 			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
 			/* For debugging, encode the failing instruction and
 			 * report it to userspace. */
@@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
-		kvmppc_queue_exception(vcpu, exit_nr);
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+		kvmppc_account_exit(vcpu, FP_UNAVAIL);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_DATA_STORAGE:
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.dsi_exits++;
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+		kvmppc_account_exit(vcpu, DSI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_INST_STORAGE:
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.isi_exits++;
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+		kvmppc_account_exit(vcpu, ISI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.syscall_exits++;
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+		kvmppc_account_exit(vcpu, SYSCALL_EXITS);
 		r = RESUME_GUEST;
 		break;
 
+	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_DTLB_MISS: {
-		struct tlbe *gtlbe;
+		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.fault_dear;
+		int gtlb_index;
 		gfn_t gfn;
 
 		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
-		if (!gtlbe) {
+		gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			vcpu->stat.dtlb_real_miss_exits++;
+			kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
 			r = RESUME_GUEST;
 			break;
 		}
 
+		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
 		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
 		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
 
@@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
-			vcpu->stat.dtlb_virt_miss_exits++;
+			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
+			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
 			r = RESUME_GUEST;
 		} else {
 			/* Guest has mapped and accessed a page which is not
 			 * actually RAM. */
 			r = kvmppc_emulate_mmio(run, vcpu);
+			kvmppc_account_exit(vcpu, MMIO_EXITS);
 		}
 
 		break;
 	}
 
+	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_ITLB_MISS: {
-		struct tlbe *gtlbe;
+		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.pc;
+		gpa_t gpaddr;
 		gfn_t gfn;
+		int gtlb_index;
 
 		r = RESUME_GUEST;
 
 		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
-		if (!gtlbe) {
+		gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
-			vcpu->stat.itlb_real_miss_exits++;
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+			kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
 			break;
 		}
 
-		vcpu->stat.itlb_virt_miss_exits++;
+		kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
 
-		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+		gpaddr = tlb_xlate(gtlbe, eaddr);
+		gfn = gpaddr >> PAGE_SHIFT;
 
 		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
 			/* The guest TLB had a mapping, but the shadow TLB
@@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
+			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
 		} else {
 			/* Guest mapped and leaped at non-RAM! */
-			kvmppc_queue_exception(vcpu,
-			                       BOOKE_INTERRUPT_MACHINE_CHECK);
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
 		}
 
 		break;
@@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		mtspr(SPRN_DBSR, dbsr);
 
 		run->exit_reason = KVM_EXIT_DEBUG;
+		kvmppc_account_exit(vcpu, DEBUG_EXITS);
 		r = RESUME_HOST;
 		break;
 	}
@@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	local_irq_disable();
 
-	kvmppc_check_and_deliver_interrupts(vcpu);
+	kvmppc_core_deliver_interrupts(vcpu);
 
-	/* Do some exit accounting. */
-	vcpu->stat.sum_exits++;
 	if (!(r & RESUME_HOST)) {
 		/* To avoid clobbering exit_reason, only check for signals if
 		 * we aren't already exiting to userspace for some other
@@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (signal_pending(current)) {
 			run->exit_reason = KVM_EXIT_INTR;
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-
-			vcpu->stat.signal_exits++;
-		} else {
-			vcpu->stat.light_exits++;
-		}
-	} else {
-		switch (run->exit_reason) {
-		case KVM_EXIT_MMIO:
-			vcpu->stat.mmio_exits++;
-			break;
-		case KVM_EXIT_DCR:
-			vcpu->stat.dcr_exits++;
-			break;
-		case KVM_EXIT_INTR:
-			vcpu->stat.signal_exits++;
-			break;
+			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
 		}
 	}
 
@@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
-
-	tlbe->tid = 0;
-	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
-	tlbe->word1 = 0;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
-	tlbe++;
-	tlbe->tid = 0;
-	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
-	tlbe->word1 = 0xef600000;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
-	              | PPC44x_TLB_I | PPC44x_TLB_G;
-
 	vcpu->arch.pc = 0;
 	vcpu->arch.msr = 0;
 	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	 * before it's programmed its own IVPR. */
 	vcpu->arch.ivpr = 0x55550000;
 
-	/* Since the guest can directly access the timebase, it must know the
-	 * real timebase frequency. Accordingly, it must see the state of
-	 * CCR1[TCS]. */
-	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+	kvmppc_init_timing_stats(vcpu);
 
-	return 0;
+	return kvmppc_core_vcpu_setup(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.ctr = regs->ctr;
 	vcpu->arch.lr = regs->lr;
 	vcpu->arch.xer = regs->xer;
-	vcpu->arch.msr = regs->msr;
+	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.srr0 = regs->srr0;
 	vcpu->arch.srr1 = regs->srr1;
 	vcpu->arch.sprg0 = regs->sprg0;
@@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -ENOTSUPP;
 }
 
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                   struct kvm_translation *tr)
 {
-	struct tlbe *gtlbe;
-	int index;
-	gva_t eaddr;
-	u8 pid;
-	u8 as;
-
-	eaddr = tr->linear_address;
-	pid = (tr->linear_address >> 32) & 0xff;
-	as = (tr->linear_address >> 40) & 0x1;
-
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
-	if (index == -1) {
-		tr->valid = 0;
-		return 0;
-	}
+	return kvmppc_core_vcpu_translate(vcpu, tr);
+}
 
-	gtlbe = &vcpu->arch.guest_tlb[index];
+int kvmppc_booke_init(void)
+{
+	unsigned long ivor[16];
+	unsigned long max_ivor = 0;
+	int i;
 
-	tr->physical_address = tlb_xlate(gtlbe, eaddr);
-	/* XXX what does "writeable" and "usermode" even mean? */
-	tr->valid = 1;
+	/* We install our own exception handlers by hijacking IVPR. IVPR must
+	 * be 16-bit aligned, so we need a 64KB allocation. */
+	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	                                         VCPU_SIZE_ORDER);
+	if (!kvmppc_booke_handlers)
+		return -ENOMEM;
+
+	/* XXX make sure our handlers are smaller than Linux's */
+
+	/* Copy our interrupt handlers to match host IVORs. That way we don't
+	 * have to swap the IVORs on every guest/host transition. */
+	ivor[0] = mfspr(SPRN_IVOR0);
+	ivor[1] = mfspr(SPRN_IVOR1);
+	ivor[2] = mfspr(SPRN_IVOR2);
+	ivor[3] = mfspr(SPRN_IVOR3);
+	ivor[4] = mfspr(SPRN_IVOR4);
+	ivor[5] = mfspr(SPRN_IVOR5);
+	ivor[6] = mfspr(SPRN_IVOR6);
+	ivor[7] = mfspr(SPRN_IVOR7);
+	ivor[8] = mfspr(SPRN_IVOR8);
+	ivor[9] = mfspr(SPRN_IVOR9);
+	ivor[10] = mfspr(SPRN_IVOR10);
+	ivor[11] = mfspr(SPRN_IVOR11);
+	ivor[12] = mfspr(SPRN_IVOR12);
+	ivor[13] = mfspr(SPRN_IVOR13);
+	ivor[14] = mfspr(SPRN_IVOR14);
+	ivor[15] = mfspr(SPRN_IVOR15);
+
+	for (i = 0; i < 16; i++) {
+		if (ivor[i] > max_ivor)
+			max_ivor = ivor[i];
+
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       kvmppc_handlers_start + i * kvmppc_handler_len,
+		       kvmppc_handler_len);
+	}
+	flush_icache_range(kvmppc_booke_handlers,
+	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
 
 	return 0;
 }
+
+void __exit kvmppc_booke_exit(void)
+{
+	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+	kvm_exit();
+}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 000000000000..cf7c94ca24bf
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,60 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_BOOKE_H__
+#define __KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include "timing.h"
+
+/* interrupt priortity ordering */
+#define BOOKE_IRQPRIO_DATA_STORAGE 0
+#define BOOKE_IRQPRIO_INST_STORAGE 1
+#define BOOKE_IRQPRIO_ALIGNMENT 2
+#define BOOKE_IRQPRIO_PROGRAM 3
+#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#define BOOKE_IRQPRIO_SYSCALL 5
+#define BOOKE_IRQPRIO_AP_UNAVAIL 6
+#define BOOKE_IRQPRIO_DTLB_MISS 7
+#define BOOKE_IRQPRIO_ITLB_MISS 8
+#define BOOKE_IRQPRIO_MACHINE_CHECK 9
+#define BOOKE_IRQPRIO_DEBUG 10
+#define BOOKE_IRQPRIO_CRITICAL 11
+#define BOOKE_IRQPRIO_WATCHDOG 12
+#define BOOKE_IRQPRIO_EXTERNAL 13
+#define BOOKE_IRQPRIO_FIT 14
+#define BOOKE_IRQPRIO_DECREMENTER 15
+
+/* Helper function for "full" MSR writes. No need to call this if only EE is
+ * changing. */
+static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+
+	vcpu->arch.msr = new_msr;
+
+	if (vcpu->arch.msr & MSR_WE) {
+		kvm_vcpu_block(vcpu);
+		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+	};
+}
+
+#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341bc31e..000000000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <asm/cacheflush.h>
-#include <asm/kvm_ppc.h>
-
-unsigned long kvmppc_booke_handlers;
-
-static int kvmppc_booke_init(void)
-{
-	unsigned long ivor[16];
-	unsigned long max_ivor = 0;
-	int i;
-
-	/* We install our own exception handlers by hijacking IVPR. IVPR must
-	 * be 16-bit aligned, so we need a 64KB allocation. */
-	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-	                                         VCPU_SIZE_ORDER);
-	if (!kvmppc_booke_handlers)
-		return -ENOMEM;
-
-	/* XXX make sure our handlers are smaller than Linux's */
-
-	/* Copy our interrupt handlers to match host IVORs. That way we don't
-	 * have to swap the IVORs on every guest/host transition. */
-	ivor[0] = mfspr(SPRN_IVOR0);
-	ivor[1] = mfspr(SPRN_IVOR1);
-	ivor[2] = mfspr(SPRN_IVOR2);
-	ivor[3] = mfspr(SPRN_IVOR3);
-	ivor[4] = mfspr(SPRN_IVOR4);
-	ivor[5] = mfspr(SPRN_IVOR5);
-	ivor[6] = mfspr(SPRN_IVOR6);
-	ivor[7] = mfspr(SPRN_IVOR7);
-	ivor[8] = mfspr(SPRN_IVOR8);
-	ivor[9] = mfspr(SPRN_IVOR9);
-	ivor[10] = mfspr(SPRN_IVOR10);
-	ivor[11] = mfspr(SPRN_IVOR11);
-	ivor[12] = mfspr(SPRN_IVOR12);
-	ivor[13] = mfspr(SPRN_IVOR13);
-	ivor[14] = mfspr(SPRN_IVOR14);
-	ivor[15] = mfspr(SPRN_IVOR15);
-
-	for (i = 0; i < 16; i++) {
-		if (ivor[i] > max_ivor)
-			max_ivor = ivor[i];
-
-		memcpy((void *)kvmppc_booke_handlers + ivor[i],
-		       kvmppc_handlers_start + i * kvmppc_handler_len,
-		       kvmppc_handler_len);
-	}
-	flush_icache_range(kvmppc_booke_handlers,
-	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
-
-	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
-}
-
-static void __exit kvmppc_booke_exit(void)
-{
-	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
-	kvm_exit();
-}
-
-module_init(kvmppc_booke_init)
-module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165baf85f..084ebcd7dd83 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
 	li	r6, 1
 	slw	r6, r6, r5
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save exit time */
+1:
+	mfspr	r7, SPRN_TBRU
+	mfspr	r8, SPRN_TBRL
+	mfspr	r9, SPRN_TBRU
+	cmpw	r9, r7
+	bne	1b
+	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
+	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
 	/* Save the faulting instruction and all GPRs for emulation. */
 	andi.	r7, r6, NEED_INST_MASK
 	beq	..skip_inst_copy
@@ -335,54 +347,6 @@ lightweight_exit:
 	lwz	r3, VCPU_SHADOW_PID(r4)
 	mtspr	SPRN_PID, r3
 
-	/* Prevent all asynchronous TLB updates. */
-	mfmsr	r5
-	lis	r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
-	ori	r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
-	andc	r6, r5, r6
-	mtmsr	r6
-
-	/* Load the guest mappings, leaving the host's "pinned" kernel mappings
-	 * in place. */
-	mfspr	r10, SPRN_MMUCR			/* Save host MMUCR. */
-	li	r5, PPC44x_TLB_SIZE
-	lis	r5, tlb_44x_hwater@ha
-	lwz	r5, tlb_44x_hwater@l(r5)
-	mtctr	r5
-	addi	r9, r4, VCPU_SHADOW_TLB
-	addi	r5, r4, VCPU_SHADOW_MOD
-	li	r3, 0
-1:
-	lbzx	r7, r3, r5
-	cmpwi	r7, 0
-	beq	3f
-
-	/* Load guest entry. */
-	mulli	r11, r3, TLBE_BYTES
-	add	r11, r11, r9
-	lwz	r7, 0(r11)
-	mtspr	SPRN_MMUCR, r7
-	lwz	r7, 4(r11)
-	tlbwe	r7, r3, PPC44x_TLB_PAGEID
-	lwz	r7, 8(r11)
-	tlbwe	r7, r3, PPC44x_TLB_XLAT
-	lwz	r7, 12(r11)
-	tlbwe	r7, r3, PPC44x_TLB_ATTRIB
-3:
-	addi	r3, r3, 1                       /* Increment index. */
-	bdnz	1b
-
-	mtspr	SPRN_MMUCR, r10			/* Restore host MMUCR. */
-
-	/* Clear bitmap of modified TLB entries */
-	li	r5, PPC44x_TLB_SIZE>>2
-	mtctr	r5
-	addi	r5, r4, VCPU_SHADOW_MOD - 4
-	li	r6, 0
-1:
-	stwu	r6, 4(r5)
-	bdnz	1b
-
 	iccci	0, 0 /* XXX hack */
 
 	/* Load some guest volatiles. */
@@ -423,6 +387,18 @@ lightweight_exit:
 	lwz	r3, VCPU_SPRG7(r4)
 	mtspr	SPRN_SPRG7, r3
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save enter time */
+1:
+	mfspr	r6, SPRN_TBRU
+	mfspr	r7, SPRN_TBRL
+	mfspr	r8, SPRN_TBRU
+	cmpw	r8, r6
+	bne	1b
+	stw	r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
 	/* Finish loading guest volatiles and jump to guest. */
 	lwz	r3, VCPU_CTR(r4)
 	mtctr	r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fbdc20d..d1d38daa93fb 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,161 +23,14 @@
 #include <linux/string.h>
 #include <linux/kvm_host.h>
 
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
+#include <asm/reg.h>
 #include <asm/time.h>
 #include <asm/byteorder.h>
 #include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include "timing.h"
 
-#include "44x_tlb.h"
-
-/* Instruction decoding */
-static inline unsigned int get_op(u32 inst)
-{
-	return inst >> 26;
-}
-
-static inline unsigned int get_xop(u32 inst)
-{
-	return (inst >> 1) & 0x3ff;
-}
-
-static inline unsigned int get_sprn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_dcrn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_rt(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_rs(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_ra(u32 inst)
-{
-	return (inst >> 16) & 0x1f;
-}
-
-static inline unsigned int get_rb(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_rc(u32 inst)
-{
-	return inst & 0x1;
-}
-
-static inline unsigned int get_ws(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_d(u32 inst)
-{
-	return inst & 0xffff;
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-                             const struct tlbe *tlbe)
-{
-	gpa_t gpa;
-
-	if (!get_tlb_v(tlbe))
-		return 0;
-
-	/* Does it match current guest AS? */
-	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
-		return 0;
-
-	gpa = get_tlb_raddr(tlbe);
-	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
-		/* Mapping is not for RAM. */
-		return 0;
-
-	return 1;
-}
-
-static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
-{
-	u64 eaddr;
-	u64 raddr;
-	u64 asid;
-	u32 flags;
-	struct tlbe *tlbe;
-	unsigned int ra;
-	unsigned int rs;
-	unsigned int ws;
-	unsigned int index;
-
-	ra = get_ra(inst);
-	rs = get_rs(inst);
-	ws = get_ws(inst);
-
-	index = vcpu->arch.gpr[ra];
-	if (index > PPC44x_TLB_SIZE) {
-		printk("%s: index %d\n", __func__, index);
-		kvmppc_dump_vcpu(vcpu);
-		return EMULATE_FAIL;
-	}
-
-	tlbe = &vcpu->arch.guest_tlb[index];
-
-	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
-	if (tlbe->word0 & PPC44x_TLB_VALID) {
-		eaddr = get_tlb_eaddr(tlbe);
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
-	}
-
-	switch (ws) {
-	case PPC44x_TLB_PAGEID:
-		tlbe->tid = vcpu->arch.mmucr & 0xff;
-		tlbe->word0 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_XLAT:
-		tlbe->word1 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_ATTRIB:
-		tlbe->word2 = vcpu->arch.gpr[rs];
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
-
-	if (tlbe_is_host_safe(vcpu, tlbe)) {
-		eaddr = get_tlb_eaddr(tlbe);
-		raddr = get_tlb_raddr(tlbe);
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		flags = tlbe->word2 & 0xffff;
-
-		/* Create a 4KB mapping on the host. If the guest wanted a
-		 * large page, only the first 4KB is mapped here and the rest
-		 * are mapped on the fly. */
-		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
-	}
-
-	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
-			tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
-			handler);
-
-	return EMULATE_DONE;
-}
-
-static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.tcr & TCR_DIE) {
 		/* The decrementer ticks at the same rate as the timebase, so
@@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.pc = vcpu->arch.srr0;
-	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
 /* XXX to do:
  * lhax
  * lhaux
@@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
  *
  * XXX is_bigendian should depend on MMU mapping or MSR[LE]
  */
+/* XXX Should probably auto-generate instruction decoding for a particular core
+ * from opcode tables in the future. */
 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	u32 inst = vcpu->arch.last_inst;
 	u32 ea;
 	int ra;
 	int rb;
-	int rc;
 	int rs;
 	int rt;
 	int sprn;
-	int dcrn;
 	enum emulation_result emulated = EMULATE_DONE;
 	int advance = 1;
 
+	/* this default type might be overwritten by subcategories */
+	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
 	switch (get_op(inst)) {
-	case 3:                                                 /* trap */
-		printk("trap!\n");
-		kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+	case 3:                                             /* trap */
+		vcpu->arch.esr |= ESR_PTR;
+		kvmppc_core_queue_program(vcpu);
 		advance = 0;
 		break;
 
-	case 19:
-		switch (get_xop(inst)) {
-		case 50:                                        /* rfi */
-			kvmppc_emul_rfi(vcpu);
-			advance = 0;
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-			break;
-		}
-		break;
-
 	case 31:
 		switch (get_xop(inst)) {
 
@@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
 			break;
 
-		case 83:                                        /* mfmsr */
-			rt = get_rt(inst);
-			vcpu->arch.gpr[rt] = vcpu->arch.msr;
-			break;
-
 		case 87:                                        /* lbzx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
 			break;
 
-		case 131:                                       /* wrtee */
-			rs = get_rs(inst);
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-			                 | (vcpu->arch.gpr[rs] & MSR_EE);
-			break;
-
-		case 146:                                       /* mtmsr */
-			rs = get_rs(inst);
-			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
-			break;
-
 		case 151:                                       /* stwx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               4, 1);
 			break;
 
-		case 163:                                       /* wrteei */
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-			                 | (inst & MSR_EE);
-			break;
-
 		case 215:                                       /* stbx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 323:                                       /* mfdcr */
-			dcrn = get_dcrn(inst);
-			rt = get_rt(inst);
-
-			/* The guest may access CPR0 registers to determine the timebase
-			 * frequency, and it must know the real host frequency because it
-			 * can directly access the timebase registers.
-			 *
-			 * It would be possible to emulate those accesses in userspace,
-			 * but userspace can really only figure out the end frequency.
-			 * We could decompose that into the factors that compute it, but
-			 * that's tricky math, and it's easier to just report the real
-			 * CPR0 values.
-			 */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
-				break;
-			case DCRN_CPR0_CONFIG_DATA:
-				local_irq_disable();
-				mtdcr(DCRN_CPR0_CONFIG_ADDR,
-				      vcpu->arch.cpr0_cfgaddr);
-				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
-				local_irq_enable();
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data =  0;
-				run->dcr.is_write = 0;
-				vcpu->arch.io_gpr = rt;
-				vcpu->arch.dcr_needed = 1;
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
 		case 339:                                       /* mfspr */
 			sprn = get_sprn(inst);
 			rt = get_rt(inst);
@@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
 			case SPRN_SRR1:
 				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
-			case SPRN_MMUCR:
-				vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
-			case SPRN_PID:
-				vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
-			case SPRN_IVPR:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
-			case SPRN_CCR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
-			case SPRN_CCR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
 			case SPRN_PVR:
 				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
-			case SPRN_DEAR:
-				vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
-			case SPRN_ESR:
-				vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
-			case SPRN_DBCR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
-			case SPRN_DBCR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
 
 			/* Note: mftb and TBRL/TBWL are user-accessible, so
 			 * the guest can always access the real TB anyways.
@@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			/* Note: SPRG4-7 are user-readable, so we don't get
 			 * a trap. */
 
-			case SPRN_IVOR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
-			case SPRN_IVOR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
-			case SPRN_IVOR2:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
-			case SPRN_IVOR3:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
-			case SPRN_IVOR4:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
-			case SPRN_IVOR5:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
-			case SPRN_IVOR6:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
-			case SPRN_IVOR7:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
-			case SPRN_IVOR8:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
-			case SPRN_IVOR9:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
-			case SPRN_IVOR10:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
-			case SPRN_IVOR11:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
-			case SPRN_IVOR12:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
-			case SPRN_IVOR13:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
-			case SPRN_IVOR14:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
-			case SPRN_IVOR15:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
-
 			default:
-				printk("mfspr: unknown spr %x\n", sprn);
-				vcpu->arch.gpr[rt] = 0;
+				emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
+				if (emulated == EMULATE_FAIL) {
+					printk("mfspr: unknown spr %x\n", sprn);
+					vcpu->arch.gpr[rt] = 0;
+				}
 				break;
 			}
 			break;
@@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 451:                                       /* mtdcr */
-			dcrn = get_dcrn(inst);
-			rs = get_rs(inst);
-
-			/* emulate some access in kernel */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data = vcpu->arch.gpr[rs];
-				run->dcr.is_write = 1;
-				vcpu->arch.dcr_needed = 1;
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
 		case 467:                                       /* mtspr */
 			sprn = get_sprn(inst);
 			rs = get_rs(inst);
@@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SRR1:
 				vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
-			case SPRN_MMUCR:
-				vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
-			case SPRN_PID:
-				kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
-			case SPRN_CCR0:
-				vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
-			case SPRN_CCR1:
-				vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
-			case SPRN_DEAR:
-				vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
-			case SPRN_ESR:
-				vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
-			case SPRN_DBCR0:
-				vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
-			case SPRN_DBCR1:
-				vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
 
 			/* XXX We need to context-switch the timebase for
 			 * watchdog and FIT. */
@@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				kvmppc_emulate_dec(vcpu);
 				break;
 
-			case SPRN_TSR:
-				vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-
-			case SPRN_TCR:
-				vcpu->arch.tcr = vcpu->arch.gpr[rs];
-				kvmppc_emulate_dec(vcpu);
-				break;
-
 			case SPRN_SPRG0:
 				vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SPRG1:
@@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			case SPRN_SPRG3:
 				vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
 
-			/* Note: SPRG4-7 are user-readable. These values are
-			 * loaded into the real SPRGs when resuming the
-			 * guest. */
-			case SPRN_SPRG4:
-				vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG5:
-				vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG6:
-				vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG7:
-				vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
-			case SPRN_IVPR:
-				vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR0:
-				vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR1:
-				vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR2:
-				vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR3:
-				vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR4:
-				vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR5:
-				vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR6:
-				vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR7:
-				vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR8:
-				vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR9:
-				vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR10:
-				vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR11:
-				vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR12:
-				vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR13:
-				vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR14:
-				vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR15:
-				vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
-
 			default:
-				printk("mtspr: unknown spr %x\n", sprn);
-				emulated = EMULATE_FAIL;
+				emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+				if (emulated == EMULATE_FAIL)
+					printk("mtspr: unknown spr %x\n", sprn);
 				break;
 			}
 			break;
@@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               4, 0);
 			break;
 
-		case 978:                                       /* tlbwe */
-			emulated = kvmppc_emul_tlbwe(vcpu, inst);
-			break;
-
-		case 914:       {                               /* tlbsx */
-			int index;
-			unsigned int as = get_mmucr_sts(vcpu);
-			unsigned int pid = get_mmucr_stid(vcpu);
-
-			rt = get_rt(inst);
-			ra = get_ra(inst);
-			rb = get_rb(inst);
-			rc = get_rc(inst);
-
-			ea = vcpu->arch.gpr[rb];
-			if (ra)
-				ea += vcpu->arch.gpr[ra];
-
-			index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
-			if (rc) {
-				if (index < 0)
-					vcpu->arch.cr &= ~0x20000000;
-				else
-					vcpu->arch.cr |= 0x20000000;
-			}
-			vcpu->arch.gpr[rt] = index;
-
-			}
-			break;
-
 		case 790:                                       /* lhbrx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               2, 0);
 			break;
 
-		case 966:                                       /* iccci */
-			break;
-
 		default:
-			printk("unknown: op %d xop %d\n", get_op(inst),
-				get_xop(inst));
+			/* Attempt core-specific emulation below. */
 			emulated = EMULATE_FAIL;
-			break;
 		}
 		break;
 
@@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		break;
 
 	default:
-		printk("unknown op %d\n", get_op(inst));
 		emulated = EMULATE_FAIL;
-		break;
 	}
 
-	KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
+	if (emulated == EMULATE_FAIL) {
+		emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+		if (emulated == EMULATE_FAIL) {
+			advance = 0;
+			printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
+			       "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+		}
+	}
+
+	KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
 
 	if (advance)
 		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0efcdfe1..2822c8ccfaaf 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/tlbflush.h>
+#include "timing.h"
 #include "../mm/mmu_decl.h"
 
-
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn;
@@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
 
 void kvm_arch_check_processor_compat(void *rtn)
 {
-	int r;
-
-	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
-		r = 0;
-	else
-		r = -ENOTSUPP;
-
-	*(int *)rtn = r;
+	*(int *)rtn = kvmppc_core_check_processor_compat();
 }
 
 struct kvm *kvm_arch_create_vm(void)
@@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	int r;
 
 	switch (ext) {
-	case KVM_CAP_USER_MEMORY:
-		r = 1;
-		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
@@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
-	int err;
-
-	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	err = kvm_vcpu_init(vcpu, kvm, id);
-	if (err)
-		goto free_vcpu;
-
+	vcpu = kvmppc_core_vcpu_create(kvm, id);
+	kvmppc_create_vcpu_debugfs(vcpu, id);
 	return vcpu;
-
-free_vcpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
-	return ERR_PTR(err);
 }
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
+	kvmppc_remove_vcpu_debugfs(vcpu);
+	kvmppc_core_vcpu_free(vcpu);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
-
-	return test_bit(priority, &vcpu->arch.pending_exceptions);
+	return kvmppc_core_pending_dec(vcpu);
 }
 
 static void kvmppc_decrementer_func(unsigned long data)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
 
-	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+	kvmppc_core_queue_dec(vcpu);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
@@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_core_destroy_mmu(vcpu);
 }
 
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvmppc_disable_debug_interrupts(void)
-{
-	mtmsr(mfmsr() & ~MSR_DE);
-}
-
-static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
-{
-	kvmppc_disable_debug_interrupts();
-
-	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
-	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
-	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
-	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
-	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
-	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
-	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
-	mtmsr(vcpu->arch.host_msr);
-}
-
-static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
-{
-	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-	u32 dbcr0 = 0;
-
-	vcpu->arch.host_msr = mfmsr();
-	kvmppc_disable_debug_interrupts();
-
-	/* Save host debug register state. */
-	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
-	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
-	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
-	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
-	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
-	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
-	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
-	/* set registers up for guest */
-
-	if (dbg->bp[0]) {
-		mtspr(SPRN_IAC1, dbg->bp[0]);
-		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
-	}
-	if (dbg->bp[1]) {
-		mtspr(SPRN_IAC2, dbg->bp[1]);
-		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
-	}
-	if (dbg->bp[2]) {
-		mtspr(SPRN_IAC3, dbg->bp[2]);
-		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
-	}
-	if (dbg->bp[3]) {
-		mtspr(SPRN_IAC4, dbg->bp[3]);
-		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
-	}
-
-	mtspr(SPRN_DBCR0, dbcr0);
-	mtspr(SPRN_DBCR1, 0);
-	mtspr(SPRN_DBCR2, 0);
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	int i;
-
 	if (vcpu->guest_debug.enabled)
-		kvmppc_load_guest_debug_registers(vcpu);
+		kvmppc_core_load_guest_debugstate(vcpu);
 
-	/* Mark every guest entry in the shadow TLB entry modified, so that they
-	 * will all be reloaded on the next vcpu run (instead of being
-	 * demand-faulted). */
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_tlbe_set_modified(vcpu, i);
+	kvmppc_core_vcpu_load(vcpu, cpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->guest_debug.enabled)
-		kvmppc_restore_host_debug_state(vcpu);
+		kvmppc_core_load_host_debugstate(vcpu);
 
 	/* Don't leave guest TLB entries resident when being de-scheduled. */
 	/* XXX It would be nice to differentiate between heavyweight exit and
 	 * sched_out here, since we could avoid the TLB flush for heavyweight
 	 * exits. */
 	_tlbil_all();
+	kvmppc_core_vcpu_put(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
                                      struct kvm_run *run)
 {
-	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 	*gpr = run->dcr.data;
 }
 
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
-	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 
 	if (run->mmio.len > sizeof(*gpr)) {
 		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		vcpu->arch.dcr_needed = 0;
 	}
 
-	kvmppc_check_and_deliver_interrupts(vcpu);
+	kvmppc_core_deliver_interrupts(vcpu);
 
 	local_irq_disable();
 	kvm_guest_enter();
@@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
-	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+	kvmppc_core_queue_external(vcpu, irq);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 000000000000..47ee603f558e
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,239 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <asm/time.h>
+#include <asm-generic/div64.h>
+
+#include "timing.h"
+
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	/* pause guest execution to avoid concurrent updates */
+	local_irq_disable();
+	mutex_lock(&vcpu->mutex);
+
+	vcpu->arch.last_exit_type = 0xDEAD;
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		vcpu->arch.timing_count_type[i] = 0;
+		vcpu->arch.timing_max_duration[i] = 0;
+		vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
+		vcpu->arch.timing_sum_duration[i] = 0;
+		vcpu->arch.timing_sum_quad_duration[i] = 0;
+	}
+	vcpu->arch.timing_last_exit = 0;
+	vcpu->arch.timing_exit.tv64 = 0;
+	vcpu->arch.timing_last_enter.tv64 = 0;
+
+	mutex_unlock(&vcpu->mutex);
+	local_irq_enable();
+}
+
+static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
+{
+	u64 old;
+
+	do_div(duration, tb_ticks_per_usec);
+	if (unlikely(duration > 0xFFFFFFFF)) {
+		printk(KERN_ERR"%s - duration too big -> overflow"
+			" duration %lld type %d exit #%d\n",
+			__func__, duration, type,
+			vcpu->arch.timing_count_type[type]);
+		return;
+	}
+
+	vcpu->arch.timing_count_type[type]++;
+
+	/* sum */
+	old = vcpu->arch.timing_sum_duration[type];
+	vcpu->arch.timing_sum_duration[type] += duration;
+	if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old, vcpu->arch.timing_sum_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* square sum */
+	old = vcpu->arch.timing_sum_quad_duration[type];
+	vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
+	if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of squared durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old,
+			vcpu->arch.timing_sum_quad_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* set min/max */
+	if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
+		vcpu->arch.timing_min_duration[type] = duration;
+	if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
+		vcpu->arch.timing_max_duration[type] = duration;
+}
+
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
+{
+	u64 exit = vcpu->arch.timing_last_exit;
+	u64 enter = vcpu->arch.timing_last_enter.tv64;
+
+	/* save exit time, used next exit when the reenter time is known */
+	vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
+
+	if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
+		return; /* skip incomplete cycle (e.g. after reset) */
+
+	/* update statistics for average and standard deviation */
+	add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
+	/* enter -> timing_last_exit is time spent in guest - log this too */
+	add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
+			TIMEINGUEST);
+}
+
+static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+	[MMIO_EXITS] =              "MMIO",
+	[DCR_EXITS] =               "DCR",
+	[SIGNAL_EXITS] =            "SIGNAL",
+	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
+	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
+	[DTLB_REAL_MISS_EXITS] =    "DTLBREAL",
+	[DTLB_VIRT_MISS_EXITS] =    "DTLBVIRT",
+	[SYSCALL_EXITS] =           "SYSCALL",
+	[ISI_EXITS] =               "ISI",
+	[DSI_EXITS] =               "DSI",
+	[EMULATED_INST_EXITS] =     "EMULINST",
+	[EMULATED_MTMSRWE_EXITS] =  "EMUL_WAIT",
+	[EMULATED_WRTEE_EXITS] =    "EMUL_WRTEE",
+	[EMULATED_MTSPR_EXITS] =    "EMUL_MTSPR",
+	[EMULATED_MFSPR_EXITS] =    "EMUL_MFSPR",
+	[EMULATED_MTMSR_EXITS] =    "EMUL_MTMSR",
+	[EMULATED_MFMSR_EXITS] =    "EMUL_MFMSR",
+	[EMULATED_TLBSX_EXITS] =    "EMUL_TLBSX",
+	[EMULATED_TLBWE_EXITS] =    "EMUL_TLBWE",
+	[EMULATED_RFI_EXITS] =      "EMUL_RFI",
+	[DEC_EXITS] =               "DEC",
+	[EXT_INTR_EXITS] =          "EXTINT",
+	[HALT_WAKEUP] =             "HALT",
+	[USR_PR_INST] =             "USR_PR_INST",
+	[FP_UNAVAIL] =              "FP_UNAVAIL",
+	[DEBUG_EXITS] =             "DEBUG",
+	[TIMEINGUEST] =             "TIMEINGUEST"
+};
+
+static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+{
+	struct kvm_vcpu *vcpu = m->private;
+	int i;
+
+	seq_printf(m, "%s", "type	count	min	max	sum	sum_squared\n");
+
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		seq_printf(m, "%12s	%10d	%10lld	%10lld	%20lld	%20lld\n",
+			kvm_exit_names[i],
+			vcpu->arch.timing_count_type[i],
+			vcpu->arch.timing_min_duration[i],
+			vcpu->arch.timing_max_duration[i],
+			vcpu->arch.timing_sum_duration[i],
+			vcpu->arch.timing_sum_quad_duration[i]);
+	}
+	return 0;
+}
+
+/* Write 'c' to clear the timing statistics. */
+static ssize_t kvmppc_exit_timing_write(struct file *file,
+				       const char __user *user_buf,
+				       size_t count, loff_t *ppos)
+{
+	int err = -EINVAL;
+	char c;
+
+	if (count > 1) {
+		goto done;
+	}
+
+	if (get_user(c, user_buf)) {
+		err = -EFAULT;
+		goto done;
+	}
+
+	if (c == 'c') {
+		struct seq_file *seqf = (struct seq_file *)file->private_data;
+		struct kvm_vcpu *vcpu = seqf->private;
+		/* Write does not affect our buffers previously generated with
+		 * show. seq_file is locked here to prevent races of init with
+		 * a show call */
+		mutex_lock(&seqf->lock);
+		kvmppc_init_timing_stats(vcpu);
+		mutex_unlock(&seqf->lock);
+		err = count;
+	}
+
+done:
+	return err;
+}
+
+static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, kvmppc_exit_timing_show, inode->i_private);
+}
+
+static struct file_operations kvmppc_exit_timing_fops = {
+	.owner   = THIS_MODULE,
+	.open    = kvmppc_exit_timing_open,
+	.read    = seq_read,
+	.write   = kvmppc_exit_timing_write,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+{
+	static char dbg_fname[50];
+	struct dentry *debugfs_file;
+
+	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
+		 current->pid, id);
+	debugfs_file = debugfs_create_file(dbg_fname, 0666,
+					kvm_debugfs_dir, vcpu,
+					&kvmppc_exit_timing_fops);
+
+	if (!debugfs_file) {
+		printk(KERN_ERR"%s: error creating debugfs file %s\n",
+			__func__, dbg_fname);
+		return;
+	}
+
+	vcpu->arch.debugfs_exit_timing = debugfs_file;
+}
+
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.debugfs_exit_timing) {
+		debugfs_remove(vcpu->arch.debugfs_exit_timing);
+		vcpu->arch.debugfs_exit_timing = NULL;
+	}
+}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 000000000000..bb13b1f3cd5a
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_EXITTIMING_H__
+#define __POWERPC_KVM_EXITTIMING_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
+{
+	vcpu->arch.last_exit_type = type;
+}
+
+#else
+/* if exit timing is not configured there is no need to build the c file */
+static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+						unsigned int id) {}
+static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
+#endif /* CONFIG_KVM_EXIT_TIMING */
+
+/* account the exit in kvm_stats */
+static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
+{
+	/* type has to be known at build time for optimization */
+	BUILD_BUG_ON(__builtin_constant_p(type));
+	switch (type) {
+	case EXT_INTR_EXITS:
+		vcpu->stat.ext_intr_exits++;
+		break;
+	case DEC_EXITS:
+		vcpu->stat.dec_exits++;
+		break;
+	case EMULATED_INST_EXITS:
+		vcpu->stat.emulated_inst_exits++;
+		break;
+	case DCR_EXITS:
+		vcpu->stat.dcr_exits++;
+		break;
+	case DSI_EXITS:
+		vcpu->stat.dsi_exits++;
+		break;
+	case ISI_EXITS:
+		vcpu->stat.isi_exits++;
+		break;
+	case SYSCALL_EXITS:
+		vcpu->stat.syscall_exits++;
+		break;
+	case DTLB_REAL_MISS_EXITS:
+		vcpu->stat.dtlb_real_miss_exits++;
+		break;
+	case DTLB_VIRT_MISS_EXITS:
+		vcpu->stat.dtlb_virt_miss_exits++;
+		break;
+	case MMIO_EXITS:
+		vcpu->stat.mmio_exits++;
+		break;
+	case ITLB_REAL_MISS_EXITS:
+		vcpu->stat.itlb_real_miss_exits++;
+		break;
+	case ITLB_VIRT_MISS_EXITS:
+		vcpu->stat.itlb_virt_miss_exits++;
+		break;
+	case SIGNAL_EXITS:
+		vcpu->stat.signal_exits++;
+		break;
+	}
+}
+
+/* wrapper to set exit time and account for it in kvm_stats */
+static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
+{
+	kvmppc_set_exit_type(vcpu, type);
+	kvmppc_account_exit_stat(vcpu, type);
+}
+
+#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 201c7a5486cb..9920d6a7cf29 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -512,6 +512,13 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
 }
 
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+	unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
+	return 1UL << mmu_psize_to_shift(psize);
+}
+
 /*
  * Called by asm hashtable.S for doing lazy icache flush
  */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 53b06ebb3f2f..f00f09a77f12 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	/* this should work for most non-highmem platforms */
 	zone = pgdata->node_zones;
 
-	return __add_pages(zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 4314b39b6faf..ad123bced404 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -30,11 +30,11 @@
 #if defined(CONFIG_40x) || defined(CONFIG_8xx)
 static inline void _tlbil_all(void)
 {
-	asm volatile ("sync; tlbia; isync" : : : "memory")
+	asm volatile ("sync; tlbia; isync" : : : "memory");
 }
 static inline void _tlbil_pid(unsigned int pid)
 {
-	asm volatile ("sync; tlbia; isync" : : : "memory")
+	asm volatile ("sync; tlbia; isync" : : : "memory");
 }
 #else /* CONFIG_40x || CONFIG_8xx */
 extern void _tlbil_all(void);
@@ -47,7 +47,7 @@ extern void _tlbil_pid(unsigned int pid);
 #ifdef CONFIG_8xx
 static inline void _tlbil_va(unsigned long address, unsigned int pid)
 {
-	asm volatile ("tlbie %0; sync" : : "r" (address) : "memory")
+	asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
 }
 #else /* CONFIG_8xx */
 extern void _tlbil_va(unsigned long address, unsigned int pid);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index cf81049e1e51..7393bd76d698 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -822,42 +822,50 @@ static void __init dump_numa_memory_topology(void)
  * required. nid is the preferred node and end is the physical address of
  * the highest address in the node.
  *
- * Returns the physical address of the memory.
+ * Returns the virtual address of the memory.
  */
-static void __init *careful_allocation(int nid, unsigned long size,
+static void __init *careful_zallocation(int nid, unsigned long size,
 				       unsigned long align,
 				       unsigned long end_pfn)
 {
+	void *ret;
 	int new_nid;
-	unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+	unsigned long ret_paddr;
+
+	ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
 
 	/* retry over all memory */
-	if (!ret)
-		ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
+	if (!ret_paddr)
+		ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
 
-	if (!ret)
-		panic("numa.c: cannot allocate %lu bytes on node %d",
+	if (!ret_paddr)
+		panic("numa.c: cannot allocate %lu bytes for node %d",
 		      size, nid);
 
+	ret = __va(ret_paddr);
+
 	/*
-	 * If the memory came from a previously allocated node, we must
-	 * retry with the bootmem allocator.
+	 * We initialize the nodes in numeric order: 0, 1, 2...
+	 * and hand over control from the LMB allocator to the
+	 * bootmem allocator.  If this function is called for
+	 * node 5, then we know that all nodes <5 are using the
+	 * bootmem allocator instead of the LMB allocator.
+	 *
+	 * So, check the nid from which this allocation came
+	 * and double check to see if we need to use bootmem
+	 * instead of the LMB.  We don't free the LMB memory
+	 * since it would be useless.
 	 */
-	new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
+	new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
 	if (new_nid < nid) {
-		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
+		ret = __alloc_bootmem_node(NODE_DATA(new_nid),
 				size, align, 0);
 
-		if (!ret)
-			panic("numa.c: cannot allocate %lu bytes on node %d",
-			      size, new_nid);
-
-		ret = __pa(ret);
-
-		dbg("alloc_bootmem %lx %lx\n", ret, size);
+		dbg("alloc_bootmem %p %lx\n", ret, size);
 	}
 
-	return (void *)ret;
+	memset(ret, 0, size);
+	return ret;
 }
 
 static struct notifier_block __cpuinitdata ppc64_numa_nb = {
@@ -952,7 +960,7 @@ void __init do_init_bootmem(void)
 
 	for_each_online_node(nid) {
 		unsigned long start_pfn, end_pfn;
-		unsigned long bootmem_paddr;
+		void *bootmem_vaddr;
 		unsigned long bootmap_pages;
 
 		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
@@ -964,11 +972,9 @@ void __init do_init_bootmem(void)
 		 * previous nodes' bootmem to be initialized and have
 		 * all reserved areas marked.
 		 */
-		NODE_DATA(nid) = careful_allocation(nid,
+		NODE_DATA(nid) = careful_zallocation(nid,
 					sizeof(struct pglist_data),
 					SMP_CACHE_BYTES, end_pfn);
-		NODE_DATA(nid) = __va(NODE_DATA(nid));
-		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
   		dbg("node %d\n", nid);
 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
@@ -984,20 +990,20 @@ void __init do_init_bootmem(void)
   		dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
 
 		bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-		bootmem_paddr = (unsigned long)careful_allocation(nid,
+		bootmem_vaddr = careful_zallocation(nid,
 					bootmap_pages << PAGE_SHIFT,
 					PAGE_SIZE, end_pfn);
-		memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
 
-		dbg("bootmap_paddr = %lx\n", bootmem_paddr);
+		dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
 
-		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+		init_bootmem_node(NODE_DATA(nid),
+				  __pa(bootmem_vaddr) >> PAGE_SHIFT,
 				  start_pfn, end_pfn);
 
 		free_bootmem_with_active_regions(nid, end_pfn);
 		/*
 		 * Be very careful about moving this around.  Future
-		 * calls to careful_allocation() depend on this getting
+		 * calls to careful_zallocation() depend on this getting
 		 * done correctly.
 		 */
 		mark_reserved_regions_for_nid(nid);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 38ff35f2142a..22972cd83cc9 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -266,7 +266,8 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
 		/* The PTE should never be already set nor present in the
 		 * hash table
 		 */
-		BUG_ON(pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE));
+		BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
+		       flags);
 		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
 						     __pgprot(flags)));
 	}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 803a64c02b06..39ac22b13c73 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -189,8 +189,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
 	_tlbil_pid(0);
 	preempt_enable();
-#endif
+#else
 	_tlbil_pid(0);
+#endif
 }
 EXPORT_SYMBOL(flush_tlb_kernel_range);
 
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index 628009c01958..964b93974d89 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -30,6 +30,10 @@
 extern struct delayed_work spu_work;
 extern int spu_prof_running;
 
+#define TRACE_ARRAY_SIZE 1024
+
+extern spinlock_t oprof_spu_smpl_arry_lck;
+
 struct spu_overlay_info {	/* map of sections within an SPU overlay */
 	unsigned int vma;	/* SPU virtual memory address from elf */
 	unsigned int size;	/* size of section from elf */
@@ -79,7 +83,7 @@ struct spu_buffer {
  * the vma-to-fileoffset map.
  */
 struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
-					     u64 objectid);
+					     unsigned long objectid);
 unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
 			    unsigned int vma, const struct spu *aSpu,
 			    int *grd_val);
@@ -89,10 +93,11 @@ void vma_map_free(struct vma_to_fileoffset_map *map);
  * Entry point for SPU profiling.
  * cycles_reset is the SPU_CYCLES count value specified by the user.
  */
-int start_spu_profiling(unsigned int cycles_reset);
-
-void stop_spu_profiling(void);
+int start_spu_profiling_cycles(unsigned int cycles_reset);
+void start_spu_profiling_events(void);
 
+void stop_spu_profiling_cycles(void);
+void stop_spu_profiling_events(void);
 
 /* add the necessary profiling hooks */
 int spu_sync_start(void);
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index dd499c3e9da7..9305ddaac512 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -18,11 +18,21 @@
 #include <asm/cell-pmu.h>
 #include "pr_util.h"
 
-#define TRACE_ARRAY_SIZE 1024
 #define SCALE_SHIFT 14
 
 static u32 *samples;
 
+/* spu_prof_running is a flag used to indicate if spu profiling is enabled
+ * or not.  It is set by the routines start_spu_profiling_cycles() and
+ * start_spu_profiling_events().  The flag is cleared by the routines
+ * stop_spu_profiling_cycles() and stop_spu_profiling_events().  These
+ * routines are called via global_start() and global_stop() which are called in
+ * op_powerpc_start() and op_powerpc_stop().  These routines are called once
+ * per system as a result of the user starting/stopping oprofile.  Hence, only
+ * one CPU per user at a time will be changing  the value of spu_prof_running.
+ * In general, OProfile does not protect against multiple users trying to run
+ * OProfile at a time.
+ */
 int spu_prof_running;
 static unsigned int profiling_interval;
 
@@ -31,8 +41,8 @@ static unsigned int profiling_interval;
 
 #define SPU_PC_MASK	     0xFFFF
 
-static DEFINE_SPINLOCK(sample_array_lock);
-unsigned long sample_array_lock_flags;
+DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
+unsigned long oprof_spu_smpl_arry_lck_flags;
 
 void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
 {
@@ -49,7 +59,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese
 	 * of precision.  This is close enough for the purpose at hand.
 	 *
 	 * The value of the timeout should be small enough that the hw
-	 * trace buffer will not get more then about 1/3 full for the
+	 * trace buffer will not get more than about 1/3 full for the
 	 * maximum user specified (the LFSR value) hw sampling frequency.
 	 * This is to ensure the trace buffer will never fill even if the
 	 * kernel thread scheduling varies under a heavy system load.
@@ -145,13 +155,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
 		 * sample array must be loaded and then processed for a given
 		 * cpu.	 The sample array is not per cpu.
 		 */
-		spin_lock_irqsave(&sample_array_lock,
-				  sample_array_lock_flags);
+		spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
+				  oprof_spu_smpl_arry_lck_flags);
 		num_samples = cell_spu_pc_collection(cpu);
 
 		if (num_samples == 0) {
-			spin_unlock_irqrestore(&sample_array_lock,
-					       sample_array_lock_flags);
+			spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+					       oprof_spu_smpl_arry_lck_flags);
 			continue;
 		}
 
@@ -162,8 +172,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
 					num_samples);
 		}
 
-		spin_unlock_irqrestore(&sample_array_lock,
-				       sample_array_lock_flags);
+		spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+				       oprof_spu_smpl_arry_lck_flags);
 
 	}
 	smp_wmb();	/* insure spu event buffer updates are written */
@@ -182,13 +192,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
 
 static struct hrtimer timer;
 /*
- * Entry point for SPU profiling.
+ * Entry point for SPU cycle profiling.
  * NOTE:  SPU profiling is done system-wide, not per-CPU.
  *
  * cycles_reset is the count value specified by the user when
  * setting up OProfile to count SPU_CYCLES.
  */
-int start_spu_profiling(unsigned int cycles_reset)
+int start_spu_profiling_cycles(unsigned int cycles_reset)
 {
 	ktime_t kt;
 
@@ -212,10 +222,30 @@ int start_spu_profiling(unsigned int cycles_reset)
 	return 0;
 }
 
-void stop_spu_profiling(void)
+/*
+ * Entry point for SPU event profiling.
+ * NOTE:  SPU profiling is done system-wide, not per-CPU.
+ *
+ * cycles_reset is the count value specified by the user when
+ * setting up OProfile to count SPU_CYCLES.
+ */
+void start_spu_profiling_events(void)
+{
+	spu_prof_running = 1;
+	schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
+
+	return;
+}
+
+void stop_spu_profiling_cycles(void)
 {
 	spu_prof_running = 0;
 	hrtimer_cancel(&timer);
 	kfree(samples);
-	pr_debug("SPU_PROF: stop_spu_profiling issued\n");
+	pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
+}
+
+void stop_spu_profiling_events(void)
+{
+	spu_prof_running = 0;
 }
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 17807acb05d9..21f16edf6c8d 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -132,6 +132,28 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
 	oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
 	oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
 	oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
+#ifdef CONFIG_OPROFILE_CELL
+	/* create a file the user tool can check to see what level of profiling
+	 * support exits with this kernel. Initialize bit mask to indicate
+	 * what support the kernel has:
+	 * bit 0      -  Supports SPU event profiling in addition to PPU
+	 *               event and cycles; and SPU cycle profiling
+	 * bits 1-31  -  Currently unused.
+	 *
+	 * If the file does not exist, then the kernel only supports SPU
+	 * cycle profiling, PPU event and cycle profiling.
+	 */
+	oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support);
+	sys.cell_support = 0x1; /* Note, the user OProfile tool must check
+				 * that this bit is set before attempting to
+				 * user SPU event profiling.  Older kernels
+				 * will not have this file, hence the user
+				 * tool is not allowed to do SPU event
+				 * profiling on older kernels.  Older kernels
+				 * will accept SPU events but collected data
+				 * is garbage.
+				 */
+#endif
 #endif
 
 	for (i = 0; i < model->num_counters; ++i) {
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 25a4ec2514a3..ae06c6236d9c 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -40,14 +40,15 @@
 #include "../platforms/cell/interrupt.h"
 #include "cell/pr_util.h"
 
-static void cell_global_stop_spu(void);
+#define PPU_PROFILING            0
+#define SPU_PROFILING_CYCLES     1
+#define SPU_PROFILING_EVENTS     2
 
-/*
- * spu_cycle_reset is the number of cycles between samples.
- * This variable is used for SPU profiling and should ONLY be set
- * at the beginning of cell_reg_setup; otherwise, it's read-only.
- */
-static unsigned int spu_cycle_reset;
+#define SPU_EVENT_NUM_START      4100
+#define SPU_EVENT_NUM_STOP       4399
+#define SPU_PROFILE_EVENT_ADDR          4363  /* spu, address trace, decimal */
+#define SPU_PROFILE_EVENT_ADDR_MASK_A   0x146 /* sub unit set to zero */
+#define SPU_PROFILE_EVENT_ADDR_MASK_B   0x186 /* sub unit set to zero */
 
 #define NUM_SPUS_PER_NODE    8
 #define SPU_CYCLES_EVENT_NUM 2	/*  event number for SPU_CYCLES */
@@ -66,6 +67,21 @@ static unsigned int spu_cycle_reset;
 
 #define MAX_SPU_COUNT 0xFFFFFF	/* maximum 24 bit LFSR value */
 
+/* Minumum HW interval timer setting to send value to trace buffer is 10 cycle.
+ * To configure counter to send value every N cycles set counter to
+ * 2^32 - 1 - N.
+ */
+#define NUM_INTERVAL_CYC  0xFFFFFFFF - 10
+
+/*
+ * spu_cycle_reset is the number of cycles between samples.
+ * This variable is used for SPU profiling and should ONLY be set
+ * at the beginning of cell_reg_setup; otherwise, it's read-only.
+ */
+static unsigned int spu_cycle_reset;
+static unsigned int profiling_mode;
+static int spu_evnt_phys_spu_indx;
+
 struct pmc_cntrl_data {
 	unsigned long vcntr;
 	unsigned long evnts;
@@ -105,6 +121,8 @@ struct pm_cntrl {
 	u16 trace_mode;
 	u16 freeze;
 	u16 count_mode;
+	u16 spu_addr_trace;
+	u8  trace_buf_ovflw;
 };
 
 static struct {
@@ -122,7 +140,7 @@ static struct {
 #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
 
 static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
-
+static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
 static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
 
 /*
@@ -152,6 +170,7 @@ static u32 hdw_thread;
 
 static u32 virt_cntr_inter_mask;
 static struct timer_list timer_virt_cntr;
+static struct timer_list timer_spu_event_swap;
 
 /*
  * pm_signal needs to be global since it is initialized in
@@ -165,7 +184,7 @@ static int spu_rtas_token;   /* token for SPU cycle profiling */
 static u32 reset_value[NR_PHYS_CTRS];
 static int num_counters;
 static int oprofile_running;
-static DEFINE_SPINLOCK(virt_cntr_lock);
+static DEFINE_SPINLOCK(cntr_lock);
 
 static u32 ctr_enabled;
 
@@ -336,13 +355,13 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
 	for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) {
 		if (bus_word & (1 << i)) {
 			pm_regs.debug_bus_control |=
-			    (bus_type << (30 - (2 * i)));
+				(bus_type << (30 - (2 * i)));
 
 			for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
 				if (input_bus[j] == 0xff) {
 					input_bus[j] = i;
 					pm_regs.group_control |=
-					    (i << (30 - (2 * j)));
+						(i << (30 - (2 * j)));
 
 					break;
 				}
@@ -367,12 +386,16 @@ static void write_pm_cntrl(int cpu)
 	if (pm_regs.pm_cntrl.stop_at_max == 1)
 		val |= CBE_PM_STOP_AT_MAX;
 
-	if (pm_regs.pm_cntrl.trace_mode == 1)
+	if (pm_regs.pm_cntrl.trace_mode != 0)
 		val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
 
+	if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
+		val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
 	if (pm_regs.pm_cntrl.freeze == 1)
 		val |= CBE_PM_FREEZE_ALL_CTRS;
 
+	val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
+
 	/*
 	 * Routine set_count_mode must be called previously to set
 	 * the count mode based on the user selection of user and kernel.
@@ -441,7 +464,7 @@ static void cell_virtual_cntr(unsigned long data)
 	 * not both playing with the counters on the same node.
 	 */
 
-	spin_lock_irqsave(&virt_cntr_lock, flags);
+	spin_lock_irqsave(&cntr_lock, flags);
 
 	prev_hdw_thread = hdw_thread;
 
@@ -480,7 +503,7 @@ static void cell_virtual_cntr(unsigned long data)
 		cbe_disable_pm_interrupts(cpu);
 		for (i = 0; i < num_counters; i++) {
 			per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
-			    = cbe_read_ctr(cpu, i);
+				= cbe_read_ctr(cpu, i);
 
 			if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
 			    == 0xFFFFFFFF)
@@ -527,7 +550,7 @@ static void cell_virtual_cntr(unsigned long data)
 		cbe_enable_pm(cpu);
 	}
 
-	spin_unlock_irqrestore(&virt_cntr_lock, flags);
+	spin_unlock_irqrestore(&cntr_lock, flags);
 
 	mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
 }
@@ -541,38 +564,146 @@ static void start_virt_cntrs(void)
 	add_timer(&timer_virt_cntr);
 }
 
-/* This function is called once for all cpus combined */
-static int cell_reg_setup(struct op_counter_config *ctr,
+static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
 			struct op_system_config *sys, int num_ctrs)
 {
-	int i, j, cpu;
-	spu_cycle_reset = 0;
+	spu_cycle_reset = ctr[0].count;
 
-	if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
-		spu_cycle_reset = ctr[0].count;
+	/*
+	 * Each node will need to make the rtas call to start
+	 * and stop SPU profiling.  Get the token once and store it.
+	 */
+	spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
+
+	if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
+		printk(KERN_ERR
+		       "%s: rtas token ibm,cbe-spu-perftools unknown\n",
+		       __func__);
+		return -EIO;
+	}
+	return 0;
+}
+
+/* Unfortunately, the hardware will only support event profiling
+ * on one SPU per node at a time.  Therefore, we must time slice
+ * the profiling across all SPUs in the node.  Note, we do this
+ * in parallel for each node.  The following routine is called
+ * periodically based on kernel timer to switch which SPU is
+ * being monitored in a round robbin fashion.
+ */
+static void spu_evnt_swap(unsigned long data)
+{
+	int node;
+	int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
+	unsigned long flags;
+	int cpu;
+	int ret;
+	u32 interrupt_mask;
+
+
+	/* enable interrupts on cntr 0 */
+	interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
+
+	hdw_thread = 0;
+
+	/* Make sure spu event interrupt handler and spu event swap
+	 * don't access the counters simultaneously.
+	 */
+	spin_lock_irqsave(&cntr_lock, flags);
+
+	cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
+
+	if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
+		spu_evnt_phys_spu_indx = 0;
+
+	pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
+	pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
+	pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
+
+	/* switch the SPU being profiled on each node */
+	for_each_online_cpu(cpu) {
+		if (cbe_get_hw_thread_id(cpu))
+			continue;
+
+		node = cbe_cpu_to_node(cpu);
+		cur_phys_spu = (node * NUM_SPUS_PER_NODE)
+			+ cur_spu_evnt_phys_spu_indx;
+		nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
+			+ spu_evnt_phys_spu_indx;
 
 		/*
-		 * Each node will need to make the rtas call to start
-		 * and stop SPU profiling.  Get the token once and store it.
+		 * stop counters, save counter values, restore counts
+		 * for previous physical SPU
 		 */
-		spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
+		cbe_disable_pm(cpu);
+		cbe_disable_pm_interrupts(cpu);
 
-		if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
-			printk(KERN_ERR
-			       "%s: rtas token ibm,cbe-spu-perftools unknown\n",
-			       __func__);
-			return -EIO;
-		}
+		spu_pm_cnt[cur_phys_spu]
+			= cbe_read_ctr(cpu, 0);
+
+		/* restore previous count for the next spu to sample */
+		/* NOTE, hardware issue, counter will not start if the
+		 * counter value is at max (0xFFFFFFFF).
+		 */
+		if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
+			cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
+		 else
+			 cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
+
+		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+
+		/* setup the debug bus measure the one event and
+		 * the two events to route the next SPU's PC on
+		 * the debug bus
+		 */
+		ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
+		if (ret)
+			printk(KERN_ERR "%s: pm_rtas_activate_signals failed, "
+			       "SPU event swap\n", __func__);
+
+		/* clear the trace buffer, don't want to take PC for
+		 * previous SPU*/
+		cbe_write_pm(cpu, trace_address, 0);
+
+		enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
+
+		/* Enable interrupts on the CPU thread that is starting */
+		cbe_enable_pm_interrupts(cpu, hdw_thread,
+					 interrupt_mask);
+		cbe_enable_pm(cpu);
 	}
 
-	pm_rtas_token = rtas_token("ibm,cbe-perftools");
+	spin_unlock_irqrestore(&cntr_lock, flags);
 
+	/* swap approximately every 0.1 seconds */
+	mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
+}
+
+static void start_spu_event_swap(void)
+{
+	init_timer(&timer_spu_event_swap);
+	timer_spu_event_swap.function = spu_evnt_swap;
+	timer_spu_event_swap.data = 0UL;
+	timer_spu_event_swap.expires = jiffies + HZ / 25;
+	add_timer(&timer_spu_event_swap);
+}
+
+static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
+			struct op_system_config *sys, int num_ctrs)
+{
+	int i;
+
+	/* routine is called once for all nodes */
+
+	spu_evnt_phys_spu_indx = 0;
 	/*
-	 * For all events excetp PPU CYCLEs, each node will need to make
+	 * For all events except PPU CYCLEs, each node will need to make
 	 * the rtas cbe-perftools call to setup and reset the debug bus.
 	 * Make the token lookup call once and store it in the global
 	 * variable pm_rtas_token.
 	 */
+	pm_rtas_token = rtas_token("ibm,cbe-perftools");
+
 	if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
 		printk(KERN_ERR
 		       "%s: rtas token ibm,cbe-perftools unknown\n",
@@ -580,6 +711,58 @@ static int cell_reg_setup(struct op_counter_config *ctr,
 		return -EIO;
 	}
 
+	/* setup the pm_control register settings,
+	 * settings will be written per node by the
+	 * cell_cpu_setup() function.
+	 */
+	pm_regs.pm_cntrl.trace_buf_ovflw = 1;
+
+	/* Use the occurrence trace mode to have SPU PC saved
+	 * to the trace buffer.  Occurrence data in trace buffer
+	 * is not used.  Bit 2 must be set to store SPU addresses.
+	 */
+	pm_regs.pm_cntrl.trace_mode = 2;
+
+	pm_regs.pm_cntrl.spu_addr_trace = 0x1;  /* using debug bus
+						   event 2 & 3 */
+
+	/* setup the debug bus event array with the SPU PC routing events.
+	*  Note, pm_signal[0] will be filled in by set_pm_event() call below.
+	*/
+	pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
+	pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
+	pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
+	pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
+
+	pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
+	pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
+	pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
+	pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
+
+	/* Set the user selected spu event to profile on,
+	 * note, only one SPU profiling event is supported
+	 */
+	num_counters = 1;  /* Only support one SPU event at a time */
+	set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
+
+	reset_value[0] = 0xFFFFFFFF - ctr[0].count;
+
+	/* global, used by cell_cpu_setup */
+	ctr_enabled |= 1;
+
+	/* Initialize the count for each SPU to the reset value */
+	for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
+		spu_pm_cnt[i] = reset_value[0];
+
+	return 0;
+}
+
+static int cell_reg_setup_ppu(struct op_counter_config *ctr,
+			struct op_system_config *sys, int num_ctrs)
+{
+	/* routine is called once for all nodes */
+	int i, j, cpu;
+
 	num_counters = num_ctrs;
 
 	if (unlikely(num_ctrs > NR_PHYS_CTRS)) {
@@ -589,14 +772,6 @@ static int cell_reg_setup(struct op_counter_config *ctr,
 		       __func__);
 		return -EIO;
 	}
-	pm_regs.group_control = 0;
-	pm_regs.debug_bus_control = 0;
-
-	/* setup the pm_control register */
-	memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl));
-	pm_regs.pm_cntrl.stop_at_max = 1;
-	pm_regs.pm_cntrl.trace_mode = 0;
-	pm_regs.pm_cntrl.freeze = 1;
 
 	set_count_mode(sys->enable_kernel, sys->enable_user);
 
@@ -665,6 +840,63 @@ static int cell_reg_setup(struct op_counter_config *ctr,
 }
 
 
+/* This function is called once for all cpus combined */
+static int cell_reg_setup(struct op_counter_config *ctr,
+			struct op_system_config *sys, int num_ctrs)
+{
+	int ret=0;
+	spu_cycle_reset = 0;
+
+	/* initialize the spu_arr_trace value, will be reset if
+	 * doing spu event profiling.
+	 */
+	pm_regs.group_control = 0;
+	pm_regs.debug_bus_control = 0;
+	pm_regs.pm_cntrl.stop_at_max = 1;
+	pm_regs.pm_cntrl.trace_mode = 0;
+	pm_regs.pm_cntrl.freeze = 1;
+	pm_regs.pm_cntrl.trace_buf_ovflw = 0;
+	pm_regs.pm_cntrl.spu_addr_trace = 0;
+
+	/*
+	 * For all events except PPU CYCLEs, each node will need to make
+	 * the rtas cbe-perftools call to setup and reset the debug bus.
+	 * Make the token lookup call once and store it in the global
+	 * variable pm_rtas_token.
+	 */
+	pm_rtas_token = rtas_token("ibm,cbe-perftools");
+
+	if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
+		printk(KERN_ERR
+		       "%s: rtas token ibm,cbe-perftools unknown\n",
+		       __func__);
+		return -EIO;
+	}
+
+	if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
+		profiling_mode = SPU_PROFILING_CYCLES;
+		ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
+	} else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
+		   (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
+		profiling_mode = SPU_PROFILING_EVENTS;
+		spu_cycle_reset = ctr[0].count;
+
+		/* for SPU event profiling, need to setup the
+		 * pm_signal array with the events to route the
+		 * SPU PC before making the FW call.  Note, only
+		 * one SPU event for profiling can be specified
+		 * at a time.
+		 */
+		cell_reg_setup_spu_events(ctr, sys, num_ctrs);
+	} else {
+		profiling_mode = PPU_PROFILING;
+		ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
+	}
+
+	return ret;
+}
+
+
 
 /* This function is called once for each cpu */
 static int cell_cpu_setup(struct op_counter_config *cntr)
@@ -672,8 +904,13 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
 	u32 cpu = smp_processor_id();
 	u32 num_enabled = 0;
 	int i;
+	int ret;
 
-	if (spu_cycle_reset)
+	/* Cycle based SPU profiling does not use the performance
+	 * counters.  The trace array is configured to collect
+	 * the data.
+	 */
+	if (profiling_mode == SPU_PROFILING_CYCLES)
 		return 0;
 
 	/* There is one performance monitor per processor chip (i.e. node),
@@ -686,7 +923,6 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
 	cbe_disable_pm(cpu);
 	cbe_disable_pm_interrupts(cpu);
 
-	cbe_write_pm(cpu, pm_interval, 0);
 	cbe_write_pm(cpu, pm_start_stop, 0);
 	cbe_write_pm(cpu, group_control, pm_regs.group_control);
 	cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
@@ -703,7 +939,20 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
 	 * The pm_rtas_activate_signals will return -EIO if the FW
 	 * call failed.
 	 */
-	return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
+	if (profiling_mode == SPU_PROFILING_EVENTS) {
+		/* For SPU event profiling also need to setup the
+		 * pm interval timer
+		 */
+		ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
+					       num_enabled+2);
+		/* store PC from debug bus to Trace buffer as often
+		 * as possible (every 10 cycles)
+		 */
+		cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
+		return ret;
+	} else
+		return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
+						num_enabled);
 }
 
 #define ENTRIES	 303
@@ -885,7 +1134,122 @@ static struct notifier_block cpu_freq_notifier_block = {
 };
 #endif
 
-static int cell_global_start_spu(struct op_counter_config *ctr)
+/*
+ * Note the generic OProfile stop calls do not support returning
+ * an error on stop.  Hence, will not return an error if the FW
+ * calls fail on stop.	Failure to reset the debug bus is not an issue.
+ * Failure to disable the SPU profiling is not an issue.  The FW calls
+ * to enable the performance counters and debug bus will work even if
+ * the hardware was not cleanly reset.
+ */
+static void cell_global_stop_spu_cycles(void)
+{
+	int subfunc, rtn_value;
+	unsigned int lfsr_value;
+	int cpu;
+
+	oprofile_running = 0;
+	smp_wmb();
+
+#ifdef CONFIG_CPU_FREQ
+	cpufreq_unregister_notifier(&cpu_freq_notifier_block,
+				    CPUFREQ_TRANSITION_NOTIFIER);
+#endif
+
+	for_each_online_cpu(cpu) {
+		if (cbe_get_hw_thread_id(cpu))
+			continue;
+
+		subfunc = 3;	/*
+				 * 2 - activate SPU tracing,
+				 * 3 - deactivate
+				 */
+		lfsr_value = 0x8f100000;
+
+		rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
+				      subfunc, cbe_cpu_to_node(cpu),
+				      lfsr_value);
+
+		if (unlikely(rtn_value != 0)) {
+			printk(KERN_ERR
+			       "%s: rtas call ibm,cbe-spu-perftools " \
+			       "failed, return = %d\n",
+			       __func__, rtn_value);
+		}
+
+		/* Deactivate the signals */
+		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+	}
+
+	stop_spu_profiling_cycles();
+}
+
+static void cell_global_stop_spu_events(void)
+{
+	int cpu;
+	oprofile_running = 0;
+
+	stop_spu_profiling_events();
+	smp_wmb();
+
+	for_each_online_cpu(cpu) {
+		if (cbe_get_hw_thread_id(cpu))
+			continue;
+
+		cbe_sync_irq(cbe_cpu_to_node(cpu));
+		/* Stop the counters */
+		cbe_disable_pm(cpu);
+		cbe_write_pm07_control(cpu, 0, 0);
+
+		/* Deactivate the signals */
+		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+
+		/* Deactivate interrupts */
+		cbe_disable_pm_interrupts(cpu);
+	}
+	del_timer_sync(&timer_spu_event_swap);
+}
+
+static void cell_global_stop_ppu(void)
+{
+	int cpu;
+
+	/*
+	 * This routine will be called once for the system.
+	 * There is one performance monitor per node, so we
+	 * only need to perform this function once per node.
+	 */
+	del_timer_sync(&timer_virt_cntr);
+	oprofile_running = 0;
+	smp_wmb();
+
+	for_each_online_cpu(cpu) {
+		if (cbe_get_hw_thread_id(cpu))
+			continue;
+
+		cbe_sync_irq(cbe_cpu_to_node(cpu));
+		/* Stop the counters */
+		cbe_disable_pm(cpu);
+
+		/* Deactivate the signals */
+		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+
+		/* Deactivate interrupts */
+		cbe_disable_pm_interrupts(cpu);
+	}
+}
+
+static void cell_global_stop(void)
+{
+	if (profiling_mode == PPU_PROFILING)
+		cell_global_stop_ppu();
+	else if (profiling_mode == SPU_PROFILING_EVENTS)
+		cell_global_stop_spu_events();
+	else
+		cell_global_stop_spu_cycles();
+}
+
+static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
 {
 	int subfunc;
 	unsigned int lfsr_value;
@@ -951,18 +1315,18 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
 
 		/* start profiling */
 		ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
-		  cbe_cpu_to_node(cpu), lfsr_value);
+				cbe_cpu_to_node(cpu), lfsr_value);
 
 		if (unlikely(ret != 0)) {
 			printk(KERN_ERR
-			       "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
-			       __func__, ret);
+			       "%s: rtas call ibm,cbe-spu-perftools failed, " \
+			       "return = %d\n", __func__, ret);
 			rtas_error = -EIO;
 			goto out;
 		}
 	}
 
-	rtas_error = start_spu_profiling(spu_cycle_reset);
+	rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
 	if (rtas_error)
 		goto out_stop;
 
@@ -970,11 +1334,74 @@ static int cell_global_start_spu(struct op_counter_config *ctr)
 	return 0;
 
 out_stop:
-	cell_global_stop_spu();		/* clean up the PMU/debug bus */
+	cell_global_stop_spu_cycles();	/* clean up the PMU/debug bus */
 out:
 	return rtas_error;
 }
 
+static int cell_global_start_spu_events(struct op_counter_config *ctr)
+{
+	int cpu;
+	u32 interrupt_mask = 0;
+	int rtn = 0;
+
+	hdw_thread = 0;
+
+	/* spu event profiling, uses the performance counters to generate
+	 * an interrupt.  The hardware is setup to store the SPU program
+	 * counter into the trace array.  The occurrence mode is used to
+	 * enable storing data to the trace buffer.  The bits are set
+	 * to send/store the SPU address in the trace buffer.  The debug
+	 * bus must be setup to route the SPU program counter onto the
+	 * debug bus.  The occurrence data in the trace buffer is not used.
+	 */
+
+	/* This routine gets called once for the system.
+	 * There is one performance monitor per node, so we
+	 * only need to perform this function once per node.
+	 */
+
+	for_each_online_cpu(cpu) {
+		if (cbe_get_hw_thread_id(cpu))
+			continue;
+
+		/*
+		 * Setup SPU event-based profiling.
+		 * Set perf_mon_control bit 0 to a zero before
+		 * enabling spu collection hardware.
+		 *
+		 * Only support one SPU event on one SPU per node.
+		 */
+		if (ctr_enabled & 1) {
+			cbe_write_ctr(cpu, 0, reset_value[0]);
+			enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
+			interrupt_mask |=
+				CBE_PM_CTR_OVERFLOW_INTR(0);
+		} else {
+			/* Disable counter */
+			cbe_write_pm07_control(cpu, 0, 0);
+		}
+
+		cbe_get_and_clear_pm_interrupts(cpu);
+		cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
+		cbe_enable_pm(cpu);
+
+		/* clear the trace buffer */
+		cbe_write_pm(cpu, trace_address, 0);
+	}
+
+	/* Start the timer to time slice collecting the event profile
+	 * on each of the SPUs.  Note, can collect profile on one SPU
+	 * per node at a time.
+	 */
+	start_spu_event_swap();
+	start_spu_profiling_events();
+	oprofile_running = 1;
+	smp_wmb();
+
+	return rtn;
+}
+
 static int cell_global_start_ppu(struct op_counter_config *ctr)
 {
 	u32 cpu, i;
@@ -994,8 +1421,7 @@ static int cell_global_start_ppu(struct op_counter_config *ctr)
 			if (ctr_enabled & (1 << i)) {
 				cbe_write_ctr(cpu, i, reset_value[i]);
 				enable_ctr(cpu, i, pm_regs.pm07_cntrl);
-				interrupt_mask |=
-				    CBE_PM_CTR_OVERFLOW_INTR(i);
+				interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i);
 			} else {
 				/* Disable counter */
 				cbe_write_pm07_control(cpu, i, 0);
@@ -1024,99 +1450,162 @@ static int cell_global_start_ppu(struct op_counter_config *ctr)
 
 static int cell_global_start(struct op_counter_config *ctr)
 {
-	if (spu_cycle_reset)
-		return cell_global_start_spu(ctr);
+	if (profiling_mode == SPU_PROFILING_CYCLES)
+		return cell_global_start_spu_cycles(ctr);
+	else if (profiling_mode == SPU_PROFILING_EVENTS)
+		return cell_global_start_spu_events(ctr);
 	else
 		return cell_global_start_ppu(ctr);
 }
 
-/*
- * Note the generic OProfile stop calls do not support returning
- * an error on stop.  Hence, will not return an error if the FW
- * calls fail on stop.	Failure to reset the debug bus is not an issue.
- * Failure to disable the SPU profiling is not an issue.  The FW calls
- * to enable the performance counters and debug bus will work even if
- * the hardware was not cleanly reset.
+
+/* The SPU interrupt handler
+ *
+ * SPU event profiling works as follows:
+ * The pm_signal[0] holds the one SPU event to be measured.  It is routed on
+ * the debug bus using word 0 or 1.  The value of pm_signal[1] and
+ * pm_signal[2] contain the necessary events to route the SPU program
+ * counter for the selected SPU onto the debug bus using words 2 and 3.
+ * The pm_interval register is setup to write the SPU PC value into the
+ * trace buffer at the maximum rate possible.  The trace buffer is configured
+ * to store the PCs, wrapping when it is full.  The performance counter is
+ * intialized to the max hardware count minus the number of events, N, between
+ * samples.  Once the N events have occured, a HW counter overflow occurs
+ * causing the generation of a HW counter interrupt which also stops the
+ * writing of the SPU PC values to the trace buffer.  Hence the last PC
+ * written to the trace buffer is the SPU PC that we want.  Unfortunately,
+ * we have to read from the beginning of the trace buffer to get to the
+ * last value written.  We just hope the PPU has nothing better to do then
+ * service this interrupt. The PC for the specific SPU being profiled is
+ * extracted from the trace buffer processed and stored.  The trace buffer
+ * is cleared, interrupts are cleared, the counter is reset to max - N.
+ * A kernel timer is used to periodically call the routine spu_evnt_swap()
+ * to switch to the next physical SPU in the node to profile in round robbin
+ * order.  This way data is collected for all SPUs on the node. It does mean
+ * that we need to use a relatively small value of N to ensure enough samples
+ * on each SPU are collected each SPU is being profiled 1/8 of the time.
+ * It may also be necessary to use a longer sample collection period.
  */
-static void cell_global_stop_spu(void)
+static void cell_handle_interrupt_spu(struct pt_regs *regs,
+				      struct op_counter_config *ctr)
 {
-	int subfunc, rtn_value;
-	unsigned int lfsr_value;
-	int cpu;
+	u32 cpu, cpu_tmp;
+	u64 trace_entry;
+	u32 interrupt_mask;
+	u64 trace_buffer[2];
+	u64 last_trace_buffer;
+	u32 sample;
+	u32 trace_addr;
+	unsigned long sample_array_lock_flags;
+	int spu_num;
+	unsigned long flags;
 
-	oprofile_running = 0;
+	/* Make sure spu event interrupt handler and spu event swap
+	 * don't access the counters simultaneously.
+	 */
+	cpu = smp_processor_id();
+	spin_lock_irqsave(&cntr_lock, flags);
 
-#ifdef CONFIG_CPU_FREQ
-	cpufreq_unregister_notifier(&cpu_freq_notifier_block,
-				    CPUFREQ_TRANSITION_NOTIFIER);
-#endif
+	cpu_tmp = cpu;
+	cbe_disable_pm(cpu);
 
-	for_each_online_cpu(cpu) {
-		if (cbe_get_hw_thread_id(cpu))
-			continue;
+	interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
 
-		subfunc = 3;	/*
-				 * 2 - activate SPU tracing,
-				 * 3 - deactivate
-				 */
-		lfsr_value = 0x8f100000;
+	sample = 0xABCDEF;
+	trace_entry = 0xfedcba;
+	last_trace_buffer = 0xdeadbeaf;
 
-		rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
-				      subfunc, cbe_cpu_to_node(cpu),
-				      lfsr_value);
+	if ((oprofile_running == 1) && (interrupt_mask != 0)) {
+		/* disable writes to trace buff */
+		cbe_write_pm(cpu, pm_interval, 0);
 
-		if (unlikely(rtn_value != 0)) {
-			printk(KERN_ERR
-			       "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
-			       __func__, rtn_value);
+		/* only have one perf cntr being used, cntr 0 */
+		if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
+		    && ctr[0].enabled)
+			/* The SPU PC values will be read
+			 * from the trace buffer, reset counter
+			 */
+
+			cbe_write_ctr(cpu, 0, reset_value[0]);
+
+		trace_addr = cbe_read_pm(cpu, trace_address);
+
+		while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
+			/* There is data in the trace buffer to process
+			 * Read the buffer until you get to the last
+			 * entry.  This is the value we want.
+			 */
+
+			cbe_read_trace_buffer(cpu, trace_buffer);
+			trace_addr = cbe_read_pm(cpu, trace_address);
 		}
 
-		/* Deactivate the signals */
-		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
-	}
+		/* SPU Address 16 bit count format for 128 bit
+		 * HW trace buffer is used for the SPU PC storage
+		 *    HDR bits          0:15
+		 *    SPU Addr 0 bits   16:31
+		 *    SPU Addr 1 bits   32:47
+		 *    unused bits       48:127
+		 *
+		 * HDR: bit4 = 1 SPU Address 0 valid
+		 * HDR: bit5 = 1 SPU Address 1 valid
+		 *  - unfortunately, the valid bits don't seem to work
+		 *
+		 * Note trace_buffer[0] holds bits 0:63 of the HW
+		 * trace buffer, trace_buffer[1] holds bits 64:127
+		 */
 
-	stop_spu_profiling();
-}
+		trace_entry = trace_buffer[0]
+			& 0x00000000FFFF0000;
 
-static void cell_global_stop_ppu(void)
-{
-	int cpu;
+		/* only top 16 of the 18 bit SPU PC address
+		 * is stored in trace buffer, hence shift right
+		 * by 16 -2 bits */
+		sample = trace_entry >> 14;
+		last_trace_buffer = trace_buffer[0];
 
-	/*
-	 * This routine will be called once for the system.
-	 * There is one performance monitor per node, so we
-	 * only need to perform this function once per node.
-	 */
-	del_timer_sync(&timer_virt_cntr);
-	oprofile_running = 0;
-	smp_wmb();
+		spu_num = spu_evnt_phys_spu_indx
+			+ (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
 
-	for_each_online_cpu(cpu) {
-		if (cbe_get_hw_thread_id(cpu))
-			continue;
+		/* make sure only one process at a time is calling
+		 * spu_sync_buffer()
+		 */
+		spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
+				  sample_array_lock_flags);
+		spu_sync_buffer(spu_num, &sample, 1);
+		spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+				       sample_array_lock_flags);
 
-		cbe_sync_irq(cbe_cpu_to_node(cpu));
-		/* Stop the counters */
-		cbe_disable_pm(cpu);
+		smp_wmb();    /* insure spu event buffer updates are written
+			       * don't want events intermingled... */
 
-		/* Deactivate the signals */
-		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+		/* The counters were frozen by the interrupt.
+		 * Reenable the interrupt and restart the counters.
+		 */
+		cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
+		cbe_enable_pm_interrupts(cpu, hdw_thread,
+					 virt_cntr_inter_mask);
 
-		/* Deactivate interrupts */
-		cbe_disable_pm_interrupts(cpu);
-	}
-}
+		/* clear the trace buffer, re-enable writes to trace buff */
+		cbe_write_pm(cpu, trace_address, 0);
+		cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
 
-static void cell_global_stop(void)
-{
-	if (spu_cycle_reset)
-		cell_global_stop_spu();
-	else
-		cell_global_stop_ppu();
+		/* The writes to the various performance counters only writes
+		 * to a latch.  The new values (interrupt setting bits, reset
+		 * counter value etc.) are not copied to the actual registers
+		 * until the performance monitor is enabled.  In order to get
+		 * this to work as desired, the permormance monitor needs to
+		 * be disabled while writing to the latches.  This is a
+		 * HW design issue.
+		 */
+		write_pm_cntrl(cpu);
+		cbe_enable_pm(cpu);
+	}
+	spin_unlock_irqrestore(&cntr_lock, flags);
 }
 
-static void cell_handle_interrupt(struct pt_regs *regs,
-				struct op_counter_config *ctr)
+static void cell_handle_interrupt_ppu(struct pt_regs *regs,
+				      struct op_counter_config *ctr)
 {
 	u32 cpu;
 	u64 pc;
@@ -1132,7 +1621,7 @@ static void cell_handle_interrupt(struct pt_regs *regs,
 	 * routine are not running at the same time. See the
 	 * cell_virtual_cntr() routine for additional comments.
 	 */
-	spin_lock_irqsave(&virt_cntr_lock, flags);
+	spin_lock_irqsave(&cntr_lock, flags);
 
 	/*
 	 * Need to disable and reenable the performance counters
@@ -1185,7 +1674,16 @@ static void cell_handle_interrupt(struct pt_regs *regs,
 		 */
 		cbe_enable_pm(cpu);
 	}
-	spin_unlock_irqrestore(&virt_cntr_lock, flags);
+	spin_unlock_irqrestore(&cntr_lock, flags);
+}
+
+static void cell_handle_interrupt(struct pt_regs *regs,
+				  struct op_counter_config *ctr)
+{
+	if (profiling_mode == PPU_PROFILING)
+		cell_handle_interrupt_ppu(regs, ctr);
+	else
+		cell_handle_interrupt_spu(regs, ctr);
 }
 
 /*
@@ -1195,7 +1693,8 @@ static void cell_handle_interrupt(struct pt_regs *regs,
  */
 static int cell_sync_start(void)
 {
-	if (spu_cycle_reset)
+	if ((profiling_mode == SPU_PROFILING_CYCLES) ||
+	    (profiling_mode == SPU_PROFILING_EVENTS))
 		return spu_sync_start();
 	else
 		return DO_GENERIC_SYNC;
@@ -1203,7 +1702,8 @@ static int cell_sync_start(void)
 
 static int cell_sync_stop(void)
 {
-	if (spu_cycle_reset)
+	if ((profiling_mode == SPU_PROFILING_CYCLES) ||
+	    (profiling_mode == SPU_PROFILING_EVENTS))
 		return spu_sync_stop();
 	else
 		return 1;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
index ae7c34f37e1c..98367a0255f3 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -42,7 +42,7 @@ static struct of_device_id mpc52xx_bus_ids[] __initdata = {
  * from interrupt context while node mapping (which calls ioremap())
  * cannot be used at such point.
  */
-static spinlock_t mpc52xx_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mpc52xx_lock);
 static struct mpc52xx_gpt __iomem *mpc52xx_wdt;
 static struct mpc52xx_cdm __iomem *mpc52xx_cdm;
 
diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
index a428f8d1ac80..5177bdd2c62a 100644
--- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
@@ -42,7 +42,7 @@ static void __init mpc831x_rdb_setup_arch(void)
 	mpc831x_usb_cfg();
 }
 
-void __init mpc831x_rdb_init_IRQ(void)
+static void __init mpc831x_rdb_init_IRQ(void)
 {
 	struct device_node *np;
 
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index ec43477caa63..ec0b401bc9cf 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -49,8 +49,6 @@
 #define DBG(fmt...)
 #endif
 
-static u8 *bcsr_regs = NULL;
-
 /* ************************************************************************
  *
  * Setup the architecture
@@ -59,13 +57,14 @@ static u8 *bcsr_regs = NULL;
 static void __init mpc832x_sys_setup_arch(void)
 {
 	struct device_node *np;
+	u8 __iomem *bcsr_regs = NULL;
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc832x_sys_setup_arch()", 0);
 
 	/* Map BCSR area */
 	np = of_find_node_by_name(NULL, "bcsr");
-	if (np != 0) {
+	if (np) {
 		struct resource res;
 
 		of_address_to_resource(np, 0, &res);
@@ -93,9 +92,9 @@ static void __init mpc832x_sys_setup_arch(void)
 			!= NULL){
 		/* Reset the Ethernet PHYs */
 #define BCSR8_FETH_RST 0x50
-		bcsr_regs[8] &= ~BCSR8_FETH_RST;
+		clrbits8(&bcsr_regs[8], BCSR8_FETH_RST);
 		udelay(1000);
-		bcsr_regs[8] |= BCSR8_FETH_RST;
+		setbits8(&bcsr_regs[8], BCSR8_FETH_RST);
 		iounmap(bcsr_regs);
 		of_node_put(np);
 	}
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 0300268ce5b8..2a1295f19832 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -38,6 +38,7 @@
 #define DBG(fmt...)
 #endif
 
+#ifdef CONFIG_QUICC_ENGINE
 static void mpc83xx_spi_activate_cs(u8 cs, u8 polarity)
 {
 	pr_debug("%s %d %d\n", __func__, cs, polarity);
@@ -77,8 +78,8 @@ static int __init mpc832x_spi_init(void)
 			    mpc83xx_spi_activate_cs,
 			    mpc83xx_spi_deactivate_cs);
 }
-
 machine_device_initcall(mpc832x_rdb, mpc832x_spi_init);
+#endif /* CONFIG_QUICC_ENGINE */
 
 /* ************************************************************************
  *
@@ -130,7 +131,7 @@ static int __init mpc832x_declare_of_platform_devices(void)
 }
 machine_device_initcall(mpc832x_rdb, mpc832x_declare_of_platform_devices);
 
-void __init mpc832x_rdb_init_IRQ(void)
+static void __init mpc832x_rdb_init_IRQ(void)
 {
 
 	struct device_node *np;
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 9d46e5bdd101..09e9d6fb7411 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -18,6 +18,7 @@
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
+#include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/reboot.h>
@@ -43,6 +44,7 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
+#include <sysdev/simple_gpio.h>
 #include <asm/qe.h>
 #include <asm/qe_ic.h>
 
@@ -55,8 +57,6 @@
 #define DBG(fmt...)
 #endif
 
-static u8 *bcsr_regs = NULL;
-
 /* ************************************************************************
  *
  * Setup the architecture
@@ -65,13 +65,14 @@ static u8 *bcsr_regs = NULL;
 static void __init mpc836x_mds_setup_arch(void)
 {
 	struct device_node *np;
+	u8 __iomem *bcsr_regs = NULL;
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc836x_mds_setup_arch()", 0);
 
 	/* Map BCSR area */
 	np = of_find_node_by_name(NULL, "bcsr");
-	if (np != 0) {
+	if (np) {
 		struct resource res;
 
 		of_address_to_resource(np, 0, &res);
@@ -93,6 +94,16 @@ static void __init mpc836x_mds_setup_arch(void)
 
 		for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
 			par_io_of_config(np);
+#ifdef CONFIG_QE_USB
+		/* Must fixup Par IO before QE GPIO chips are registered. */
+		par_io_config_pin(1,  2, 1, 0, 3, 0); /* USBOE  */
+		par_io_config_pin(1,  3, 1, 0, 3, 0); /* USBTP  */
+		par_io_config_pin(1,  8, 1, 0, 1, 0); /* USBTN  */
+		par_io_config_pin(1, 10, 2, 0, 3, 0); /* USBRXD */
+		par_io_config_pin(1,  9, 2, 1, 3, 0); /* USBRP  */
+		par_io_config_pin(1, 11, 2, 1, 3, 0); /* USBRN  */
+		par_io_config_pin(2, 20, 2, 0, 1, 0); /* CLK21  */
+#endif /* CONFIG_QE_USB */
 	}
 
 	if ((np = of_find_compatible_node(NULL, "network", "ucc_geth"))
@@ -151,6 +162,70 @@ static int __init mpc836x_declare_of_platform_devices(void)
 }
 machine_device_initcall(mpc836x_mds, mpc836x_declare_of_platform_devices);
 
+#ifdef CONFIG_QE_USB
+static int __init mpc836x_usb_cfg(void)
+{
+	u8 __iomem *bcsr;
+	struct device_node *np;
+	const char *mode;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc8360mds-bcsr");
+	if (!np)
+		return -ENODEV;
+
+	bcsr = of_iomap(np, 0);
+	of_node_put(np);
+	if (!bcsr)
+		return -ENOMEM;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc8323-qe-usb");
+	if (!np) {
+		ret = -ENODEV;
+		goto err;
+	}
+
+#define BCSR8_TSEC1M_MASK	(0x3 << 6)
+#define BCSR8_TSEC1M_RGMII	(0x0 << 6)
+#define BCSR8_TSEC2M_MASK	(0x3 << 4)
+#define BCSR8_TSEC2M_RGMII	(0x0 << 4)
+	/*
+	 * Default is GMII (2), but we should set it to RGMII (0) if we use
+	 * USB (Eth PHY is in RGMII mode anyway).
+	 */
+	clrsetbits_8(&bcsr[8], BCSR8_TSEC1M_MASK | BCSR8_TSEC2M_MASK,
+			       BCSR8_TSEC1M_RGMII | BCSR8_TSEC2M_RGMII);
+
+#define BCSR13_USBMASK	0x0f
+#define BCSR13_nUSBEN	0x08 /* 1 - Disable, 0 - Enable			*/
+#define BCSR13_USBSPEED	0x04 /* 1 - Full, 0 - Low			*/
+#define BCSR13_USBMODE	0x02 /* 1 - Host, 0 - Function			*/
+#define BCSR13_nUSBVCC	0x01 /* 1 - gets VBUS, 0 - supplies VBUS 	*/
+
+	clrsetbits_8(&bcsr[13], BCSR13_USBMASK, BCSR13_USBSPEED);
+
+	mode = of_get_property(np, "mode", NULL);
+	if (mode && !strcmp(mode, "peripheral")) {
+		setbits8(&bcsr[13], BCSR13_nUSBVCC);
+		qe_usb_clock_set(QE_CLK21, 48000000);
+	} else {
+		setbits8(&bcsr[13], BCSR13_USBMODE);
+		/*
+		 * The BCSR GPIOs are used to control power and
+		 * speed of the USB transceiver. This is needed for
+		 * the USB Host only.
+		 */
+		simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
+	}
+
+	of_node_put(np);
+err:
+	iounmap(bcsr);
+	return ret;
+}
+machine_arch_initcall(mpc836x_mds, mpc836x_usb_cfg);
+#endif /* CONFIG_QE_USB */
+
 static void __init mpc836x_mds_init_IRQ(void)
 {
 	struct device_node *np;
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index a5273bb28e1b..b0090aac9642 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -51,8 +51,9 @@ static void __init mpc836x_rdk_setup_arch(void)
 	for_each_compatible_node(np, "pci", "fsl,mpc8349-pci")
 		mpc83xx_add_bridge(np);
 #endif
-
+#ifdef CONFIG_QUICC_ENGINE
 	qe_reset();
+#endif
 }
 
 static void __init mpc836x_rdk_init_IRQ(void)
@@ -71,13 +72,14 @@ static void __init mpc836x_rdk_init_IRQ(void)
 	 */
 	ipic_set_default_priority();
 	of_node_put(np);
-
+#ifdef CONFIG_QUICC_ENGINE
 	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
 	if (!np)
 		return;
 
 	qe_ic_init(np, 0, qe_ic_cascade_low_ipic, qe_ic_cascade_high_ipic);
 	of_node_put(np);
+#endif
 }
 
 /*
diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c
index 8bb13c807142..530ef990ca7c 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c
@@ -26,7 +26,6 @@
 #define BCSR12_USB_SER_MASK	0x8a
 #define BCSR12_USB_SER_PIN	0x80
 #define BCSR12_USB_SER_DEVICE	0x02
-extern int mpc837x_usb_cfg(void);
 
 static int mpc837xmds_usb_cfg(void)
 {
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index da030afa2e2c..1d096545322b 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -21,8 +21,6 @@
 
 #include "mpc83xx.h"
 
-extern int mpc837x_usb_cfg(void);
-
 /* ************************************************************************
  *
  * Setup the architecture
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index 2a7cbabb410a..83cfe51526ec 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -61,6 +61,7 @@
 
 extern void mpc83xx_restart(char *cmd);
 extern long mpc83xx_time_init(void);
+extern int mpc837x_usb_cfg(void);
 extern int mpc834x_usb_cfg(void);
 extern int mpc831x_usb_cfg(void);
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index a8301c8ad537..7326d904202c 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -148,6 +148,9 @@ static int mpc85xx_exclude_device(struct pci_controller *hose,
 /*
  * Setup the architecture
  */
+#ifdef CONFIG_SMP
+extern void __init mpc85xx_smp_init(void);
+#endif
 static void __init mpc85xx_ds_setup_arch(void)
 {
 #ifdef CONFIG_PCI
@@ -173,6 +176,10 @@ static void __init mpc85xx_ds_setup_arch(void)
 	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
 
+#ifdef CONFIG_SMP
+	mpc85xx_smp_init();
+#endif
+
 	printk("MPC85xx DS board from Freescale Semiconductor\n");
 }
 
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index d652c713f496..79a0df17078b 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -58,6 +58,7 @@ smp_85xx_kick_cpu(int nr)
 
 	if (cpu_rel_addr == NULL) {
 		printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
+		local_irq_restore(flags);
 		return;
 	}
 
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 47e956c871fe..47fe2bea9865 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -312,4 +312,15 @@ config MPC8xxx_GPIO
 	  Say Y here if you're going to use hardware that connects to the
 	  MPC831x/834x/837x/8572/8610 GPIOs.
 
+config SIMPLE_GPIO
+	bool "Support for simple, memory-mapped GPIO controllers"
+	depends on PPC
+	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Say Y here to support simple, memory-mapped GPIO controllers.
+	  These are usually BCSRs used to control board's switches, LEDs,
+	  chip-selects, Ethernet/USB PHY's power and various other small
+	  on-board peripherals.
+
 endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 3d0c776f888d..e868b5c50723 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -231,7 +231,7 @@ config VIRT_CPU_ACCOUNTING
 	  If in doubt, say Y here.
 
 config SMP
-	depends on PPC_STD_MMU
+	depends on PPC_STD_MMU || FSL_BOOKE
 	bool "Symmetric multi-processing support"
 	---help---
 	  This enables support for systems with more than one CPU. If you have
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 2e67bd840e01..35b1ec492715 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -44,8 +44,8 @@ static DEFINE_SPINLOCK(beat_htab_lock);
 
 static inline unsigned int beat_read_mask(unsigned hpte_group)
 {
-	unsigned long hpte_v[5];
 	unsigned long rmask = 0;
+	u64 hpte_v[5];
 
 	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
 	if (!(hpte_v[0] & HPTE_V_BOLTED))
@@ -93,8 +93,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
 				  int psize, int ssize)
 {
 	unsigned long lpar_rc;
-	unsigned long slot;
-	unsigned long hpte_v, hpte_r;
+	u64 hpte_v, hpte_r, slot;
 
 	/* same as iseries */
 	if (vflags & HPTE_V_SECONDARY)
@@ -153,8 +152,9 @@ static long beat_lpar_hpte_remove(unsigned long hpte_group)
 
 static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
 {
-	unsigned long dword0, dword[5];
+	unsigned long dword0;
 	unsigned long lpar_rc;
+	u64 dword[5];
 
 	lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
 
@@ -170,7 +170,7 @@ static void beat_lpar_hptab_clear(void)
 	unsigned long size_bytes = 1UL << ppc64_pft_size;
 	unsigned long hpte_count = size_bytes >> 4;
 	int i;
-	unsigned long dummy0, dummy1;
+	u64 dummy0, dummy1;
 
 	/* TODO: Use bulk call */
 	for (i = 0; i < hpte_count; i++)
@@ -189,7 +189,8 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
 				    int psize, int ssize, int local)
 {
 	unsigned long lpar_rc;
-	unsigned long dummy0, dummy1, want_v;
+	u64 dummy0, dummy1;
+	unsigned long want_v;
 
 	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
 
@@ -255,7 +256,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 					  unsigned long ea,
 					  int psize, int ssize)
 {
-	unsigned long lpar_rc, slot, vsid, va, dummy0, dummy1;
+	unsigned long lpar_rc, slot, vsid, va;
+	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
 	va = (vsid << 28) | (ea & 0x0fffffff);
@@ -276,7 +278,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
-	unsigned long dummy1, dummy2;
+	u64 dummy1, dummy2;
 	unsigned long flags;
 
 	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
@@ -315,8 +317,7 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
 				  int psize, int ssize)
 {
 	unsigned long lpar_rc;
-	unsigned long slot;
-	unsigned long hpte_v, hpte_r;
+	u64 hpte_v, hpte_r, slot;
 
 	/* same as iseries */
 	if (vflags & HPTE_V_SECONDARY)
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
index 6b418f6b6175..350735bc8888 100644
--- a/arch/powerpc/platforms/cell/beat_udbg.c
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
@@ -40,8 +40,8 @@ static void udbg_putc_beat(char c)
 }
 
 /* Buffered chars getc */
-static long inbuflen;
-static long inbuf[2];	/* must be 2 longs */
+static u64 inbuflen;
+static u64 inbuf[2];	/* must be 2 u64s */
 
 static int udbg_getc_poll_beat(void)
 {
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c
index ec7c8f45a215..e6506cd0ff94 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -118,7 +118,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cur = cbe_freqs[cur_pmode].frequency;
 
 #ifdef CONFIG_SMP
-	policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
+	cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
 #endif
 
 	cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
index 70fa7aef5edd..20472e487b6f 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
@@ -54,7 +54,7 @@ int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
 {
 	struct cbe_pmd_regs __iomem *pmd_regs;
 	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
-	u64 flags;
+	unsigned long flags;
 	u64 value;
 #ifdef DEBUG
 	long time;
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index a3c6c01bd6db..968c1c0b4d5b 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -110,7 +110,7 @@ static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
 		}
 
 		/* initialize spu_gov_info for all affected cpus */
-		for_each_cpu_mask(i, policy->cpus) {
+		for_each_cpu(i, policy->cpus) {
 			affected_info = &per_cpu(spu_gov_info, i);
 			affected_info->policy = policy;
 		}
@@ -127,7 +127,7 @@ static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
 		spu_gov_cancel_work(info);
 
 		/* clean spu_gov_info for all affected cpus */
-		for_each_cpu_mask (i, policy->cpus) {
+		for_each_cpu (i, policy->cpus) {
 			info = &per_cpu(spu_gov_info, i);
 			info->policy = NULL;
 		}
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 2d5bb22d6c09..28c04dab2633 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void)
 
 	iic = &__get_cpu_var(iic);
 	*(unsigned long *) &pending =
-		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+		in_be64((u64 __iomem *) &iic->regs->pending_destr);
 	if (!(pending.flags & CBE_IIC_IRQ_VALID))
 		return NO_IRQ;
 	virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
index b5f84e8f0899..059cad6c3f69 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/platforms/cell/io-workarounds.c
@@ -130,14 +130,14 @@ static const struct ppc_pci_io __devinitconst iowa_pci_io = {
 
 };
 
-static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size,
+static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
 						unsigned long flags)
 {
 	struct iowa_bus *bus;
 	void __iomem *res = __ioremap(addr, size, flags);
 	int busno;
 
-	bus = iowa_pci_find(0, addr);
+	bus = iowa_pci_find(0, (unsigned long)addr);
 	if (bus != NULL) {
 		busno = bus - iowa_busses;
 		PCI_SET_ADDR_TOKEN(res, busno + 1);
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 86db4dd170a0..88d94b59a7cb 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -150,8 +150,8 @@ static int cbe_nr_iommus;
 static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
 		long n_ptes)
 {
-	unsigned long __iomem *reg;
-	unsigned long val;
+	u64 __iomem *reg;
+	u64 val;
 	long n;
 
 	reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
index 906a0a2a9fe1..1410443731eb 100644
--- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu)
 	u64 route;
 
 	if (nr_cpus_node(spu->node)) {
-		cpumask_t spumask = node_to_cpumask(spu->node);
-		cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu));
+		const struct cpumask *spumask = cpumask_of_node(spu->node),
+			*cpumask = cpumask_of_node(cpu_to_node(cpu));
 
-		if (!cpus_intersects(spumask, cpumask))
+		if (!cpumask_intersects(spumask, cpumask))
 			return;
 	}
 
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 6296bfd9cb0b..e309ef70a531 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -97,7 +97,6 @@ spufs_new_inode(struct super_block *sb, int mode)
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 out:
 	return inode;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 2ad914c47493..6a0ad196aeb3 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx)
 static int __node_allowed(struct spu_context *ctx, int node)
 {
 	if (nr_cpus_node(node)) {
-		cpumask_t mask = node_to_cpumask(node);
+		const struct cpumask *mask = cpumask_of_node(node);
 
-		if (cpus_intersects(mask, ctx->cpus_allowed))
+		if (cpumask_intersects(mask, &ctx->cpus_allowed))
 			return 1;
 	}
 
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 15c62d3ca129..3bf908e2873a 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -314,7 +314,7 @@ extern char *isolated_loader;
  *	we need to call spu_release(ctx) before sleeping, and
  *	then spu_acquire(ctx) when awoken.
  *
- * 	Returns with state_mutex re-acquired when successfull or
+ * 	Returns with state_mutex re-acquired when successful or
  * 	with -ERESTARTSYS and the state_mutex dropped when interrupted.
  */
 
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index ed3753d8c109..7ddd0a2c8027 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -10,18 +10,21 @@ menu "iSeries device drivers"
 config VIODASD
 	tristate "iSeries Virtual I/O disk support"
 	depends on BLOCK
+	select VIOPATH
 	help
 	  If you are running on an iSeries system and you want to use
 	  virtual disks created and managed by OS/400, say Y.
 
 config VIOCD
 	tristate "iSeries Virtual I/O CD support"
+	select VIOPATH
 	help
 	  If you are running Linux on an IBM iSeries system and you want to
 	  read a CD drive owned by OS/400, say Y here.
 
 config VIOTAPE
 	tristate "iSeries Virtual Tape Support"
+	select VIOPATH
 	help
 	  If you are running Linux on an iSeries system and you want Linux
 	  to read and/or write a tape drive owned by OS/400, say Y here.
@@ -30,5 +33,3 @@ endmenu
 
 config VIOPATH
 	bool
-	depends on VIODASD || VIOCD || VIOTAPE || ISERIES_VETH
-	default y
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 70b688c1aefb..24519b96d6ad 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/kdev_t.h>
+#include <linux/kexec.h>
 #include <linux/major.h>
 #include <linux/root_dev.h>
 #include <linux/kernel.h>
@@ -638,6 +639,13 @@ static int __init iseries_probe(void)
 	return 1;
 }
 
+#ifdef CONFIG_KEXEC
+static int iseries_kexec_prepare(struct kimage *image)
+{
+	return -ENOSYS;
+}
+#endif
+
 define_machine(iseries) {
 	.name			= "iSeries",
 	.setup_arch		= iSeries_setup_arch,
@@ -658,6 +666,9 @@ define_machine(iseries) {
 	.probe			= iseries_probe,
 	.ioremap		= iseries_ioremap,
 	.iounmap		= iseries_iounmap,
+#ifdef CONFIG_KEXEC
+	.machine_kexec_prepare	= iseries_kexec_prepare,
+#endif
 	/* XXX Implement enable_pmcs for iSeries */
 };
 
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c
index 58556b028a4c..be2527a516ea 100644
--- a/arch/powerpc/platforms/pasemi/cpufreq.c
+++ b/arch/powerpc/platforms/pasemi/cpufreq.c
@@ -112,7 +112,7 @@ static int get_gizmo_latency(void)
 
 static void set_astate(int cpu, unsigned int astate)
 {
-	u64 flags;
+	unsigned long flags;
 
 	/* Return if called before init has run */
 	if (unlikely(!sdcasr_mapbase))
@@ -213,7 +213,7 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	pr_debug("current astate is at %d\n",cur_astate);
 
 	policy->cur = pas_freqs[cur_astate].frequency;
-	policy->cpus = cpu_online_map;
+	cpumask_copy(policy->cpus, &cpu_online_map);
 
 	ppc_proc_freq = policy->cur * 1000ul;
 
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index 217af321b0ca..a6152d922243 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -509,7 +509,7 @@ fallback:
  */
 int pasemi_dma_init(void)
 {
-	static spinlock_t init_lock = SPIN_LOCK_UNLOCKED;
+	static DEFINE_SPINLOCK(init_lock);
 	struct pci_dev *iob_pdev;
 	struct pci_dev *pdev;
 	struct resource res;
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
index 4dfb4bc242b5..beb38333b6d2 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -362,7 +362,7 @@ static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	/* secondary CPUs are tied to the primary one by the
 	 * cpufreq core if in the secondary policy we tell it that
 	 * it actually must be one policy together with all others. */
-	policy->cpus = cpu_online_map;
+	cpumask_copy(policy->cpus, &cpu_online_map);
 	cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu);
 
 	return cpufreq_frequency_table_cpuinfo(policy,
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 54b7b76ed4f0..04cdd32624d4 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -661,6 +661,7 @@ static void __init init_second_ohare(void)
 			pci_find_hose_for_OF_device(np);
 		if (!hose) {
 			printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+			of_node_put(np);
 			return;
 		}
 		early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
@@ -669,6 +670,7 @@ static void __init init_second_ohare(void)
 		early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
 	}
 	has_second_ohare = 1;
+	of_node_put(np);
 }
 
 /*
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 59eb840d8ce2..1810e4226e56 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -265,12 +265,15 @@ int __init via_calibrate_decr(void)
 	struct resource rsrc;
 
 	vias = of_find_node_by_name(NULL, "via-cuda");
-	if (vias == 0)
+	if (vias == NULL)
 		vias = of_find_node_by_name(NULL, "via-pmu");
-	if (vias == 0)
+	if (vias == NULL)
 		vias = of_find_node_by_name(NULL, "via");
-	if (vias == 0 || of_address_to_resource(vias, 0, &rsrc))
+	if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) {
+	        of_node_put(vias);
 		return 0;
+	}
+	of_node_put(vias);
 	via = ioremap(rsrc.start, rsrc.end - rsrc.start + 1);
 	if (via == NULL) {
 		printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
@@ -297,7 +300,7 @@ int __init via_calibrate_decr(void)
 	ppc_tb_freq = (dstart - dend) * 100 / 6;
 
 	iounmap(via);
-	
+
 	return 1;
 }
 #endif
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index dbc124e05646..ca71a12b764c 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -518,6 +518,41 @@ fail_device_register:
 	return result;
 }
 
+static int __init ps3_register_ramdisk_device(void)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = PS3_MATCH_ID_GPU;
+	p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK;
+	p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+	return result;
+}
+
 /**
  * ps3_setup_dynamic_device - Setup a dynamic device from the repository
  */
@@ -946,6 +981,8 @@ static int __init ps3_register_devices(void)
 
 	ps3_register_lpm_devices();
 
+	ps3_register_ramdisk_device();
+
 	pr_debug(" <- %s:%d\n", __func__, __LINE__);
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index f7a69021b7bf..84e058f1e1cc 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq)
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void)
 
 		/* Reset affinity to all cpus */
 		irq_desc[virq].affinity = CPU_MASK_ALL;
-		desc->chip->set_affinity(virq, CPU_MASK_ALL);
+		desc->chip->set_affinity(virq, cpu_all_mask);
 unlock:
 		spin_unlock_irqrestore(&desc->lock, flags);
 	}
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 5afce115ab1f..b33b28a6fe12 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_FSL_PCI)		+= fsl_pci.o $(fsl-msi-obj-y)
 obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
 obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
 obj-$(CONFIG_MPC8xxx_GPIO)	+= mpc8xxx_gpio.o
+obj-$(CONFIG_SIMPLE_GPIO)	+= simple_gpio.o
 obj-$(CONFIG_RAPIDIO)		+= fsl_rio.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_QUICC_ENGINE)	+= qe_lib/
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index d5f9ae0f1b75..f611d0369cc8 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -29,7 +29,8 @@
 
 #if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx)
 /* atmu setup for fsl pci/pcie controller */
-void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc)
+static void __init setup_pci_atmu(struct pci_controller *hose,
+				  struct resource *rsrc)
 {
 	struct ccsr_pci __iomem *pci;
 	int i;
@@ -86,7 +87,7 @@ void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc)
 	out_be32(&pci->piw[2].piwar, PIWAR_2G);
 }
 
-void __init setup_pci_cmd(struct pci_controller *hose)
+static void __init setup_pci_cmd(struct pci_controller *hose)
 {
 	u16 cmd;
 	int cap_x;
@@ -130,7 +131,7 @@ static void __init quirk_fsl_pcie_header(struct pci_dev *dev)
 	return ;
 }
 
-int __init fsl_pcie_check_link(struct pci_controller *hose)
+static int __init fsl_pcie_check_link(struct pci_controller *hose)
 {
 	u32 val;
 	early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val);
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index 60f7f227327c..9c744e4285a0 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -5,8 +5,13 @@
 #include <asm/mmu.h>
 
 extern phys_addr_t get_immrbase(void);
+#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx)
 extern u32 get_brgfreq(void);
 extern u32 get_baudrate(void);
+#else
+static inline u32 get_brgfreq(void) { return -1; }
+static inline u32 get_baudrate(void) { return -1; }
+#endif
 extern u32 fsl_get_sys_freq(void);
 
 struct spi_board_info;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index c82babb70074..3e0d89dcdba2 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq)
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
 	} else {
 		cpumask_t tmp;
 
-		cpus_and(tmp, cpumask, cpu_online_map);
+		cpumask_and(&tmp, cpumask, cpu_online_mask);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 6209c62a426d..3cef2af10f42 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
index 76ffbc48d4b9..41ac3dfac98e 100644
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -22,5 +22,6 @@ config UCC
 
 config QE_USB
 	bool
+	default y if USB_GADGET_FSL_QE
 	help
-	  QE USB Host Controller support
+	  QE USB Controller support
diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c
index 8e5a0bc36d0b..3485288dce31 100644
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ b/arch/powerpc/sysdev/qe_lib/gpio.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <linux/err.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
@@ -24,8 +25,14 @@ struct qe_gpio_chip {
 	struct of_mm_gpio_chip mm_gc;
 	spinlock_t lock;
 
+	unsigned long pin_flags[QE_PIO_PINS];
+#define QE_PIN_REQUESTED 0
+
 	/* shadowed data register to clear/set bits safely */
 	u32 cpdata;
+
+	/* saved_regs used to restore dedicated functions */
+	struct qe_pio_regs saved_regs;
 };
 
 static inline struct qe_gpio_chip *
@@ -40,6 +47,12 @@ static void qe_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
 	struct qe_pio_regs __iomem *regs = mm_gc->regs;
 
 	qe_gc->cpdata = in_be32(&regs->cpdata);
+	qe_gc->saved_regs.cpdata = qe_gc->cpdata;
+	qe_gc->saved_regs.cpdir1 = in_be32(&regs->cpdir1);
+	qe_gc->saved_regs.cpdir2 = in_be32(&regs->cpdir2);
+	qe_gc->saved_regs.cppar1 = in_be32(&regs->cppar1);
+	qe_gc->saved_regs.cppar2 = in_be32(&regs->cppar2);
+	qe_gc->saved_regs.cpodr = in_be32(&regs->cpodr);
 }
 
 static int qe_gpio_get(struct gpio_chip *gc, unsigned int gpio)
@@ -103,6 +116,188 @@ static int qe_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 	return 0;
 }
 
+struct qe_pin {
+	/*
+	 * The qe_gpio_chip name is unfortunate, we should change that to
+	 * something like qe_pio_controller. Someday.
+	 */
+	struct qe_gpio_chip *controller;
+	int num;
+};
+
+/**
+ * qe_pin_request - Request a QE pin
+ * @np:		device node to get a pin from
+ * @index:	index of a pin in the device tree
+ * Context:	non-atomic
+ *
+ * This function return qe_pin so that you could use it with the rest of
+ * the QE Pin Multiplexing API.
+ */
+struct qe_pin *qe_pin_request(struct device_node *np, int index)
+{
+	struct qe_pin *qe_pin;
+	struct device_node *gc;
+	struct of_gpio_chip *of_gc = NULL;
+	struct of_mm_gpio_chip *mm_gc;
+	struct qe_gpio_chip *qe_gc;
+	int err;
+	int size;
+	const void *gpio_spec;
+	const u32 *gpio_cells;
+	unsigned long flags;
+
+	qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL);
+	if (!qe_pin) {
+		pr_debug("%s: can't allocate memory\n", __func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", index,
+					  &gc, &gpio_spec);
+	if (err) {
+		pr_debug("%s: can't parse gpios property\n", __func__);
+		goto err0;
+	}
+
+	if (!of_device_is_compatible(gc, "fsl,mpc8323-qe-pario-bank")) {
+		pr_debug("%s: tried to get a non-qe pin\n", __func__);
+		err = -EINVAL;
+		goto err1;
+	}
+
+	of_gc = gc->data;
+	if (!of_gc) {
+		pr_debug("%s: gpio controller %s isn't registered\n",
+			 np->full_name, gc->full_name);
+		err = -ENODEV;
+		goto err1;
+	}
+
+	gpio_cells = of_get_property(gc, "#gpio-cells", &size);
+	if (!gpio_cells || size != sizeof(*gpio_cells) ||
+			*gpio_cells != of_gc->gpio_cells) {
+		pr_debug("%s: wrong #gpio-cells for %s\n",
+			 np->full_name, gc->full_name);
+		err = -EINVAL;
+		goto err1;
+	}
+
+	err = of_gc->xlate(of_gc, np, gpio_spec, NULL);
+	if (err < 0)
+		goto err1;
+
+	mm_gc = to_of_mm_gpio_chip(&of_gc->gc);
+	qe_gc = to_qe_gpio_chip(mm_gc);
+
+	spin_lock_irqsave(&qe_gc->lock, flags);
+
+	if (test_and_set_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[err]) == 0) {
+		qe_pin->controller = qe_gc;
+		qe_pin->num = err;
+		err = 0;
+	} else {
+		err = -EBUSY;
+	}
+
+	spin_unlock_irqrestore(&qe_gc->lock, flags);
+
+	if (!err)
+		return qe_pin;
+err1:
+	of_node_put(gc);
+err0:
+	kfree(qe_pin);
+	pr_debug("%s failed with status %d\n", __func__, err);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(qe_pin_request);
+
+/**
+ * qe_pin_free - Free a pin
+ * @qe_pin:	pointer to the qe_pin structure
+ * Context:	any
+ *
+ * This function frees the qe_pin structure and makes a pin available
+ * for further qe_pin_request() calls.
+ */
+void qe_pin_free(struct qe_pin *qe_pin)
+{
+	struct qe_gpio_chip *qe_gc = qe_pin->controller;
+	unsigned long flags;
+	const int pin = qe_pin->num;
+
+	spin_lock_irqsave(&qe_gc->lock, flags);
+	test_and_clear_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[pin]);
+	spin_unlock_irqrestore(&qe_gc->lock, flags);
+
+	kfree(qe_pin);
+}
+EXPORT_SYMBOL(qe_pin_free);
+
+/**
+ * qe_pin_set_dedicated - Revert a pin to a dedicated peripheral function mode
+ * @qe_pin:	pointer to the qe_pin structure
+ * Context:	any
+ *
+ * This function resets a pin to a dedicated peripheral function that
+ * has been set up by the firmware.
+ */
+void qe_pin_set_dedicated(struct qe_pin *qe_pin)
+{
+	struct qe_gpio_chip *qe_gc = qe_pin->controller;
+	struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
+	struct qe_pio_regs *sregs = &qe_gc->saved_regs;
+	int pin = qe_pin->num;
+	u32 mask1 = 1 << (QE_PIO_PINS - (pin + 1));
+	u32 mask2 = 0x3 << (QE_PIO_PINS - (pin % (QE_PIO_PINS / 2) + 1) * 2);
+	bool second_reg = pin > (QE_PIO_PINS / 2) - 1;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qe_gc->lock, flags);
+
+	if (second_reg) {
+		clrsetbits_be32(&regs->cpdir2, mask2, sregs->cpdir2 & mask2);
+		clrsetbits_be32(&regs->cppar2, mask2, sregs->cppar2 & mask2);
+	} else {
+		clrsetbits_be32(&regs->cpdir1, mask2, sregs->cpdir1 & mask2);
+		clrsetbits_be32(&regs->cppar1, mask2, sregs->cppar1 & mask2);
+	}
+
+	if (sregs->cpdata & mask1)
+		qe_gc->cpdata |= mask1;
+	else
+		qe_gc->cpdata &= ~mask1;
+
+	out_be32(&regs->cpdata, qe_gc->cpdata);
+	clrsetbits_be32(&regs->cpodr, mask1, sregs->cpodr & mask1);
+
+	spin_unlock_irqrestore(&qe_gc->lock, flags);
+}
+EXPORT_SYMBOL(qe_pin_set_dedicated);
+
+/**
+ * qe_pin_set_gpio - Set a pin to the GPIO mode
+ * @qe_pin:	pointer to the qe_pin structure
+ * Context:	any
+ *
+ * This function sets a pin to the GPIO mode.
+ */
+void qe_pin_set_gpio(struct qe_pin *qe_pin)
+{
+	struct qe_gpio_chip *qe_gc = qe_pin->controller;
+	struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qe_gc->lock, flags);
+
+	/* Let's make it input by default, GPIO API is able to change that. */
+	__par_io_config_pin(regs, qe_pin->num, QE_PIO_DIR_IN, 0, 0, 0);
+
+	spin_unlock_irqrestore(&qe_gc->lock, flags);
+}
+EXPORT_SYMBOL(qe_pin_set_gpio);
+
 static int __init qe_add_gpiochips(void)
 {
 	struct device_node *np;
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
new file mode 100644
index 000000000000..43c4569e24b7
--- /dev/null
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -0,0 +1,155 @@
+/*
+ * Simple Memory-Mapped GPIOs
+ *
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/gpio.h>
+#include <asm/prom.h>
+#include "simple_gpio.h"
+
+struct u8_gpio_chip {
+	struct of_mm_gpio_chip mm_gc;
+	spinlock_t lock;
+
+	/* shadowed data register to clear/set bits safely */
+	u8 data;
+};
+
+static struct u8_gpio_chip *to_u8_gpio_chip(struct of_mm_gpio_chip *mm_gc)
+{
+	return container_of(mm_gc, struct u8_gpio_chip, mm_gc);
+}
+
+static u8 u8_pin2mask(unsigned int pin)
+{
+	return 1 << (8 - 1 - pin);
+}
+
+static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+
+	return in_8(mm_gc->regs) & u8_pin2mask(gpio);
+}
+
+static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct u8_gpio_chip *u8_gc = to_u8_gpio_chip(mm_gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&u8_gc->lock, flags);
+
+	if (val)
+		u8_gc->data |= u8_pin2mask(gpio);
+	else
+		u8_gc->data &= ~u8_pin2mask(gpio);
+
+	out_8(mm_gc->regs, u8_gc->data);
+
+	spin_unlock_irqrestore(&u8_gc->lock, flags);
+}
+
+static int u8_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	return 0;
+}
+
+static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	u8_gpio_set(gc, gpio, val);
+	return 0;
+}
+
+static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+	struct u8_gpio_chip *u8_gc = to_u8_gpio_chip(mm_gc);
+
+	u8_gc->data = in_8(mm_gc->regs);
+}
+
+static int __init u8_simple_gpiochip_add(struct device_node *np)
+{
+	int ret;
+	struct u8_gpio_chip *u8_gc;
+	struct of_mm_gpio_chip *mm_gc;
+	struct of_gpio_chip *of_gc;
+	struct gpio_chip *gc;
+
+	u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL);
+	if (!u8_gc)
+		return -ENOMEM;
+
+	spin_lock_init(&u8_gc->lock);
+
+	mm_gc = &u8_gc->mm_gc;
+	of_gc = &mm_gc->of_gc;
+	gc = &of_gc->gc;
+
+	mm_gc->save_regs = u8_gpio_save_regs;
+	of_gc->gpio_cells = 2;
+	gc->ngpio = 8;
+	gc->direction_input = u8_gpio_dir_in;
+	gc->direction_output = u8_gpio_dir_out;
+	gc->get = u8_gpio_get;
+	gc->set = u8_gpio_set;
+
+	ret = of_mm_gpiochip_add(np, mm_gc);
+	if (ret)
+		goto err;
+	return 0;
+err:
+	kfree(u8_gc);
+	return ret;
+}
+
+void __init simple_gpiochip_init(const char *compatible)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, compatible) {
+		int ret;
+		struct resource r;
+
+		ret = of_address_to_resource(np, 0, &r);
+		if (ret)
+			goto err;
+
+		switch (resource_size(&r)) {
+		case 1:
+			ret = u8_simple_gpiochip_add(np);
+			if (ret)
+				goto err;
+			break;
+		default:
+			/*
+			 * Whenever you need support for GPIO bank width > 1,
+			 * please just turn u8_ code into huge macros, and
+			 * construct needed uX_ code with it.
+			 */
+			ret = -ENOSYS;
+			goto err;
+		}
+		continue;
+err:
+		pr_err("%s: registration failed, status %d\n",
+		       np->full_name, ret);
+	}
+}
diff --git a/arch/powerpc/sysdev/simple_gpio.h b/arch/powerpc/sysdev/simple_gpio.h
new file mode 100644
index 000000000000..3a7b0c513c76
--- /dev/null
+++ b/arch/powerpc/sysdev/simple_gpio.h
@@ -0,0 +1,12 @@
+#ifndef __SYSDEV_SIMPLE_GPIO_H
+#define __SYSDEV_SIMPLE_GPIO_H
+
+#include <linux/errno.h>
+
+#ifdef CONFIG_SIMPLE_GPIO
+extern void simple_gpiochip_init(const char *compatible);
+#else
+static inline void simple_gpiochip_init(const char *compatible) {}
+#endif /* CONFIG_SIMPLE_GPIO */
+
+#endif /* __SYSDEV_SIMPLE_GPIO_H */