554 files changed, 16159 insertions, 14713 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
index 3680364b4048..d46bba801aac 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -100,7 +100,7 @@ Description:    read only
 		Hexadecimal value of the device ID found in this AFU
 		configuration record.
 
-What:           /sys/class/cxl/<afu>/cr<config num>/vendor
+What:           /sys/class/cxl/<afu>/cr<config num>/class
 Date:           February 2015
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
diff --git a/Documentation/blockdev/nbd.txt b/Documentation/blockdev/nbd.txt
index 271e607304da..db242ea2bce8 100644
--- a/Documentation/blockdev/nbd.txt
+++ b/Documentation/blockdev/nbd.txt
@@ -1,17 +1,31 @@
-                      Network Block Device (TCP version)
-                                       
-   What is it: With this compiled in the kernel (or as a module), Linux
-   can use a remote server as one of its block devices. So every time
-   the client computer wants to read, e.g., /dev/nb0, it sends a
-   request over TCP to the server, which will reply with the data read.
-   This can be used for stations with low disk space (or even diskless)
-   to borrow disk space from another computer.
-   Unlike NFS, it is possible to put any filesystem on it, etc.
-
-   For more information, or to download the nbd-client and nbd-server
-   tools, go to http://nbd.sf.net/.
-
-   The nbd kernel module need only be installed on the client
-   system, as the nbd-server is completely in userspace. In fact,
-   the nbd-server has been successfully ported to other operating
-   systems, including Windows.
+Network Block Device (TCP version)
+==================================
+
+1) Overview
+-----------
+
+What is it: With this compiled in the kernel (or as a module), Linux
+can use a remote server as one of its block devices. So every time
+the client computer wants to read, e.g., /dev/nb0, it sends a
+request over TCP to the server, which will reply with the data read.
+This can be used for stations with low disk space (or even diskless)
+to borrow disk space from another computer.
+Unlike NFS, it is possible to put any filesystem on it, etc.
+
+For more information, or to download the nbd-client and nbd-server
+tools, go to http://nbd.sf.net/.
+
+The nbd kernel module need only be installed on the client
+system, as the nbd-server is completely in userspace. In fact,
+the nbd-server has been successfully ported to other operating
+systems, including Windows.
+
+A) NBD parameters
+-----------------
+
+max_part
+	Number of partitions per device (default: 0).
+
+nbds_max
+	Number of block devices that should be initialized (default: 16).
+
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 6e54a9d88b7a..3b5f5d1088c6 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -26,6 +26,13 @@ Required properties:
 
 Optional properties:
 
+- interrupt-affinity : Valid only when using SPIs, specifies a list of phandles
+                       to CPU nodes corresponding directly to the affinity of
+		       the SPIs listed in the interrupts property.
+
+		       This property should be present when there is more than
+		       a single SPI.
+
 - qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
                      events.
 
diff --git a/Documentation/devicetree/bindings/mailbox/arm-mhu.txt b/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
new file mode 100644
index 000000000000..4971f03f0b33
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
@@ -0,0 +1,43 @@
+ARM MHU Mailbox Driver
+======================
+
+The ARM's Message-Handling-Unit (MHU) is a mailbox controller that has
+3 independent channels/links to communicate with remote processor(s).
+ MHU links are hardwired on a platform. A link raises interrupt for any
+received data. However, there is no specified way of knowing if the sent
+data has been read by the remote. This driver assumes the sender polls
+STAT register and the remote clears it after having read the data.
+The last channel is specified to be a 'Secure' resource, hence can't be
+used by Linux running NS.
+
+Mailbox Device Node:
+====================
+
+Required properties:
+--------------------
+- compatible:		Shall be "arm,mhu" & "arm,primecell"
+- reg:			Contains the mailbox register address range (base
+			address and length)
+- #mbox-cells		Shall be 1 - the index of the channel needed.
+- interrupts:		Contains the interrupt information corresponding to
+			each of the 3 links of MHU.
+
+Example:
+--------
+
+	mhu: mailbox@2b1f0000 {
+		#mbox-cells = <1>;
+		compatible = "arm,mhu", "arm,primecell";
+		reg = <0 0x2b1f0000 0x1000>;
+		interrupts = <0 36 4>, /* LP-NonSecure */
+			     <0 35 4>, /* HP-NonSecure */
+			     <0 37 4>; /* Secure */
+		clocks = <&clock 0 2 1>;
+		clock-names = "apb_pclk";
+	};
+
+	mhu_client: scb@2e000000 {
+		compatible = "fujitsu,mb86s70-scb-1.0";
+		reg = <0 0x2e000000 0x4000>;
+		mboxes = <&mhu 1>; /* HP-NonSecure */
+	};
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 7c3f187d48bf..0a926e2ba3ab 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -196,7 +196,7 @@ prototypes:
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
+	int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*migratepage)(struct address_space *, struct page *, struct page *);
 	int (*launder_page)(struct page *);
 	int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 207cdca68bed..5d833b32bbcd 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -590,7 +590,7 @@ struct address_space_operations {
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
+	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
 	int (*launder_page) (struct page *);
diff --git a/Documentation/powerpc/pci_iov_resource_on_powernv.txt b/Documentation/powerpc/pci_iov_resource_on_powernv.txt
new file mode 100644
index 000000000000..b55c5cd83f8d
--- /dev/null
+++ b/Documentation/powerpc/pci_iov_resource_on_powernv.txt
@@ -0,0 +1,301 @@
+Wei Yang <weiyang@linux.vnet.ibm.com>
+Benjamin Herrenschmidt <benh@au1.ibm.com>
+Bjorn Helgaas <bhelgaas@google.com>
+26 Aug 2014
+
+This document describes the requirement from hardware for PCI MMIO resource
+sizing and assignment on PowerKVM and how generic PCI code handles this
+requirement. The first two sections describe the concepts of Partitionable
+Endpoints and the implementation on P8 (IODA2). The next two sections talks
+about considerations on enabling SRIOV on IODA2.
+
+1. Introduction to Partitionable Endpoints
+
+A Partitionable Endpoint (PE) is a way to group the various resources
+associated with a device or a set of devices to provide isolation between
+partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism
+to freeze a device that is causing errors in order to limit the possibility
+of propagation of bad data.
+
+There is thus, in HW, a table of PE states that contains a pair of "frozen"
+state bits (one for MMIO and one for DMA, they get set together but can be
+cleared independently) for each PE.
+
+When a PE is frozen, all stores in any direction are dropped and all loads
+return all 1's value. MSIs are also blocked. There's a bit more state that
+captures things like the details of the error that caused the freeze etc., but
+that's not critical.
+
+The interesting part is how the various PCIe transactions (MMIO, DMA, ...)
+are matched to their corresponding PEs.
+
+The following section provides a rough description of what we have on P8
+(IODA2).  Keep in mind that this is all per PHB (PCI host bridge).  Each PHB
+is a completely separate HW entity that replicates the entire logic, so has
+its own set of PEs, etc.
+
+2. Implementation of Partitionable Endpoints on P8 (IODA2)
+
+P8 supports up to 256 Partitionable Endpoints per PHB.
+
+  * Inbound
+
+    For DMA, MSIs and inbound PCIe error messages, we have a table (in
+    memory but accessed in HW by the chip) that provides a direct
+    correspondence between a PCIe RID (bus/dev/fn) with a PE number.
+    We call this the RTT.
+
+    - For DMA we then provide an entire address space for each PE that can
+      contain two "windows", depending on the value of PCI address bit 59.
+      Each window can be configured to be remapped via a "TCE table" (IOMMU
+      translation table), which has various configurable characteristics
+      not described here.
+
+    - For MSIs, we have two windows in the address space (one at the top of
+      the 32-bit space and one much higher) which, via a combination of the
+      address and MSI value, will result in one of the 2048 interrupts per
+      bridge being triggered.  There's a PE# in the interrupt controller
+      descriptor table as well which is compared with the PE# obtained from
+      the RTT to "authorize" the device to emit that specific interrupt.
+
+    - Error messages just use the RTT.
+
+  * Outbound.  That's where the tricky part is.
+
+    Like other PCI host bridges, the Power8 IODA2 PHB supports "windows"
+    from the CPU address space to the PCI address space.  There is one M32
+    window and sixteen M64 windows.  They have different characteristics.
+    First what they have in common: they forward a configurable portion of
+    the CPU address space to the PCIe bus and must be naturally aligned
+    power of two in size.  The rest is different:
+
+    - The M32 window:
+
+      * Is limited to 4GB in size.
+
+      * Drops the top bits of the address (above the size) and replaces
+	them with a configurable value.  This is typically used to generate
+	32-bit PCIe accesses.  We configure that window at boot from FW and
+	don't touch it from Linux; it's usually set to forward a 2GB
+	portion of address space from the CPU to PCIe
+	0x8000_0000..0xffff_ffff.  (Note: The top 64KB are actually
+	reserved for MSIs but this is not a problem at this point; we just
+	need to ensure Linux doesn't assign anything there, the M32 logic
+	ignores that however and will forward in that space if we try).
+
+      * It is divided into 256 segments of equal size.  A table in the chip
+	maps each segment to a PE#.  That allows portions of the MMIO space
+	to be assigned to PEs on a segment granularity.  For a 2GB window,
+	the segment granularity is 2GB/256 = 8MB.
+
+    Now, this is the "main" window we use in Linux today (excluding
+    SR-IOV).  We basically use the trick of forcing the bridge MMIO windows
+    onto a segment alignment/granularity so that the space behind a bridge
+    can be assigned to a PE.
+
+    Ideally we would like to be able to have individual functions in PEs
+    but that would mean using a completely different address allocation
+    scheme where individual function BARs can be "grouped" to fit in one or
+    more segments.
+
+    - The M64 windows:
+
+      * Must be at least 256MB in size.
+
+      * Do not translate addresses (the address on PCIe is the same as the
+	address on the PowerBus).  There is a way to also set the top 14
+	bits which are not conveyed by PowerBus but we don't use this.
+
+      * Can be configured to be segmented.  When not segmented, we can
+	specify the PE# for the entire window.  When segmented, a window
+	has 256 segments; however, there is no table for mapping a segment
+	to a PE#.  The segment number *is* the PE#.
+
+      * Support overlaps.  If an address is covered by multiple windows,
+	there's a defined ordering for which window applies.
+
+    We have code (fairly new compared to the M32 stuff) that exploits that
+    for large BARs in 64-bit space:
+
+    We configure an M64 window to cover the entire region of address space
+    that has been assigned by FW for the PHB (about 64GB, ignore the space
+    for the M32, it comes out of a different "reserve").  We configure it
+    as segmented.
+
+    Then we do the same thing as with M32, using the bridge alignment
+    trick, to match to those giant segments.
+
+    Since we cannot remap, we have two additional constraints:
+
+    - We do the PE# allocation *after* the 64-bit space has been assigned
+      because the addresses we use directly determine the PE#.  We then
+      update the M32 PE# for the devices that use both 32-bit and 64-bit
+      spaces or assign the remaining PE# to 32-bit only devices.
+
+    - We cannot "group" segments in HW, so if a device ends up using more
+      than one segment, we end up with more than one PE#.  There is a HW
+      mechanism to make the freeze state cascade to "companion" PEs but
+      that only works for PCIe error messages (typically used so that if
+      you freeze a switch, it freezes all its children).  So we do it in
+      SW.  We lose a bit of effectiveness of EEH in that case, but that's
+      the best we found.  So when any of the PEs freezes, we freeze the
+      other ones for that "domain".  We thus introduce the concept of
+      "master PE" which is the one used for DMA, MSIs, etc., and "secondary
+      PEs" that are used for the remaining M64 segments.
+
+    We would like to investigate using additional M64 windows in "single
+    PE" mode to overlay over specific BARs to work around some of that, for
+    example for devices with very large BARs, e.g., GPUs.  It would make
+    sense, but we haven't done it yet.
+
+3. Considerations for SR-IOV on PowerKVM
+
+  * SR-IOV Background
+
+    The PCIe SR-IOV feature allows a single Physical Function (PF) to
+    support several Virtual Functions (VFs).  Registers in the PF's SR-IOV
+    Capability control the number of VFs and whether they are enabled.
+
+    When VFs are enabled, they appear in Configuration Space like normal
+    PCI devices, but the BARs in VF config space headers are unusual.  For
+    a non-VF device, software uses BARs in the config space header to
+    discover the BAR sizes and assign addresses for them.  For VF devices,
+    software uses VF BAR registers in the *PF* SR-IOV Capability to
+    discover sizes and assign addresses.  The BARs in the VF's config space
+    header are read-only zeros.
+
+    When a VF BAR in the PF SR-IOV Capability is programmed, it sets the
+    base address for all the corresponding VF(n) BARs.  For example, if the
+    PF SR-IOV Capability is programmed to enable eight VFs, and it has a
+    1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region.
+    This region is divided into eight contiguous 1MB regions, each of which
+    is a BAR0 for one of the VFs.  Note that even though the VF BAR
+    describes an 8MB region, the alignment requirement is for a single VF,
+    i.e., 1MB in this example.
+
+  There are several strategies for isolating VFs in PEs:
+
+  - M32 window: There's one M32 window, and it is split into 256
+    equally-sized segments.  The finest granularity possible is a 256MB
+    window with 1MB segments.  VF BARs that are 1MB or larger could be
+    mapped to separate PEs in this window.  Each segment can be
+    individually mapped to a PE via the lookup table, so this is quite
+    flexible, but it works best when all the VF BARs are the same size.  If
+    they are different sizes, the entire window has to be small enough that
+    the segment size matches the smallest VF BAR, which means larger VF
+    BARs span several segments.
+
+  - Non-segmented M64 window: A non-segmented M64 window is mapped entirely
+    to a single PE, so it could only isolate one VF.
+
+  - Single segmented M64 windows: A segmented M64 window could be used just
+    like the M32 window, but the segments can't be individually mapped to
+    PEs (the segment number is the PE#), so there isn't as much
+    flexibility.  A VF with multiple BARs would have to be in a "domain" of
+    multiple PEs, which is not as well isolated as a single PE.
+
+  - Multiple segmented M64 windows: As usual, each window is split into 256
+    equally-sized segments, and the segment number is the PE#.  But if we
+    use several M64 windows, they can be set to different base addresses
+    and different segment sizes.  If we have VFs that each have a 1MB BAR
+    and a 32MB BAR, we could use one M64 window to assign 1MB segments and
+    another M64 window to assign 32MB segments.
+
+  Finally, the plan to use M64 windows for SR-IOV, which will be described
+  more in the next two sections.  For a given VF BAR, we need to
+  effectively reserve the entire 256 segments (256 * VF BAR size) and
+  position the VF BAR to start at the beginning of a free range of
+  segments/PEs inside that M64 window.
+
+  The goal is of course to be able to give a separate PE for each VF.
+
+  The IODA2 platform has 16 M64 windows, which are used to map MMIO
+  range to PE#.  Each M64 window defines one MMIO range and this range is
+  divided into 256 segments, with each segment corresponding to one PE.
+
+  We decide to leverage this M64 window to map VFs to individual PEs, since
+  SR-IOV VF BARs are all the same size.
+
+  But doing so introduces another problem: total_VFs is usually smaller
+  than the number of M64 window segments, so if we map one VF BAR directly
+  to one M64 window, some part of the M64 window will map to another
+  device's MMIO range.
+
+  IODA supports 256 PEs, so segmented windows contain 256 segments, so if
+  total_VFs is less than 256, we have the situation in Figure 1.0, where
+  segments [total_VFs, 255] of the M64 window may map to some MMIO range on
+  other devices:
+
+     0      1                     total_VFs - 1
+     +------+------+-     -+------+------+
+     |      |      |  ...  |      |      |
+     +------+------+-     -+------+------+
+
+                           VF(n) BAR space
+
+     0      1                     total_VFs - 1                255
+     +------+------+-     -+------+------+-      -+------+------+
+     |      |      |  ...  |      |      |   ...  |      |      |
+     +------+------+-     -+------+------+-      -+------+------+
+
+                           M64 window
+
+		Figure 1.0 Direct map VF(n) BAR space
+
+  Our current solution is to allocate 256 segments even if the VF(n) BAR
+  space doesn't need that much, as shown in Figure 1.1:
+
+     0      1                     total_VFs - 1                255
+     +------+------+-     -+------+------+-      -+------+------+
+     |      |      |  ...  |      |      |   ...  |      |      |
+     +------+------+-     -+------+------+-      -+------+------+
+
+                           VF(n) BAR space + extra
+
+     0      1                     total_VFs - 1                255
+     +------+------+-     -+------+------+-      -+------+------+
+     |      |      |  ...  |      |      |   ...  |      |      |
+     +------+------+-     -+------+------+-      -+------+------+
+
+			   M64 window
+
+		Figure 1.1 Map VF(n) BAR space + extra
+
+  Allocating the extra space ensures that the entire M64 window will be
+  assigned to this one SR-IOV device and none of the space will be
+  available for other devices.  Note that this only expands the space
+  reserved in software; there are still only total_VFs VFs, and they only
+  respond to segments [0, total_VFs - 1].  There's nothing in hardware that
+  responds to segments [total_VFs, 255].
+
+4. Implications for the Generic PCI Code
+
+The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
+aligned to the size of an individual VF BAR.
+
+In IODA2, the MMIO address determines the PE#.  If the address is in an M32
+window, we can set the PE# by updating the table that translates segments
+to PE#s.  Similarly, if the address is in an unsegmented M64 window, we can
+set the PE# for the window.  But if it's in a segmented M64 window, the
+segment number is the PE#.
+
+Therefore, the only way to control the PE# for a VF is to change the base
+of the VF(n) BAR space in the VF BAR.  If the PCI core allocates the exact
+amount of space required for the VF(n) BAR space, the VF BAR value is fixed
+and cannot be changed.
+
+On the other hand, if the PCI core allocates additional space, the VF BAR
+value can be changed as long as the entire VF(n) BAR space remains inside
+the space allocated by the core.
+
+Ideally the segment size will be the same as an individual VF BAR size.
+Then each VF will be in its own PE.  The VF BARs (and therefore the PE#s)
+are contiguous.  If VF0 is in PE(x), then VF(n) is in PE(x+n).  If we
+allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0.
+
+If the segment size is smaller than the VF BAR size, it will take several
+segments to cover a VF BAR, and a VF will be in several PEs.  This is
+possible, but the isolation isn't as good, and it reduces the number of PE#
+choices because instead of consuming only numVFs segments, the VF(n) BAR
+space will consume (numVFs * n) segments.  That means there aren't as many
+available segments for adjusting base of the VF(n) BAR space.
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt
index 9791e98ab49c..ba0a2a4a54ba 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -74,22 +74,23 @@ Causes of transaction aborts
 Syscalls
 ========
 
-Performing syscalls from within transaction is not recommended, and can lead
-to unpredictable results.
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
 
-Syscalls do not by design abort transactions, but beware: The kernel code will
-not be running in transactional state.  The effect of syscalls will always
-remain visible, but depending on the call they may abort your transaction as a
-side-effect, read soon-to-be-aborted transactional data that should not remain
-invisible, etc.  If you constantly retry a transaction that constantly aborts
-itself by calling a syscall, you'll have a livelock & make no progress.
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel.  However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware.  The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure.  No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
 
-Simple syscalls (e.g. sigprocmask()) "could" be OK.  Even things like write()
-from, say, printf() should be OK as long as the kernel does not access any
-memory that was accessed transactionally.
-
-Consider any syscalls that happen to work as debug-only -- not recommended for
-production use.  Best to queue them up till after the transaction is over.
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library.  Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
 
 
 Signals
@@ -174,10 +175,9 @@ These are defined in <asm/reg.h>, and distinguish different reasons why the
 kernel aborted a transaction:
 
  TM_CAUSE_RESCHED       Thread was rescheduled.
- TM_CAUSE_TLBI          Software TLB invalide.
+ TM_CAUSE_TLBI          Software TLB invalid.
  TM_CAUSE_FAC_UNAV      FP/VEC/VSX unavailable trap.
- TM_CAUSE_SYSCALL       Currently unused; future syscalls that must abort
-                        transactions for consistency will use this.
+ TM_CAUSE_SYSCALL       Syscall from active transaction.
  TM_CAUSE_SIGNAL        Signal delivered.
  TM_CAUSE_MISC          Currently unused.
  TM_CAUSE_ALIGNMENT     Alignment fault.
@@ -185,7 +185,7 @@ kernel aborted a transaction:
 
 These can be checked by the user program's abort handler as TEXASR[0:7].  If
 bit 7 is set, it indicates that the error is consider persistent.  For example
-a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.q
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
 
 GDB
 ===
diff --git a/MAINTAINERS b/MAINTAINERS
index d24e8a38ff63..f7bbaece5649 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8108,7 +8108,7 @@ S:	Maintained
 F:	drivers/net/wireless/rt2x00/
 
 RAMDISK RAM BLOCK DEVICE DRIVER
-M:	Nick Piggin <npiggin@kernel.dk>
+M:	Jens Axboe <axboe@kernel.dk>
 S:	Maintained
 F:	Documentation/blockdev/ramdisk.txt
 F:	drivers/block/brd.c
@@ -8655,11 +8655,9 @@ F:	drivers/scsi/sg.c
 F:	include/scsi/sg.h
 
 SCSI SUBSYSTEM
-M:	"James E.J. Bottomley" <JBottomley@parallels.com>
+M:	"James E.J. Bottomley" <JBottomley@odin.com>
 L:	linux-scsi@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-pending-2.6.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
 S:	Maintained
 F:	drivers/scsi/
 F:	include/scsi/
diff --git a/arch/Kconfig b/arch/Kconfig
index e1068987bad1..a65eafb24997 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -32,7 +32,7 @@ config HAVE_OPROFILE
 
 config OPROFILE_NMI_TIMER
 	def_bool y
-	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !PPC64
 
 config KPROBES
 	bool "Kprobes"
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 4cf48c3aca13..405aa1883307 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -269,6 +269,16 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+	return false;
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+				       pgd_t *hyp_pgd,
+				       pgd_t *merged_hyp_pgd,
+				       unsigned long hyp_idmap_start) { }
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 7a301be9ac67..8b60fde5ce48 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -11,7 +11,7 @@
 #ifdef CONFIG_ARM_KERNMEM_PERMS
 #include <asm/pgtable.h>
 #endif
-	
+
 #define PROC_INFO							\
 	. = ALIGN(4);							\
 	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
@@ -23,7 +23,7 @@
 	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
 	*(.idmap.text)							\
 	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
-	. = ALIGN(32);							\
+	. = ALIGN(PAGE_SIZE);						\
 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
 	*(.hyp.idmap.text)						\
 	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
@@ -343,8 +343,11 @@ SECTIONS
  */
 ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
 ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
+
 /*
- * The HYP init code can't be more than a page long.
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
  * The above comment applies as well.
  */
-ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big")
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
+	"HYP init code too big or misaligned")
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 15b050d46fc9..1d5accbd3dcf 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -35,9 +35,9 @@ extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 
 static pgd_t *boot_hyp_pgd;
 static pgd_t *hyp_pgd;
+static pgd_t *merged_hyp_pgd;
 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
-static void *init_bounce_page;
 static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
@@ -405,9 +405,6 @@ void free_boot_hyp_pgd(void)
 	if (hyp_pgd)
 		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
-	free_page((unsigned long)init_bounce_page);
-	init_bounce_page = NULL;
-
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 }
 
@@ -438,6 +435,11 @@ void free_hyp_pgds(void)
 		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
 		hyp_pgd = NULL;
 	}
+	if (merged_hyp_pgd) {
+		clear_page(merged_hyp_pgd);
+		free_page((unsigned long)merged_hyp_pgd);
+		merged_hyp_pgd = NULL;
+	}
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 }
@@ -1622,12 +1624,18 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 
 phys_addr_t kvm_mmu_get_httbr(void)
 {
-	return virt_to_phys(hyp_pgd);
+	if (__kvm_cpu_uses_extended_idmap())
+		return virt_to_phys(merged_hyp_pgd);
+	else
+		return virt_to_phys(hyp_pgd);
 }
 
 phys_addr_t kvm_mmu_get_boot_httbr(void)
 {
-	return virt_to_phys(boot_hyp_pgd);
+	if (__kvm_cpu_uses_extended_idmap())
+		return virt_to_phys(merged_hyp_pgd);
+	else
+		return virt_to_phys(boot_hyp_pgd);
 }
 
 phys_addr_t kvm_get_idmap_vector(void)
@@ -1643,39 +1651,11 @@ int kvm_mmu_init(void)
 	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
 	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
-	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
-		/*
-		 * Our init code is crossing a page boundary. Allocate
-		 * a bounce page, copy the code over and use that.
-		 */
-		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
-		phys_addr_t phys_base;
-
-		init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
-		if (!init_bounce_page) {
-			kvm_err("Couldn't allocate HYP init bounce page\n");
-			err = -ENOMEM;
-			goto out;
-		}
-
-		memcpy(init_bounce_page, __hyp_idmap_text_start, len);
-		/*
-		 * Warning: the code we just copied to the bounce page
-		 * must be flushed to the point of coherency.
-		 * Otherwise, the data may be sitting in L2, and HYP
-		 * mode won't be able to observe it as it runs with
-		 * caches off at that point.
-		 */
-		kvm_flush_dcache_to_poc(init_bounce_page, len);
-
-		phys_base = kvm_virt_to_phys(init_bounce_page);
-		hyp_idmap_vector += phys_base - hyp_idmap_start;
-		hyp_idmap_start = phys_base;
-		hyp_idmap_end = phys_base + len;
-
-		kvm_info("Using HYP init bounce page @%lx\n",
-			 (unsigned long)phys_base);
-	}
+	/*
+	 * We rely on the linker script to ensure at build time that the HYP
+	 * init code does not cross a page boundary.
+	 */
+	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
 
 	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
 	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
@@ -1698,6 +1678,17 @@ int kvm_mmu_init(void)
 		goto out;
 	}
 
+	if (__kvm_cpu_uses_extended_idmap()) {
+		merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		if (!merged_hyp_pgd) {
+			kvm_err("Failed to allocate extra HYP pgd\n");
+			goto out;
+		}
+		__kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
+				    hyp_idmap_start);
+		return 0;
+	}
+
 	/* Map the very same page at the trampoline VA */
 	err = 	__create_hyp_mappings(boot_hyp_pgd,
 				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 263a2044c65b..224081ccc92f 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -53,105 +53,33 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 
 static __read_mostly int xen_events_irq = -1;
 
-/* map fgmfn of domid to lpfn in the current domain */
-static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
-			    unsigned int domid)
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       xen_pfn_t *mfn, int nr,
+			       int *err_ptr, pgprot_t prot,
+			       unsigned domid,
+			       struct page **pages)
 {
-	int rc;
-	struct xen_add_to_physmap_range xatp = {
-		.domid = DOMID_SELF,
-		.foreign_domid = domid,
-		.size = 1,
-		.space = XENMAPSPACE_gmfn_foreign,
-	};
-	xen_ulong_t idx = fgmfn;
-	xen_pfn_t gpfn = lpfn;
-	int err = 0;
-
-	set_xen_guest_handle(xatp.idxs, &idx);
-	set_xen_guest_handle(xatp.gpfns, &gpfn);
-	set_xen_guest_handle(xatp.errs, &err);
-
-	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
-	if (rc || err) {
-		pr_warn("Failed to map pfn to mfn rc:%d:%d pfn:%lx mfn:%lx\n",
-			rc, err, lpfn, fgmfn);
-		return 1;
-	}
-	return 0;
-}
-
-struct remap_data {
-	xen_pfn_t fgmfn; /* foreign domain's gmfn */
-	pgprot_t prot;
-	domid_t  domid;
-	struct vm_area_struct *vma;
-	int index;
-	struct page **pages;
-	struct xen_remap_mfn_info *info;
-};
-
-static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
-			void *data)
-{
-	struct remap_data *info = data;
-	struct page *page = info->pages[info->index++];
-	unsigned long pfn = page_to_pfn(page);
-	pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
-
-	if (map_foreign_page(pfn, info->fgmfn, info->domid))
-		return -EFAULT;
-	set_pte_at(info->vma->vm_mm, addr, ptep, pte);
-
-	return 0;
+	return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+					 prot, domid, pages);
 }
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
 
+/* Not used by XENFEAT_auto_translated guests. */
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t mfn, int nr,
-			       pgprot_t prot, unsigned domid,
-			       struct page **pages)
+                              unsigned long addr,
+                              xen_pfn_t mfn, int nr,
+                              pgprot_t prot, unsigned domid,
+                              struct page **pages)
 {
-	int err;
-	struct remap_data data;
-
-	/* TBD: Batching, current sole caller only does page at a time */
-	if (nr > 1)
-		return -EINVAL;
-
-	data.fgmfn = mfn;
-	data.prot = prot;
-	data.domid = domid;
-	data.vma = vma;
-	data.index = 0;
-	data.pages = pages;
-	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
-				  remap_pte_fn, &data);
-	return err;
+	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
 
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 			       int nr, struct page **pages)
 {
-	int i;
-
-	for (i = 0; i < nr; i++) {
-		struct xen_remove_from_physmap xrp;
-		unsigned long rc, pfn;
-
-		pfn = page_to_pfn(pages[i]);
-
-		xrp.domid = DOMID_SELF;
-		xrp.gpfn = pfn;
-		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
-		if (rc) {
-			pr_warn("Failed to unmap pfn:%lx rc:%ld\n",
-				pfn, rc);
-			return rc;
-		}
-	}
-	return 0;
+	return xen_xlate_unmap_gfn_range(vma, nr, pages);
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 34f487d5d84e..b8d96f1554af 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -368,6 +368,27 @@ config ARM64_ERRATUM_832075
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_845719
+	bool "Cortex-A53: 845719: a load might read incorrect data"
+	depends on COMPAT
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 845719 on Cortex-A53 parts up to r0p4.
+
+	  When running a compat (AArch32) userspace on an affected Cortex-A53
+	  part, a load at EL0 from a virtual address that matches the bottom 32
+	  bits of the virtual address used by a recent load at (AArch64) EL1
+	  might return incorrect data.
+
+	  The workaround is to write the contextidr_el1 register on exception
+	  return to a 32-bit task.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
 endmenu
 
 
@@ -455,8 +476,8 @@ config SCHED_SMT
 	  places. If unsure say N here.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-64)"
-	range 2 64
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
 	depends on SMP
 	# These have to remain sorted largest to smallest
 	default "64"
@@ -470,6 +491,10 @@ config HOTPLUG_CPU
 
 source kernel/Kconfig.preempt
 
+config UP_LATE_INIT
+       def_bool y
+       depends on !SMP
+
 config HZ
 	int
 	default 100
@@ -670,7 +695,7 @@ source "fs/Kconfig.binfmt"
 
 config COMPAT
 	bool "Kernel support for 32-bit EL0"
-	depends on !ARM64_64K_PAGES
+	depends on !ARM64_64K_PAGES || EXPERT
 	select COMPAT_BINFMT_ELF
 	select HAVE_UID16
 	select OLD_SIGSUSPEND3
@@ -681,6 +706,10 @@ config COMPAT
 	  the user helper functions, VFP support and the ptrace interface are
 	  handled appropriately by the kernel.
 
+	  If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
+	  will only be able to execute AArch32 binaries that were compiled with
+	  64k aligned segments.
+
 	  If you want to execute 32-bit userspace applications, say Y.
 
 config SYSVIPC_COMPAT
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 69ceedc982a5..4d2a925998f9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -48,7 +48,7 @@ core-$(CONFIG_KVM) += arch/arm64/kvm/
 core-$(CONFIG_XEN) += arch/arm64/xen/
 core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
 libs-y		:= arch/arm64/lib/ $(libs-y)
-libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/
+core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 # Default target when executing plain make
 KBUILD_IMAGE	:= Image.gz
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index af6a452b1aac..4e03d8dd23f6 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -31,8 +31,12 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_ARCH_EXYNOS7=y
 CONFIG_ARCH_FSL_LS2085A=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_SEATTLE=y
+CONFIG_ARCH_TEGRA=y
+CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_XGENE=y
@@ -62,6 +66,7 @@ CONFIG_BPF_JIT=y
 # CONFIG_WIRELESS is not set
 CONFIG_NET_9P=y
 CONFIG_NET_9P_VIRTIO=y
+# CONFIG_TEGRA_AHB is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -81,6 +86,7 @@ CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
 CONFIG_NET_XGENE=y
+CONFIG_SKY2=y
 CONFIG_SMC91X=y
 CONFIG_SMSC911X=y
 # CONFIG_WLAN is not set
@@ -100,6 +106,8 @@ CONFIG_SPI=y
 CONFIG_SPI_PL022=y
 CONFIG_GPIO_PL061=y
 CONFIG_GPIO_XGENE=y
+CONFIG_POWER_RESET_XGENE=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
@@ -112,10 +120,10 @@ CONFIG_LOGO=y
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
 CONFIG_USB_ULPI=y
 CONFIG_MMC=y
 CONFIG_MMC_ARMMMCI=y
@@ -125,6 +133,7 @@ CONFIG_MMC_SPI=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
 CONFIG_RTC_DRV_XGENE=y
+CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
 # CONFIG_IOMMU_SUPPORT is not set
@@ -143,8 +152,10 @@ CONFIG_CUSE=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
+CONFIG_EFIVAR_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_9P_FS=y
 CONFIG_NLS_CODEPAGE_437=y
@@ -159,7 +170,6 @@ CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_FTRACE is not set
-CONFIG_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 432e4841cd81..a2a7fbcacc14 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
 0:	mov	v4.16b, v3.16b
 1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
-	aese	v1.16b, v4.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
-	aese	v1.16b, v5.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
-	aese	v1.16b, v3.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
 	bpl	1b
 	aese	v0.16b, v4.16b
@@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
 	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
-	aese	v1.16b, v4.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
-	aese	v1.16b, v5.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
-	aese	v1.16b, v3.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
 	ld1	{v5.2d}, [x10], #16		/* load next round key */
 	bpl	2b
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 685a18f731eb..78f3cfe92c08 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -45,18 +45,14 @@
 
 	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3
 	aes\de		\i0\().16b, \k\().16b
-	.ifnb		\i1
-	aes\de		\i1\().16b, \k\().16b
-	.ifnb		\i3
-	aes\de		\i2\().16b, \k\().16b
-	aes\de		\i3\().16b, \k\().16b
-	.endif
-	.endif
 	aes\mc		\i0\().16b, \i0\().16b
 	.ifnb		\i1
+	aes\de		\i1\().16b, \k\().16b
 	aes\mc		\i1\().16b, \i1\().16b
 	.ifnb		\i3
+	aes\de		\i2\().16b, \k\().16b
 	aes\mc		\i2\().16b, \i2\().16b
+	aes\de		\i3\().16b, \k\().16b
 	aes\mc		\i3\().16b, \i3\().16b
 	.endif
 	.endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 750bac4e637e..144b64ad96c3 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -159,4 +159,52 @@ lr	.req	x30		// link register
 	orr	\rd, \lbits, \hbits, lsl #32
 	.endm
 
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
+ * <symbol> is within the range +/- 4 GB of the PC.
+ */
+	/*
+	 * @dst: destination register (64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional scratch register to be used if <dst> == sp, which
+	 *       is not allowed in an adrp instruction
+	 */
+	.macro	adr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	add	\dst, \dst, :lo12:\sym
+	.else
+	adrp	\tmp, \sym
+	add	\dst, \tmp, :lo12:\sym
+	.endif
+	.endm
+
+	/*
+	 * @dst: destination register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
+	 *       32-bit wide register, in which case it cannot be used to hold
+	 *       the address
+	 */
+	.macro	ldr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	ldr	\dst, [\dst, :lo12:\sym]
+	.else
+	adrp	\tmp, \sym
+	ldr	\dst, [\tmp, :lo12:\sym]
+	.endif
+	.endm
+
+	/*
+	 * @src: source register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: mandatory 64-bit scratch register to calculate the address
+	 *       while <src> needs to be preserved.
+	 */
+	.macro	str_l, src, sym, tmp
+	adrp	\tmp, \sym
+	str	\src, [\tmp, :lo12:\sym]
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b6c16d5f622f..82cb9f98ba1a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -23,11 +23,24 @@
 
 #define ARM64_WORKAROUND_CLEAN_CACHE		0
 #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
+#define ARM64_WORKAROUND_845719			2
 
-#define ARM64_NCAPS				2
+#define ARM64_NCAPS				3
 
 #ifndef __ASSEMBLY__
 
+struct arm64_cpu_capabilities {
+	const char *desc;
+	u16 capability;
+	bool (*matches)(const struct arm64_cpu_capabilities *);
+	union {
+		struct {	/* To be used for erratum handling only */
+			u32 midr_model;
+			u32 midr_range_min, midr_range_max;
+		};
+	};
+};
+
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
 static inline bool cpu_have_feature(unsigned int num)
@@ -51,7 +64,10 @@ static inline void cpus_set_cap(unsigned int num)
 		__set_bit(num, cpu_hwcaps);
 }
 
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info);
 void check_local_cpu_errata(void);
+void check_local_cpu_features(void);
 bool cpu_supports_mixed_endian_el0(void);
 bool system_supports_mixed_endian_el0(void);
 
diff --git a/arch/arm64/include/asm/cputable.h b/arch/arm64/include/asm/cputable.h
deleted file mode 100644
index e3bd983d3661..000000000000
--- a/arch/arm64/include/asm/cputable.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * arch/arm64/include/asm/cputable.h
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_CPUTABLE_H
-#define __ASM_CPUTABLE_H
-
-struct cpu_info {
-	unsigned int	cpu_id_val;
-	unsigned int	cpu_id_mask;
-	const char	*cpu_name;
-	unsigned long	(*cpu_setup)(void);
-};
-
-extern struct cpu_info *lookup_processor_type(unsigned int);
-
-#endif
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 6932bb57dba0..9437e3dc5833 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -97,7 +97,7 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	if (!dev->dma_mask)
-		return 0;
+		return false;
 
 	return addr + size - 1 <= *dev->dma_mask;
 }
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index defa0ff98250..926495686554 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -33,6 +33,7 @@
 enum fixed_addresses {
 	FIX_HOLE,
 	FIX_EARLYCON_MEM_BASE,
+	FIX_TEXT_POKE0,
 	__end_of_permanent_fixed_addresses,
 
 	/*
@@ -49,7 +50,6 @@ enum fixed_addresses {
 
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
-	FIX_TEXT_POKE0,
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index d2f49423c5dc..f81b328d9cf4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -285,6 +285,7 @@ bool aarch64_insn_is_nop(u32 insn);
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
 u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 				  u32 insn, u64 imm);
 u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 36250705dc4c..61505676d085 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -68,6 +68,8 @@
 #include <asm/pgalloc.h>
 #include <asm/cachetype.h>
 #include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
 
 #define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
 
@@ -269,5 +271,36 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+	return __cpu_uses_extended_idmap();
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+				       pgd_t *hyp_pgd,
+				       pgd_t *merged_hyp_pgd,
+				       unsigned long hyp_idmap_start)
+{
+	int idmap_idx;
+
+	/*
+	 * Use the first entry to access the HYP mappings. It is
+	 * guaranteed to be free, otherwise we wouldn't use an
+	 * extended idmap.
+	 */
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
+	merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE);
+
+	/*
+	 * Create another extended level entry that points to the boot HYP map,
+	 * which contains an ID mapping of the HYP init code. We essentially
+	 * merge the boot and runtime HYP maps by doing so, but they don't
+	 * overlap anyway, so this is fine.
+	 */
+	idmap_idx = hyp_idmap_start >> VA_BITS;
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
+	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 101a42bde728..8ec41e5f56f0 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void)
 	: "r" (ttbr));
 }
 
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline bool __cpu_uses_extended_idmap(void)
+{
+	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
+		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
+}
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (__cpu_uses_extended_idmap())
+		asm volatile (
+		"	mrs	%0, tcr_el1	;"
+		"	bfi	%0, %1, %2, %3	;"
+		"	msr	tcr_el1, %0	;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
 static inline void switch_new_context(struct mm_struct *mm)
 {
 	unsigned long flags;
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 8fc8fa280e92..7d9c7e4a424b 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,9 @@
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
  * map the kernel. With the 64K page configuration, swapper and idmap need to
  * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information).
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
@@ -42,7 +44,7 @@
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(SWAPPER_DIR_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 80f3d241cff8..59bfae75dc98 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -143,7 +143,12 @@
 /*
  * TCR flags.
  */
-#define TCR_TxSZ(x)		(((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
+#define TCR_T0SZ_OFFSET		0
+#define TCR_T1SZ_OFFSET		16
+#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH		6
 #define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
 #define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
 #define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index e6f087806aaf..b7710a59672c 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -44,6 +44,7 @@ struct pmu_hw_events {
 struct arm_pmu {
 	struct pmu		pmu;
 	cpumask_t		active_irqs;
+	int			*irq_affinity;
 	const char		*name;
 	irqreturn_t		(*handle_irq)(int irq_num, void *dev);
 	void			(*enable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 941c375616e2..220633b791b8 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -45,15 +45,6 @@ do {							\
 	cpu_do_switch_mm(virt_to_phys(pgd),mm);		\
 } while (0)
 
-#define cpu_get_pgd()					\
-({							\
-	unsigned long pg;				\
-	asm("mrs	%0, ttbr0_el1\n"		\
-	    : "=r" (pg));				\
-	pg &= ~0xffff000000003ffful;			\
-	(pgd_t *)phys_to_virt(pg);			\
-})
-
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 20e9591a60cf..d2c37a1df0eb 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -127,7 +127,11 @@ extern void release_thread(struct task_struct *);
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define cpu_relax()			barrier()
+static inline void cpu_relax(void)
+{
+	asm volatile("yield" ::: "memory");
+}
+
 #define cpu_relax_lowlatency()                cpu_relax()
 
 /* Thread switching */
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
index 59e282311b58..8dcd61e32176 100644
--- a/arch/arm64/include/asm/smp_plat.h
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -40,4 +40,6 @@ static inline u32 mpidr_hash_size(void)
 extern u64 __cpu_logical_map[NR_CPUS];
 #define cpu_logical_map(cpu)    __cpu_logical_map[cpu]
 
+void __init do_post_cpus_up_work(void);
+
 #endif /* __ASM_SMP_PLAT_H */
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 27224426e0bf..cef934a90f17 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -406,7 +406,7 @@ __SYSCALL(__NR_vfork, sys_vfork)
 #define __NR_ugetrlimit 191	/* SuS compliant getrlimit */
 __SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit)		/* SuS compliant getrlimit */
 #define __NR_mmap2 192
-__SYSCALL(__NR_mmap2, sys_mmap_pgoff)
+__SYSCALL(__NR_mmap2, compat_sys_mmap2_wrapper)
 #define __NR_truncate64 193
 __SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
 #define __NR_ftruncate64 194
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 5ee07eee80c2..b12e15b80516 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -12,12 +12,12 @@ CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_return_address.o = -pg
 
 # Object file lists.
-arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
+arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
 			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o	\
 			   return_address.o cpuinfo.o cpu_errata.o		\
-			   alternative.o cacheinfo.o
+			   cpufeature.o alternative.o cacheinfo.o
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index ad7821d64a1d..21033bba9390 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,6 +24,7 @@
 #include <asm/cacheflush.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
+#include <asm/insn.h>
 #include <linux/stop_machine.h>
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -33,6 +34,48 @@ struct alt_region {
 	struct alt_instr *end;
 };
 
+/*
+ * Decode the imm field of a b/bl instruction, and return the byte
+ * offset as a signed value (so it can be used when computing a new
+ * branch target).
+ */
+static s32 get_branch_offset(u32 insn)
+{
+	s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
+
+	/* sign-extend the immediate before turning it into a byte offset */
+	return (imm << 6) >> 4;
+}
+
+static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
+{
+	u32 insn;
+
+	aarch64_insn_read(altinsnptr, &insn);
+
+	/* Stop the world on instructions we don't support... */
+	BUG_ON(aarch64_insn_is_cbz(insn));
+	BUG_ON(aarch64_insn_is_cbnz(insn));
+	BUG_ON(aarch64_insn_is_bcond(insn));
+	/* ... and there is probably more. */
+
+	if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
+		enum aarch64_insn_branch_type type;
+		unsigned long target;
+
+		if (aarch64_insn_is_b(insn))
+			type = AARCH64_INSN_BRANCH_NOLINK;
+		else
+			type = AARCH64_INSN_BRANCH_LINK;
+
+		target = (unsigned long)altinsnptr + get_branch_offset(insn);
+		insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
+						   target, type);
+	}
+
+	return insn;
+}
+
 static int __apply_alternatives(void *alt_region)
 {
 	struct alt_instr *alt;
@@ -40,16 +83,24 @@ static int __apply_alternatives(void *alt_region)
 	u8 *origptr, *replptr;
 
 	for (alt = region->begin; alt < region->end; alt++) {
+		u32 insn;
+		int i;
+
 		if (!cpus_have_cap(alt->cpufeature))
 			continue;
 
-		BUG_ON(alt->alt_len > alt->orig_len);
+		BUG_ON(alt->alt_len != alt->orig_len);
 
 		pr_info_once("patching kernel code\n");
 
 		origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
 		replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
-		memcpy(origptr, replptr, alt->alt_len);
+
+		for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
+			insn = get_alt_insn(origptr + i, replptr + i);
+			aarch64_insn_write(origptr + i, insn);
+		}
+
 		flush_icache_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + alt->alt_len));
 	}
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 56cadd3606bf..da675cc5dfae 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -24,7 +24,6 @@
 #include <linux/kvm_host.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
-#include <asm/cputable.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/vdso_datapage.h>
@@ -70,9 +69,6 @@ int main(void)
   BLANK();
   DEFINE(PAGE_SZ,	       	PAGE_SIZE);
   BLANK();
-  DEFINE(CPU_INFO_SZ,		sizeof(struct cpu_info));
-  DEFINE(CPU_INFO_SETUP,	offsetof(struct cpu_info, cpu_setup));
-  BLANK();
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index fa62637e63a8..6ffd91438560 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -16,8 +16,6 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define pr_fmt(fmt) "alternatives: " fmt
-
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
@@ -26,27 +24,11 @@
 #define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 
-/*
- * Add a struct or another datatype to the union below if you need
- * different means to detect an affected CPU.
- */
-struct arm64_cpu_capabilities {
-	const char *desc;
-	u16 capability;
-	bool (*is_affected)(struct arm64_cpu_capabilities *);
-	union {
-		struct {
-			u32 midr_model;
-			u32 midr_range_min, midr_range_max;
-		};
-	};
-};
-
 #define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
 			MIDR_ARCHITECTURE_MASK)
 
 static bool __maybe_unused
-is_affected_midr_range(struct arm64_cpu_capabilities *entry)
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
 {
 	u32 midr = read_cpuid_id();
 
@@ -59,12 +41,12 @@ is_affected_midr_range(struct arm64_cpu_capabilities *entry)
 }
 
 #define MIDR_RANGE(model, min, max) \
-	.is_affected = is_affected_midr_range, \
+	.matches = is_affected_midr_range, \
 	.midr_model = model, \
 	.midr_range_min = min, \
 	.midr_range_max = max
 
-struct arm64_cpu_capabilities arm64_errata[] = {
+const struct arm64_cpu_capabilities arm64_errata[] = {
 #if	defined(CONFIG_ARM64_ERRATUM_826319) || \
 	defined(CONFIG_ARM64_ERRATUM_827319) || \
 	defined(CONFIG_ARM64_ERRATUM_824069)
@@ -88,7 +70,16 @@ struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A57 r0p0 - r1p2 */
 		.desc = "ARM erratum 832075",
 		.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
-		MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12),
+		MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
+			   (1 << MIDR_VARIANT_SHIFT) | 2),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	{
+	/* Cortex-A53 r0p[01234] */
+		.desc = "ARM erratum 845719",
+		.capability = ARM64_WORKAROUND_845719,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
 	},
 #endif
 	{
@@ -97,15 +88,5 @@ struct arm64_cpu_capabilities arm64_errata[] = {
 
 void check_local_cpu_errata(void)
 {
-	struct arm64_cpu_capabilities *cpus = arm64_errata;
-	int i;
-
-	for (i = 0; cpus[i].desc; i++) {
-		if (!cpus[i].is_affected(&cpus[i]))
-			continue;
-
-		if (!cpus_have_cap(cpus[i].capability))
-			pr_info("enabling workaround for %s\n", cpus[i].desc);
-		cpus_set_cap(cpus[i].capability);
-	}
+	check_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
new file mode 100644
index 000000000000..3d9967e43d89
--- /dev/null
+++ b/arch/arm64/kernel/cpufeature.c
@@ -0,0 +1,47 @@
+/*
+ * Contains CPU feature definitions
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cpufeature.h>
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
+	{},
+};
+
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info)
+{
+	int i;
+
+	for (i = 0; caps[i].desc; i++) {
+		if (!caps[i].matches(&caps[i]))
+			continue;
+
+		if (!cpus_have_cap(caps[i].capability))
+			pr_info("%s %s\n", info, caps[i].desc);
+		cpus_set_cap(caps[i].capability);
+	}
+}
+
+void check_local_cpu_features(void)
+{
+	check_cpu_capabilities(arm64_features, "detected feature");
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 929855691dae..75d5a867e7fb 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -236,6 +236,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	cpuinfo_detect_icache_policy(info);
 
 	check_local_cpu_errata();
+	check_local_cpu_features();
 	update_cpu_features(info);
 }
 
diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c
deleted file mode 100644
index fd3993cb060f..000000000000
--- a/arch/arm64/kernel/cputable.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * arch/arm64/kernel/cputable.c
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-
-#include <asm/cputable.h>
-
-extern unsigned long __cpu_setup(void);
-
-struct cpu_info cpu_table[] = {
-	{
-		.cpu_id_val	= 0x000f0000,
-		.cpu_id_mask	= 0x000f0000,
-		.cpu_name	= "AArch64 Processor",
-		.cpu_setup	= __cpu_setup,
-	},
-	{ /* Empty */ },
-};
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf21bb3bf752..959fe8733560 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -21,8 +21,10 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 
+#include <asm/alternative-asm.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
 #include <asm/thread_info.h>
@@ -120,6 +122,24 @@
 	ct_user_enter
 	ldr	x23, [sp, #S_SP]		// load return stack pointer
 	msr	sp_el0, x23
+
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	alternative_insn						\
+	"nop",								\
+	"tbz x22, #4, 1f",						\
+	ARM64_WORKAROUND_845719
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	alternative_insn						\
+	"nop; nop",							\
+	"mrs x29, contextidr_el1; msr contextidr_el1, x29; 1:",		\
+	ARM64_WORKAROUND_845719
+#else
+	alternative_insn						\
+	"nop",								\
+	"msr contextidr_el1, xzr; 1:",					\
+	ARM64_WORKAROUND_845719
+#endif
+#endif
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
index 9a8f6ae2530e..bd9bfaa9269b 100644
--- a/arch/arm64/kernel/entry32.S
+++ b/arch/arm64/kernel/entry32.S
@@ -19,9 +19,12 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/const.h>
 
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/page.h>
 
 /*
  * System call wrappers for the AArch32 compatibility layer.
@@ -54,6 +57,21 @@ ENTRY(compat_sys_fstatfs64_wrapper)
 ENDPROC(compat_sys_fstatfs64_wrapper)
 
 /*
+ * Note: off_4k (w5) is always in units of 4K. If we can't do the
+ * requested offset because it is not page-aligned, we return -EINVAL.
+ */
+ENTRY(compat_sys_mmap2_wrapper)
+#if PAGE_SHIFT > 12
+	tst	w5, #~PAGE_MASK >> 12
+	b.ne	1f
+	lsr	w5, w5, #PAGE_SHIFT - 12
+#endif
+	b	sys_mmap_pgoff
+1:	mov	x0, #-EINVAL
+	ret
+ENDPROC(compat_sys_mmap2_wrapper)
+
+/*
  * Wrappers for AArch32 syscalls that either take 64-bit parameters
  * in registers or that take 32-bit parameters which require sign
  * extension.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 07f930540f4a..19f915e8f6e0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -36,7 +36,7 @@
 #include <asm/page.h>
 #include <asm/virt.h>
 
-#define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET)
+#define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)
 
 #if (TEXT_OFFSET & 0xfff) != 0
 #error TEXT_OFFSET must be at least 4KB aligned
@@ -46,13 +46,6 @@
 #error TEXT_OFFSET must be less than 2MB
 #endif
 
-	.macro	pgtbl, ttb0, ttb1, virt_to_phys
-	ldr	\ttb1, =swapper_pg_dir
-	ldr	\ttb0, =idmap_pg_dir
-	add	\ttb1, \ttb1, \virt_to_phys
-	add	\ttb0, \ttb0, \virt_to_phys
-	.endm
-
 #ifdef CONFIG_ARM64_64K_PAGES
 #define BLOCK_SHIFT	PAGE_SHIFT
 #define BLOCK_SIZE	PAGE_SIZE
@@ -63,7 +56,7 @@
 #define TABLE_SHIFT	PUD_SHIFT
 #endif
 
-#define KERNEL_START	KERNEL_RAM_VADDR
+#define KERNEL_START	_text
 #define KERNEL_END	_end
 
 /*
@@ -240,40 +233,43 @@ section_table:
 #endif
 
 ENTRY(stext)
-	mov	x21, x0				// x21=FDT
+	bl	preserve_boot_args
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
-	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
+	adrp	x24, __PHYS_OFFSET
 	bl	set_cpu_boot_mode_flag
-	mrs	x22, midr_el1			// x22=cpuid
-	mov	x0, x22
-	bl	lookup_processor_type
-	mov	x23, x0				// x23=current cpu_table
-	/*
-	 * __error_p may end up out of range for cbz if text areas are
-	 * aligned up to section sizes.
-	 */
-	cbnz	x23, 1f				// invalid processor (x23=0)?
-	b	__error_p
-1:
+
 	bl	__vet_fdt
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
 	/*
-	 * The following calls CPU specific code in a position independent
-	 * manner. See arch/arm64/mm/proc.S for details. x23 = base of
-	 * cpu_info structure selected by lookup_processor_type above.
+	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
+	 * details.
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
-	ldr	x27, __switch_data		// address to jump to after
+	ldr	x27, =__mmap_switched		// address to jump to after
 						// MMU has been enabled
-	adrp	lr, __enable_mmu		// return (PIC) address
-	add	lr, lr, #:lo12:__enable_mmu
-	ldr	x12, [x23, #CPU_INFO_SETUP]
-	add	x12, x12, x28			// __virt_to_phys
-	br	x12				// initialise processor
+	adr_l	lr, __enable_mmu		// return (PIC) address
+	b	__cpu_setup			// initialise processor
 ENDPROC(stext)
 
 /*
+ * Preserve the arguments passed by the bootloader in x0 .. x3
+ */
+preserve_boot_args:
+	mov	x21, x0				// x21=FDT
+
+	adr_l	x0, boot_args			// record the contents of
+	stp	x21, x1, [x0]			// x0 .. x3 at kernel entry
+	stp	x2, x3, [x0, #16]
+
+	dmb	sy				// needed before dc ivac with
+						// MMU off
+
+	add	x1, x0, #0x20			// 4 x 8 bytes
+	b	__inval_cache_range		// tail call
+ENDPROC(preserve_boot_args)
+
+/*
  * Determine validity of the x21 FDT pointer.
  * The dtb must be 8-byte aligned and live in the first 512M of memory.
  */
@@ -356,7 +352,8 @@ ENDPROC(__vet_fdt)
  *   - pgd entry for fixed mappings (TTBR1)
  */
 __create_page_tables:
-	pgtbl	x25, x26, x28			// idmap_pg_dir and swapper_pg_dir addresses
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
 	mov	x27, lr
 
 	/*
@@ -385,12 +382,50 @@ __create_page_tables:
 	 * Create the identity mapping.
 	 */
 	mov	x0, x25				// idmap_pg_dir
-	ldr	x3, =KERNEL_START
-	add	x3, x3, x28			// __pa(KERNEL_START)
+	adrp	x3, KERNEL_START		// __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))
+
+	/*
+	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+	 * created that covers system RAM if that is located sufficiently high
+	 * in the physical address space. So for the ID map, use an extended
+	 * virtual range in that case, by configuring an additional translation
+	 * level.
+	 * First, we have to verify our assumption that the current value of
+	 * VA_BITS was chosen such that all translation levels are fully
+	 * utilised, and that lowering T0SZ will always result in an additional
+	 * translation level to be configured.
+	 */
+#if VA_BITS != EXTRA_SHIFT
+#error "Mismatch between VA_BITS and page size/number of translation levels"
+#endif
+
+	/*
+	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+	 * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
+	 * this number conveniently equals the number of leading zeroes in
+	 * the physical address of KERNEL_END.
+	 */
+	adrp	x5, KERNEL_END
+	clz	x5, x5
+	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
+	b.ge	1f			// .. then skip additional level
+
+	adr_l	x6, idmap_t0sz
+	str	x5, [x6]
+	dmb	sy
+	dc	ivac, x6		// Invalidate potentially stale cache line
+
+	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
+1:
+#endif
+
 	create_pgd_entry x0, x3, x5, x6
-	ldr	x6, =KERNEL_END
 	mov	x5, x3				// __pa(KERNEL_START)
-	add	x6, x6, x28			// __pa(KERNEL_END)
+	adr_l	x6, KERNEL_END			// __pa(KERNEL_END)
 	create_block_map x0, x7, x3, x5, x6
 
 	/*
@@ -399,7 +434,7 @@ __create_page_tables:
 	mov	x0, x26				// swapper_pg_dir
 	mov	x5, #PAGE_OFFSET
 	create_pgd_entry x0, x5, x3, x6
-	ldr	x6, =KERNEL_END
+	ldr	x6, =KERNEL_END			// __va(KERNEL_END)
 	mov	x3, x24				// phys offset
 	create_block_map x0, x7, x3, x5, x6
 
@@ -426,6 +461,7 @@ __create_page_tables:
 	 */
 	mov	x0, x25
 	add	x1, x26, #SWAPPER_DIR_SIZE
+	dmb	sy
 	bl	__inval_cache_range
 
 	mov	lr, x27
@@ -433,37 +469,22 @@ __create_page_tables:
 ENDPROC(__create_page_tables)
 	.ltorg
 
-	.align	3
-	.type	__switch_data, %object
-__switch_data:
-	.quad	__mmap_switched
-	.quad	__bss_start			// x6
-	.quad	__bss_stop			// x7
-	.quad	processor_id			// x4
-	.quad	__fdt_pointer			// x5
-	.quad	memstart_addr			// x6
-	.quad	init_thread_union + THREAD_START_SP // sp
-
 /*
- * The following fragment of code is executed with the MMU on in MMU mode, and
- * uses absolute addresses; this is not position independent.
+ * The following fragment of code is executed with the MMU enabled.
  */
+	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
-	adr	x3, __switch_data + 8
+	adr_l	x6, __bss_start
+	adr_l	x7, __bss_stop
 
-	ldp	x6, x7, [x3], #16
 1:	cmp	x6, x7
 	b.hs	2f
 	str	xzr, [x6], #8			// Clear BSS
 	b	1b
 2:
-	ldp	x4, x5, [x3], #16
-	ldr	x6, [x3], #8
-	ldr	x16, [x3]
-	mov	sp, x16
-	str	x22, [x4]			// Save processor ID
-	str	x21, [x5]			// Save FDT pointer
-	str	x24, [x6]			// Save PHYS_OFFSET
+	adr_l	sp, initial_sp, x4
+	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
+	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
 	mov	x29, #0
 	b	start_kernel
 ENDPROC(__mmap_switched)
@@ -566,8 +587,7 @@ ENDPROC(el2_setup)
  * in x20. See arch/arm64/include/asm/virt.h for more info.
  */
 ENTRY(set_cpu_boot_mode_flag)
-	ldr	x1, =__boot_cpu_mode		// Compute __boot_cpu_mode
-	add	x1, x1, x28
+	adr_l	x1, __boot_cpu_mode
 	cmp	w20, #BOOT_CPU_MODE_EL2
 	b.ne	1f
 	add	x1, x1, #4
@@ -588,29 +608,21 @@ ENDPROC(set_cpu_boot_mode_flag)
 	.align	L1_CACHE_SHIFT
 ENTRY(__boot_cpu_mode)
 	.long	BOOT_CPU_MODE_EL2
-	.long	0
+	.long	BOOT_CPU_MODE_EL1
 	.popsection
 
 #ifdef CONFIG_SMP
-	.align	3
-1:	.quad	.
-	.quad	secondary_holding_pen_release
-
 	/*
 	 * This provides a "holding pen" for platforms to hold all secondary
 	 * cores are held until we're ready for them to initialise.
 	 */
 ENTRY(secondary_holding_pen)
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
-	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
 	bl	set_cpu_boot_mode_flag
 	mrs	x0, mpidr_el1
 	ldr     x1, =MPIDR_HWID_BITMASK
 	and	x0, x0, x1
-	adr	x1, 1b
-	ldp	x2, x3, [x1]
-	sub	x1, x1, x2
-	add	x3, x3, x1
+	adr_l	x3, secondary_holding_pen_release
 pen:	ldr	x4, [x3]
 	cmp	x4, x0
 	b.eq	secondary_startup
@@ -624,7 +636,6 @@ ENDPROC(secondary_holding_pen)
 	 */
 ENTRY(secondary_entry)
 	bl	el2_setup			// Drop to EL1
-	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
 	bl	set_cpu_boot_mode_flag
 	b	secondary_startup
 ENDPROC(secondary_entry)
@@ -633,16 +644,9 @@ ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
 	 */
-	mrs	x22, midr_el1			// x22=cpuid
-	mov	x0, x22
-	bl	lookup_processor_type
-	mov	x23, x0				// x23=current cpu_table
-	cbz	x23, __error_p			// invalid processor (x23=0)?
-
-	pgtbl	x25, x26, x28			// x25=TTBR0, x26=TTBR1
-	ldr	x12, [x23, #CPU_INFO_SETUP]
-	add	x12, x12, x28			// __virt_to_phys
-	blr	x12				// initialise processor
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
+	bl	__cpu_setup			// initialise processor
 
 	ldr	x21, =secondary_data
 	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU
@@ -658,11 +662,12 @@ ENDPROC(__secondary_switched)
 #endif	/* CONFIG_SMP */
 
 /*
- * Setup common bits before finally enabling the MMU. Essentially this is just
- * loading the page table pointer and vector base registers.
+ * Enable the MMU.
  *
- * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
- * the MMU.
+ *  x0  = SCTLR_EL1 value for turning on the MMU.
+ *  x27 = *virtual* address to jump to upon completion
+ *
+ * other registers depend on the function called upon completion
  */
 __enable_mmu:
 	ldr	x5, =vectors
@@ -670,89 +675,7 @@ __enable_mmu:
 	msr	ttbr0_el1, x25			// load TTBR0
 	msr	ttbr1_el1, x26			// load TTBR1
 	isb
-	b	__turn_mmu_on
-ENDPROC(__enable_mmu)
-
-/*
- * Enable the MMU. This completely changes the structure of the visible memory
- * space. You will not be able to trace execution through this.
- *
- *  x0  = system control register
- *  x27 = *virtual* address to jump to upon completion
- *
- * other registers depend on the function called upon completion
- *
- * We align the entire function to the smallest power of two larger than it to
- * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET
- * close to the end of a 512MB or 1GB block we might require an additional
- * table to map the entire function.
- */
-	.align	4
-__turn_mmu_on:
 	msr	sctlr_el1, x0
 	isb
 	br	x27
-ENDPROC(__turn_mmu_on)
-
-/*
- * Calculate the start of physical memory.
- */
-__calc_phys_offset:
-	adr	x0, 1f
-	ldp	x1, x2, [x0]
-	sub	x28, x0, x1			// x28 = PHYS_OFFSET - PAGE_OFFSET
-	add	x24, x2, x28			// x24 = PHYS_OFFSET
-	ret
-ENDPROC(__calc_phys_offset)
-
-	.align 3
-1:	.quad	.
-	.quad	PAGE_OFFSET
-
-/*
- * Exception handling. Something went wrong and we can't proceed. We ought to
- * tell the user, but since we don't have any guarantee that we're even
- * running on the right architecture, we do virtually nothing.
- */
-__error_p:
-ENDPROC(__error_p)
-
-__error:
-1:	nop
-	b	1b
-ENDPROC(__error)
-
-/*
- * This function gets the processor ID in w0 and searches the cpu_table[] for
- * a match. It returns a pointer to the struct cpu_info it found. The
- * cpu_table[] must end with an empty (all zeros) structure.
- *
- * This routine can be called via C code and it needs to work with the MMU
- * both disabled and enabled (the offset is calculated automatically).
- */
-ENTRY(lookup_processor_type)
-	adr	x1, __lookup_processor_type_data
-	ldp	x2, x3, [x1]
-	sub	x1, x1, x2			// get offset between VA and PA
-	add	x3, x3, x1			// convert VA to PA
-1:
-	ldp	w5, w6, [x3]			// load cpu_id_val and cpu_id_mask
-	cbz	w5, 2f				// end of list?
-	and	w6, w6, w0
-	cmp	w5, w6
-	b.eq	3f
-	add	x3, x3, #CPU_INFO_SZ
-	b	1b
-2:
-	mov	x3, #0				// unknown processor
-3:
-	mov	x0, x3
-	ret
-ENDPROC(lookup_processor_type)
-
-	.align	3
-	.type	__lookup_processor_type_data, %object
-__lookup_processor_type_data:
-	.quad	.
-	.quad	cpu_table
-	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
+ENDPROC(__enable_mmu)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index c8eca88f12e6..924902083e47 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -265,23 +265,13 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
 	return aarch64_insn_patch_text_sync(addrs, insns, cnt);
 }
 
-u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
-				  u32 insn, u64 imm)
+static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
+						u32 *maskp, int *shiftp)
 {
-	u32 immlo, immhi, lomask, himask, mask;
+	u32 mask;
 	int shift;
 
 	switch (type) {
-	case AARCH64_INSN_IMM_ADR:
-		lomask = 0x3;
-		himask = 0x7ffff;
-		immlo = imm & lomask;
-		imm >>= 2;
-		immhi = imm & himask;
-		imm = (immlo << 24) | (immhi);
-		mask = (lomask << 24) | (himask);
-		shift = 5;
-		break;
 	case AARCH64_INSN_IMM_26:
 		mask = BIT(26) - 1;
 		shift = 0;
@@ -320,9 +310,68 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 		shift = 16;
 		break;
 	default:
-		pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
-			type);
-		return 0;
+		return -EINVAL;
+	}
+
+	*maskp = mask;
+	*shiftp = shift;
+
+	return 0;
+}
+
+#define ADR_IMM_HILOSPLIT	2
+#define ADR_IMM_SIZE		SZ_2M
+#define ADR_IMM_LOMASK		((1 << ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_HIMASK		((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_LOSHIFT		29
+#define ADR_IMM_HISHIFT		5
+
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
+		immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
+		insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
+		mask = ADR_IMM_SIZE - 1;
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
+	}
+
+	return (insn >> shift) & mask;
+}
+
+u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+				  u32 insn, u64 imm)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
+		imm >>= ADR_IMM_HILOSPLIT;
+		immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
+		imm = immlo | immhi;
+		mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
+			(ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
 	}
 
 	/* Update the immediate field. */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 25a5308744b1..195991dadc37 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -25,8 +25,10 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 
@@ -322,22 +324,31 @@ out:
 }
 
 static int
-validate_event(struct pmu_hw_events *hw_events,
-	       struct perf_event *event)
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+				struct perf_event *event)
 {
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct arm_pmu *armpmu;
 	struct hw_perf_event fake_event = event->hw;
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
 	if (is_software_event(event))
 		return 1;
 
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
 	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
 	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 		return 1;
 
+	armpmu = to_arm_pmu(event->pmu);
 	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
 }
 
@@ -355,15 +366,15 @@ validate_group(struct perf_event *event)
 	memset(fake_used_mask, 0, sizeof(fake_used_mask));
 	fake_pmu.used_mask = fake_used_mask;
 
-	if (!validate_event(&fake_pmu, leader))
+	if (!validate_event(event->pmu, &fake_pmu, leader))
 		return -EINVAL;
 
 	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(&fake_pmu, sibling))
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
 			return -EINVAL;
 	}
 
-	if (!validate_event(&fake_pmu, event))
+	if (!validate_event(event->pmu, &fake_pmu, event))
 		return -EINVAL;
 
 	return 0;
@@ -396,7 +407,12 @@ armpmu_release_hardware(struct arm_pmu *armpmu)
 		free_percpu_irq(irq, &cpu_hw_events);
 	} else {
 		for (i = 0; i < irqs; ++i) {
-			if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+			int cpu = i;
+
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
 				continue;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq > 0)
@@ -450,19 +466,24 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
 	} else {
 		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
 			err = 0;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq <= 0)
 				continue;
 
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
 			/*
 			 * If we have a single PMU interrupt that we can't shift,
 			 * assume that we're running on a uniprocessor machine and
 			 * continue. Otherwise, continue without this interrupt.
 			 */
-			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-						irq, i);
+						irq, cpu);
 				continue;
 			}
 
@@ -476,7 +497,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 				return err;
 			}
 
-			cpumask_set_cpu(i, &armpmu->active_irqs);
+			cpumask_set_cpu(cpu, &armpmu->active_irqs);
 		}
 	}
 
@@ -1289,9 +1310,46 @@ static const struct of_device_id armpmu_of_device_ids[] = {
 
 static int armpmu_device_probe(struct platform_device *pdev)
 {
+	int i, *irqs;
+
 	if (!cpu_pmu)
 		return -ENODEV;
 
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(dn), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+	}
+
+	if (i == pdev->num_resources)
+		cpu_pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
 	cpu_pmu->plat_device = pdev;
 	return 0;
 }
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index e8420f635bd4..51ef97274b52 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -50,7 +50,6 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
-#include <asm/cputable.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/sections.h>
@@ -62,9 +61,7 @@
 #include <asm/memblock.h>
 #include <asm/psci.h>
 #include <asm/efi.h>
-
-unsigned int processor_id;
-EXPORT_SYMBOL(processor_id);
+#include <asm/virt.h>
 
 unsigned long elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -83,7 +80,6 @@ unsigned int compat_elf_hwcap2 __read_mostly;
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
-static const char *cpu_name;
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -119,6 +115,11 @@ void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+/*
+ * The recorded values of x0 .. x3 upon kernel entry.
+ */
+u64 __cacheline_aligned boot_args[4];
+
 void __init smp_setup_processor_id(void)
 {
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
@@ -207,24 +208,38 @@ static void __init smp_build_mpidr_hash(void)
 }
 #endif
 
+static void __init hyp_mode_check(void)
+{
+	if (is_hyp_mode_available())
+		pr_info("CPU: All CPU(s) started at EL2\n");
+	else if (is_hyp_mode_mismatched())
+		WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC,
+			   "CPU: CPUs started in inconsistent modes");
+	else
+		pr_info("CPU: All CPU(s) started at EL1\n");
+}
+
+void __init do_post_cpus_up_work(void)
+{
+	hyp_mode_check();
+	apply_alternatives_all();
+}
+
+#ifdef CONFIG_UP_LATE_INIT
+void __init up_late_init(void)
+{
+	do_post_cpus_up_work();
+}
+#endif /* CONFIG_UP_LATE_INIT */
+
 static void __init setup_processor(void)
 {
-	struct cpu_info *cpu_info;
 	u64 features, block;
 	u32 cwg;
 	int cls;
 
-	cpu_info = lookup_processor_type(read_cpuid_id());
-	if (!cpu_info) {
-		printk("CPU configuration botched (ID %08x), unable to continue.\n",
-		       read_cpuid_id());
-		while (1);
-	}
-
-	cpu_name = cpu_info->cpu_name;
-
-	printk("CPU: %s [%08x] revision %d\n",
-	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
+	printk("CPU: AArch64 Processor [%08x] revision %d\n",
+	       read_cpuid_id(), read_cpuid_id() & 15);
 
 	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	elf_hwcap = 0;
@@ -402,6 +417,12 @@ void __init setup_arch(char **cmdline_p)
 	conswitchp = &dummy_con;
 #endif
 #endif
+	if (boot_args[1] || boot_args[2] || boot_args[3]) {
+		pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
+			"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
+			"This indicates a broken bootloader or old kernel\n",
+			boot_args[1], boot_args[2], boot_args[3]);
+	}
 }
 
 static int __init arm64_device_init(void)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 328b8ce4b007..ffe8e1b814e0 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 
 	preempt_disable();
 	trace_hardirqs_off();
@@ -309,7 +310,7 @@ void cpu_die(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
-	apply_alternatives_all();
+	do_post_cpus_up_work();
 }
 
 void __init smp_prepare_boot_cpu(void)
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 2d5ab3c90b82..a40b1343b819 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -37,6 +37,7 @@ asmlinkage long compat_sys_readahead_wrapper(void);
 asmlinkage long compat_sys_fadvise64_64_wrapper(void);
 asmlinkage long compat_sys_sync_file_range2_wrapper(void);
 asmlinkage long compat_sys_fallocate_wrapper(void);
+asmlinkage long compat_sys_mmap2_wrapper(void);
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym)	[nr] = sym,
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5d9d2dca530d..a2c29865c3fe 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -23,10 +23,14 @@ jiffies = jiffies_64;
 
 #define HYPERVISOR_TEXT					\
 	/*						\
-	 * Force the alignment to be compatible with	\
-	 * the vectors requirements			\
+	 * Align to 4 KB so that			\
+	 * a) the HYP vector table is at its minimum	\
+	 *    alignment of 2048 bytes			\
+	 * b) the HYP init code will not cross a page	\
+	 *    boundary if its size does not exceed	\
+	 *    4 KB (see related ASSERT() below)		\
 	 */						\
-	. = ALIGN(2048);				\
+	. = ALIGN(SZ_4K);				\
 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
 	*(.hyp.idmap.text)				\
 	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
@@ -163,10 +167,11 @@ SECTIONS
 }
 
 /*
- * The HYP init code can't be more than a page long.
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
  */
-ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
-       "HYP init code too big")
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
+	"HYP init code too big or misaligned")
 
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index c3191168a994..178ba2248a98 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,6 +20,7 @@
 #include <asm/assembler.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
 
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
@@ -65,6 +66,25 @@ __do_hyp_init:
 	and	x4, x4, x5
 	ldr	x5, =TCR_EL2_FLAGS
 	orr	x4, x4, x5
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+	/*
+	 * If we are running with VA_BITS < 48, we may be running with an extra
+	 * level of translation in the ID map. This is only the case if system
+	 * RAM is out of range for the currently configured page size and number
+	 * of translation levels, in which case we will also need the extra
+	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+	 *
+	 * However, at EL2, there is only one TTBR register, and we can't switch
+	 * between translation tables *and* update TCR_EL2.T0SZ at the same
+	 * time. Bottom line: we need the extra level in *both* our translation
+	 * tables.
+	 *
+	 * So use the same T0SZ value we use for the ID map.
+	 */
+	ldr_l	x5, idmap_t0sz
+	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+#endif
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
@@ -91,6 +111,10 @@ __do_hyp_init:
 	msr	sctlr_el2, x4
 	isb
 
+	/* Skip the trampoline dance if we merged the boot and runtime PGDs */
+	cmp	x0, x1
+	b.eq	merged
+
 	/* MMU is now enabled. Get ready for the trampoline dance */
 	ldr	x4, =TRAMPOLINE_VA
 	adr	x5, target
@@ -105,6 +129,7 @@ target: /* We're now in the trampoline code, switch page tables */
 	tlbi	alle2
 	dsb	sy
 
+merged:
 	/* Set the stack and new vectors */
 	kern_hyp_va	x2
 	mov	sp, x2
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 79e01163a981..5b8b664422d3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,6 +40,8 @@
 
 #include "mm.h"
 
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -454,6 +456,7 @@ void __init paging_init(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 }
 
 /*
@@ -461,8 +464,10 @@ void __init paging_init(void)
  */
 void setup_mm_for_reboot(void)
 {
-	cpu_switch_mm(idmap_pg_dir, &init_mm);
+	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
 }
 
 /*
@@ -627,10 +632,7 @@ void __set_fixmap(enum fixed_addresses idx,
 	unsigned long addr = __fix_to_virt(idx);
 	pte_t *pte;
 
-	if (idx >= __end_of_fixed_addresses) {
-		BUG();
-		return;
-	}
+	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
 	pte = fixmap_pte(addr);
 
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 1d3ec3ddd84b..e47ed1c5dce1 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -73,7 +73,6 @@ int set_memory_ro(unsigned long addr, int numpages)
 					__pgprot(PTE_RDONLY),
 					__pgprot(PTE_WRITE));
 }
-EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
@@ -81,7 +80,6 @@ int set_memory_rw(unsigned long addr, int numpages)
 					__pgprot(PTE_WRITE),
 					__pgprot(PTE_RDONLY));
 }
-EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 005d29e2977d..4c4d93c4bf65 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -52,3 +52,13 @@
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	ldr_l	\tmpreg, idmap_t0sz
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 28eebfb6af76..cdd754e19b9b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -156,6 +156,7 @@ ENTRY(cpu_do_resume)
 	msr	cpacr_el1, x6
 	msr	ttbr0_el1, x1
 	msr	ttbr1_el1, x7
+	tcr_set_idmap_t0sz x8, x7
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
@@ -233,6 +234,8 @@ ENTRY(__cpu_setup)
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+	tcr_set_idmap_t0sz	x10, x9
+
 	/*
 	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
 	 * TCR_EL1.
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile
index e72eb3417239..6b0be670ddfa 100644
--- a/arch/c6x/Makefile
+++ b/arch/c6x/Makefile
@@ -8,7 +8,7 @@
 
 KBUILD_DEFCONFIG := dsk6455_defconfig
 
-cflags-y += -mno-dsbt -msdata=none
+cflags-y += -mno-dsbt -msdata=none -D__linux__
 
 cflags-$(CONFIG_C6X_BIG_KERNEL) += -mlong-calls
 
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 2de73391b81e..ae0a51f5376c 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -41,6 +41,7 @@ generic-y += resource.h
 generic-y += scatterlist.h
 generic-y += segment.h
 generic-y += sembuf.h
+generic-y += serial.h
 generic-y += shmbuf.h
 generic-y += shmparam.h
 generic-y += siginfo.h
diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h
index 88bd0d899bdb..bbd7774e4d4e 100644
--- a/arch/c6x/include/asm/dma-mapping.h
+++ b/arch/c6x/include/asm/dma-mapping.h
@@ -17,6 +17,14 @@
 
 #define dma_supported(d, m)	1
 
+static inline void dma_sync_single_range_for_device(struct device *dev,
+						    dma_addr_t addr,
+						    unsigned long offset,
+						    size_t size,
+						    enum dma_data_direction dir)
+{
+}
+
 static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h
new file mode 100644
index 000000000000..a1858bd5f6c8
--- /dev/null
+++ b/arch/c6x/include/asm/flat.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_C6X_FLAT_H
+#define __ASM_C6X_FLAT_H
+
+#define flat_argvp_envp_on_stack()			0
+#define flat_old_ram_flag(flags)			(flags)
+#define flat_reloc_valid(reloc, size)			((reloc) <= (size))
+#define flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
+#define flat_put_addr_at_rp(rp, val, relval)		put_unaligned(val, rp)
+#define flat_get_relocate_addr(rel)			(rel)
+#define flat_set_persistent(relval, p)			0
+
+#endif /* __ASM_C6X_FLAT_H */
diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h
index 696804475f55..852afb209afb 100644
--- a/arch/c6x/include/asm/setup.h
+++ b/arch/c6x/include/asm/setup.h
@@ -12,6 +12,7 @@
 #define _ASM_C6X_SETUP_H
 
 #include <uapi/asm/setup.h>
+#include <linux/types.h>
 
 #ifndef __ASSEMBLY__
 extern int c6x_add_memory(phys_addr_t start, unsigned long size);
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 757128868d43..72e17f7ebd6f 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -26,7 +26,8 @@
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/of.h>
-
+#include <linux/console.h>
+#include <linux/screen_info.h>
 
 #include <asm/sections.h>
 #include <asm/div64.h>
@@ -38,6 +39,8 @@
 
 static const char *c6x_soc_name;
 
+struct screen_info screen_info;
+
 int c6x_num_cores;
 EXPORT_SYMBOL_GPL(c6x_num_cores);
 
@@ -60,6 +63,7 @@ unsigned char c6x_fuse_mac[6];
 
 unsigned long memory_start;
 unsigned long memory_end;
+EXPORT_SYMBOL(memory_end);
 
 unsigned long ram_start;
 unsigned long ram_end;
@@ -265,8 +269,8 @@ int __init c6x_add_memory(phys_addr_t start, unsigned long size)
  */
 notrace void __init machine_init(unsigned long dt_ptr)
 {
-	const void *dtb = __va(dt_ptr);
-	const void *fdt = _fdt_start;
+	void *dtb = __va(dt_ptr);
+	void *fdt = _fdt_start;
 
 	/* interrupts must be masked */
 	set_creg(IER, 2);
diff --git a/arch/c6x/kernel/time.c b/arch/c6x/kernel/time.c
index 356ee84cad95..04845aaf5985 100644
--- a/arch/c6x/kernel/time.c
+++ b/arch/c6x/kernel/time.c
@@ -49,7 +49,7 @@ u64 sched_clock(void)
 	return (tsc * sched_clock_multiplier) >> SCHED_CLOCK_SHIFT;
 }
 
-void time_init(void)
+void __init time_init(void)
 {
 	u64 tmp = (u64)NSEC_PER_SEC << SCHED_CLOCK_SHIFT;
 
diff --git a/arch/c6x/platforms/cache.c b/arch/c6x/platforms/cache.c
index 86318a16a252..46fd2d530271 100644
--- a/arch/c6x/platforms/cache.c
+++ b/arch/c6x/platforms/cache.c
@@ -350,6 +350,7 @@ void L1P_cache_block_invalidate(unsigned int start, unsigned int end)
 			      (unsigned int *) end,
 			      IMCR_L1PIBAR, IMCR_L1PIWC);
 }
+EXPORT_SYMBOL(L1P_cache_block_invalidate);
 
 void L1D_cache_block_invalidate(unsigned int start, unsigned int end)
 {
@@ -371,6 +372,7 @@ void L1D_cache_block_writeback(unsigned int start, unsigned int end)
 			      (unsigned int *) end,
 			      IMCR_L1DWBAR, IMCR_L1DWWC);
 }
+EXPORT_SYMBOL(L1D_cache_block_writeback);
 
 /*
  *  L2 block operations
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9b780e0d2c18..190cc48abc0c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -152,6 +152,7 @@ config PPC
 	select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
 	select NO_BOOTMEM
 	select HAVE_GENERIC_RCU_GUP
+	select HAVE_PERF_EVENTS_NMI if PPC64
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
@@ -189,9 +190,6 @@ config ARCH_MAY_HAVE_PC_FDC
 	bool
 	default PCI
 
-config PPC_OF
-	def_bool y
-
 config PPC_UDBG_16550
 	bool
 	default n
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index ec2e40f2cc11..0efa8f90a8f1 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -117,7 +117,7 @@ config BDI_SWITCH
 
 config BOOTX_TEXT
 	bool "Support for early boot text console (BootX or OpenFirmware only)"
-	depends on PPC_OF && PPC_BOOK3S
+	depends on PPC_BOOK3S
 	help
 	  Say Y here to see progress messages from the boot firmware in text
 	  mode. Requires either BootX or Open Firmware.
@@ -193,13 +193,6 @@ config PPC_EARLY_DEBUG_PAS_REALMODE
 	  Select this to enable early debugging for PA Semi.
 	  Output will be on UART0.
 
-config PPC_EARLY_DEBUG_BEAT
-	bool "Beat HV Console"
-	depends on PPC_CELLEB
-	select PPC_UDBG_BEAT
-	help
-	  Select this to enable early debugging for Celleb with Beat.
-
 config PPC_EARLY_DEBUG_44x
 	bool "Early serial debugging for IBM/AMCC 44x CPUs"
 	depends on 44x
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index fc502e042438..07a480861f78 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -248,10 +248,10 @@ boot := arch/$(ARCH)/boot
 
 ifeq ($(CONFIG_RELOCATABLE),y)
 quiet_cmd_relocs_check = CALL    $<
-      cmd_relocs_check = perl $< "$(OBJDUMP)" "$(obj)/vmlinux"
+      cmd_relocs_check = $(CONFIG_SHELL) $< "$(OBJDUMP)" "$(obj)/vmlinux"
 
 PHONY += relocs_check
-relocs_check: arch/powerpc/relocs_check.pl vmlinux
+relocs_check: arch/powerpc/relocs_check.sh vmlinux
 	$(call cmd,relocs_check)
 
 zImage: relocs_check
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 8a5bc1cfc6aa..73eddda53b8e 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -110,7 +110,6 @@ src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
 src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
 src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
 src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
-src-plat-$(CONFIG_PPC_CELLEB) += pseries-head.S
 src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S
 
 src-wlib := $(sort $(src-wlib-y))
@@ -215,7 +214,6 @@ image-$(CONFIG_PPC_POWERNV)		+= zImage.pseries
 image-$(CONFIG_PPC_MAPLE)		+= zImage.maple
 image-$(CONFIG_PPC_IBM_CELL_BLADE)	+= zImage.pseries
 image-$(CONFIG_PPC_PS3)			+= dtbImage.ps3
-image-$(CONFIG_PPC_CELLEB)		+= zImage.pseries
 image-$(CONFIG_PPC_CELL_QPACE)		+= zImage.pseries
 image-$(CONFIG_PPC_CHRP)		+= zImage.chrp
 image-$(CONFIG_PPC_EFIKA)		+= zImage.chrp
@@ -317,7 +315,7 @@ endif
 # Allow extra targets to be added to the defconfig
 image-y	+= $(subst ",,$(CONFIG_EXTRA_TARGETS))
 
-initrd-  := $(patsubst zImage%, zImage.initrd%, $(image-n) $(image-))
+initrd-  := $(patsubst zImage%, zImage.initrd%, $(image-))
 initrd-y := $(patsubst zImage%, zImage.initrd%, \
 		$(patsubst dtbImage%, dtbImage.initrd%, \
 		$(patsubst simpleImage%, simpleImage.initrd%, \
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 14de4f8778a7..12866ccb5694 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -155,29 +155,29 @@ p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
 	ld	r9,(p_rela-p_base)(r10)
 	add	r9,r9,r10
 
-	li	r7,0
+	li	r13,0
 	li	r8,0
-9:	ld	r6,0(r11)       /* get tag */
-	cmpdi	r6,0
+9:	ld	r12,0(r11)       /* get tag */
+	cmpdi	r12,0
 	beq	12f              /* end of list */
-	cmpdi	r6,RELA
+	cmpdi	r12,RELA
 	bne	10f
-	ld	r7,8(r11)       /* get RELA pointer in r7 */
+	ld	r13,8(r11)       /* get RELA pointer in r13 */
 	b	11f
-10:	addis	r6,r6,(-RELACOUNT)@ha
-	cmpdi	r6,RELACOUNT@l
+10:	addis	r12,r12,(-RELACOUNT)@ha
+	cmpdi	r12,RELACOUNT@l
 	bne	11f
 	ld	r8,8(r11)       /* get RELACOUNT value in r8 */
 11:	addi	r11,r11,16
 	b	9b
 12:
-	cmpdi	r7,0            /* check we have both RELA and RELACOUNT */
+	cmpdi	r13,0            /* check we have both RELA and RELACOUNT */
 	cmpdi	cr1,r8,0
 	beq	3f
 	beq	cr1,3f
 
 	/* Calcuate the runtime offset. */
-	subf	r7,r7,r9
+	subf	r13,r13,r9
 
 	/* Run through the list of relocations and process the
 	 * R_PPC64_RELATIVE ones. */
@@ -185,10 +185,10 @@ p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
 13:	ld	r0,8(r9)        /* ELF64_R_TYPE(reloc->r_info) */
 	cmpdi	r0,22           /* R_PPC64_RELATIVE */
 	bne	3f
-	ld	r6,0(r9)        /* reloc->r_offset */
+	ld	r12,0(r9)        /* reloc->r_offset */
 	ld	r0,16(r9)       /* reloc->r_addend */
-	add	r0,r0,r7
-	stdx	r0,r7,r6
+	add	r0,r0,r13
+	stdx	r0,r13,r12
 	addi	r9,r9,24
 	bdnz	13b
 
@@ -218,7 +218,7 @@ p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
 	beq	6f
 	ld	r1,0(r8)
 	li	r0,0
-	stdu	r0,-16(r1)	/* establish a stack frame */
+	stdu	r0,-112(r1)	/* establish a stack frame */
 6:
 #endif  /* __powerpc64__ */
 	/* Call platform_init() */
diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts
deleted file mode 100644
index 2aa5cd318ce8..000000000000
--- a/arch/powerpc/boot/dts/b4860emu.dts
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * B4860 emulator Device Tree Source
- *
- * Copyright 2013 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name of Freescale Semiconductor nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * This software is provided by Freescale Semiconductor "as is" and any
- * express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are
- * disclaimed. In no event shall Freescale Semiconductor be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused and
- * on any theory of liability, whether in contract, strict liability, or tort
- * (including negligence or otherwise) arising in any way out of the use of
- * this software, even if advised of the possibility of such damage.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e6500_power_isa.dtsi"
-
-/ {
-	compatible = "fsl,B4860";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&mpic>;
-
-	aliases {
-		ccsr = &soc;
-
-		serial0 = &serial0;
-		serial1 = &serial1;
-		serial2 = &serial2;
-		serial3 = &serial3;
-		dma0 = &dma0;
-		dma1 = &dma1;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		cpu0: PowerPC,e6500@0 {
-			device_type = "cpu";
-			reg = <0 1>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-		cpu1: PowerPC,e6500@2 {
-			device_type = "cpu";
-			reg = <2 3>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-		cpu2: PowerPC,e6500@4 {
-			device_type = "cpu";
-			reg = <4 5>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-		cpu3: PowerPC,e6500@6 {
-			device_type = "cpu";
-			reg = <6 7>;
-			next-level-cache = <&L2>;
-			fsl,portid-mapping = <0x80000000>;
-		};
-	};
-};
-
-/ {
-	model = "fsl,B4860QDS";
-	compatible = "fsl,B4860EMU", "fsl,B4860QDS";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&mpic>;
-
-	ifc: localbus@ffe124000 {
-		reg = <0xf 0xfe124000 0 0x2000>;
-		ranges = <0 0 0xf 0xe8000000 0x08000000
-			  2 0 0xf 0xff800000 0x00010000
-			  3 0 0xf 0xffdf0000 0x00008000>;
-
-		nor@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "cfi-flash";
-			reg = <0x0 0x0 0x8000000>;
-			bank-width = <2>;
-			device-width = <1>;
-		};
-	};
-
-	memory {
-		device_type = "memory";
-	};
-
-	soc: soc@ffe000000 {
-		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
-		reg = <0xf 0xfe000000 0 0x00001000>;
-	};
-};
-
-&ifc {
-	#address-cells = <2>;
-	#size-cells = <1>;
-	compatible = "fsl,ifc", "simple-bus";
-	interrupts = <25 2 0 0>;
-};
-
-&soc {
-	#address-cells = <1>;
-	#size-cells = <1>;
-	device_type = "soc";
-	compatible = "simple-bus";
-
-	soc-sram-error {
-		compatible = "fsl,soc-sram-error";
-		interrupts = <16 2 1 2>;
-	};
-
-	corenet-law@0 {
-		compatible = "fsl,corenet-law";
-		reg = <0x0 0x1000>;
-		fsl,num-laws = <32>;
-	};
-
-	ddr1: memory-controller@8000 {
-		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
-		reg = <0x8000 0x1000>;
-		interrupts = <16 2 1 8>;
-	};
-
-	ddr2: memory-controller@9000 {
-		compatible = "fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller";
-		reg = <0x9000 0x1000>;
-		interrupts = <16 2 1 9>;
-	};
-
-	cpc: l3-cache-controller@10000 {
-		compatible = "fsl,b4-l3-cache-controller", "cache";
-		reg = <0x10000 0x1000
-		       0x11000 0x1000>;
-		interrupts = <16 2 1 4>;
-	};
-
-	corenet-cf@18000 {
-		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
-		reg = <0x18000 0x1000>;
-		interrupts = <16 2 1 0>;
-		fsl,ccf-num-csdids = <32>;
-		fsl,ccf-num-snoopids = <32>;
-	};
-
-	iommu@20000 {
-		compatible = "fsl,pamu-v1.0", "fsl,pamu";
-		reg = <0x20000 0x4000>;
-		fsl,portid-mapping = <0x8000>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		interrupts = <
-			24 2 0 0
-			16 2 1 1>;
-		pamu0: pamu@0 {
-			reg = <0 0x1000>;
-			fsl,primary-cache-geometry = <8 1>;
-			fsl,secondary-cache-geometry = <32 2>;
-		};
-	};
-
-/include/ "fsl/qoriq-mpic.dtsi"
-
-	guts: global-utilities@e0000 {
-		compatible = "fsl,b4-device-config";
-		reg = <0xe0000 0xe00>;
-		fsl,has-rstcr;
-		fsl,liodn-bits = <12>;
-	};
-
-/include/ "fsl/qoriq-clockgen2.dtsi"
-	global-utilities@e1000 {
-		compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0";
-	};
-
-/include/ "fsl/qoriq-dma-0.dtsi"
-	dma@100300 {
-		fsl,iommu-parent = <&pamu0>;
-		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
-	};
-
-/include/ "fsl/qoriq-dma-1.dtsi"
-	dma@101300 {
-		fsl,iommu-parent = <&pamu0>;
-		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
-	};
-
-/include/ "fsl/qoriq-i2c-0.dtsi"
-/include/ "fsl/qoriq-i2c-1.dtsi"
-/include/ "fsl/qoriq-duart-0.dtsi"
-/include/ "fsl/qoriq-duart-1.dtsi"
-
-	L2: l2-cache-controller@c20000 {
-		compatible = "fsl,b4-l2-cache-controller";
-		reg = <0xc20000 0x1000>;
-		next-level-cache = <&cpc>;
-	};
-};
diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi
index e5bde0b85135..24ed80dc2120 100644
--- a/arch/powerpc/boot/dts/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/b4qds.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4420DS Device Tree Source
  *
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -97,10 +97,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01052000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
index 65100b9636b7..f35e9e0a5445 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4860 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -109,6 +109,64 @@
 	};
 };
 
+&bportals {
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+	bman-portal@48000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <141 2 0 0>;
+	};
+	bman-portal@4c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <143 2 0 0>;
+	};
+	bman-portal@50000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <145 2 0 0>;
+	};
+	bman-portal@54000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <147 2 0 0>;
+	};
+	bman-portal@58000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <149 2 0 0>;
+	};
+	bman-portal@5c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <151 2 0 0>;
+	};
+	bman-portal@60000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <153 2 0 0>;
+	};
+};
+
 &soc {
 	ddr2: memory-controller@9000 {
 		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 1a54ba71f685..73136c0029d2 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4420 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * this software, even if advised of the possibility of such damage.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -128,6 +133,83 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -261,6 +343,11 @@
 /include/ "qoriq-duart-1.dtsi"
 /include/ "qoriq-sec5.3-0.dtsi"
 
+/include/ "qoriq-bman1.dtsi"
+	bman: bman@31a000 {
+		interrupts = <16 2 1 29>;
+	};
+
 	L2: l2-cache-controller@c20000 {
 		compatible = "fsl,b4-l2-cache-controller";
 		reg = <0xc20000 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
index 81437fdf1db4..7780f21430cb 100644
--- a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P1023/P1017 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -97,6 +102,28 @@
 	};
 };
 
+&bportals {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x100000 0x1000>;
+		interrupts = <30 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x101000 0x1000>;
+		interrupts = <32 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x102000 0x1000>;
+		interrupts = <34 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -221,6 +248,14 @@
 /include/ "pq3-mpic.dtsi"
 /include/ "pq3-mpic-timer-B.dtsi"
 
+	bman: bman@8a000 {
+		compatible = "fsl,bman";
+		reg = <0x8a000 0x1000>;
+		interrupts = <16 2 0 0>;
+		fsl,bman-portals = <&bportals>;
+		memory-region = <&bman_fbpr>;
+	};
+
 	global-utilities@e0000 {
 		compatible = "fsl,p1023-guts";
 		reg = <0xe0000 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index efd74db4f9b0..f2feacfd9a25 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P2041/P2040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	compatible = "fsl,p2041-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -216,6 +221,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -407,4 +414,6 @@
 crypto: crypto@300000 {
 		fsl,iommu-parent = <&pamu1>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
index d7425ef1ae41..d6fea37395ad 100644
--- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P3041 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	compatible = "fsl,p3041-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -243,6 +248,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -434,4 +441,6 @@
 crypto: crypto@300000 {
 		fsl,iommu-parent = <&pamu1>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
index 7005a4a4cef0..89482c9b2301 100644
--- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P4080/P4040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
 	compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -243,6 +248,8 @@
 
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -490,4 +497,6 @@
 crypto: crypto@300000 {
 		fsl,iommu-parent = <&pamu1>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
index 55834211bd28..6e04851e2fc9 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P5020/5010 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &lbc {
 	compatible = "fsl,p5020-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -240,6 +245,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -421,6 +428,8 @@
 		fsl,iommu-parent = <&pamu1>;
 	};
 
+/include/ "qoriq-bman1.dtsi"
+
 /include/ "qoriq-raid1.0-0.dtsi"
 	raideng@320000 {
 		fsl,iommu-parent = <&pamu1>;
diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
index 6e4cd6ce363c..5e44dfa1e1a5 100644
--- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * P5040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * software, even if advised of the possibility of such damage.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &lbc {
 	compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus";
 	interrupts = <25 2 0 0>;
@@ -195,6 +200,8 @@
 	};
 };
 
+/include/ "qoriq-bman1-portals.dtsi"
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -399,4 +406,6 @@
 	crypto@300000 {
 		fsl,iommu-parent = <&pamu4>;
 	};
+
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index 15ae462e758f..5cc01be5b152 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * T1040 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -218,6 +223,63 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -401,4 +463,5 @@
 		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
 	};
 /include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-bman1.dtsi"
 };
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
index 1ce91e3485a9..86bdaf6cbd14 100644
--- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * T2081 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -224,6 +229,103 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -400,6 +502,7 @@
 		phy_type = "utmi";
 	};
 /include/ "qoriq-sec5.2-0.dtsi"
+/include/ "qoriq-bman1.dtsi"
 
 	L2_1: l2-cache-controller@c20000 {
 		/* Cluster 0 L2 cache */
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index 0e96fcabe812..4d4f25895d8c 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * T4240 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,11 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -294,6 +299,263 @@
 	};
 };
 
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+	bman-portal@48000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <141 2 0 0>;
+	};
+	bman-portal@4c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <143 2 0 0>;
+	};
+	bman-portal@50000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <145 2 0 0>;
+	};
+	bman-portal@54000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <147 2 0 0>;
+	};
+	bman-portal@58000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <149 2 0 0>;
+	};
+	bman-portal@5c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <151 2 0 0>;
+	};
+	bman-portal@60000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <153 2 0 0>;
+	};
+	bman-portal@64000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x64000 0x4000>, <0x1019000 0x1000>;
+		interrupts = <155 2 0 0>;
+	};
+	bman-portal@68000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x68000 0x4000>, <0x101a000 0x1000>;
+		interrupts = <157 2 0 0>;
+	};
+	bman-portal@6c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x6c000 0x4000>, <0x101b000 0x1000>;
+		interrupts = <159 2 0 0>;
+	};
+	bman-portal@70000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x70000 0x4000>, <0x101c000 0x1000>;
+		interrupts = <161 2 0 0>;
+	};
+	bman-portal@74000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x74000 0x4000>, <0x101d000 0x1000>;
+		interrupts = <163 2 0 0>;
+	};
+	bman-portal@78000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x78000 0x4000>, <0x101e000 0x1000>;
+		interrupts = <165 2 0 0>;
+	};
+	bman-portal@7c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x7c000 0x4000>, <0x101f000 0x1000>;
+		interrupts = <167 2 0 0>;
+	};
+	bman-portal@80000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x80000 0x4000>, <0x1020000 0x1000>;
+		interrupts = <169 2 0 0>;
+	};
+	bman-portal@84000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x84000 0x4000>, <0x1021000 0x1000>;
+		interrupts = <171 2 0 0>;
+	};
+	bman-portal@88000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x88000 0x4000>, <0x1022000 0x1000>;
+		interrupts = <173 2 0 0>;
+	};
+	bman-portal@8c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8c000 0x4000>, <0x1023000 0x1000>;
+		interrupts = <175 2 0 0>;
+	};
+	bman-portal@90000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x90000 0x4000>, <0x1024000 0x1000>;
+		interrupts = <385 2 0 0>;
+	};
+	bman-portal@94000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x94000 0x4000>, <0x1025000 0x1000>;
+		interrupts = <387 2 0 0>;
+	};
+	bman-portal@98000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x98000 0x4000>, <0x1026000 0x1000>;
+		interrupts = <389 2 0 0>;
+	};
+	bman-portal@9c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x9c000 0x4000>, <0x1027000 0x1000>;
+		interrupts = <391 2 0 0>;
+	};
+	bman-portal@a0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa0000 0x4000>, <0x1028000 0x1000>;
+		interrupts = <393 2 0 0>;
+	};
+	bman-portal@a4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa4000 0x4000>, <0x1029000 0x1000>;
+		interrupts = <395 2 0 0>;
+	};
+	bman-portal@a8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa8000 0x4000>, <0x102a000 0x1000>;
+		interrupts = <397 2 0 0>;
+	};
+	bman-portal@ac000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xac000 0x4000>, <0x102b000 0x1000>;
+		interrupts = <399 2 0 0>;
+	};
+	bman-portal@b0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb0000 0x4000>, <0x102c000 0x1000>;
+		interrupts = <401 2 0 0>;
+	};
+	bman-portal@b4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb4000 0x4000>, <0x102d000 0x1000>;
+		interrupts = <403 2 0 0>;
+	};
+	bman-portal@b8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb8000 0x4000>, <0x102e000 0x1000>;
+		interrupts = <405 2 0 0>;
+	};
+	bman-portal@bc000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xbc000 0x4000>, <0x102f000 0x1000>;
+		interrupts = <407 2 0 0>;
+	};
+	bman-portal@c0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc0000 0x4000>, <0x1030000 0x1000>;
+		interrupts = <409 2 0 0>;
+	};
+	bman-portal@c4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc4000 0x4000>, <0x1031000 0x1000>;
+		interrupts = <411 2 0 0>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
@@ -486,6 +748,7 @@
 /include/ "qoriq-sata2-0.dtsi"
 /include/ "qoriq-sata2-1.dtsi"
 /include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-bman1.dtsi"
 
 	L2_1: l2-cache-controller@c20000 {
 		compatible = "fsl,t4240-l2-cache-controller";
diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/kmcoge4.dts
index 89b4119f3b19..97e6d11d1e6d 100644
--- a/arch/powerpc/boot/dts/kmcoge4.dts
+++ b/arch/powerpc/boot/dts/kmcoge4.dts
@@ -25,10 +25,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/oca4080.dts b/arch/powerpc/boot/dts/oca4080.dts
index 3d4c751d1608..eb76caae11d9 100644
--- a/arch/powerpc/boot/dts/oca4080.dts
+++ b/arch/powerpc/boot/dts/oca4080.dts
@@ -49,10 +49,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p1023rdb.dts b/arch/powerpc/boot/dts/p1023rdb.dts
index 0a06a88ddbd5..9236e3742a23 100644
--- a/arch/powerpc/boot/dts/p1023rdb.dts
+++ b/arch/powerpc/boot/dts/p1023rdb.dts
@@ -1,7 +1,7 @@
 /*
  * P1023 RDB Device Tree Source
  *
- *    Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Author: Chunhe Lan <Chunhe.Lan@freescale.com>
  *
@@ -47,6 +47,21 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
+	bportals: bman-portals@ff200000 {
+		ranges = <0x0 0xf 0xff200000 0x200000>;
+	};
+
 	soc: soc@ff600000 {
 		ranges = <0x0 0x0 0xff600000 0x200000>;
 
@@ -228,7 +243,6 @@
 				  0x0 0x100000>;
 		};
 	};
-
 };
 
 /include/ "fsl/p1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p2041rdb.dts b/arch/powerpc/boot/dts/p2041rdb.dts
index d97ad74c7279..c1e69dc7188e 100644
--- a/arch/powerpc/boot/dts/p2041rdb.dts
+++ b/arch/powerpc/boot/dts/p2041rdb.dts
@@ -1,7 +1,7 @@
 /*
  * P2041RDB Device Tree Source
  *
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts
index 394ea9c943c9..2192fe94866d 100644
--- a/arch/powerpc/boot/dts/p3041ds.dts
+++ b/arch/powerpc/boot/dts/p3041ds.dts
@@ -1,7 +1,7 @@
 /*
  * P3041DS Device Tree Source
  *
- * Copyright 2010-2011 Freescale Semiconductor Inc.
+ * Copyright 2010 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 1cf6148b8b05..fad441654642 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -1,7 +1,7 @@
 /*
  * P4080DS Device Tree Source
  *
- * Copyright 2009-2011 Freescale Semiconductor Inc.
+ * Copyright 2009 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts
index b7f3057cd894..7382636dc560 100644
--- a/arch/powerpc/boot/dts/p5020ds.dts
+++ b/arch/powerpc/boot/dts/p5020ds.dts
@@ -1,7 +1,7 @@
 /*
  * P5020DS Device Tree Source
  *
- * Copyright 2010-2011 Freescale Semiconductor Inc.
+ * Copyright 2010 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts
index 7e04bf487c04..35dabf5b6098 100644
--- a/arch/powerpc/boot/dts/p5040ds.dts
+++ b/arch/powerpc/boot/dts/p5040ds.dts
@@ -1,7 +1,7 @@
 /*
  * P5040DS Device Tree Source
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -45,10 +45,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t104xqds.dtsi b/arch/powerpc/boot/dts/t104xqds.dtsi
index 234f4b596c5b..f7e9bfbeefc7 100644
--- a/arch/powerpc/boot/dts/t104xqds.dtsi
+++ b/arch/powerpc/boot/dts/t104xqds.dtsi
@@ -1,7 +1,7 @@
 /*
  * T104xQDS Device Tree Source
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -38,6 +38,17 @@
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
 		ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -77,6 +88,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi
index 187add885cae..76e07a3f2ca8 100644
--- a/arch/powerpc/boot/dts/t104xrdb.dtsi
+++ b/arch/powerpc/boot/dts/t104xrdb.dtsi
@@ -33,6 +33,16 @@
  */
 
 / {
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
 
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
@@ -69,6 +79,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi
index 59061834d54e..c42e07f4f648 100644
--- a/arch/powerpc/boot/dts/t208xqds.dtsi
+++ b/arch/powerpc/boot/dts/t208xqds.dtsi
@@ -1,7 +1,7 @@
 /*
  * T2080/T2081 QDS Device Tree Source
  *
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -39,6 +39,17 @@
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
 		ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -78,6 +89,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
@@ -137,7 +152,7 @@
 					rtc@68 {
 						compatible = "dallas,ds3232";
 						reg = <0x68>;
-						interrupts = <0x1 0x1 0 0>;
+						interrupts = <0xb 0x1 0 0>;
 					};
 				};
 
diff --git a/arch/powerpc/boot/dts/t208xrdb.dtsi b/arch/powerpc/boot/dts/t208xrdb.dtsi
index 1481e192e783..e1463b165d0e 100644
--- a/arch/powerpc/boot/dts/t208xrdb.dtsi
+++ b/arch/powerpc/boot/dts/t208xrdb.dtsi
@@ -39,6 +39,17 @@
 	#size-cells = <2>;
 	interrupt-parent = <&mpic>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	ifc: localbus@ffe124000 {
 		reg = <0xf 0xfe124000 0 0x2000>;
 		ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -79,6 +90,10 @@
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t4240qds.dts b/arch/powerpc/boot/dts/t4240qds.dts
index 97683f6a2936..6df77766410b 100644
--- a/arch/powerpc/boot/dts/t4240qds.dts
+++ b/arch/powerpc/boot/dts/t4240qds.dts
@@ -1,7 +1,7 @@
 /*
  * T4240QDS Device Tree Source
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -100,10 +100,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t4240rdb.dts b/arch/powerpc/boot/dts/t4240rdb.dts
index 53761d4e8c51..46049cf37f02 100644
--- a/arch/powerpc/boot/dts/t4240rdb.dts
+++ b/arch/powerpc/boot/dts/t4240rdb.dts
@@ -69,10 +69,25 @@
 		device_type = "memory";
 	};
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+	};
+
 	dcsr: dcsr@f00000000 {
 		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
 	};
 
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
 	soc: soc@ffe000000 {
 		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
 		reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/libfdt-wrapper.c b/arch/powerpc/boot/libfdt-wrapper.c
index bb8b9b3505ee..535e8fd8900d 100644
--- a/arch/powerpc/boot/libfdt-wrapper.c
+++ b/arch/powerpc/boot/libfdt-wrapper.c
@@ -44,12 +44,12 @@
 
 #define offset_devp(off)	\
 	({ \
-		int _offset = (off); \
+		unsigned long _offset = (off); \
 		check_err(_offset) ? NULL : (void *)(_offset+1); \
 	})
 
-#define devp_offset_find(devp)	(((int)(devp))-1)
-#define devp_offset(devp)	(devp ? ((int)(devp))-1 : 0)
+#define devp_offset_find(devp)	(((unsigned long)(devp))-1)
+#define devp_offset(devp)	(devp ? ((unsigned long)(devp))-1 : 0)
 
 static void *fdt;
 static void *buf; /* = NULL */
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index c89fdb1b80e1..8dcd744e5728 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -4,15 +4,17 @@
 #include <types.h>
 #include <string.h>
 
+#include "of.h"
+
 typedef u32 uint32_t;
 typedef u64 uint64_t;
 typedef unsigned long uintptr_t;
 
-#define fdt16_to_cpu(x)		(x)
-#define cpu_to_fdt16(x)		(x)
-#define fdt32_to_cpu(x)		(x)
-#define cpu_to_fdt32(x)		(x)
-#define fdt64_to_cpu(x)		(x)
-#define cpu_to_fdt64(x)		(x)
+#define fdt16_to_cpu(x)		be16_to_cpu(x)
+#define cpu_to_fdt16(x)		cpu_to_be16(x)
+#define fdt32_to_cpu(x)		be32_to_cpu(x)
+#define cpu_to_fdt32(x)		cpu_to_be32(x)
+#define fdt64_to_cpu(x)		be64_to_cpu(x)
+#define cpu_to_fdt64(x)		cpu_to_be64(x)
 
 #endif /* _ARCH_POWERPC_BOOT_LIBFDT_ENV_H */
diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index c8c1750aba0c..5603320dce07 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -24,11 +24,19 @@ void of_console_init(void);
 typedef u32			__be32;
 
 #ifdef __LITTLE_ENDIAN__
+#define cpu_to_be16(x) swab16(x)
+#define be16_to_cpu(x) swab16(x)
 #define cpu_to_be32(x) swab32(x)
 #define be32_to_cpu(x) swab32(x)
+#define cpu_to_be64(x) swab64(x)
+#define be64_to_cpu(x) swab64(x)
 #else
+#define cpu_to_be16(x) (x)
+#define be16_to_cpu(x) (x)
 #define cpu_to_be32(x) (x)
 #define be32_to_cpu(x) (x)
+#define cpu_to_be64(x) (x)
+#define be64_to_cpu(x) (x)
 #endif
 
 #define PROM_ERROR (-1u)
diff --git a/arch/powerpc/boot/planetcore.c b/arch/powerpc/boot/planetcore.c
index 0d8558a475bb..75117e63e6db 100644
--- a/arch/powerpc/boot/planetcore.c
+++ b/arch/powerpc/boot/planetcore.c
@@ -131,36 +131,3 @@ void planetcore_set_stdout_path(const char *table)
 
 	setprop_str(chosen, "linux,stdout-path", path);
 }
-
-void planetcore_set_serial_speed(const char *table)
-{
-	void *chosen, *stdout;
-	u64 baud;
-	u32 baud32;
-	int len;
-
-	chosen = finddevice("/chosen");
-	if (!chosen)
-		return;
-
-	len = getprop(chosen, "linux,stdout-path", prop_buf, MAX_PROP_LEN);
-	if (len <= 0)
-		return;
-
-	stdout = finddevice(prop_buf);
-	if (!stdout) {
-		printf("planetcore_set_serial_speed: "
-		       "Bad /chosen/linux,stdout-path.\r\n");
-
-		return;
-	}
-
-	if (!planetcore_get_decimal(table, PLANETCORE_KEY_SERIAL_BAUD,
-	                            &baud)) {
-		printf("planetcore_set_serial_speed: No SB tag.\r\n");
-		return;
-	}
-
-	baud32 = baud;
-	setprop(stdout, "current-speed", &baud32, 4);
-}
diff --git a/arch/powerpc/boot/planetcore.h b/arch/powerpc/boot/planetcore.h
index 0d4094f1771c..d53c733cc463 100644
--- a/arch/powerpc/boot/planetcore.h
+++ b/arch/powerpc/boot/planetcore.h
@@ -43,7 +43,4 @@ void planetcore_set_mac_addrs(const char *table);
  */
 void planetcore_set_stdout_path(const char *table);
 
-/* Sets the current-speed property in the serial node. */
-void planetcore_set_serial_speed(const char *table);
-
 #endif
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index ae0f88ec4a32..3f50c27ed8f8 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -277,7 +277,7 @@ treeboot-iss4xx-mpic)
     platformo="$object/treeboot-iss4xx.o"
     ;;
 epapr)
-    platformo="$object/epapr.o $object/epapr-wrapper.o"
+    platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o"
     link_address='0x20000000'
     pie=-pie
     ;;
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 9788b3c2d563..9227b517560a 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -28,7 +28,6 @@ CONFIG_PS3_ROM=m
 CONFIG_PS3_FLASH=m
 CONFIG_PS3_LPM=m
 CONFIG_PPC_IBM_CELL_BLADE=y
-CONFIG_PPC_CELLEB=y
 CONFIG_RTAS_FLASH=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
@@ -113,7 +112,6 @@ CONFIG_IDE=y
 CONFIG_BLK_DEV_GENERIC=y
 CONFIG_BLK_DEV_AEC62XX=y
 CONFIG_BLK_DEV_SIIMAGE=y
-CONFIG_BLK_DEV_CELLEB=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=m
 CONFIG_CHR_DEV_SG=y
@@ -156,7 +154,6 @@ CONFIG_SERIAL_TXX9_NR_UARTS=2
 CONFIG_SERIAL_TXX9_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
 CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_DEVICE_INTERFACE=m
 CONFIG_IPMI_SI=m
diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig
deleted file mode 100644
index ff454dcd2dd3..000000000000
--- a/arch/powerpc/configs/celleb_defconfig
+++ /dev/null
@@ -1,152 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_TUNE_CELL=y
-CONFIG_ALTIVEC=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=4
-CONFIG_SYSVIPC=y
-CONFIG_FHANDLE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=15
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_PPC_POWERNV is not set
-# CONFIG_PPC_PSERIES is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_CELLEB=y
-CONFIG_SPU_FS=y
-# CONFIG_CBE_THERM is not set
-CONFIG_UDBG_RTAS_CONSOLE=y
-# CONFIG_RTAS_PROC is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_KEXEC=y
-CONFIG_NUMA=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_SYN_COOKIES=y
-CONFIG_IPV6=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=m
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_CELLEB=y
-CONFIG_SCSI=m
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_NETDEVICES=y
-CONFIG_SPIDER_NET=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_SERIAL_TXX9_NR_UARTS=3
-CONFIG_SERIAL_TXX9_CONSOLE=y
-CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_I2C=y
-# CONFIG_HWMON is not set
-CONFIG_WATCHDOG=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_STORAGE=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_LIBCRC32C=m
-CONFIG_DEBUG_FS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
index ca7957b09a3c..37659937bd12 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -99,6 +99,8 @@ CONFIG_E1000E=y
 CONFIG_AT803X_PHY=y
 CONFIG_VITESSE_PHY=y
 CONFIG_FIXED_PHY=y
+CONFIG_MDIO_BUS_MUX_GPIO=y
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -114,11 +116,14 @@ CONFIG_NVRAM=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C_MUX_PCA954x=y
 CONFIG_SPI=y
 CONFIG_SPI_GPIO=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_INA2XX=y
 CONFIG_USB_HID=m
 CONFIG_USB=y
 CONFIG_USB_MON=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index 04737aaa8b6b..33cd1df818ad 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -12,6 +12,10 @@ CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_KALLSYMS_ALL=y
@@ -75,6 +79,10 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=131072
 CONFIG_EEPROM_LEGACY=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
 CONFIG_ATA=y
 CONFIG_SATA_FSL=y
 CONFIG_SATA_SIL24=y
@@ -85,6 +93,8 @@ CONFIG_FSL_XGMAC_MDIO=y
 CONFIG_E1000E=y
 CONFIG_VITESSE_PHY=y
 CONFIG_FIXED_PHY=y
+CONFIG_MDIO_BUS_MUX_GPIO=y
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
@@ -99,11 +109,14 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_MPC=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C_MUX_PCA954x=y
 CONFIG_SPI=y
 CONFIG_SPI_GPIO=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_INA2XX=y
 CONFIG_USB_HID=m
 CONFIG_USB=y
 CONFIG_USB_MON=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index 8535c343dd57..6ecf7bdbc2f9 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -150,8 +150,7 @@ CONFIG_SPI=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
 CONFIG_GPIO_MPC8XXX=y
-CONFIG_HWMON=m
-CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM90=y
 CONFIG_FB=y
 CONFIG_FB_FSL_DIU=y
 # CONFIG_VGA_CONSOLE is not set
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index c45ad2e01b0c..b6c7111ea913 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -143,7 +143,7 @@ CONFIG_SPI=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
 CONFIG_GPIO_MPC8XXX=y
-# CONFIG_HWMON is not set
+CONFIG_SENSORS_LM90=y
 CONFIG_FB=y
 CONFIG_FB_FSL_DIU=y
 # CONFIG_VGA_CONSOLE is not set
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 3315c9f0828a..aad501ae3834 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -36,7 +36,6 @@ CONFIG_PS3_ROM=m
 CONFIG_PS3_FLASH=m
 CONFIG_PS3_LPM=m
 CONFIG_PPC_IBM_CELL_BLADE=y
-CONFIG_PPC_CELLEB=y
 CONFIG_PPC_CELL_QPACE=y
 CONFIG_RTAS_FLASH=m
 CONFIG_IBMEBUS=y
@@ -89,7 +88,6 @@ CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_GENERIC=y
 CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_CELLEB=y
 CONFIG_BLK_DEV_IDE_PMAC=y
 CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
 CONFIG_BLK_DEV_SD=y
@@ -196,7 +194,6 @@ CONFIG_SERIAL_TXX9_CONSOLE=y
 CONFIG_SERIAL_JSM=m
 CONFIG_HVC_CONSOLE=y
 CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
 CONFIG_HVCS=m
 CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IBM_BSR=m
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 382b28e364dc..4b87205c230c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,6 +1,8 @@
-
 generic-y += clkdev.h
+generic-y += div64.h
+generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 34a05a1a990b..0dc42c5082b7 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -76,9 +76,6 @@ extern void _set_L3CR(unsigned long);
 #define _set_L3CR(val)	do { } while(0)
 #endif
 
-extern void cacheable_memzero(void *p, unsigned int nb);
-extern void *cacheable_memcpy(void *, const void *, unsigned int);
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 5cf5a6d10685..6367b8347dad 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -100,7 +100,7 @@ struct cpu_spec {
 	/*
 	 * Processor specific routine to flush tlbs.
 	 */
-	void		(*flush_tlb)(unsigned long inval_selector);
+	void		(*flush_tlb)(unsigned int action);
 
 };
 
@@ -114,6 +114,12 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start,
 
 extern const char *powerpc_base_platform;
 
+/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
+enum {
+	TLB_INVAL_SCOPE_GLOBAL = 0,	/* invalidate all TLBs */
+	TLB_INVAL_SCOPE_LPID = 1,	/* invalidate TLBs for current LPID */
+};
+
 #endif /* __ASSEMBLY__ */
 
 /* CPU kernel features */
diff --git a/arch/powerpc/include/asm/dbdma.h b/arch/powerpc/include/asm/dbdma.h
index e23f07e73cb3..6c69836b4ec2 100644
--- a/arch/powerpc/include/asm/dbdma.h
+++ b/arch/powerpc/include/asm/dbdma.h
@@ -42,12 +42,12 @@ struct dbdma_regs {
  * DBDMA command structure.  These fields are all little-endian!
  */
 struct dbdma_cmd {
-    unsigned short req_count;	/* requested byte transfer count */
-    unsigned short command;	/* command word (has bit-fields) */
-    unsigned int   phy_addr;	/* physical data address */
-    unsigned int   cmd_dep;	/* command-dependent field */
-    unsigned short res_count;	/* residual count after completion */
-    unsigned short xfer_status;	/* transfer status */
+	__le16 req_count;	/* requested byte transfer count */
+	__le16 command;		/* command word (has bit-fields) */
+	__le32 phy_addr;	/* physical data address */
+	__le32 cmd_dep;		/* command-dependent field */
+	__le16 res_count;	/* residual count after completion */
+	__le16 xfer_status;	/* transfer status */
 };
 
 /* DBDMA command values in command field */
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 7d2e6235726d..4efc11dacb98 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -31,7 +31,7 @@ typedef struct {
 
 static inline bool dcr_map_ok_native(dcr_host_native_t host)
 {
-	return 1;
+	return true;
 }
 
 #define dcr_map_native(dev, dcr_n, dcr_c) \
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 38faeded7d59..9f1371bab5fc 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -8,6 +8,9 @@
 
 struct dma_map_ops;
 struct device_node;
+#ifdef CONFIG_PPC64
+struct pci_dn;
+#endif
 
 /*
  * Arch extensions to struct device.
@@ -34,6 +37,9 @@ struct dev_archdata {
 #ifdef CONFIG_SWIOTLB
 	dma_addr_t		max_direct_dma_addr;
 #endif
+#ifdef CONFIG_PPC64
+	struct pci_dn		*pci_data;
+#endif
 #ifdef CONFIG_EEH
 	struct eeh_dev		*edev;
 #endif
diff --git a/arch/powerpc/include/asm/div64.h b/arch/powerpc/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/powerpc/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 894d538f3567..9103687b0436 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -191,11 +191,11 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	struct dev_archdata *sd = &dev->archdata;
 
 	if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr)
-		return 0;
+		return false;
 #endif
 
 	if (!dev->dma_mask)
-		return 0;
+		return false;
 
 	return addr + size - 1 <= *dev->dma_mask;
 }
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 55abfd09e47f..a52db28ecc1e 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -29,7 +29,7 @@
 
 struct pci_dev;
 struct pci_bus;
-struct device_node;
+struct pci_dn;
 
 #ifdef CONFIG_EEH
 
@@ -136,14 +136,14 @@ struct eeh_dev {
 	struct eeh_pe *pe;		/* Associated PE		*/
 	struct list_head list;		/* Form link list in the PE	*/
 	struct pci_controller *phb;	/* Associated PHB		*/
-	struct device_node *dn;		/* Associated device node	*/
+	struct pci_dn *pdn;		/* Associated PCI device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
 	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
 };
 
-static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
+static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
 {
-	return edev ? edev->dn : NULL;
+	return edev ? edev->pdn : NULL;
 }
 
 static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
@@ -200,8 +200,7 @@ struct eeh_ops {
 	char *name;
 	int (*init)(void);
 	int (*post_init)(void);
-	void* (*of_probe)(struct device_node *dn, void *flag);
-	int (*dev_probe)(struct pci_dev *dev, void *flag);
+	void* (*probe)(struct pci_dn *pdn, void *data);
 	int (*set_option)(struct eeh_pe *pe, int option);
 	int (*get_pe_addr)(struct eeh_pe *pe);
 	int (*get_state)(struct eeh_pe *pe, int *state);
@@ -211,10 +210,10 @@ struct eeh_ops {
 	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*err_inject)(struct eeh_pe *pe, int type, int func,
 			  unsigned long addr, unsigned long mask);
-	int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
-	int (*write_config)(struct device_node *dn, int where, int size, u32 val);
+	int (*read_config)(struct pci_dn *pdn, int where, int size, u32 *val);
+	int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val);
 	int (*next_error)(struct eeh_pe **pe);
-	int (*restore_config)(struct device_node *dn);
+	int (*restore_config)(struct pci_dn *pdn);
 };
 
 extern int eeh_subsystem_flags;
@@ -272,7 +271,7 @@ void eeh_pe_restore_bars(struct eeh_pe *pe);
 const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
-void *eeh_dev_init(struct device_node *dn, void *data);
+void *eeh_dev_init(struct pci_dn *pdn, void *data);
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 int eeh_init(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
@@ -280,8 +279,8 @@ int __exit eeh_ops_unregister(const char *name);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
-void eeh_add_device_early(struct device_node *);
-void eeh_add_device_tree_early(struct device_node *);
+void eeh_add_device_early(struct pci_dn *);
+void eeh_add_device_tree_early(struct pci_dn *);
 void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
@@ -323,7 +322,7 @@ static inline int eeh_init(void)
 	return 0;
 }
 
-static inline void *eeh_dev_init(struct device_node *dn, void *data)
+static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
 {
 	return NULL;
 }
@@ -339,9 +338,9 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
 
 static inline void eeh_addr_cache_build(void) { }
 
-static inline void eeh_add_device_early(struct device_node *dn) { }
+static inline void eeh_add_device_early(struct pci_dn *pdn) { }
 
-static inline void eeh_add_device_tree_early(struct device_node *dn) { }
+static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { }
 
 static inline void eeh_add_device_late(struct pci_dev *dev) { }
 
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 681bc0314b6b..e05808a328db 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -42,7 +42,7 @@
 #define FW_FEATURE_SPLPAR	ASM_CONST(0x0000000000100000)
 #define FW_FEATURE_LPAR		ASM_CONST(0x0000000000400000)
 #define FW_FEATURE_PS3_LV1	ASM_CONST(0x0000000000800000)
-#define FW_FEATURE_BEAT		ASM_CONST(0x0000000001000000)
+/* Free				ASM_CONST(0x0000000001000000) */
 #define FW_FEATURE_CMO		ASM_CONST(0x0000000002000000)
 #define FW_FEATURE_VPHN		ASM_CONST(0x0000000004000000)
 #define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
@@ -75,8 +75,6 @@ enum {
 	FW_FEATURE_POWERNV_ALWAYS = 0,
 	FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
 	FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
-	FW_FEATURE_CELLEB_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_BEAT,
-	FW_FEATURE_CELLEB_ALWAYS = 0,
 	FW_FEATURE_NATIVE_POSSIBLE = 0,
 	FW_FEATURE_NATIVE_ALWAYS = 0,
 	FW_FEATURE_POSSIBLE =
@@ -89,9 +87,6 @@ enum {
 #ifdef CONFIG_PPC_PS3
 		FW_FEATURE_PS3_POSSIBLE |
 #endif
-#ifdef CONFIG_PPC_CELLEB
-		FW_FEATURE_CELLEB_POSSIBLE |
-#endif
 #ifdef CONFIG_PPC_NATIVE
 		FW_FEATURE_NATIVE_ALWAYS |
 #endif
@@ -106,9 +101,6 @@ enum {
 #ifdef CONFIG_PPC_PS3
 		FW_FEATURE_PS3_ALWAYS &
 #endif
-#ifdef CONFIG_PPC_CELLEB
-		FW_FEATURE_CELLEB_ALWAYS &
-#endif
 #ifdef CONFIG_PPC_NATIVE
 		FW_FEATURE_NATIVE_ALWAYS &
 #endif
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f1ea5972f6ec..1e27d6338565 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -29,6 +29,7 @@
 #include <linux/bitops.h>
 #include <asm/machdep.h>
 #include <asm/types.h>
+#include <asm/pci-bridge.h>
 
 #define IOMMU_PAGE_SHIFT_4K      12
 #define IOMMU_PAGE_SIZE_4K       (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K)
@@ -78,6 +79,9 @@ struct iommu_table {
 	struct iommu_group *it_group;
 #endif
 	void (*set_bypass)(struct iommu_table *tbl, bool enable);
+#ifdef CONFIG_PPC_POWERNV
+	void           *data;
+#endif
 };
 
 /* Pure 2^n version of get_order */
@@ -169,7 +173,7 @@ extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
 			     struct dma_attrs *attrs);
 
 extern void iommu_init_early_pSeries(void);
-extern void iommu_init_early_dart(void);
+extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
 extern void iommu_init_early_pasemi(void);
 
 extern void alloc_dart_table(void);
diff --git a/arch/powerpc/include/asm/irq_regs.h b/arch/powerpc/include/asm/irq_regs.h
deleted file mode 100644
index ba94b51a0a70..000000000000
--- a/arch/powerpc/include/asm/irq_regs.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <asm-generic/irq_regs.h>
-
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2d81e202bdcc..14619a59ec09 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -290,11 +290,11 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
 	pte_t old_pte, new_pte = __pte(0);
 
 	while (1) {
-		old_pte = pte_val(*ptep);
+		old_pte = *ptep;
 		/*
 		 * wait until _PAGE_BUSY is clear then set it atomically
 		 */
-		if (unlikely(old_pte & _PAGE_BUSY)) {
+		if (unlikely(pte_val(old_pte) & _PAGE_BUSY)) {
 			cpu_relax();
 			continue;
 		}
@@ -305,16 +305,18 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
 			return __pte(0);
 #endif
 		/* If pte is not present return None */
-		if (unlikely(!(old_pte & _PAGE_PRESENT)))
+		if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
 			return __pte(0);
 
 		new_pte = pte_mkyoung(old_pte);
 		if (writing && pte_write(old_pte))
 			new_pte = pte_mkdirty(new_pte);
 
-		if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte,
-					     new_pte))
+		if (pte_val(old_pte) == __cmpxchg_u64((unsigned long *)ptep,
+						      pte_val(old_pte),
+						      pte_val(new_pte))) {
 			break;
+		}
 	}
 	return new_pte;
 }
@@ -335,7 +337,7 @@ static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
 {
 	if (key)
 		return PP_RWRX <= pp && pp <= PP_RXRX;
-	return 1;
+	return true;
 }
 
 static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
@@ -373,7 +375,7 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
 	unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
 
 	if (pagesize <= PAGE_SIZE)
-		return 1;
+		return true;
 	return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
 }
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8ef05121d3cd..c610961720c7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -585,7 +585,7 @@ struct kvm_vcpu_arch {
 	pgd_t *pgdir;
 
 	u8 io_gpr; /* GPR used as IO source/target */
-	u8 mmio_is_bigendian;
+	u8 mmio_host_swabbed;
 	u8 mmio_sign_extend;
 	u8 osi_needed;
 	u8 osi_enabled;
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/powerpc/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c8175a3fe560..ef8899432ae7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -103,9 +103,6 @@ struct machdep_calls {
 #endif
 #endif /* CONFIG_PPC64 */
 
-	void		(*pci_dma_dev_setup)(struct pci_dev *dev);
-	void		(*pci_dma_bus_setup)(struct pci_bus *bus);
-
 	/* Platform set_dma_mask and dma_get_required_mask overrides */
 	int		(*dma_set_mask)(struct device *dev, u64 dma_mask);
 	u64		(*dma_get_required_mask)(struct device *dev);
@@ -125,9 +122,8 @@ struct machdep_calls {
 	unsigned int	(*get_irq)(void);
 
 	/* PCI stuff */
-	/* Called after scanning the bus, before allocating resources */
+	/* Called after allocating resources */
 	void		(*pcibios_fixup)(void);
-	int		(*pci_probe_mode)(struct pci_bus *);
 	void		(*pci_irq_fixup)(struct pci_dev *dev);
 	int		(*pcibios_root_bridge_prepare)(struct pci_host_bridge
 				*bridge);
@@ -237,18 +233,13 @@ struct machdep_calls {
 	/* Called for each PCI bus in the system when it's probed */
 	void (*pcibios_fixup_bus)(struct pci_bus *);
 
-	/* Called when pci_enable_device() is called. Returns 0 to
-	 * allow assignment/enabling of the device. */
-	int  (*pcibios_enable_device_hook)(struct pci_dev *);
-
 	/* Called after scan and before resource survey */
 	void (*pcibios_fixup_phb)(struct pci_controller *hose);
 
-	/* Called during PCI resource reassignment */
-	resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type);
-
-	/* Reset the secondary bus of bridge */
-	void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
+#ifdef CONFIG_PCI_IOV
+	void (*pcibios_fixup_sriov)(struct pci_dev *pdev);
+	resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno);
+#endif /* CONFIG_PCI_IOV */
 
 	/* Called to shutdown machine specific hardware not already controlled
 	 * by other drivers.
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 4f13c3ed7acf..1da6a81ce541 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -112,6 +112,7 @@
 #define TLBIEL_INVAL_SET_SHIFT	12
 
 #define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
+#define POWER8_TLB_SETS		512	/* # sets in POWER8 TLB */
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h
index 3bef74a9914b..213f3a81593d 100644
--- a/arch/powerpc/include/asm/mpc85xx.h
+++ b/arch/powerpc/include/asm/mpc85xx.h
@@ -61,6 +61,7 @@
 #define SVR_T4240	0x824000
 #define SVR_T4120	0x824001
 #define SVR_T4160	0x824100
+#define SVR_T4080	0x824102
 #define SVR_C291	0x850000
 #define SVR_C292	0x850020
 #define SVR_C293	0x850030
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 754f93d208fa..98697611e7b3 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -34,10 +34,6 @@
 #define		MPIC_GREG_GCONF_BASE_MASK		0x000fffff
 #define		MPIC_GREG_GCONF_MCK			0x08000000
 #define MPIC_GREG_GLOBAL_CONF_1		0x00030
-#define		MPIC_GREG_GLOBAL_CONF_1_SIE		0x08000000
-#define		MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK	0x70000000
-#define		MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO(r)	\
-			(((r) << 28) & MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK)
 #define MPIC_GREG_VENDOR_0		0x00040
 #define MPIC_GREG_VENDOR_1		0x00050
 #define MPIC_GREG_VENDOR_2		0x00060
@@ -396,14 +392,7 @@ extern struct bus_type mpic_subsys;
 #define	MPIC_REGSET_TSI108		MPIC_REGSET(1)	/* Tsi108/109 PIC */
 
 /* Get the version of primary MPIC */
-#ifdef CONFIG_MPIC
 extern u32 fsl_mpic_primary_get_version(void);
-#else
-static inline u32 fsl_mpic_primary_get_version(void)
-{
-	return 0;
-}
-#endif
 
 /* Allocate the controller structure and setup the linux irq descs
  * for the range if interrupts passed in. No HW initialization is
@@ -496,11 +485,5 @@ extern unsigned int mpic_get_coreint_irq(void);
 /* Fetch Machine Check interrupt from primary mpic */
 extern unsigned int mpic_get_mcirq(void);
 
-/* Set the EPIC clock ratio */
-void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio);
-
-/* Enable/Disable EPIC serial interrupt mode */
-void mpic_set_serial_int(struct mpic *mpic, int enable);
-
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_MPIC_H */
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
new file mode 100644
index 000000000000..ff1ccb375e60
--- /dev/null
+++ b/arch/powerpc/include/asm/nmi.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_NMI_H
+#define _ASM_NMI_H
+
+#endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe4e626..09a518bb7c03 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -9,12 +9,43 @@
 #ifndef _ASM_POWERPC_NVRAM_H
 #define _ASM_POWERPC_NVRAM_H
 
-
+#include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <uapi/asm/nvram.h>
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version > 4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+	__be32 error_type;
+	__be32 seq_num;
+};
+
+struct nvram_os_partition {
+	const char *name;
+	int req_size;	/* desired size, in bytes */
+	int min_size;	/* minimum acceptable size (0 means req_size) */
+	long size;	/* size of data portion (excluding err_log_info) */
+	long index;	/* offset of data portion of partition */
+	bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+	__be16 version;
+	__be16 report_length;
+	__be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
 #ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
 extern int nvram_write_error_log(char * buff, int length,
 					 unsigned int err_type, unsigned int err_seq);
 extern int nvram_read_error_log(char * buff, int length,
@@ -50,6 +81,23 @@ extern void	pmac_xpram_write(int xpaddr, u8 data);
 /* Synchronize NVRAM */
 extern void	nvram_sync(void);
 
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+				int length, unsigned int *err_type,
+				unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+				    char *buff, int length,
+				    unsigned int err_type,
+				    unsigned int error_log_cnt);
+
 /* Determine NVRAM size */
 extern ssize_t nvram_get_size(void);
 
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
new file mode 100644
index 000000000000..0321a909e663
--- /dev/null
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -0,0 +1,735 @@
+/*
+ * OPAL API definitions.
+ *
+ * Copyright 2011-2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __OPAL_API_H
+#define __OPAL_API_H
+
+/****** OPAL APIs ******/
+
+/* Return codes */
+#define OPAL_SUCCESS		0
+#define OPAL_PARAMETER		-1
+#define OPAL_BUSY		-2
+#define OPAL_PARTIAL		-3
+#define OPAL_CONSTRAINED	-4
+#define OPAL_CLOSED		-5
+#define OPAL_HARDWARE		-6
+#define OPAL_UNSUPPORTED	-7
+#define OPAL_PERMISSION		-8
+#define OPAL_NO_MEM		-9
+#define OPAL_RESOURCE		-10
+#define OPAL_INTERNAL_ERROR	-11
+#define OPAL_BUSY_EVENT		-12
+#define OPAL_HARDWARE_FROZEN	-13
+#define OPAL_WRONG_STATE	-14
+#define OPAL_ASYNC_COMPLETION	-15
+#define OPAL_EMPTY		-16
+#define OPAL_I2C_TIMEOUT	-17
+#define OPAL_I2C_INVALID_CMD	-18
+#define OPAL_I2C_LBUS_PARITY	-19
+#define OPAL_I2C_BKEND_OVERRUN	-20
+#define OPAL_I2C_BKEND_ACCESS	-21
+#define OPAL_I2C_ARBT_LOST	-22
+#define OPAL_I2C_NACK_RCVD	-23
+#define OPAL_I2C_STOP_ERR	-24
+
+/* API Tokens (in r0) */
+#define OPAL_INVALID_CALL		       -1
+#define OPAL_TEST				0
+#define OPAL_CONSOLE_WRITE			1
+#define OPAL_CONSOLE_READ			2
+#define OPAL_RTC_READ				3
+#define OPAL_RTC_WRITE				4
+#define OPAL_CEC_POWER_DOWN			5
+#define OPAL_CEC_REBOOT				6
+#define OPAL_READ_NVRAM				7
+#define OPAL_WRITE_NVRAM			8
+#define OPAL_HANDLE_INTERRUPT			9
+#define OPAL_POLL_EVENTS			10
+#define OPAL_PCI_SET_HUB_TCE_MEMORY		11
+#define OPAL_PCI_SET_PHB_TCE_MEMORY		12
+#define OPAL_PCI_CONFIG_READ_BYTE		13
+#define OPAL_PCI_CONFIG_READ_HALF_WORD  	14
+#define OPAL_PCI_CONFIG_READ_WORD		15
+#define OPAL_PCI_CONFIG_WRITE_BYTE		16
+#define OPAL_PCI_CONFIG_WRITE_HALF_WORD		17
+#define OPAL_PCI_CONFIG_WRITE_WORD		18
+#define OPAL_SET_XIVE				19
+#define OPAL_GET_XIVE				20
+#define OPAL_GET_COMPLETION_TOKEN_STATUS	21 /* obsolete */
+#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER	22
+#define OPAL_PCI_EEH_FREEZE_STATUS		23
+#define OPAL_PCI_SHPC				24
+#define OPAL_CONSOLE_WRITE_BUFFER_SPACE		25
+#define OPAL_PCI_EEH_FREEZE_CLEAR		26
+#define OPAL_PCI_PHB_MMIO_ENABLE		27
+#define OPAL_PCI_SET_PHB_MEM_WINDOW		28
+#define OPAL_PCI_MAP_PE_MMIO_WINDOW		29
+#define OPAL_PCI_SET_PHB_TABLE_MEMORY		30
+#define OPAL_PCI_SET_PE				31
+#define OPAL_PCI_SET_PELTV			32
+#define OPAL_PCI_SET_MVE			33
+#define OPAL_PCI_SET_MVE_ENABLE			34
+#define OPAL_PCI_GET_XIVE_REISSUE		35
+#define OPAL_PCI_SET_XIVE_REISSUE		36
+#define OPAL_PCI_SET_XIVE_PE			37
+#define OPAL_GET_XIVE_SOURCE			38
+#define OPAL_GET_MSI_32				39
+#define OPAL_GET_MSI_64				40
+#define OPAL_START_CPU				41
+#define OPAL_QUERY_CPU_STATUS			42
+#define OPAL_WRITE_OPPANEL			43 /* unimplemented */
+#define OPAL_PCI_MAP_PE_DMA_WINDOW		44
+#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL		45
+#define OPAL_PCI_RESET				49
+#define OPAL_PCI_GET_HUB_DIAG_DATA		50
+#define OPAL_PCI_GET_PHB_DIAG_DATA		51
+#define OPAL_PCI_FENCE_PHB			52
+#define OPAL_PCI_REINIT				53
+#define OPAL_PCI_MASK_PE_ERROR			54
+#define OPAL_SET_SLOT_LED_STATUS		55
+#define OPAL_GET_EPOW_STATUS			56
+#define OPAL_SET_SYSTEM_ATTENTION_LED		57
+#define OPAL_RESERVED1				58
+#define OPAL_RESERVED2				59
+#define OPAL_PCI_NEXT_ERROR			60
+#define OPAL_PCI_EEH_FREEZE_STATUS2		61
+#define OPAL_PCI_POLL				62
+#define OPAL_PCI_MSI_EOI			63
+#define OPAL_PCI_GET_PHB_DIAG_DATA2		64
+#define OPAL_XSCOM_READ				65
+#define OPAL_XSCOM_WRITE			66
+#define OPAL_LPC_READ				67
+#define OPAL_LPC_WRITE				68
+#define OPAL_RETURN_CPU				69
+#define OPAL_REINIT_CPUS			70
+#define OPAL_ELOG_READ				71
+#define OPAL_ELOG_WRITE				72
+#define OPAL_ELOG_ACK				73
+#define OPAL_ELOG_RESEND			74
+#define OPAL_ELOG_SIZE				75
+#define OPAL_FLASH_VALIDATE			76
+#define OPAL_FLASH_MANAGE			77
+#define OPAL_FLASH_UPDATE			78
+#define OPAL_RESYNC_TIMEBASE			79
+#define OPAL_CHECK_TOKEN			80
+#define OPAL_DUMP_INIT				81
+#define OPAL_DUMP_INFO				82
+#define OPAL_DUMP_READ				83
+#define OPAL_DUMP_ACK				84
+#define OPAL_GET_MSG				85
+#define OPAL_CHECK_ASYNC_COMPLETION		86
+#define OPAL_SYNC_HOST_REBOOT			87
+#define OPAL_SENSOR_READ			88
+#define OPAL_GET_PARAM				89
+#define OPAL_SET_PARAM				90
+#define OPAL_DUMP_RESEND			91
+#define OPAL_ELOG_SEND				92	/* Deprecated */
+#define OPAL_PCI_SET_PHB_CAPI_MODE		93
+#define OPAL_DUMP_INFO2				94
+#define OPAL_WRITE_OPPANEL_ASYNC		95
+#define OPAL_PCI_ERR_INJECT			96
+#define OPAL_PCI_EEH_FREEZE_SET			97
+#define OPAL_HANDLE_HMI				98
+#define OPAL_CONFIG_CPU_IDLE_STATE		99
+#define OPAL_SLW_SET_REG			100
+#define OPAL_REGISTER_DUMP_REGION		101
+#define OPAL_UNREGISTER_DUMP_REGION		102
+#define OPAL_WRITE_TPO				103
+#define OPAL_READ_TPO				104
+#define OPAL_GET_DPO_STATUS			105
+#define OPAL_OLD_I2C_REQUEST			106	/* Deprecated */
+#define OPAL_IPMI_SEND				107
+#define OPAL_IPMI_RECV				108
+#define OPAL_I2C_REQUEST			109
+#define OPAL_FLASH_READ				110
+#define OPAL_FLASH_WRITE			111
+#define OPAL_FLASH_ERASE			112
+#define OPAL_LAST				112
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED		0x00010000
+#define OPAL_PM_SLEEP_ENABLED		0x00020000
+#define OPAL_PM_WINKLE_ENABLED		0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000 /* with workaround */
+
+#ifndef __ASSEMBLY__
+
+/* Other enums */
+enum OpalFreezeState {
+	OPAL_EEH_STOPPED_NOT_FROZEN = 0,
+	OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
+	OPAL_EEH_STOPPED_DMA_FREEZE = 2,
+	OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3,
+	OPAL_EEH_STOPPED_RESET = 4,
+	OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
+	OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
+};
+
+enum OpalEehFreezeActionToken {
+	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
+	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
+	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
+
+	OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
+	OPAL_EEH_ACTION_SET_FREEZE_DMA  = 2,
+	OPAL_EEH_ACTION_SET_FREEZE_ALL  = 3
+};
+
+enum OpalPciStatusToken {
+	OPAL_EEH_NO_ERROR	= 0,
+	OPAL_EEH_IOC_ERROR	= 1,
+	OPAL_EEH_PHB_ERROR	= 2,
+	OPAL_EEH_PE_ERROR	= 3,
+	OPAL_EEH_PE_MMIO_ERROR	= 4,
+	OPAL_EEH_PE_DMA_ERROR	= 5
+};
+
+enum OpalPciErrorSeverity {
+	OPAL_EEH_SEV_NO_ERROR	= 0,
+	OPAL_EEH_SEV_IOC_DEAD	= 1,
+	OPAL_EEH_SEV_PHB_DEAD	= 2,
+	OPAL_EEH_SEV_PHB_FENCED	= 3,
+	OPAL_EEH_SEV_PE_ER	= 4,
+	OPAL_EEH_SEV_INF	= 5
+};
+
+enum OpalErrinjectType {
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR	= 0,
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64	= 1,
+};
+
+enum OpalErrinjectFunc {
+	/* IOA bus specific errors */
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR	= 0,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA	= 1,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR	= 2,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA	= 3,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR	= 4,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA	= 5,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR	= 6,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA	= 7,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR	= 8,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA	= 9,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR	= 10,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA	= 11,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR	= 12,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA	= 13,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER	= 14,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET	= 15,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR	= 16,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA	= 17,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER	= 18,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET	= 19,
+};
+
+enum OpalMmioWindowType {
+	OPAL_M32_WINDOW_TYPE = 1,
+	OPAL_M64_WINDOW_TYPE = 2,
+	OPAL_IO_WINDOW_TYPE  = 3
+};
+
+enum OpalExceptionHandler {
+	OPAL_MACHINE_CHECK_HANDLER	    = 1,
+	OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
+	OPAL_SOFTPATCH_HANDLER		    = 3
+};
+
+enum OpalPendingState {
+	OPAL_EVENT_OPAL_INTERNAL   = 0x1,
+	OPAL_EVENT_NVRAM	   = 0x2,
+	OPAL_EVENT_RTC		   = 0x4,
+	OPAL_EVENT_CONSOLE_OUTPUT  = 0x8,
+	OPAL_EVENT_CONSOLE_INPUT   = 0x10,
+	OPAL_EVENT_ERROR_LOG_AVAIL = 0x20,
+	OPAL_EVENT_ERROR_LOG	   = 0x40,
+	OPAL_EVENT_EPOW		   = 0x80,
+	OPAL_EVENT_LED_STATUS	   = 0x100,
+	OPAL_EVENT_PCI_ERROR	   = 0x200,
+	OPAL_EVENT_DUMP_AVAIL	   = 0x400,
+	OPAL_EVENT_MSG_PENDING	   = 0x800,
+};
+
+enum OpalThreadStatus {
+	OPAL_THREAD_INACTIVE = 0x0,
+	OPAL_THREAD_STARTED = 0x1,
+	OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
+};
+
+enum OpalPciBusCompare {
+	OpalPciBusAny	= 0,	/* Any bus number match */
+	OpalPciBus3Bits	= 2,	/* Match top 3 bits of bus number */
+	OpalPciBus4Bits	= 3,	/* Match top 4 bits of bus number */
+	OpalPciBus5Bits	= 4,	/* Match top 5 bits of bus number */
+	OpalPciBus6Bits	= 5,	/* Match top 6 bits of bus number */
+	OpalPciBus7Bits	= 6,	/* Match top 7 bits of bus number */
+	OpalPciBusAll	= 7,	/* Match bus number exactly */
+};
+
+enum OpalDeviceCompare {
+	OPAL_IGNORE_RID_DEVICE_NUMBER = 0,
+	OPAL_COMPARE_RID_DEVICE_NUMBER = 1
+};
+
+enum OpalFuncCompare {
+	OPAL_IGNORE_RID_FUNCTION_NUMBER = 0,
+	OPAL_COMPARE_RID_FUNCTION_NUMBER = 1
+};
+
+enum OpalPeAction {
+	OPAL_UNMAP_PE = 0,
+	OPAL_MAP_PE = 1
+};
+
+enum OpalPeltvAction {
+	OPAL_REMOVE_PE_FROM_DOMAIN = 0,
+	OPAL_ADD_PE_TO_DOMAIN = 1
+};
+
+enum OpalMveEnableAction {
+	OPAL_DISABLE_MVE = 0,
+	OPAL_ENABLE_MVE = 1
+};
+
+enum OpalM64Action {
+	OPAL_DISABLE_M64 = 0,
+	OPAL_ENABLE_M64_SPLIT = 1,
+	OPAL_ENABLE_M64_NON_SPLIT = 2
+};
+
+enum OpalPciResetScope {
+	OPAL_RESET_PHB_COMPLETE		= 1,
+	OPAL_RESET_PCI_LINK		= 2,
+	OPAL_RESET_PHB_ERROR		= 3,
+	OPAL_RESET_PCI_HOT		= 4,
+	OPAL_RESET_PCI_FUNDAMENTAL	= 5,
+	OPAL_RESET_PCI_IODA_TABLE	= 6
+};
+
+enum OpalPciReinitScope {
+	/*
+	 * Note: we chose values that do not overlap
+	 * OpalPciResetScope as OPAL v2 used the same
+	 * enum for both
+	 */
+	OPAL_REINIT_PCI_DEV = 1000
+};
+
+enum OpalPciResetState {
+	OPAL_DEASSERT_RESET = 0,
+	OPAL_ASSERT_RESET   = 1
+};
+
+/*
+ * Address cycle types for LPC accesses. These also correspond
+ * to the content of the first cell of the "reg" property for
+ * device nodes on the LPC bus
+ */
+enum OpalLPCAddressType {
+	OPAL_LPC_MEM	= 0,
+	OPAL_LPC_IO	= 1,
+	OPAL_LPC_FW	= 2,
+};
+
+enum opal_msg_type {
+	OPAL_MSG_ASYNC_COMP = 0,	/* params[0] = token, params[1] = rc,
+					 * additional params function-specific
+					 */
+	OPAL_MSG_MEM_ERR,
+	OPAL_MSG_EPOW,
+	OPAL_MSG_SHUTDOWN,		/* params[0] = 1 reboot, 0 shutdown */
+	OPAL_MSG_HMI_EVT,
+	OPAL_MSG_DPO,
+	OPAL_MSG_TYPE_MAX,
+};
+
+struct opal_msg {
+	__be32 msg_type;
+	__be32 reserved;
+	__be64 params[8];
+};
+
+/* System parameter permission */
+enum OpalSysparamPerm {
+	OPAL_SYSPARAM_READ  = 0x1,
+	OPAL_SYSPARAM_WRITE = 0x2,
+	OPAL_SYSPARAM_RW    = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE),
+};
+
+enum {
+	OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1,
+};
+
+struct opal_ipmi_msg {
+	uint8_t version;
+	uint8_t netfn;
+	uint8_t cmd;
+	uint8_t data[];
+};
+
+/* FSP memory errors handling */
+enum OpalMemErr_Version {
+	OpalMemErr_V1 = 1,
+};
+
+enum OpalMemErrType {
+	OPAL_MEM_ERR_TYPE_RESILIENCE	= 0,
+	OPAL_MEM_ERR_TYPE_DYN_DALLOC,
+};
+
+/* Memory Reilience error type */
+enum OpalMemErr_ResilErrType {
+	OPAL_MEM_RESILIENCE_CE		= 0,
+	OPAL_MEM_RESILIENCE_UE,
+	OPAL_MEM_RESILIENCE_UE_SCRUB,
+};
+
+/* Dynamic Memory Deallocation type */
+enum OpalMemErr_DynErrType {
+	OPAL_MEM_DYNAMIC_DEALLOC	= 0,
+};
+
+struct OpalMemoryErrorData {
+	enum OpalMemErr_Version	version:8;	/* 0x00 */
+	enum OpalMemErrType	type:8;		/* 0x01 */
+	__be16			flags;		/* 0x02 */
+	uint8_t			reserved_1[4];	/* 0x04 */
+
+	union {
+		/* Memory Resilience corrected/uncorrected error info */
+		struct {
+			enum OpalMemErr_ResilErrType	resil_err_type:8;
+			uint8_t				reserved_1[7];
+			__be64				physical_address_start;
+			__be64				physical_address_end;
+		} resilience;
+		/* Dynamic memory deallocation error info */
+		struct {
+			enum OpalMemErr_DynErrType	dyn_err_type:8;
+			uint8_t				reserved_1[7];
+			__be64				physical_address_start;
+			__be64				physical_address_end;
+		} dyn_dealloc;
+	} u;
+};
+
+/* HMI interrupt event */
+enum OpalHMI_Version {
+	OpalHMIEvt_V1 = 1,
+};
+
+enum OpalHMI_Severity {
+	OpalHMI_SEV_NO_ERROR = 0,
+	OpalHMI_SEV_WARNING = 1,
+	OpalHMI_SEV_ERROR_SYNC = 2,
+	OpalHMI_SEV_FATAL = 3,
+};
+
+enum OpalHMI_Disposition {
+	OpalHMI_DISPOSITION_RECOVERED = 0,
+	OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum OpalHMI_ErrType {
+	OpalHMI_ERROR_MALFUNC_ALERT	= 0,
+	OpalHMI_ERROR_PROC_RECOV_DONE,
+	OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
+	OpalHMI_ERROR_PROC_RECOV_MASKED,
+	OpalHMI_ERROR_TFAC,
+	OpalHMI_ERROR_TFMR_PARITY,
+	OpalHMI_ERROR_HA_OVERFLOW_WARN,
+	OpalHMI_ERROR_XSCOM_FAIL,
+	OpalHMI_ERROR_XSCOM_DONE,
+	OpalHMI_ERROR_SCOM_FIR,
+	OpalHMI_ERROR_DEBUG_TRIG_FIR,
+	OpalHMI_ERROR_HYP_RESOURCE,
+	OpalHMI_ERROR_CAPP_RECOVERY,
+};
+
+struct OpalHMIEvent {
+	uint8_t		version;	/* 0x00 */
+	uint8_t		severity;	/* 0x01 */
+	uint8_t		type;		/* 0x02 */
+	uint8_t		disposition;	/* 0x03 */
+	uint8_t		reserved_1[4];	/* 0x04 */
+
+	__be64		hmer;
+	/* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
+	__be64		tfmr;
+};
+
+enum {
+	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
+	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
+	OPAL_P7IOC_DIAG_TYPE_BI		= 2,
+	OPAL_P7IOC_DIAG_TYPE_CI		= 3,
+	OPAL_P7IOC_DIAG_TYPE_MISC	= 4,
+	OPAL_P7IOC_DIAG_TYPE_I2C	= 5,
+	OPAL_P7IOC_DIAG_TYPE_LAST	= 6
+};
+
+struct OpalIoP7IOCErrorData {
+	__be16 type;
+
+	/* GEM */
+	__be64 gemXfir;
+	__be64 gemRfir;
+	__be64 gemRirqfir;
+	__be64 gemMask;
+	__be64 gemRwof;
+
+	/* LEM */
+	__be64 lemFir;
+	__be64 lemErrMask;
+	__be64 lemAction0;
+	__be64 lemAction1;
+	__be64 lemWof;
+
+	union {
+		struct OpalIoP7IOCRgcErrorData {
+			__be64 rgcStatus;	/* 3E1C10 */
+			__be64 rgcLdcp;		/* 3E1C18 */
+		}rgc;
+		struct OpalIoP7IOCBiErrorData {
+			__be64 biLdcp0;		/* 3C0100, 3C0118 */
+			__be64 biLdcp1;		/* 3C0108, 3C0120 */
+			__be64 biLdcp2;		/* 3C0110, 3C0128 */
+			__be64 biFenceStatus;	/* 3C0130, 3C0130 */
+
+			uint8_t biDownbound;	/* BI Downbound or Upbound */
+		}bi;
+		struct OpalIoP7IOCCiErrorData {
+			__be64 ciPortStatus;	/* 3Dn008 */
+			__be64 ciPortLdcp;	/* 3Dn010 */
+
+			uint8_t ciPort;		/* Index of CI port: 0/1 */
+		}ci;
+	};
+};
+
+/**
+ * This structure defines the overlay which will be used to store PHB error
+ * data upon request.
+ */
+enum {
+	OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
+};
+
+enum {
+	OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
+	OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
+};
+
+enum {
+	OPAL_P7IOC_NUM_PEST_REGS = 128,
+	OPAL_PHB3_NUM_PEST_REGS = 256
+};
+
+struct OpalIoPhbErrorCommon {
+	__be32 version;
+	__be32 ioType;
+	__be32 len;
+};
+
+struct OpalIoP7IOCPhbErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	__be32 brdgCtl;
+
+	// P7IOC utl regs
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
+
+	// P7IOC cfg regs
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
+
+	// cfg AER regs
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
+
+	__be32 rsv3;
+
+	// Record data about the call to allocate a buffer.
+	__be64 errorClass;
+	__be64 correlator;
+
+	//P7IOC MMIO Error Regs
+	__be64 p7iocPlssr;                // n120
+	__be64 p7iocCsr;                  // n110
+	__be64 lemFir;                    // nC00
+	__be64 lemErrorMask;              // nC18
+	__be64 lemWOF;                    // nC40
+	__be64 phbErrorStatus;            // nC80
+	__be64 phbFirstErrorStatus;       // nC88
+	__be64 phbErrorLog0;              // nCC0
+	__be64 phbErrorLog1;              // nCC8
+	__be64 mmioErrorStatus;           // nD00
+	__be64 mmioFirstErrorStatus;      // nD08
+	__be64 mmioErrorLog0;             // nD40
+	__be64 mmioErrorLog1;             // nD48
+	__be64 dma0ErrorStatus;           // nD80
+	__be64 dma0FirstErrorStatus;      // nD88
+	__be64 dma0ErrorLog0;             // nDC0
+	__be64 dma0ErrorLog1;             // nDC8
+	__be64 dma1ErrorStatus;           // nE00
+	__be64 dma1FirstErrorStatus;      // nE08
+	__be64 dma1ErrorLog0;             // nE40
+	__be64 dma1ErrorLog1;             // nE48
+	__be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
+	__be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
+};
+
+struct OpalIoPhb3ErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	__be32 brdgCtl;
+
+	/* PHB3 UTL regs */
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
+
+	/* PHB3 cfg regs */
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
+
+	/* cfg AER regs */
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
+
+	__be32 rsv3;
+
+	/* Record data about the call to allocate a buffer */
+	__be64 errorClass;
+	__be64 correlator;
+
+	/* PHB3 MMIO Error Regs */
+	__be64 nFir;			/* 000 */
+	__be64 nFirMask;		/* 003 */
+	__be64 nFirWOF;		/* 008 */
+	__be64 phbPlssr;		/* 120 */
+	__be64 phbCsr;		/* 110 */
+	__be64 lemFir;		/* C00 */
+	__be64 lemErrorMask;		/* C18 */
+	__be64 lemWOF;		/* C40 */
+	__be64 phbErrorStatus;	/* C80 */
+	__be64 phbFirstErrorStatus;	/* C88 */
+	__be64 phbErrorLog0;		/* CC0 */
+	__be64 phbErrorLog1;		/* CC8 */
+	__be64 mmioErrorStatus;	/* D00 */
+	__be64 mmioFirstErrorStatus;	/* D08 */
+	__be64 mmioErrorLog0;		/* D40 */
+	__be64 mmioErrorLog1;		/* D48 */
+	__be64 dma0ErrorStatus;	/* D80 */
+	__be64 dma0FirstErrorStatus;	/* D88 */
+	__be64 dma0ErrorLog0;		/* DC0 */
+	__be64 dma0ErrorLog1;		/* DC8 */
+	__be64 dma1ErrorStatus;	/* E00 */
+	__be64 dma1FirstErrorStatus;	/* E08 */
+	__be64 dma1ErrorLog0;		/* E40 */
+	__be64 dma1ErrorLog1;		/* E48 */
+	__be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
+	__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
+};
+
+enum {
+	OPAL_REINIT_CPUS_HILE_BE	= (1 << 0),
+	OPAL_REINIT_CPUS_HILE_LE	= (1 << 1),
+};
+
+typedef struct oppanel_line {
+	__be64 line;
+	__be64 line_len;
+} oppanel_line_t;
+
+/*
+ * SG entries
+ *
+ * WARNING: The current implementation requires each entry
+ * to represent a block that is 4k aligned *and* each block
+ * size except the last one in the list to be as well.
+ */
+struct opal_sg_entry {
+	__be64 data;
+	__be64 length;
+};
+
+/*
+ * Candiate image SG list.
+ *
+ * length = VER | length
+ */
+struct opal_sg_list {
+	__be64 length;
+	__be64 next;
+	struct opal_sg_entry entry[];
+};
+
+/*
+ * Dump region ID range usable by the OS
+ */
+#define OPAL_DUMP_REGION_HOST_START		0x80
+#define OPAL_DUMP_REGION_LOG_BUF		0x80
+#define OPAL_DUMP_REGION_HOST_END		0xFF
+
+/* CAPI modes for PHB */
+enum {
+	OPAL_PHB_CAPI_MODE_PCIE		= 0,
+	OPAL_PHB_CAPI_MODE_CAPI		= 1,
+	OPAL_PHB_CAPI_MODE_SNOOP_OFF    = 2,
+	OPAL_PHB_CAPI_MODE_SNOOP_ON	= 3,
+};
+
+/* OPAL I2C request */
+struct opal_i2c_request {
+	uint8_t	type;
+#define OPAL_I2C_RAW_READ	0
+#define OPAL_I2C_RAW_WRITE	1
+#define OPAL_I2C_SM_READ	2
+#define OPAL_I2C_SM_WRITE	3
+	uint8_t flags;
+#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
+	uint8_t	subaddr_sz;		/* Max 4 */
+	uint8_t reserved;
+	__be16 addr;			/* 7 or 10 bit address */
+	__be16 reserved2;
+	__be32 subaddr;		/* Sub-address if any */
+	__be32 size;			/* Data size */
+	__be64 buffer_ra;		/* Buffer real address */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9ee0a30a02ce..042af1abfc4d 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -9,755 +9,17 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef __OPAL_H
-#define __OPAL_H
+#ifndef _ASM_POWERPC_OPAL_H
+#define _ASM_POWERPC_OPAL_H
 
-#ifndef __ASSEMBLY__
-/*
- * SG entry
- *
- * WARNING: The current implementation requires each entry
- * to represent a block that is 4k aligned *and* each block
- * size except the last one in the list to be as well.
- */
-struct opal_sg_entry {
-	__be64 data;
-	__be64 length;
-};
-
-/* SG list */
-struct opal_sg_list {
-	__be64 length;
-	__be64 next;
-	struct opal_sg_entry entry[];
-};
-
-/* We calculate number of sg entries based on PAGE_SIZE */
-#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
-
-#endif /* __ASSEMBLY__ */
-
-/****** OPAL APIs ******/
-
-/* Return codes */
-#define OPAL_SUCCESS 		0
-#define OPAL_PARAMETER		-1
-#define OPAL_BUSY		-2
-#define OPAL_PARTIAL		-3
-#define OPAL_CONSTRAINED	-4
-#define OPAL_CLOSED		-5
-#define OPAL_HARDWARE		-6
-#define OPAL_UNSUPPORTED	-7
-#define OPAL_PERMISSION		-8
-#define OPAL_NO_MEM		-9
-#define OPAL_RESOURCE		-10
-#define OPAL_INTERNAL_ERROR	-11
-#define OPAL_BUSY_EVENT		-12
-#define OPAL_HARDWARE_FROZEN	-13
-#define OPAL_WRONG_STATE	-14
-#define OPAL_ASYNC_COMPLETION	-15
-#define OPAL_I2C_TIMEOUT	-17
-#define OPAL_I2C_INVALID_CMD	-18
-#define OPAL_I2C_LBUS_PARITY	-19
-#define OPAL_I2C_BKEND_OVERRUN	-20
-#define OPAL_I2C_BKEND_ACCESS	-21
-#define OPAL_I2C_ARBT_LOST	-22
-#define OPAL_I2C_NACK_RCVD	-23
-#define OPAL_I2C_STOP_ERR	-24
-
-/* API Tokens (in r0) */
-#define OPAL_INVALID_CALL			-1
-#define OPAL_CONSOLE_WRITE			1
-#define OPAL_CONSOLE_READ			2
-#define OPAL_RTC_READ				3
-#define OPAL_RTC_WRITE				4
-#define OPAL_CEC_POWER_DOWN			5
-#define OPAL_CEC_REBOOT				6
-#define OPAL_READ_NVRAM				7
-#define OPAL_WRITE_NVRAM			8
-#define OPAL_HANDLE_INTERRUPT			9
-#define OPAL_POLL_EVENTS			10
-#define OPAL_PCI_SET_HUB_TCE_MEMORY		11
-#define OPAL_PCI_SET_PHB_TCE_MEMORY		12
-#define OPAL_PCI_CONFIG_READ_BYTE		13
-#define OPAL_PCI_CONFIG_READ_HALF_WORD  	14
-#define OPAL_PCI_CONFIG_READ_WORD		15
-#define OPAL_PCI_CONFIG_WRITE_BYTE		16
-#define OPAL_PCI_CONFIG_WRITE_HALF_WORD		17
-#define OPAL_PCI_CONFIG_WRITE_WORD		18
-#define OPAL_SET_XIVE				19
-#define OPAL_GET_XIVE				20
-#define OPAL_GET_COMPLETION_TOKEN_STATUS	21 /* obsolete */
-#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER	22
-#define OPAL_PCI_EEH_FREEZE_STATUS		23
-#define OPAL_PCI_SHPC				24
-#define OPAL_CONSOLE_WRITE_BUFFER_SPACE		25
-#define OPAL_PCI_EEH_FREEZE_CLEAR		26
-#define OPAL_PCI_PHB_MMIO_ENABLE		27
-#define OPAL_PCI_SET_PHB_MEM_WINDOW		28
-#define OPAL_PCI_MAP_PE_MMIO_WINDOW		29
-#define OPAL_PCI_SET_PHB_TABLE_MEMORY		30
-#define OPAL_PCI_SET_PE				31
-#define OPAL_PCI_SET_PELTV			32
-#define OPAL_PCI_SET_MVE			33
-#define OPAL_PCI_SET_MVE_ENABLE			34
-#define OPAL_PCI_GET_XIVE_REISSUE		35
-#define OPAL_PCI_SET_XIVE_REISSUE		36
-#define OPAL_PCI_SET_XIVE_PE			37
-#define OPAL_GET_XIVE_SOURCE			38
-#define OPAL_GET_MSI_32				39
-#define OPAL_GET_MSI_64				40
-#define OPAL_START_CPU				41
-#define OPAL_QUERY_CPU_STATUS			42
-#define OPAL_WRITE_OPPANEL			43
-#define OPAL_PCI_MAP_PE_DMA_WINDOW		44
-#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL		45
-#define OPAL_PCI_RESET				49
-#define OPAL_PCI_GET_HUB_DIAG_DATA		50
-#define OPAL_PCI_GET_PHB_DIAG_DATA		51
-#define OPAL_PCI_FENCE_PHB			52
-#define OPAL_PCI_REINIT				53
-#define OPAL_PCI_MASK_PE_ERROR			54
-#define OPAL_SET_SLOT_LED_STATUS		55
-#define OPAL_GET_EPOW_STATUS			56
-#define OPAL_SET_SYSTEM_ATTENTION_LED		57
-#define OPAL_RESERVED1				58
-#define OPAL_RESERVED2				59
-#define OPAL_PCI_NEXT_ERROR			60
-#define OPAL_PCI_EEH_FREEZE_STATUS2		61
-#define OPAL_PCI_POLL				62
-#define OPAL_PCI_MSI_EOI			63
-#define OPAL_PCI_GET_PHB_DIAG_DATA2		64
-#define OPAL_XSCOM_READ				65
-#define OPAL_XSCOM_WRITE			66
-#define OPAL_LPC_READ				67
-#define OPAL_LPC_WRITE				68
-#define OPAL_RETURN_CPU				69
-#define OPAL_REINIT_CPUS			70
-#define OPAL_ELOG_READ				71
-#define OPAL_ELOG_WRITE				72
-#define OPAL_ELOG_ACK				73
-#define OPAL_ELOG_RESEND			74
-#define OPAL_ELOG_SIZE				75
-#define OPAL_FLASH_VALIDATE			76
-#define OPAL_FLASH_MANAGE			77
-#define OPAL_FLASH_UPDATE			78
-#define OPAL_RESYNC_TIMEBASE			79
-#define OPAL_CHECK_TOKEN			80
-#define OPAL_DUMP_INIT				81
-#define OPAL_DUMP_INFO				82
-#define OPAL_DUMP_READ				83
-#define OPAL_DUMP_ACK				84
-#define OPAL_GET_MSG				85
-#define OPAL_CHECK_ASYNC_COMPLETION		86
-#define OPAL_SYNC_HOST_REBOOT			87
-#define OPAL_SENSOR_READ			88
-#define OPAL_GET_PARAM				89
-#define OPAL_SET_PARAM				90
-#define OPAL_DUMP_RESEND			91
-#define OPAL_PCI_SET_PHB_CXL_MODE		93
-#define OPAL_DUMP_INFO2				94
-#define OPAL_PCI_ERR_INJECT			96
-#define OPAL_PCI_EEH_FREEZE_SET			97
-#define OPAL_HANDLE_HMI				98
-#define OPAL_CONFIG_CPU_IDLE_STATE		99
-#define OPAL_SLW_SET_REG			100
-#define OPAL_REGISTER_DUMP_REGION		101
-#define OPAL_UNREGISTER_DUMP_REGION		102
-#define OPAL_WRITE_TPO				103
-#define OPAL_READ_TPO				104
-#define OPAL_IPMI_SEND				107
-#define OPAL_IPMI_RECV				108
-#define OPAL_I2C_REQUEST			109
-
-/* Device tree flags */
-
-/* Flags set in power-mgmt nodes in device tree if
- * respective idle states are supported in the platform.
- */
-#define OPAL_PM_NAP_ENABLED	0x00010000
-#define OPAL_PM_SLEEP_ENABLED	0x00020000
-#define OPAL_PM_WINKLE_ENABLED	0x00040000
-#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000
+#include <asm/opal-api.h>
 
 #ifndef __ASSEMBLY__
 
 #include <linux/notifier.h>
 
-/* Other enums */
-enum OpalVendorApiTokens {
-	OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999
-};
-
-enum OpalFreezeState {
-	OPAL_EEH_STOPPED_NOT_FROZEN = 0,
-	OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
-	OPAL_EEH_STOPPED_DMA_FREEZE = 2,
-	OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3,
-	OPAL_EEH_STOPPED_RESET = 4,
-	OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
-	OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
-};
-
-enum OpalEehFreezeActionToken {
-	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
-	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
-	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
-
-	OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
-	OPAL_EEH_ACTION_SET_FREEZE_DMA  = 2,
-	OPAL_EEH_ACTION_SET_FREEZE_ALL  = 3
-};
-
-enum OpalPciStatusToken {
-	OPAL_EEH_NO_ERROR	= 0,
-	OPAL_EEH_IOC_ERROR	= 1,
-	OPAL_EEH_PHB_ERROR	= 2,
-	OPAL_EEH_PE_ERROR	= 3,
-	OPAL_EEH_PE_MMIO_ERROR	= 4,
-	OPAL_EEH_PE_DMA_ERROR	= 5
-};
-
-enum OpalPciErrorSeverity {
-	OPAL_EEH_SEV_NO_ERROR	= 0,
-	OPAL_EEH_SEV_IOC_DEAD	= 1,
-	OPAL_EEH_SEV_PHB_DEAD	= 2,
-	OPAL_EEH_SEV_PHB_FENCED	= 3,
-	OPAL_EEH_SEV_PE_ER	= 4,
-	OPAL_EEH_SEV_INF	= 5
-};
-
-enum OpalErrinjectType {
-	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR	= 0,
-	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64	= 1,
-};
-
-enum OpalErrinjectFunc {
-	/* IOA bus specific errors */
-	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR	= 0,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA	= 1,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR	= 2,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA	= 3,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR	= 4,
-	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA	= 5,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR	= 6,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA	= 7,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR	= 8,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA	= 9,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR	= 10,
-	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA	= 11,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR	= 12,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA	= 13,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER	= 14,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET	= 15,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR	= 16,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA	= 17,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER	= 18,
-	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET	= 19,
-};
-
-enum OpalShpcAction {
-	OPAL_SHPC_GET_LINK_STATE = 0,
-	OPAL_SHPC_GET_SLOT_STATE = 1
-};
-
-enum OpalShpcLinkState {
-	OPAL_SHPC_LINK_DOWN = 0,
-	OPAL_SHPC_LINK_UP = 1
-};
-
-enum OpalMmioWindowType {
-	OPAL_M32_WINDOW_TYPE = 1,
-	OPAL_M64_WINDOW_TYPE = 2,
-	OPAL_IO_WINDOW_TYPE = 3
-};
-
-enum OpalShpcSlotState {
-	OPAL_SHPC_DEV_NOT_PRESENT = 0,
-	OPAL_SHPC_DEV_PRESENT = 1
-};
-
-enum OpalExceptionHandler {
-	OPAL_MACHINE_CHECK_HANDLER = 1,
-	OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
-	OPAL_SOFTPATCH_HANDLER = 3
-};
-
-enum OpalPendingState {
-	OPAL_EVENT_OPAL_INTERNAL	= 0x1,
-	OPAL_EVENT_NVRAM		= 0x2,
-	OPAL_EVENT_RTC			= 0x4,
-	OPAL_EVENT_CONSOLE_OUTPUT	= 0x8,
-	OPAL_EVENT_CONSOLE_INPUT	= 0x10,
-	OPAL_EVENT_ERROR_LOG_AVAIL	= 0x20,
-	OPAL_EVENT_ERROR_LOG		= 0x40,
-	OPAL_EVENT_EPOW			= 0x80,
-	OPAL_EVENT_LED_STATUS		= 0x100,
-	OPAL_EVENT_PCI_ERROR		= 0x200,
-	OPAL_EVENT_DUMP_AVAIL		= 0x400,
-	OPAL_EVENT_MSG_PENDING		= 0x800,
-};
-
-enum OpalMessageType {
-	OPAL_MSG_ASYNC_COMP = 0,	/* params[0] = token, params[1] = rc,
-					 * additional params function-specific
-					 */
-	OPAL_MSG_MEM_ERR,
-	OPAL_MSG_EPOW,
-	OPAL_MSG_SHUTDOWN,		/* params[0] = 1 reboot, 0 shutdown */
-	OPAL_MSG_HMI_EVT,
-	OPAL_MSG_TYPE_MAX,
-};
-
-enum OpalThreadStatus {
-	OPAL_THREAD_INACTIVE = 0x0,
-	OPAL_THREAD_STARTED = 0x1,
-	OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
-};
-
-enum OpalPciBusCompare {
-	OpalPciBusAny	= 0,	/* Any bus number match */
-	OpalPciBus3Bits	= 2,	/* Match top 3 bits of bus number */
-	OpalPciBus4Bits	= 3,	/* Match top 4 bits of bus number */
-	OpalPciBus5Bits	= 4,	/* Match top 5 bits of bus number */
-	OpalPciBus6Bits	= 5,	/* Match top 6 bits of bus number */
-	OpalPciBus7Bits	= 6,	/* Match top 7 bits of bus number */
-	OpalPciBusAll	= 7,	/* Match bus number exactly */
-};
-
-enum OpalDeviceCompare {
-	OPAL_IGNORE_RID_DEVICE_NUMBER = 0,
-	OPAL_COMPARE_RID_DEVICE_NUMBER = 1
-};
-
-enum OpalFuncCompare {
-	OPAL_IGNORE_RID_FUNCTION_NUMBER = 0,
-	OPAL_COMPARE_RID_FUNCTION_NUMBER = 1
-};
-
-enum OpalPeAction {
-	OPAL_UNMAP_PE = 0,
-	OPAL_MAP_PE = 1
-};
-
-enum OpalPeltvAction {
-	OPAL_REMOVE_PE_FROM_DOMAIN = 0,
-	OPAL_ADD_PE_TO_DOMAIN = 1
-};
-
-enum OpalMveEnableAction {
-	OPAL_DISABLE_MVE = 0,
-	OPAL_ENABLE_MVE = 1
-};
-
-enum OpalM64EnableAction {
-	OPAL_DISABLE_M64 = 0,
-	OPAL_ENABLE_M64_SPLIT = 1,
-	OPAL_ENABLE_M64_NON_SPLIT = 2
-};
-
-enum OpalPciResetScope {
-	OPAL_RESET_PHB_COMPLETE		= 1,
-	OPAL_RESET_PCI_LINK		= 2,
-	OPAL_RESET_PHB_ERROR		= 3,
-	OPAL_RESET_PCI_HOT		= 4,
-	OPAL_RESET_PCI_FUNDAMENTAL	= 5,
-	OPAL_RESET_PCI_IODA_TABLE	= 6
-};
-
-enum OpalPciReinitScope {
-	OPAL_REINIT_PCI_DEV = 1000
-};
-
-enum OpalPciResetState {
-	OPAL_DEASSERT_RESET = 0,
-	OPAL_ASSERT_RESET = 1
-};
-
-enum OpalPciMaskAction {
-	OPAL_UNMASK_ERROR_TYPE = 0,
-	OPAL_MASK_ERROR_TYPE = 1
-};
-
-enum OpalSlotLedType {
-	OPAL_SLOT_LED_ID_TYPE = 0,
-	OPAL_SLOT_LED_FAULT_TYPE = 1
-};
-
-enum OpalLedAction {
-	OPAL_TURN_OFF_LED = 0,
-	OPAL_TURN_ON_LED = 1,
-	OPAL_QUERY_LED_STATE_AFTER_BUSY = 2
-};
-
-enum OpalEpowStatus {
-	OPAL_EPOW_NONE = 0,
-	OPAL_EPOW_UPS = 1,
-	OPAL_EPOW_OVER_AMBIENT_TEMP = 2,
-	OPAL_EPOW_OVER_INTERNAL_TEMP = 3
-};
-
-/*
- * Address cycle types for LPC accesses. These also correspond
- * to the content of the first cell of the "reg" property for
- * device nodes on the LPC bus
- */
-enum OpalLPCAddressType {
-	OPAL_LPC_MEM	= 0,
-	OPAL_LPC_IO	= 1,
-	OPAL_LPC_FW	= 2,
-};
-
-/* System parameter permission */
-enum OpalSysparamPerm {
-	OPAL_SYSPARAM_READ      = 0x1,
-	OPAL_SYSPARAM_WRITE     = 0x2,
-	OPAL_SYSPARAM_RW        = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE),
-};
-
-struct opal_msg {
-	__be32 msg_type;
-	__be32 reserved;
-	__be64 params[8];
-};
-
-enum {
-	OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1,
-};
-
-struct opal_ipmi_msg {
-	uint8_t		version;
-	uint8_t		netfn;
-	uint8_t		cmd;
-	uint8_t		data[];
-};
-
-/* FSP memory errors handling */
-enum OpalMemErr_Version {
-	OpalMemErr_V1 = 1,
-};
-
-enum OpalMemErrType {
-	OPAL_MEM_ERR_TYPE_RESILIENCE	= 0,
-	OPAL_MEM_ERR_TYPE_DYN_DALLOC,
-	OPAL_MEM_ERR_TYPE_SCRUB,
-};
-
-/* Memory Reilience error type */
-enum OpalMemErr_ResilErrType {
-	OPAL_MEM_RESILIENCE_CE		= 0,
-	OPAL_MEM_RESILIENCE_UE,
-	OPAL_MEM_RESILIENCE_UE_SCRUB,
-};
-
-/* Dynamic Memory Deallocation type */
-enum OpalMemErr_DynErrType {
-	OPAL_MEM_DYNAMIC_DEALLOC	= 0,
-};
-
-/* OpalMemoryErrorData->flags */
-#define OPAL_MEM_CORRECTED_ERROR	0x0001
-#define OPAL_MEM_THRESHOLD_EXCEEDED	0x0002
-#define OPAL_MEM_ACK_REQUIRED		0x8000
-
-struct OpalMemoryErrorData {
-	enum OpalMemErr_Version	version:8;	/* 0x00 */
-	enum OpalMemErrType	type:8;		/* 0x01 */
-	__be16			flags;		/* 0x02 */
-	uint8_t			reserved_1[4];	/* 0x04 */
-
-	union {
-		/* Memory Resilience corrected/uncorrected error info */
-		struct {
-			enum OpalMemErr_ResilErrType resil_err_type:8;
-			uint8_t		reserved_1[7];
-			__be64		physical_address_start;
-			__be64		physical_address_end;
-		} resilience;
-		/* Dynamic memory deallocation error info */
-		struct {
-			enum OpalMemErr_DynErrType dyn_err_type:8;
-			uint8_t		reserved_1[7];
-			__be64		physical_address_start;
-			__be64		physical_address_end;
-		} dyn_dealloc;
-	} u;
-};
-
-/* HMI interrupt event */
-enum OpalHMI_Version {
-	OpalHMIEvt_V1 = 1,
-};
-
-enum OpalHMI_Severity {
-	OpalHMI_SEV_NO_ERROR = 0,
-	OpalHMI_SEV_WARNING = 1,
-	OpalHMI_SEV_ERROR_SYNC = 2,
-	OpalHMI_SEV_FATAL = 3,
-};
-
-enum OpalHMI_Disposition {
-	OpalHMI_DISPOSITION_RECOVERED = 0,
-	OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
-};
-
-enum OpalHMI_ErrType {
-	OpalHMI_ERROR_MALFUNC_ALERT	= 0,
-	OpalHMI_ERROR_PROC_RECOV_DONE,
-	OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
-	OpalHMI_ERROR_PROC_RECOV_MASKED,
-	OpalHMI_ERROR_TFAC,
-	OpalHMI_ERROR_TFMR_PARITY,
-	OpalHMI_ERROR_HA_OVERFLOW_WARN,
-	OpalHMI_ERROR_XSCOM_FAIL,
-	OpalHMI_ERROR_XSCOM_DONE,
-	OpalHMI_ERROR_SCOM_FIR,
-	OpalHMI_ERROR_DEBUG_TRIG_FIR,
-	OpalHMI_ERROR_HYP_RESOURCE,
-};
-
-struct OpalHMIEvent {
-	uint8_t		version;	/* 0x00 */
-	uint8_t		severity;	/* 0x01 */
-	uint8_t		type;		/* 0x02 */
-	uint8_t		disposition;	/* 0x03 */
-	uint8_t		reserved_1[4];	/* 0x04 */
-
-	__be64		hmer;
-	/* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
-	__be64		tfmr;
-};
-
-enum {
-	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
-	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
-	OPAL_P7IOC_DIAG_TYPE_BI		= 2,
-	OPAL_P7IOC_DIAG_TYPE_CI		= 3,
-	OPAL_P7IOC_DIAG_TYPE_MISC	= 4,
-	OPAL_P7IOC_DIAG_TYPE_I2C	= 5,
-	OPAL_P7IOC_DIAG_TYPE_LAST	= 6
-};
-
-struct OpalIoP7IOCErrorData {
-	__be16 type;
-
-	/* GEM */
-	__be64 gemXfir;
-	__be64 gemRfir;
-	__be64 gemRirqfir;
-	__be64 gemMask;
-	__be64 gemRwof;
-
-	/* LEM */
-	__be64 lemFir;
-	__be64 lemErrMask;
-	__be64 lemAction0;
-	__be64 lemAction1;
-	__be64 lemWof;
-
-	union {
-		struct OpalIoP7IOCRgcErrorData {
-			__be64 rgcStatus;	/* 3E1C10 */
-			__be64 rgcLdcp;		/* 3E1C18 */
-		}rgc;
-		struct OpalIoP7IOCBiErrorData {
-			__be64 biLdcp0;		/* 3C0100, 3C0118 */
-			__be64 biLdcp1;		/* 3C0108, 3C0120 */
-			__be64 biLdcp2;		/* 3C0110, 3C0128 */
-			__be64 biFenceStatus;	/* 3C0130, 3C0130 */
-
-			    u8 biDownbound;	/* BI Downbound or Upbound */
-		}bi;
-		struct OpalIoP7IOCCiErrorData {
-			__be64 ciPortStatus;	/* 3Dn008 */
-			__be64 ciPortLdcp;	/* 3Dn010 */
-
-			    u8 ciPort;		/* Index of CI port: 0/1 */
-		}ci;
-	};
-};
-
-/**
- * This structure defines the overlay which will be used to store PHB error
- * data upon request.
- */
-enum {
-	OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
-};
-
-enum {
-	OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
-	OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
-};
-
-enum {
-	OPAL_P7IOC_NUM_PEST_REGS = 128,
-	OPAL_PHB3_NUM_PEST_REGS = 256
-};
-
-/* CAPI modes for PHB */
-enum {
-	OPAL_PHB_CAPI_MODE_PCIE         = 0,
-	OPAL_PHB_CAPI_MODE_CAPI         = 1,
-	OPAL_PHB_CAPI_MODE_SNOOP_OFF    = 2,
-	OPAL_PHB_CAPI_MODE_SNOOP_ON     = 3,
-};
-
-struct OpalIoPhbErrorCommon {
-	__be32 version;
-	__be32 ioType;
-	__be32 len;
-};
-
-struct OpalIoP7IOCPhbErrorData {
-	struct OpalIoPhbErrorCommon common;
-
-	__be32 brdgCtl;
-
-	// P7IOC utl regs
-	__be32 portStatusReg;
-	__be32 rootCmplxStatus;
-	__be32 busAgentStatus;
-
-	// P7IOC cfg regs
-	__be32 deviceStatus;
-	__be32 slotStatus;
-	__be32 linkStatus;
-	__be32 devCmdStatus;
-	__be32 devSecStatus;
-
-	// cfg AER regs
-	__be32 rootErrorStatus;
-	__be32 uncorrErrorStatus;
-	__be32 corrErrorStatus;
-	__be32 tlpHdr1;
-	__be32 tlpHdr2;
-	__be32 tlpHdr3;
-	__be32 tlpHdr4;
-	__be32 sourceId;
-
-	__be32 rsv3;
-
-	// Record data about the call to allocate a buffer.
-	__be64 errorClass;
-	__be64 correlator;
-
-	//P7IOC MMIO Error Regs
-	__be64 p7iocPlssr;                // n120
-	__be64 p7iocCsr;                  // n110
-	__be64 lemFir;                    // nC00
-	__be64 lemErrorMask;              // nC18
-	__be64 lemWOF;                    // nC40
-	__be64 phbErrorStatus;            // nC80
-	__be64 phbFirstErrorStatus;       // nC88
-	__be64 phbErrorLog0;              // nCC0
-	__be64 phbErrorLog1;              // nCC8
-	__be64 mmioErrorStatus;           // nD00
-	__be64 mmioFirstErrorStatus;      // nD08
-	__be64 mmioErrorLog0;             // nD40
-	__be64 mmioErrorLog1;             // nD48
-	__be64 dma0ErrorStatus;           // nD80
-	__be64 dma0FirstErrorStatus;      // nD88
-	__be64 dma0ErrorLog0;             // nDC0
-	__be64 dma0ErrorLog1;             // nDC8
-	__be64 dma1ErrorStatus;           // nE00
-	__be64 dma1FirstErrorStatus;      // nE08
-	__be64 dma1ErrorLog0;             // nE40
-	__be64 dma1ErrorLog1;             // nE48
-	__be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
-	__be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
-};
-
-struct OpalIoPhb3ErrorData {
-	struct OpalIoPhbErrorCommon common;
-
-	__be32 brdgCtl;
-
-	/* PHB3 UTL regs */
-	__be32 portStatusReg;
-	__be32 rootCmplxStatus;
-	__be32 busAgentStatus;
-
-	/* PHB3 cfg regs */
-	__be32 deviceStatus;
-	__be32 slotStatus;
-	__be32 linkStatus;
-	__be32 devCmdStatus;
-	__be32 devSecStatus;
-
-	/* cfg AER regs */
-	__be32 rootErrorStatus;
-	__be32 uncorrErrorStatus;
-	__be32 corrErrorStatus;
-	__be32 tlpHdr1;
-	__be32 tlpHdr2;
-	__be32 tlpHdr3;
-	__be32 tlpHdr4;
-	__be32 sourceId;
-
-	__be32 rsv3;
-
-	/* Record data about the call to allocate a buffer */
-	__be64 errorClass;
-	__be64 correlator;
-
-	__be64 nFir;			/* 000 */
-	__be64 nFirMask;		/* 003 */
-	__be64 nFirWOF;		/* 008 */
-
-	/* PHB3 MMIO Error Regs */
-	__be64 phbPlssr;		/* 120 */
-	__be64 phbCsr;		/* 110 */
-	__be64 lemFir;		/* C00 */
-	__be64 lemErrorMask;		/* C18 */
-	__be64 lemWOF;		/* C40 */
-	__be64 phbErrorStatus;	/* C80 */
-	__be64 phbFirstErrorStatus;	/* C88 */
-	__be64 phbErrorLog0;		/* CC0 */
-	__be64 phbErrorLog1;		/* CC8 */
-	__be64 mmioErrorStatus;	/* D00 */
-	__be64 mmioFirstErrorStatus;	/* D08 */
-	__be64 mmioErrorLog0;		/* D40 */
-	__be64 mmioErrorLog1;		/* D48 */
-	__be64 dma0ErrorStatus;	/* D80 */
-	__be64 dma0FirstErrorStatus;	/* D88 */
-	__be64 dma0ErrorLog0;		/* DC0 */
-	__be64 dma0ErrorLog1;		/* DC8 */
-	__be64 dma1ErrorStatus;	/* E00 */
-	__be64 dma1FirstErrorStatus;	/* E08 */
-	__be64 dma1ErrorLog0;		/* E40 */
-	__be64 dma1ErrorLog1;		/* E48 */
-	__be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
-	__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
-};
-
-enum {
-	OPAL_REINIT_CPUS_HILE_BE	= (1 << 0),
-	OPAL_REINIT_CPUS_HILE_LE	= (1 << 1),
-};
-
-typedef struct oppanel_line {
-	const char * 	line;
-	uint64_t 	line_len;
-} oppanel_line_t;
-
-/* OPAL I2C request */
-struct opal_i2c_request {
-	uint8_t	type;
-#define OPAL_I2C_RAW_READ	0
-#define OPAL_I2C_RAW_WRITE	1
-#define OPAL_I2C_SM_READ	2
-#define OPAL_I2C_SM_WRITE	3
-	uint8_t flags;
-#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
-	uint8_t	subaddr_sz;		/* Max 4 */
-	uint8_t reserved;
-	__be16 addr;			/* 7 or 10 bit address */
-	__be16 reserved2;
-	__be32 subaddr;		/* Sub-address if any */
-	__be32 size;			/* Data size */
-	__be64 buffer_ra;		/* Buffer real address */
-};
+/* We calculate number of sg entries based on PAGE_SIZE */
+#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
 
 /* /sys/firmware/opal */
 extern struct kobject *opal_kobj;
@@ -932,6 +194,13 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
 int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
 			 struct opal_i2c_request *oreq);
 
+int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
+		uint64_t size, uint64_t token);
+int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf,
+		uint64_t size, uint64_t token);
+int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
+		uint64_t token);
+
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
 				   int depth, void *data);
@@ -946,8 +215,10 @@ extern void hvc_opal_init_early(void);
 extern int opal_notifier_register(struct notifier_block *nb);
 extern int opal_notifier_unregister(struct notifier_block *nb);
 
-extern int opal_message_notifier_register(enum OpalMessageType msg_type,
+extern int opal_message_notifier_register(enum opal_msg_type msg_type,
 						struct notifier_block *nb);
+extern int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+					    struct notifier_block *nb);
 extern void opal_notifier_enable(void);
 extern void opal_notifier_disable(void);
 extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
@@ -962,7 +233,7 @@ extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
 struct rtc_time;
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
-extern void opal_flash_init(void);
+extern void opal_flash_update_init(void);
 extern void opal_flash_term_callback(void);
 extern int opal_elog_init(void);
 extern void opal_platform_dump_init(void);
@@ -983,13 +254,8 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
 					     unsigned long vmalloc_size);
 void opal_free_sg_list(struct opal_sg_list *sg);
 
-/*
- * Dump region ID range usable by the OS
- */
-#define OPAL_DUMP_REGION_HOST_START		0x80
-#define OPAL_DUMP_REGION_LOG_BUF		0x80
-#define OPAL_DUMP_REGION_HOST_END		0xFF
+extern int opal_error_code(int rc);
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* __OPAL_H */
+#endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e5f22c6c4bf9..70bd4381f8e6 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -106,9 +106,9 @@ struct paca_struct {
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 #ifdef CONFIG_PPC_BOOK3E
-	u64 exgen[8] __attribute__((aligned(0x80)));
+	u64 exgen[8] __aligned(0x40);
 	/* Keep pgd in the same cacheline as the start of extlb */
-	pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */
+	pgd_t *pgd __aligned(0x40); /* Current PGD */
 	pgd_t *kernel_pgd;		/* Kernel PGD */
 
 	/* Shared by all threads of a core -- points to tcd of first thread */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 546d036fe925..1811c44bf34b 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -15,6 +15,24 @@
 struct device_node;
 
 /*
+ * PCI controller operations
+ */
+struct pci_controller_ops {
+	void		(*dma_dev_setup)(struct pci_dev *dev);
+	void		(*dma_bus_setup)(struct pci_bus *bus);
+
+	int		(*probe_mode)(struct pci_bus *);
+
+	/* Called when pci_enable_device() is called. Returns true to
+	 * allow assignment/enabling of the device. */
+	bool		(*enable_device_hook)(struct pci_dev *);
+
+	/* Called during PCI resource reassignment */
+	resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type);
+	void		(*reset_secondary_bus)(struct pci_dev *dev);
+};
+
+/*
  * Structure of a PCI controller (host bridge)
  */
 struct pci_controller {
@@ -46,6 +64,7 @@ struct pci_controller {
 	resource_size_t	isa_mem_phys;
 	resource_size_t	isa_mem_size;
 
+	struct pci_controller_ops controller_ops;
 	struct pci_ops *ops;
 	unsigned int __iomem *cfg_addr;
 	void __iomem *cfg_data;
@@ -89,6 +108,7 @@ struct pci_controller {
 
 #ifdef CONFIG_PPC64
 	unsigned long buid;
+	struct pci_dn *pci_data;
 #endif	/* CONFIG_PPC64 */
 
 	void *private_data;
@@ -154,31 +174,51 @@ static inline int isa_vaddr_is_ioport(void __iomem *address)
 struct iommu_table;
 
 struct pci_dn {
+	int     flags;
+#define PCI_DN_FLAG_IOV_VF	0x01
+
 	int	busno;			/* pci bus number */
 	int	devfn;			/* pci device and function number */
+	int	vendor_id;		/* Vendor ID */
+	int	device_id;		/* Device ID */
+	int	class_code;		/* Device class code */
 
+	struct  pci_dn *parent;
 	struct  pci_controller *phb;	/* for pci devices */
 	struct	iommu_table *iommu_table;	/* for phb's or bridges */
 	struct	device_node *node;	/* back-pointer to the device_node */
 
 	int	pci_ext_config_space;	/* for pci devices */
 
-	struct	pci_dev *pcidev;	/* back-pointer to the pci device */
 #ifdef CONFIG_EEH
 	struct eeh_dev *edev;		/* eeh device */
 #endif
 #define IODA_INVALID_PE		(-1)
 #ifdef CONFIG_PPC_POWERNV
 	int	pe_number;
+#ifdef CONFIG_PCI_IOV
+	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
+	u16     num_vfs;		/* number of VFs enabled*/
+	int     offset;			/* PE# for the first VF PE */
+#define M64_PER_IOV 4
+	int     m64_per_iov;
+#define IODA_INVALID_M64        (-1)
+	int     m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+#endif /* CONFIG_PCI_IOV */
 #endif
+	struct list_head child_list;
+	struct list_head list;
 };
 
 /* Get the pointer to a device_node's pci_dn */
 #define PCI_DN(dn)	((struct pci_dn *) (dn)->data)
 
+extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+					   int devfn);
 extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
-
-extern void * update_dn_pci_info(struct device_node *dn, void *data);
+extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev);
+extern void remove_dev_pci_data(struct pci_dev *pdev);
+extern void *update_dn_pci_info(struct device_node *dn, void *data);
 
 static inline int pci_device_from_OF_node(struct device_node *np,
 					  u8 *bus, u8 *devfn)
@@ -191,20 +231,12 @@ static inline int pci_device_from_OF_node(struct device_node *np,
 }
 
 #if defined(CONFIG_EEH)
-static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
+static inline struct eeh_dev *pdn_to_eeh_dev(struct pci_dn *pdn)
 {
-	/*
-	 * For those OF nodes whose parent isn't PCI bridge, they
-	 * don't have PCI_DN actually. So we have to skip them for
-	 * any EEH operations.
-	 */
-	if (!dn || !PCI_DN(dn))
-		return NULL;
-
-	return PCI_DN(dn)->edev;
+	return pdn ? pdn->edev : NULL;
 }
 #else
-#define of_node_to_eeh_dev(x) (NULL)
+#define pdn_to_eeh_dev(x)	(NULL)
 #endif
 
 /** Find the bus corresponding to the indicated device node */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 1b0739bc14b5..4aef8d660999 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -22,7 +22,7 @@
 
 #include <asm-generic/pci-dma-compat.h>
 
-/* Return values for ppc_md.pci_probe_mode function */
+/* Return values for pci_controller_ops.probe_mode function */
 #define PCI_PROBE_NONE		-1	/* Don't look at this bus at all */
 #define PCI_PROBE_NORMAL	0	/* Do normal PCI probing */
 #define PCI_PROBE_DEVTREE	1	/* Instantiate from device tree */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index db1e2b8eff3c..4122a86d6858 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -23,8 +23,6 @@ extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary);
 
 extern struct list_head hose_list;
 
-extern void find_and_init_phbs(void);
-
 extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
 
 /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
@@ -33,9 +31,14 @@ extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
 
 /* PCI device_node operations */
 struct device_node;
+struct pci_dn;
+
 typedef void *(*traverse_func)(struct device_node *me, void *data);
 void *traverse_pci_devices(struct device_node *start, traverse_func pre,
 		void *data);
+void *traverse_pci_dn(struct pci_dn *root,
+		      void *(*fn)(struct pci_dn *, void *),
+		      void *data);
 
 extern void pci_devs_phb_init(void);
 extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
@@ -76,7 +79,6 @@ static inline const char *eeh_driver_name(struct pci_dev *pdev)
 #endif /* CONFIG_EEH */
 
 #else /* CONFIG_PCI */
-static inline void find_and_init_phbs(void) { }
 static inline void init_pci_config_tokens(void) { }
 #endif /* !CONFIG_PCI */
 
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 7e4612528546..dd0fc18d8103 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -637,105 +637,105 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
 
 /* AltiVec Registers (VPRs) */
 
-#define	vr0	0
-#define	vr1	1
-#define	vr2	2
-#define	vr3	3
-#define	vr4	4
-#define	vr5	5
-#define	vr6	6
-#define	vr7	7
-#define	vr8	8
-#define	vr9	9
-#define	vr10	10
-#define	vr11	11
-#define	vr12	12
-#define	vr13	13
-#define	vr14	14
-#define	vr15	15
-#define	vr16	16
-#define	vr17	17
-#define	vr18	18
-#define	vr19	19
-#define	vr20	20
-#define	vr21	21
-#define	vr22	22
-#define	vr23	23
-#define	vr24	24
-#define	vr25	25
-#define	vr26	26
-#define	vr27	27
-#define	vr28	28
-#define	vr29	29
-#define	vr30	30
-#define	vr31	31
+#define	v0	0
+#define	v1	1
+#define	v2	2
+#define	v3	3
+#define	v4	4
+#define	v5	5
+#define	v6	6
+#define	v7	7
+#define	v8	8
+#define	v9	9
+#define	v10	10
+#define	v11	11
+#define	v12	12
+#define	v13	13
+#define	v14	14
+#define	v15	15
+#define	v16	16
+#define	v17	17
+#define	v18	18
+#define	v19	19
+#define	v20	20
+#define	v21	21
+#define	v22	22
+#define	v23	23
+#define	v24	24
+#define	v25	25
+#define	v26	26
+#define	v27	27
+#define	v28	28
+#define	v29	29
+#define	v30	30
+#define	v31	31
 
 /* VSX Registers (VSRs) */
 
-#define	vsr0	0
-#define	vsr1	1
-#define	vsr2	2
-#define	vsr3	3
-#define	vsr4	4
-#define	vsr5	5
-#define	vsr6	6
-#define	vsr7	7
-#define	vsr8	8
-#define	vsr9	9
-#define	vsr10	10
-#define	vsr11	11
-#define	vsr12	12
-#define	vsr13	13
-#define	vsr14	14
-#define	vsr15	15
-#define	vsr16	16
-#define	vsr17	17
-#define	vsr18	18
-#define	vsr19	19
-#define	vsr20	20
-#define	vsr21	21
-#define	vsr22	22
-#define	vsr23	23
-#define	vsr24	24
-#define	vsr25	25
-#define	vsr26	26
-#define	vsr27	27
-#define	vsr28	28
-#define	vsr29	29
-#define	vsr30	30
-#define	vsr31	31
-#define	vsr32	32
-#define	vsr33	33
-#define	vsr34	34
-#define	vsr35	35
-#define	vsr36	36
-#define	vsr37	37
-#define	vsr38	38
-#define	vsr39	39
-#define	vsr40	40
-#define	vsr41	41
-#define	vsr42	42
-#define	vsr43	43
-#define	vsr44	44
-#define	vsr45	45
-#define	vsr46	46
-#define	vsr47	47
-#define	vsr48	48
-#define	vsr49	49
-#define	vsr50	50
-#define	vsr51	51
-#define	vsr52	52
-#define	vsr53	53
-#define	vsr54	54
-#define	vsr55	55
-#define	vsr56	56
-#define	vsr57	57
-#define	vsr58	58
-#define	vsr59	59
-#define	vsr60	60
-#define	vsr61	61
-#define	vsr62	62
-#define	vsr63	63
+#define	vs0	0
+#define	vs1	1
+#define	vs2	2
+#define	vs3	3
+#define	vs4	4
+#define	vs5	5
+#define	vs6	6
+#define	vs7	7
+#define	vs8	8
+#define	vs9	9
+#define	vs10	10
+#define	vs11	11
+#define	vs12	12
+#define	vs13	13
+#define	vs14	14
+#define	vs15	15
+#define	vs16	16
+#define	vs17	17
+#define	vs18	18
+#define	vs19	19
+#define	vs20	20
+#define	vs21	21
+#define	vs22	22
+#define	vs23	23
+#define	vs24	24
+#define	vs25	25
+#define	vs26	26
+#define	vs27	27
+#define	vs28	28
+#define	vs29	29
+#define	vs30	30
+#define	vs31	31
+#define	vs32	32
+#define	vs33	33
+#define	vs34	34
+#define	vs35	35
+#define	vs36	36
+#define	vs37	37
+#define	vs38	38
+#define	vs39	39
+#define	vs40	40
+#define	vs41	41
+#define	vs42	42
+#define	vs43	43
+#define	vs44	44
+#define	vs45	45
+#define	vs46	46
+#define	vs47	47
+#define	vs48	48
+#define	vs49	49
+#define	vs50	50
+#define	vs51	51
+#define	vs52	52
+#define	vs53	53
+#define	vs54	54
+#define	vs55	55
+#define	vs56	56
+#define	vs57	57
+#define	vs58	58
+#define	vs59	59
+#define	vs60	60
+#define	vs61	61
+#define	vs62	62
+#define	vs63	63
 
 /* SPE Registers (EVPRs) */
 
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 2e23e92a4372..7a4ede16b283 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -4,6 +4,7 @@
 
 #include <linux/spinlock.h>
 #include <asm/page.h>
+#include <linux/time.h>
 
 /*
  * Definitions for talking to the RTAS on CHRP machines.
@@ -273,6 +274,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log)
 #define PSERIES_ELOG_SECT_ID_MANUFACT_INFO	(('M' << 8) | 'I')
 #define PSERIES_ELOG_SECT_ID_CALL_HOME		(('C' << 8) | 'H')
 #define PSERIES_ELOG_SECT_ID_USER_DEF		(('U' << 8) | 'D')
+#define PSERIES_ELOG_SECT_ID_HOTPLUG		(('H' << 8) | 'P')
 
 /* Vendor specific Platform Event Log Format, Version 6, section header */
 struct pseries_errorlog {
@@ -296,6 +298,31 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect)
 	return be16_to_cpu(sect->length);
 }
 
+/* RTAS pseries hotplug errorlog section */
+struct pseries_hp_errorlog {
+	u8	resource;
+	u8	action;
+	u8	id_type;
+	u8	reserved;
+	union {
+		__be32	drc_index;
+		__be32	drc_count;
+		char	drc_name[1];
+	} _drc_u;
+};
+
+#define PSERIES_HP_ELOG_RESOURCE_CPU	1
+#define PSERIES_HP_ELOG_RESOURCE_MEM	2
+#define PSERIES_HP_ELOG_RESOURCE_SLOT	3
+#define PSERIES_HP_ELOG_RESOURCE_PHB	4
+
+#define PSERIES_HP_ELOG_ACTION_ADD	1
+#define PSERIES_HP_ELOG_ACTION_REMOVE	2
+
+#define PSERIES_HP_ELOG_ID_DRC_NAME	1
+#define PSERIES_HP_ELOG_ID_DRC_INDEX	2
+#define PSERIES_HP_ELOG_ID_DRC_COUNT	3
+
 struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
 					      uint16_t section_id);
 
@@ -327,7 +354,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_online_cpus_mask(cpumask_var_t cpus);
 extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
-extern int rtas_ibm_suspend_me(u64 handle, int *vasi_return);
+extern int rtas_ibm_suspend_me(u64 handle);
 
 struct rtc_time;
 extern unsigned long rtas_get_boot_time(void);
@@ -343,8 +370,12 @@ extern int early_init_dt_scan_rtas(unsigned long node,
 extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
+extern time64_t last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
+#else
+static inline int clobbering_unread_rtas_event(void) { return 0; }
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index fbdf18cf954c..e9d384cbd021 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -7,7 +7,6 @@
 extern void ppc_printk_progress(char *s, unsigned short hex);
 
 extern unsigned int rtas_data;
-extern int mem_init_done;	/* set on boot once kmalloc can be called */
 extern unsigned long long memory_limit;
 extern unsigned long klimit;
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index d607df5081a7..825663c30945 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -42,7 +42,7 @@ struct smp_ops_t {
 #ifdef CONFIG_PPC_SMP_MUXED_IPI
 	void  (*cause_ipi)(int cpu, unsigned long data);
 #endif
-	int   (*probe)(void);
+	void  (*probe)(void);
 	int   (*kick_cpu)(int nr);
 	void  (*setup_cpu)(int nr);
 	void  (*bringup_done)(void);
@@ -125,7 +125,6 @@ extern irqreturn_t smp_ipi_demux(void);
 
 void smp_init_pSeries(void);
 void smp_init_cell(void);
-void smp_init_celleb(void);
 void smp_setup_cpu_maps(void);
 
 extern int __cpu_disable(void);
@@ -175,7 +174,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
 
 extern int smt_enabled_at_boot;
 
-extern int smp_mpic_probe(void);
+extern void smp_mpic_probe(void);
 extern void smp_mpic_setup_cpu(int cpu);
 extern int smp_generic_kick_cpu(int nr);
 extern int smp_generic_cpu_bootable(unsigned int nr);
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
index 96f59de61855..487e09077a3e 100644
--- a/arch/powerpc/include/asm/swab.h
+++ b/arch/powerpc/include/asm/swab.h
@@ -9,30 +9,4 @@
 
 #include <uapi/asm/swab.h>
 
-static __inline__ __u16 ld_le16(const volatile __u16 *addr)
-{
-	__u16 val;
-
-	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
-{
-	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static __inline__ __u32 ld_le32(const volatile __u32 *addr)
-{
-	__u32 val;
-
-	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
-{
-	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
 #endif /* _ASM_POWERPC_SWAB_H */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 91062eef582f..f1863a138b4a 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -367,3 +367,4 @@ SYSCALL_SPU(getrandom)
 SYSCALL_SPU(memfd_create)
 SYSCALL_SPU(bpf)
 COMPAT_SYS(execveat)
+PPC64ONLY(switch_endian)
diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
index c44131e68e11..233ef5fe5fde 100644
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ b/arch/powerpc/include/asm/ucc_slow.h
@@ -251,19 +251,6 @@ void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode);
  */
 void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode);
 
-/* ucc_slow_poll_transmitter_now
- * Immediately forces a poll of the transmitter for data to be sent.
- * Typically, the hardware performs a periodic poll for data that the
- * transmit routine has set up to be transmitted. In cases where
- * this polling cycle is not soon enough, this optional routine can
- * be invoked to force a poll right away, instead. Proper use for
- * each transmission for which this functionality is desired is to
- * call the transmit routine and then this routine right after.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_poll_transmitter_now(struct ucc_slow_private * uccs);
-
 /* ucc_slow_graceful_stop_tx
  * Smoothly stops transmission on a specified slow UCC.
  *
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 36b79c31eedd..f4f8b667d75b 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define __NR_syscalls		363
+#define __NR_syscalls		364
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h
index e5f8dd366212..ab3acd2f2786 100644
--- a/arch/powerpc/include/asm/vga.h
+++ b/arch/powerpc/include/asm/vga.h
@@ -25,12 +25,12 @@
 
 static inline void scr_writew(u16 val, volatile u16 *addr)
 {
-    st_le16(addr, val);
+	*addr = cpu_to_le16(val);
 }
 
 static inline u16 scr_readw(volatile const u16 *addr)
 {
-    return ld_le16(addr);
+	return le16_to_cpu(*addr);
 }
 
 #define VT_BUF_HAVE_MEMCPYW
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 6997f4a271df..0e25bdb190bb 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -146,7 +146,7 @@ extern void xics_update_irq_servers(void);
 extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
 extern void xics_mask_unknown_vec(unsigned int vec);
 extern irqreturn_t xics_ipi_dispatch(int cpu);
-extern int xics_smp_probe(void);
+extern void xics_smp_probe(void);
 extern void xics_register_ics(struct ics *ics);
 extern void xics_teardown_cpu(void);
 extern void xics_kexec_teardown_cpu(int secondary);
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
index 77d2ed35b111..8036b385417d 100644
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -136,7 +136,7 @@ struct pt_regs {
 #endif /* __powerpc64__ */
 
 /*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * Get/set all the altivec registers v0..v31, vscr, vrsave, in one go.
  * The transfer totals 34 quadword.  Quadwords 0-31 contain the
  * corresponding vector registers.  Quadword 32 contains the vscr as the
  * last word (offset 12) within that quadword.  Quadword 33 contains the
diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h
index 5d836b7c1176..5047659815a5 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -11,7 +11,7 @@
 #define TM_CAUSE_RESCHED	0xde
 #define TM_CAUSE_TLBI		0xdc
 #define TM_CAUSE_FAC_UNAV	0xda
-#define TM_CAUSE_SYSCALL	0xd8  /* future use */
+#define TM_CAUSE_SYSCALL	0xd8
 #define TM_CAUSE_MISC		0xd6  /* future use */
 #define TM_CAUSE_SIGNAL		0xd4
 #define TM_CAUSE_ALIGNMENT	0xd2
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index ef5b5b1f3123..e4aa173dae62 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -385,5 +385,6 @@
 #define __NR_memfd_create	360
 #define __NR_bpf		361
 #define __NR_execveat		362
+#define __NR_switch_endian	363
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 502cf69b6c89..c1ebbdaac28f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -33,7 +33,8 @@ obj-y				:= cputable.o ptrace.o syscalls.o \
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o dma.o \
-				   misc_$(CONFIG_WORD_SIZE).o vdso32/
+				   misc_$(CONFIG_WORD_SIZE).o vdso32/ \
+				   of_platform.o prom_parse.o
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
@@ -47,7 +48,6 @@ obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
 obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o
-obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o
 procfs-y			:= proc_powerpc.o
 obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
 rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)	:= rtas_pci.o
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index ae77b7e59889..c641983bbdd6 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -61,12 +61,22 @@ struct cache_type_info {
 };
 
 /* These are used to index the cache_type_info array. */
-#define CACHE_TYPE_UNIFIED     0
-#define CACHE_TYPE_INSTRUCTION 1
-#define CACHE_TYPE_DATA        2
+#define CACHE_TYPE_UNIFIED     0 /* cache-size, cache-block-size, etc. */
+#define CACHE_TYPE_UNIFIED_D   1 /* d-cache-size, d-cache-block-size, etc */
+#define CACHE_TYPE_INSTRUCTION 2
+#define CACHE_TYPE_DATA        3
 
 static const struct cache_type_info cache_type_info[] = {
 	{
+		/* Embedded systems that use cache-size, cache-block-size,
+		 * etc. for the Unified (typically L2) cache. */
+		.name            = "Unified",
+		.size_prop       = "cache-size",
+		.line_size_props = { "cache-line-size",
+				     "cache-block-size", },
+		.nr_sets_prop    = "cache-sets",
+	},
+	{
 		/* PowerPC Processor binding says the [di]-cache-*
 		 * must be equal on unified caches, so just use
 		 * d-cache properties. */
@@ -293,7 +303,8 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
 {
 	struct cache *iter;
 
-	if (cache->type == CACHE_TYPE_UNIFIED)
+	if (cache->type == CACHE_TYPE_UNIFIED ||
+	    cache->type == CACHE_TYPE_UNIFIED_D)
 		return cache;
 
 	list_for_each_entry(iter, &cache_list, list)
@@ -324,16 +335,29 @@ static bool cache_node_is_unified(const struct device_node *np)
 	return of_get_property(np, "cache-unified", NULL);
 }
 
-static struct cache *cache_do_one_devnode_unified(struct device_node *node,
-						  int level)
+/*
+ * Unified caches can have two different sets of tags.  Most embedded
+ * use cache-size, etc. for the unified cache size, but open firmware systems
+ * use d-cache-size, etc.   Check on initialization for which type we have, and
+ * return the appropriate structure type.  Assume it's embedded if it isn't
+ * open firmware.  If it's yet a 3rd type, then there will be missing entries
+ * in /sys/devices/system/cpu/cpu0/cache/index2/, and this code will need
+ * to be extended further.
+ */
+static int cache_is_unified_d(const struct device_node *np)
 {
-	struct cache *cache;
+	return of_get_property(np,
+		cache_type_info[CACHE_TYPE_UNIFIED_D].size_prop, NULL) ?
+		CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
+}
 
+/*
+ */
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+{
 	pr_debug("creating L%d ucache for %s\n", level, node->full_name);
 
-	cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
-
-	return cache;
+	return new_cache(cache_is_unified_d(node), level, node);
 }
 
 static struct cache *cache_do_one_devnode_split(struct device_node *node,
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 46733535cc0b..9c9b7411b28b 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -137,15 +137,11 @@ __init_HFSCR:
 /*
  * Clear the TLB using the specified IS form of tlbiel instruction
  * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
- *
- * r3 = IS field
  */
 __init_tlb_power7:
-	li	r3,0xc00	/* IS field = 0b11 */
-_GLOBAL(__flush_tlb_power7)
 	li	r6,128
 	mtctr	r6
-	mr	r7,r3		/* IS field */
+	li	r7,0xc00	/* IS field = 0b11 */
 	ptesync
 2:	tlbiel	r7
 	addi	r7,r7,0x1000
@@ -154,11 +150,9 @@ _GLOBAL(__flush_tlb_power7)
 1:	blr
 
 __init_tlb_power8:
-	li	r3,0xc00	/* IS field = 0b11 */
-_GLOBAL(__flush_tlb_power8)
 	li	r6,512
 	mtctr	r6
-	mr	r7,r3		/* IS field */
+	li	r7,0xc00	/* IS field = 0b11 */
 	ptesync
 2:	tlbiel	r7
 	addi	r7,r7,0x1000
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index f83046878336..60262fdf35ba 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -71,8 +71,8 @@ extern void __restore_cpu_power7(void);
 extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power8(void);
 extern void __restore_cpu_a2(void);
-extern void __flush_tlb_power7(unsigned long inval_selector);
-extern void __flush_tlb_power8(unsigned long inval_selector);
+extern void __flush_tlb_power7(unsigned int action);
+extern void __flush_tlb_power8(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 735979764cd4..6e8d764ce47b 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -116,16 +116,13 @@ void __init swiotlb_detect_4g(void)
 	}
 }
 
-static int __init swiotlb_late_init(void)
+static int __init check_swiotlb_enabled(void)
 {
-	if (ppc_swiotlb_enable) {
+	if (ppc_swiotlb_enable)
 		swiotlb_print_info();
-		set_pci_dma_ops(&swiotlb_dma_ops);
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
-	} else {
+	else
 		swiotlb_free();
-	}
 
 	return 0;
 }
-subsys_initcall(swiotlb_late_init);
+subsys_initcall(check_swiotlb_enabled);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 3b2252e7731b..a4c62eb0ee48 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -164,30 +164,34 @@ __setup("eeh=", eeh_setup);
  */
 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 {
-	struct device_node *dn = eeh_dev_to_of_node(edev);
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	u32 cfg;
 	int cap, i;
 	int n = 0, l = 0;
 	char buffer[128];
 
-	n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
-	pr_warn("EEH: of node=%s\n", dn->full_name);
+	n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+		       edev->phb->global_number, pdn->busno,
+		       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+	pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+		edev->phb->global_number, pdn->busno,
+		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 
-	eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
+	eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
 	pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
 
-	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
+	eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
 	pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
 
 	/* Gather bridge-specific registers */
 	if (edev->mode & EEH_DEV_BRIDGE) {
-		eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
+		eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
 		pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
 
-		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
 		pr_warn("EEH: Bridge control: %04x\n", cfg);
 	}
@@ -195,11 +199,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 	/* Dump out the PCI-X command and status regs */
 	cap = edev->pcix_cap;
 	if (cap) {
-		eeh_ops->read_config(dn, cap, 4, &cfg);
+		eeh_ops->read_config(pdn, cap, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
 		pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
 
-		eeh_ops->read_config(dn, cap+4, 4, &cfg);
+		eeh_ops->read_config(pdn, cap+4, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
 		pr_warn("EEH: PCI-X status: %08x\n", cfg);
 	}
@@ -211,7 +215,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 		pr_warn("EEH: PCI-E capabilities and status follow:\n");
 
 		for (i=0; i<=8; i++) {
-			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+			eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 
 			if ((i % 4) == 0) {
@@ -238,7 +242,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 		pr_warn("EEH: PCI-E AER capability register set follows:\n");
 
 		for (i=0; i<=13; i++) {
-			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+			eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 
 			if ((i % 4) == 0) {
@@ -414,11 +418,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	int ret;
 	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	unsigned long flags;
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct pci_dev *dev;
 	struct eeh_pe *pe, *parent_pe, *phb_pe;
 	int rc = 0;
-	const char *location;
+	const char *location = NULL;
 
 	eeh_stats.total_mmio_ffs++;
 
@@ -429,15 +433,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 		eeh_stats.no_dn++;
 		return 0;
 	}
-	dn = eeh_dev_to_of_node(edev);
 	dev = eeh_dev_to_pci_dev(edev);
 	pe = eeh_dev_to_pe(edev);
 
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!pe) {
 		eeh_stats.ignored_check++;
-		pr_debug("EEH: Ignored check for %s %s\n",
-			eeh_pci_name(dev), dn->full_name);
+		pr_debug("EEH: Ignored check for %s\n",
+			eeh_pci_name(dev));
 		return 0;
 	}
 
@@ -473,10 +476,13 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	if (pe->state & EEH_PE_ISOLATED) {
 		pe->check_count++;
 		if (pe->check_count % EEH_MAX_FAILS == 0) {
-			location = of_get_property(dn, "ibm,loc-code", NULL);
+			pdn = eeh_dev_to_pdn(edev);
+			if (pdn->node)
+				location = of_get_property(pdn->node, "ibm,loc-code", NULL);
 			printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
 				"location=%s driver=%s pci addr=%s\n",
-				pe->check_count, location,
+				pe->check_count,
+				location ? location : "unknown",
 				eeh_driver_name(dev), eeh_pci_name(dev));
 			printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
 				eeh_driver_name(dev));
@@ -667,6 +673,55 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
 	return rc;
 }
 
+static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+	struct pci_dev *dev = userdata;
+
+	/*
+	 * The caller should have disabled and saved the
+	 * state for the specified device
+	 */
+	if (!pdev || pdev == dev)
+		return NULL;
+
+	/* Ensure we have D0 power state */
+	pci_set_power_state(pdev, PCI_D0);
+
+	/* Save device state */
+	pci_save_state(pdev);
+
+	/*
+	 * Disable device to avoid any DMA traffic and
+	 * interrupt from the device
+	 */
+	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+
+	return NULL;
+}
+
+static void *eeh_restore_dev_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+	struct pci_dev *dev = userdata;
+
+	if (!pdev)
+		return NULL;
+
+	/* Apply customization from firmware */
+	if (pdn && eeh_ops->restore_config)
+		eeh_ops->restore_config(pdn);
+
+	/* The caller should restore state for the specified device */
+	if (pdev != dev)
+		pci_save_state(pdev);
+
+	return NULL;
+}
+
 /**
  * pcibios_set_pcie_slot_reset - Set PCI-E reset state
  * @dev: pci device struct
@@ -689,13 +744,19 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 	switch (state) {
 	case pcie_deassert_reset:
 		eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+		eeh_unfreeze_pe(pe, false);
 		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+		eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
 		break;
 	case pcie_hot_reset:
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 		eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 		eeh_ops->reset(pe, EEH_RESET_HOT);
 		break;
 	case pcie_warm_reset:
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 		eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 		break;
@@ -815,15 +876,15 @@ out:
  */
 void eeh_save_bars(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn;
 	int i;
-	struct device_node *dn;
 
-	if (!edev)
+	pdn = eeh_dev_to_pdn(edev);
+	if (!pdn)
 		return;
-	dn = eeh_dev_to_of_node(edev);
 
 	for (i = 0; i < 16; i++)
-		eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+		eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
 
 	/*
 	 * For PCI bridges including root port, we need enable bus
@@ -914,7 +975,7 @@ static struct notifier_block eeh_reboot_nb = {
 int eeh_init(void)
 {
 	struct pci_controller *hose, *tmp;
-	struct device_node *phb;
+	struct pci_dn *pdn;
 	static int cnt = 0;
 	int ret = 0;
 
@@ -949,20 +1010,9 @@ int eeh_init(void)
 		return ret;
 
 	/* Enable EEH for all adapters */
-	if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) {
-		list_for_each_entry_safe(hose, tmp,
-			&hose_list, list_node) {
-			phb = hose->dn;
-			traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
-		}
-	} else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) {
-		list_for_each_entry_safe(hose, tmp,
-			&hose_list, list_node)
-			pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
-	} else {
-		pr_warn("%s: Invalid probe mode %x",
-			__func__, eeh_subsystem_flags);
-		return -EINVAL;
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		pdn = hose->pci_data;
+		traverse_pci_dn(pdn, eeh_ops->probe, NULL);
 	}
 
 	/*
@@ -987,8 +1037,8 @@ int eeh_init(void)
 core_initcall_sync(eeh_init);
 
 /**
- * eeh_add_device_early - Enable EEH for the indicated device_node
- * @dn: device node for which to set up EEH
+ * eeh_add_device_early - Enable EEH for the indicated device node
+ * @pdn: PCI device node for which to set up EEH
  *
  * This routine must be used to perform EEH initialization for PCI
  * devices that were added after system boot (e.g. hotplug, dlpar).
@@ -998,44 +1048,41 @@ core_initcall_sync(eeh_init);
  * on the CEC architecture, type of the device, on earlier boot
  * command-line arguments & etc.
  */
-void eeh_add_device_early(struct device_node *dn)
+void eeh_add_device_early(struct pci_dn *pdn)
 {
 	struct pci_controller *phb;
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
-	/*
-	 * If we're doing EEH probe based on PCI device, we
-	 * would delay the probe until late stage because
-	 * the PCI device isn't available this moment.
-	 */
-	if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
+	if (!edev || !eeh_enabled())
 		return;
 
-	if (!of_node_to_eeh_dev(dn))
-		return;
-	phb = of_node_to_eeh_dev(dn)->phb;
-
 	/* USB Bus children of PCI devices will not have BUID's */
-	if (NULL == phb || 0 == phb->buid)
+	phb = edev->phb;
+	if (NULL == phb ||
+	    (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
 		return;
 
-	eeh_ops->of_probe(dn, NULL);
+	eeh_ops->probe(pdn, NULL);
 }
 
 /**
  * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @dn: device node
+ * @pdn: PCI device node
  *
  * This routine must be used to perform EEH initialization for the
  * indicated PCI device that was added after system boot (e.g.
  * hotplug, dlpar).
  */
-void eeh_add_device_tree_early(struct device_node *dn)
+void eeh_add_device_tree_early(struct pci_dn *pdn)
 {
-	struct device_node *sib;
+	struct pci_dn *n;
 
-	for_each_child_of_node(dn, sib)
-		eeh_add_device_tree_early(sib);
-	eeh_add_device_early(dn);
+	if (!pdn)
+		return;
+
+	list_for_each_entry(n, &pdn->child_list, list)
+		eeh_add_device_tree_early(n);
+	eeh_add_device_early(pdn);
 }
 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
 
@@ -1048,7 +1095,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  */
 void eeh_add_device_late(struct pci_dev *dev)
 {
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 
 	if (!dev || !eeh_enabled())
@@ -1056,8 +1103,8 @@ void eeh_add_device_late(struct pci_dev *dev)
 
 	pr_debug("EEH: Adding device %s\n", pci_name(dev));
 
-	dn = pci_device_to_OF_node(dev);
-	edev = of_node_to_eeh_dev(dn);
+	pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	edev = pdn_to_eeh_dev(pdn);
 	if (edev->pdev == dev) {
 		pr_debug("EEH: Already referenced !\n");
 		return;
@@ -1089,13 +1136,6 @@ void eeh_add_device_late(struct pci_dev *dev)
 	edev->pdev = dev;
 	dev->dev.archdata.edev = edev;
 
-	/*
-	 * We have to do the EEH probe here because the PCI device
-	 * hasn't been created yet in the early stage.
-	 */
-	if (eeh_has_flag(EEH_PROBE_MODE_DEV))
-		eeh_ops->dev_probe(dev, NULL);
-
 	eeh_addr_cache_insert_dev(dev);
 }
 
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 07d8a2423a61..eeabeabea49c 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -171,30 +171,27 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 
 static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
 {
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 	int i;
 
-	dn = pci_device_to_OF_node(dev);
-	if (!dn) {
+	pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	if (!pdn) {
 		pr_warn("PCI: no pci dn found for dev=%s\n",
 			pci_name(dev));
 		return;
 	}
 
-	edev = of_node_to_eeh_dev(dn);
+	edev = pdn_to_eeh_dev(pdn);
 	if (!edev) {
-		pr_warn("PCI: no EEH dev found for dn=%s\n",
-			dn->full_name);
+		pr_warn("PCI: no EEH dev found for %s\n",
+			pci_name(dev));
 		return;
 	}
 
 	/* Skip any devices for which EEH is not enabled. */
 	if (!edev->pe) {
-#ifdef DEBUG
-		pr_info("PCI: skip building address cache for=%s - %s\n",
-			pci_name(dev), dn->full_name);
-#endif
+		dev_dbg(&dev->dev, "EEH: Skip building address cache\n");
 		return;
 	}
 
@@ -282,18 +279,18 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
  */
 void eeh_addr_cache_build(void)
 {
-	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 	struct pci_dev *dev = NULL;
 
 	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
 
 	for_each_pci_dev(dev) {
-		dn = pci_device_to_OF_node(dev);
-		if (!dn)
+		pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+		if (!pdn)
 			continue;
 
-		edev = of_node_to_eeh_dev(dn);
+		edev = pdn_to_eeh_dev(pdn);
 		if (!edev)
 			continue;
 
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index e5274ee9a75f..aabba94ff9cb 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -43,13 +43,13 @@
 
 /**
  * eeh_dev_init - Create EEH device according to OF node
- * @dn: device node
+ * @pdn: PCI device node
  * @data: PHB
  *
  * It will create EEH device according to the given OF node. The function
  * might be called by PCI emunation, DR, PHB hotplug.
  */
-void *eeh_dev_init(struct device_node *dn, void *data)
+void *eeh_dev_init(struct pci_dn *pdn, void *data)
 {
 	struct pci_controller *phb = data;
 	struct eeh_dev *edev;
@@ -63,8 +63,8 @@ void *eeh_dev_init(struct device_node *dn, void *data)
 	}
 
 	/* Associate EEH device with OF node */
-	PCI_DN(dn)->edev = edev;
-	edev->dn  = dn;
+	pdn->edev = edev;
+	edev->pdn = pdn;
 	edev->phb = phb;
 	INIT_LIST_HEAD(&edev->list);
 
@@ -80,16 +80,16 @@ void *eeh_dev_init(struct device_node *dn, void *data)
  */
 void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 {
-	struct device_node *dn = phb->dn;
+	struct pci_dn *root = phb->pci_data;
 
 	/* EEH PE for PHB */
 	eeh_phb_pe_create(phb);
 
 	/* EEH device for PHB */
-	eeh_dev_init(dn, phb);
+	eeh_dev_init(root, phb);
 
 	/* EEH devices for children OF nodes */
-	traverse_pci_devices(dn, eeh_dev_init, phb);
+	traverse_pci_dn(root, eeh_dev_init, phb);
 }
 
 /**
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index d099540c0f56..24768ff3cb73 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -83,28 +83,6 @@ static inline void eeh_pcid_put(struct pci_dev *pdev)
 	module_put(pdev->driver->driver.owner);
 }
 
-#if 0
-static void print_device_node_tree(struct pci_dn *pdn, int dent)
-{
-	int i;
-	struct device_node *pc;
-
-	if (!pdn)
-		return;
-	for (i = 0; i < dent; i++)
-		printk(" ");
-	printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
-		pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
-		pdn->eeh_pe_config_addr, pdn->node->full_name);
-	dent += 3;
-	pc = pdn->node->child;
-	while (pc) {
-		print_device_node_tree(PCI_DN(pc), dent);
-		pc = pc->sibling;
-	}
-}
-#endif
-
 /**
  * eeh_disable_irq - Disable interrupt for the recovering device
  * @dev: PCI device
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 1e4946c36f9e..35f0b62259bb 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -291,27 +291,25 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
  */
 static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
 {
-	struct device_node *dn;
 	struct eeh_dev *parent;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/*
 	 * It might have the case for the indirect parent
 	 * EEH device already having associated PE, but
 	 * the direct parent EEH device doesn't have yet.
 	 */
-	dn = edev->dn->parent;
-	while (dn) {
+	pdn = pdn ? pdn->parent : NULL;
+	while (pdn) {
 		/* We're poking out of PCI territory */
-		if (!PCI_DN(dn)) return NULL;
-
-		parent = of_node_to_eeh_dev(dn);
-		/* We're poking out of PCI territory */
-		if (!parent) return NULL;
+		parent = pdn_to_eeh_dev(pdn);
+		if (!parent)
+			return NULL;
 
 		if (parent->pe)
 			return parent->pe;
 
-		dn = dn->parent;
+		pdn = pdn->parent;
 	}
 
 	return NULL;
@@ -330,6 +328,13 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent;
 
+	/* Check if the PE number is valid */
+	if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
+		pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%d\n",
+		       __func__, edev->config_addr, edev->phb->global_number);
+		return -EINVAL;
+	}
+
 	/*
 	 * Search the PE has been existing or not according
 	 * to the PE address. If that has been existing, the
@@ -338,21 +343,18 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	 */
 	pe = eeh_pe_get(edev);
 	if (pe && !(pe->type & EEH_PE_INVALID)) {
-		if (!edev->pe_config_addr) {
-			pr_err("%s: PE with addr 0x%x already exists\n",
-				__func__, edev->config_addr);
-			return -EEXIST;
-		}
-
 		/* Mark the PE as type of PCI bus */
 		pe->type = EEH_PE_BUS;
 		edev->pe = pe;
 
 		/* Put the edev to PE */
 		list_add_tail(&edev->list, &pe->edevs);
-		pr_debug("EEH: Add %s to Bus PE#%x\n",
-			edev->dn->full_name, pe->addr);
-
+		pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
+			edev->phb->global_number,
+			edev->config_addr >> 8,
+			PCI_SLOT(edev->config_addr & 0xFF),
+			PCI_FUNC(edev->config_addr & 0xFF),
+			pe->addr);
 		return 0;
 	} else if (pe && (pe->type & EEH_PE_INVALID)) {
 		list_add_tail(&edev->list, &pe->edevs);
@@ -368,9 +370,14 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 			parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
 			parent = parent->parent;
 		}
-		pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-			edev->dn->full_name, pe->addr, pe->parent->addr);
 
+		pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
+			 "PE#%x, Parent PE#%x\n",
+			edev->phb->global_number,
+			edev->config_addr >> 8,
+                        PCI_SLOT(edev->config_addr & 0xFF),
+                        PCI_FUNC(edev->config_addr & 0xFF),
+			pe->addr, pe->parent->addr);
 		return 0;
 	}
 
@@ -409,8 +416,13 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	list_add_tail(&pe->child, &parent->child_list);
 	list_add_tail(&edev->list, &pe->edevs);
 	edev->pe = pe;
-	pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-		edev->dn->full_name, pe->addr, pe->parent->addr);
+	pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
+		 "Device PE#%x, Parent PE#%x\n",
+		 edev->phb->global_number,
+		 edev->config_addr >> 8,
+		 PCI_SLOT(edev->config_addr & 0xFF),
+		 PCI_FUNC(edev->config_addr & 0xFF),
+		 pe->addr, pe->parent->addr);
 
 	return 0;
 }
@@ -430,8 +442,11 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 	int cnt;
 
 	if (!edev->pe) {
-		pr_debug("%s: No PE found for EEH device %s\n",
-			 __func__, edev->dn->full_name);
+		pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
+			 __func__,  edev->phb->global_number,
+			 edev->config_addr >> 8,
+			 PCI_SLOT(edev->config_addr & 0xFF),
+			 PCI_FUNC(edev->config_addr & 0xFF));
 		return -EEXIST;
 	}
 
@@ -653,9 +668,9 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
  * blocked on normal path during the stage. So we need utilize
  * eeh operations, which is always permitted.
  */
-static void eeh_bridge_check_link(struct eeh_dev *edev,
-				  struct device_node *dn)
+static void eeh_bridge_check_link(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int cap;
 	uint32_t val;
 	int timeout = 0;
@@ -675,32 +690,32 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
 
 	/* Check slot status */
 	cap = edev->pcie_cap;
-	eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
 	if (!(val & PCI_EXP_SLTSTA_PDS)) {
 		pr_debug("  No card in the slot (0x%04x) !\n", val);
 		return;
 	}
 
 	/* Check power status if we have the capability */
-	eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val);
 	if (val & PCI_EXP_SLTCAP_PCP) {
-		eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
+		eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
 		if (val & PCI_EXP_SLTCTL_PCC) {
 			pr_debug("  In power-off state, power it on ...\n");
 			val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
 			val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
-			eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
+			eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
 			msleep(2 * 1000);
 		}
 	}
 
 	/* Enable link */
-	eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val);
 	val &= ~PCI_EXP_LNKCTL_LD;
-	eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
+	eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val);
 
 	/* Check link */
-	eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
+	eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
 	if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
 		pr_debug("  No link reporting capability (0x%08x) \n", val);
 		msleep(1000);
@@ -713,7 +728,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
 		msleep(20);
 		timeout += 20;
 
-		eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
+		eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val);
 		if (val & PCI_EXP_LNKSTA_DLLLA)
 			break;
 	}
@@ -728,9 +743,9 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
 #define BYTE_SWAP(OFF)	(8*((OFF)/4)+3-(OFF))
 #define SAVED_BYTE(OFF)	(((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
 
-static void eeh_restore_bridge_bars(struct eeh_dev *edev,
-				    struct device_node *dn)
+static void eeh_restore_bridge_bars(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int i;
 
 	/*
@@ -738,49 +753,49 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev,
 	 * Bus numbers and windows: 0x18 - 0x30
 	 */
 	for (i = 4; i < 13; i++)
-		eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+		eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
 	/* Rom: 0x38 */
-	eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
+	eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]);
 
 	/* Cache line & Latency timer: 0xC 0xD */
-	eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+	eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
                 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-        eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+        eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
                 SAVED_BYTE(PCI_LATENCY_TIMER));
 	/* Max latency, min grant, interrupt ping and line: 0x3C */
-	eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+	eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
 
 	/* PCI Command: 0x4 */
-	eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
+	eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1]);
 
 	/* Check the PCIe link is ready */
-	eeh_bridge_check_link(edev, dn);
+	eeh_bridge_check_link(edev);
 }
 
-static void eeh_restore_device_bars(struct eeh_dev *edev,
-				    struct device_node *dn)
+static void eeh_restore_device_bars(struct eeh_dev *edev)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int i;
 	u32 cmd;
 
 	for (i = 4; i < 10; i++)
-		eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+		eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
 	/* 12 == Expansion ROM Address */
-	eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
+	eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]);
 
-	eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+	eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
 		SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-	eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+	eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
 		SAVED_BYTE(PCI_LATENCY_TIMER));
 
 	/* max latency, min grant, interrupt pin and line */
-	eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+	eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
 
 	/*
 	 * Restore PERR & SERR bits, some devices require it,
 	 * don't touch the other command bits
 	 */
-	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+	eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd);
 	if (edev->config_space[1] & PCI_COMMAND_PARITY)
 		cmd |= PCI_COMMAND_PARITY;
 	else
@@ -789,7 +804,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
 		cmd |= PCI_COMMAND_SERR;
 	else
 		cmd &= ~PCI_COMMAND_SERR;
-	eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
+	eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd);
 }
 
 /**
@@ -804,16 +819,16 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
 static void *eeh_restore_one_device_bars(void *data, void *flag)
 {
 	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct device_node *dn = eeh_dev_to_of_node(edev);
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/* Do special restore for bridges */
 	if (edev->mode & EEH_DEV_BRIDGE)
-		eeh_restore_bridge_bars(edev, dn);
+		eeh_restore_bridge_bars(edev);
 	else
-		eeh_restore_device_bars(edev, dn);
+		eeh_restore_device_bars(edev);
 
-	if (eeh_ops->restore_config)
-		eeh_ops->restore_config(dn);
+	if (eeh_ops->restore_config && pdn)
+		eeh_ops->restore_config(pdn);
 
 	return NULL;
 }
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d180caf2d6de..8ca9434c40e6 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,7 @@
 #include <asm/ftrace.h>
 #include <asm/hw_irq.h>
 #include <asm/context_tracking.h>
+#include <asm/tm.h>
 
 /*
  * System calls.
@@ -145,6 +146,24 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 	andi.	r11,r10,_TIF_SYSCALL_DOTRACE
 	bne	syscall_dotrace
 .Lsyscall_dotrace_cont:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	b	1f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+	extrdi.	r11, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+	beq+	1f
+
+	/* Doom the transaction and don't perform the syscall: */
+	mfmsr	r11
+	li	r12, 1
+	rldimi	r11, r12, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r11, 0
+	li	r11, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+	TABORT(R11)
+
+	b	.Lsyscall_exit
+1:
+#endif
 	cmpldi	0,r0,NR_syscalls
 	bge-	syscall_enosys
 
@@ -356,6 +375,11 @@ _GLOBAL(ppc64_swapcontext)
 	bl	sys_swapcontext
 	b	.Lsyscall_exit
 
+_GLOBAL(ppc_switch_endian)
+	bl	save_nvgprs
+	bl	sys_switch_endian
+	b	.Lsyscall_exit
+
 _GLOBAL(ret_from_fork)
 	bl	schedule_tail
 	REST_NVGPRS(r1)
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 05adc8bbdef8..eeaa0d5f69d5 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -94,6 +94,7 @@ _GLOBAL(power7_powersave_common)
 	beq	1f
 	addi	r1,r1,INT_FRAME_SIZE
 	ld	r0,16(r1)
+	li	r3,0			/* Return 0 (no nap) */
 	mtlr	r0
 	blr
 
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index b6f123ab90ed..2c647b1e62e4 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -28,6 +28,55 @@
 #include <asm/mce.h>
 #include <asm/machdep.h>
 
+static void flush_tlb_206(unsigned int num_sets, unsigned int action)
+{
+	unsigned long rb;
+	unsigned int i;
+
+	switch (action) {
+	case TLB_INVAL_SCOPE_GLOBAL:
+		rb = TLBIEL_INVAL_SET;
+		break;
+	case TLB_INVAL_SCOPE_LPID:
+		rb = TLBIEL_INVAL_SET_LPID;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	asm volatile("ptesync" : : : "memory");
+	for (i = 0; i < num_sets; i++) {
+		asm volatile("tlbiel %0" : : "r" (rb));
+		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+	}
+	asm volatile("ptesync" : : : "memory");
+}
+
+/*
+ * Generic routine to flush TLB on power7. This routine is used as
+ * flush_tlb hook in cpu_spec for Power7 processor.
+ *
+ * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
+ *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
+ */
+void __flush_tlb_power7(unsigned int action)
+{
+	flush_tlb_206(POWER7_TLB_SETS, action);
+}
+
+/*
+ * Generic routine to flush TLB on power8. This routine is used as
+ * flush_tlb hook in cpu_spec for power8 processor.
+ *
+ * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
+ *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
+ */
+void __flush_tlb_power8(unsigned int action)
+{
+	flush_tlb_206(POWER8_TLB_SETS, action);
+}
+
 /* flush SLBs and reload */
 static void flush_and_reload_slb(void)
 {
@@ -79,7 +128,7 @@ static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 	}
 	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET);
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
 		/* reset error bits */
 		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
 	}
@@ -110,7 +159,7 @@ static long mce_handle_common_ierror(uint64_t srr1)
 		break;
 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
-			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET);
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
 			handled = 1;
 		}
 		break;
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 34f7c9b7cd96..1e703f8ebad4 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -26,6 +26,9 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/kmsg_dump.h>
+#include <linux/pstore.h>
+#include <linux/zlib.h>
 #include <asm/uaccess.h>
 #include <asm/nvram.h>
 #include <asm/rtas.h>
@@ -54,6 +57,680 @@ struct nvram_partition {
 
 static LIST_HEAD(nvram_partitions);
 
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+	.name = "ibm,rtas-log",
+	.req_size = 2079,
+	.min_size = 1055,
+	.index = -1,
+	.os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+	.name = "lnx,oops-log",
+	.req_size = 4000,
+	.min_size = 2000,
+	.index = -1,
+	.os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+	"ibm,rtas-log",
+#endif
+	"lnx,oops-log",
+	NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+			  enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper nvram_kmsg_dumper = {
+	.dump = oops_to_nvram
+};
+
+/*
+ * For capturing and compressing an oops or panic report...
+
+ * big_oops_buf[] holds the uncompressed text we're capturing.
+ *
+ * oops_buf[] holds the compressed text, preceded by a oops header.
+ * oops header has u16 holding the version of oops header (to differentiate
+ * between old and new format header) followed by u16 holding the length of
+ * the compressed* text (*Or uncompressed, if compression fails.) and u64
+ * holding the timestamp. oops_buf[] gets written to NVRAM.
+ *
+ * oops_log_info points to the header. oops_data points to the compressed text.
+ *
+ * +- oops_buf
+ * |                                   +- oops_data
+ * v                                   v
+ * +-----------+-----------+-----------+------------------------+
+ * | version   | length    | timestamp | text                   |
+ * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
+ * +-----------+-----------+-----------+------------------------+
+ * ^
+ * +- oops_log_info
+ *
+ * We preallocate these buffers during init to avoid kmalloc during oops/panic.
+ */
+static size_t big_oops_buf_sz;
+static char *big_oops_buf, *oops_buf;
+static char *oops_data;
+static size_t oops_data_sz;
+
+/* Compression parameters */
+#define COMPR_LEVEL 6
+#define WINDOW_BITS 12
+#define MEM_LEVEL 4
+static struct z_stream_s stream;
+
+#ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+	.name = "ibm,skiboot",
+	.index = -1,
+	.os_partition = false
+};
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+static struct nvram_os_partition of_config_partition = {
+	.name = "of-config",
+	.index = -1,
+	.os_partition = false
+};
+#endif
+
+static struct nvram_os_partition common_partition = {
+	.name = "common",
+	.index = -1,
+	.os_partition = false
+};
+
+static enum pstore_type_id nvram_type_ids[] = {
+	PSTORE_TYPE_DMESG,
+	PSTORE_TYPE_PPC_COMMON,
+	-1,
+	-1,
+	-1
+};
+static int read_type;
+#endif
+
+/* nvram_write_os_partition
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode.  If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk.  For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name       | data             |
+ * |0          |1         |2      3|4         15|16        length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log                         |
+ * |0            3|4          7|8                  error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_os_partition(struct nvram_os_partition *part,
+			     char *buff, int length,
+			     unsigned int err_type,
+			     unsigned int error_log_cnt)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+
+	if (part->index == -1)
+		return -ESPIPE;
+
+	if (length > part->size)
+		length = part->size;
+
+	info.error_type = cpu_to_be32(err_type);
+	info.seq_num = cpu_to_be32(error_log_cnt);
+
+	tmp_index = part->index;
+
+	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info),
+				&tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	rc = ppc_md.nvram_write(buff, length, &tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+/* nvram_read_partition
+ *
+ * Reads nvram partition for at most 'length'
+ */
+int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+			 int length, unsigned int *err_type,
+			 unsigned int *error_log_cnt)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+
+	if (part->index == -1)
+		return -1;
+
+	if (length > part->size)
+		length = part->size;
+
+	tmp_index = part->index;
+
+	if (part->os_partition) {
+		rc = ppc_md.nvram_read((char *)&info,
+					sizeof(struct err_log_info),
+					&tmp_index);
+		if (rc <= 0) {
+			pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+			return rc;
+		}
+	}
+
+	rc = ppc_md.nvram_read(buff, length, &tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	if (part->os_partition) {
+		*error_log_cnt = be32_to_cpu(info.seq_num);
+		*err_type = be32_to_cpu(info.error_type);
+	}
+
+	return 0;
+}
+
+/* nvram_init_os_partition
+ *
+ * This sets up a partition with an "OS" signature.
+ *
+ * The general strategy is the following:
+ * 1.) If a partition with the indicated name already exists...
+ *	- If it's large enough, use it.
+ *	- Otherwise, recycle it and keep going.
+ * 2.) Search for a free partition that is large enough.
+ * 3.) If there's not a free partition large enough, recycle any obsolete
+ * OS partitions and try again.
+ * 4.) Will first try getting a chunk that will satisfy the requested size.
+ * 5.) If a chunk of the requested size cannot be allocated, then try finding
+ * a chunk that will satisfy the minum needed.
+ *
+ * Returns 0 on success, else -1.
+ */
+int __init nvram_init_os_partition(struct nvram_os_partition *part)
+{
+	loff_t p;
+	int size;
+
+	/* Look for ours */
+	p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
+
+	/* Found one but too small, remove it */
+	if (p && size < part->min_size) {
+		pr_info("nvram: Found too small %s partition,"
+					" removing it...\n", part->name);
+		nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
+		p = 0;
+	}
+
+	/* Create one if we didn't find */
+	if (!p) {
+		p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+					part->req_size, part->min_size);
+		if (p == -ENOSPC) {
+			pr_info("nvram: No room to create %s partition, "
+				"deleting any obsolete OS partitions...\n",
+				part->name);
+			nvram_remove_partition(NULL, NVRAM_SIG_OS,
+					nvram_os_partitions);
+			p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+					part->req_size, part->min_size);
+		}
+	}
+
+	if (p <= 0) {
+		pr_err("nvram: Failed to find or create %s"
+		       " partition, err %d\n", part->name, (int)p);
+		return -1;
+	}
+
+	part->index = p;
+	part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
+
+	return 0;
+}
+
+/* Derived from logfs_compress() */
+static int nvram_compress(const void *in, void *out, size_t inlen,
+							size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
+						MEM_LEVEL, Z_DEFAULT_STRATEGY);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_deflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	if (stream.total_out >= stream.total_in)
+		goto error;
+
+	ret = stream.total_out;
+error:
+	return ret;
+}
+
+/* Compress the text from big_oops_buf into oops_buf. */
+static int zip_oops(size_t text_len)
+{
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
+								oops_data_sz);
+	if (zipped_len < 0) {
+		pr_err("nvram: compression failed; returned %d\n", zipped_len);
+		pr_err("nvram: logging uncompressed oops/panic report\n");
+		return -1;
+	}
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(zipped_len);
+	oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+	return 0;
+}
+
+#ifdef CONFIG_PSTORE
+static int nvram_pstore_open(struct pstore_info *psi)
+{
+	/* Reset the iterator to start reading partitions again */
+	read_type = -1;
+	return 0;
+}
+
+/**
+ * nvram_pstore_write - pstore write callback for nvram
+ * @type:               Type of message logged
+ * @reason:             reason behind dump (oops/panic)
+ * @id:                 identifier to indicate the write performed
+ * @part:               pstore writes data to registered buffer in parts,
+ *                      part number will indicate the same.
+ * @count:              Indicates oops count
+ * @compressed:         Flag to indicate the log is compressed
+ * @size:               number of bytes written to the registered buffer
+ * @psi:                registered pstore_info structure
+ *
+ * Called by pstore_dump() when an oops or panic report is logged in the
+ * printk buffer.
+ * Returns 0 on successful write.
+ */
+static int nvram_pstore_write(enum pstore_type_id type,
+				enum kmsg_dump_reason reason,
+				u64 *id, unsigned int part, int count,
+				bool compressed, size_t size,
+				struct pstore_info *psi)
+{
+	int rc;
+	unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
+	struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
+
+	/* part 1 has the recent messages from printk buffer */
+	if (part > 1 || (type != PSTORE_TYPE_DMESG))
+		return -1;
+
+	if (clobbering_unread_rtas_event())
+		return -1;
+
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(size);
+	oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+
+	if (compressed)
+		err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+
+	rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
+		(int) (sizeof(*oops_hdr) + size), err_type, count);
+
+	if (rc != 0)
+		return rc;
+
+	*id = part;
+	return 0;
+}
+
+/*
+ * Reads the oops/panic report, rtas, of-config and common partition.
+ * Returns the length of the data we read from each partition.
+ * Returns 0 if we've been called before.
+ */
+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+				int *count, struct timespec *time, char **buf,
+				bool *compressed, struct pstore_info *psi)
+{
+	struct oops_log_info *oops_hdr;
+	unsigned int err_type, id_no, size = 0;
+	struct nvram_os_partition *part = NULL;
+	char *buff = NULL;
+	int sig = 0;
+	loff_t p;
+
+	read_type++;
+
+	switch (nvram_type_ids[read_type]) {
+	case PSTORE_TYPE_DMESG:
+		part = &oops_log_partition;
+		*type = PSTORE_TYPE_DMESG;
+		break;
+	case PSTORE_TYPE_PPC_COMMON:
+		sig = NVRAM_SIG_SYS;
+		part = &common_partition;
+		*type = PSTORE_TYPE_PPC_COMMON;
+		*id = PSTORE_TYPE_PPC_COMMON;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+#ifdef CONFIG_PPC_PSERIES
+	case PSTORE_TYPE_PPC_RTAS:
+		part = &rtas_log_partition;
+		*type = PSTORE_TYPE_PPC_RTAS;
+		time->tv_sec = last_rtas_event;
+		time->tv_nsec = 0;
+		break;
+	case PSTORE_TYPE_PPC_OF:
+		sig = NVRAM_SIG_OF;
+		part = &of_config_partition;
+		*type = PSTORE_TYPE_PPC_OF;
+		*id = PSTORE_TYPE_PPC_OF;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+#endif
+#ifdef CONFIG_PPC_POWERNV
+	case PSTORE_TYPE_PPC_OPAL:
+		sig = NVRAM_SIG_FW;
+		part = &skiboot_partition;
+		*type = PSTORE_TYPE_PPC_OPAL;
+		*id = PSTORE_TYPE_PPC_OPAL;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+#endif
+	default:
+		return 0;
+	}
+
+	if (!part->os_partition) {
+		p = nvram_find_partition(part->name, sig, &size);
+		if (p <= 0) {
+			pr_err("nvram: Failed to find partition %s, "
+				"err %d\n", part->name, (int)p);
+			return 0;
+		}
+		part->index = p;
+		part->size = size;
+	}
+
+	buff = kmalloc(part->size, GFP_KERNEL);
+
+	if (!buff)
+		return -ENOMEM;
+
+	if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
+		kfree(buff);
+		return 0;
+	}
+
+	*count = 0;
+
+	if (part->os_partition)
+		*id = id_no;
+
+	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+		size_t length, hdr_size;
+
+		oops_hdr = (struct oops_log_info *)buff;
+		if (be16_to_cpu(oops_hdr->version) < OOPS_HDR_VERSION) {
+			/* Old format oops header had 2-byte record size */
+			hdr_size = sizeof(u16);
+			length = be16_to_cpu(oops_hdr->version);
+			time->tv_sec = 0;
+			time->tv_nsec = 0;
+		} else {
+			hdr_size = sizeof(*oops_hdr);
+			length = be16_to_cpu(oops_hdr->report_length);
+			time->tv_sec = be64_to_cpu(oops_hdr->timestamp);
+			time->tv_nsec = 0;
+		}
+		*buf = kmalloc(length, GFP_KERNEL);
+		if (*buf == NULL)
+			return -ENOMEM;
+		memcpy(*buf, buff + hdr_size, length);
+		kfree(buff);
+
+		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
+			*compressed = true;
+		else
+			*compressed = false;
+		return length;
+	}
+
+	*buf = buff;
+	return part->size;
+}
+
+static struct pstore_info nvram_pstore_info = {
+	.owner = THIS_MODULE,
+	.name = "nvram",
+	.open = nvram_pstore_open,
+	.read = nvram_pstore_read,
+	.write = nvram_pstore_write,
+};
+
+static int nvram_pstore_init(void)
+{
+	int rc = 0;
+
+	if (machine_is(pseries)) {
+		nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+		nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+	} else
+		nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
+
+	nvram_pstore_info.buf = oops_data;
+	nvram_pstore_info.bufsize = oops_data_sz;
+
+	spin_lock_init(&nvram_pstore_info.buf_lock);
+
+	rc = pstore_register(&nvram_pstore_info);
+	if (rc != 0)
+		pr_err("nvram: pstore_register() failed, defaults to "
+				"kmsg_dump; returned %d\n", rc);
+
+	return rc;
+}
+#else
+static int nvram_pstore_init(void)
+{
+	return -1;
+}
+#endif
+
+void __init nvram_init_oops_partition(int rtas_partition_exists)
+{
+	int rc;
+
+	rc = nvram_init_os_partition(&oops_log_partition);
+	if (rc != 0) {
+#ifdef CONFIG_PPC_PSERIES
+		if (!rtas_partition_exists) {
+			pr_err("nvram: Failed to initialize oops partition!");
+			return;
+		}
+		pr_notice("nvram: Using %s partition to log both"
+			" RTAS errors and oops/panic reports\n",
+			rtas_log_partition.name);
+		memcpy(&oops_log_partition, &rtas_log_partition,
+						sizeof(rtas_log_partition));
+#else
+		pr_err("nvram: Failed to initialize oops partition!");
+		return;
+#endif
+	}
+	oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
+	if (!oops_buf) {
+		pr_err("nvram: No memory for %s partition\n",
+						oops_log_partition.name);
+		return;
+	}
+	oops_data = oops_buf + sizeof(struct oops_log_info);
+	oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
+
+	rc = nvram_pstore_init();
+
+	if (!rc)
+		return;
+
+	/*
+	 * Figure compression (preceded by elimination of each line's <n>
+	 * severity prefix) will reduce the oops/panic report to at most
+	 * 45% of its original size.
+	 */
+	big_oops_buf_sz = (oops_data_sz * 100) / 45;
+	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+	if (big_oops_buf) {
+		stream.workspace =  kmalloc(zlib_deflate_workspacesize(
+					WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
+		if (!stream.workspace) {
+			pr_err("nvram: No memory for compression workspace; "
+				"skipping compression of %s partition data\n",
+				oops_log_partition.name);
+			kfree(big_oops_buf);
+			big_oops_buf = NULL;
+		}
+	} else {
+		pr_err("No memory for uncompressed %s data; "
+			"skipping compression\n", oops_log_partition.name);
+		stream.workspace = NULL;
+	}
+
+	rc = kmsg_dump_register(&nvram_kmsg_dumper);
+	if (rc != 0) {
+		pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
+		kfree(oops_buf);
+		kfree(big_oops_buf);
+		kfree(stream.workspace);
+	}
+}
+
+/*
+ * This is our kmsg_dump callback, called after an oops or panic report
+ * has been written to the printk buffer.  We want to capture as much
+ * of the printk buffer as possible.  First, capture as much as we can
+ * that we think will compress sufficiently to fit in the lnx,oops-log
+ * partition.  If that's too much, go back and capture uncompressed text.
+ */
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+			  enum kmsg_dump_reason reason)
+{
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+	static unsigned int oops_count = 0;
+	static bool panicking = false;
+	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
+	size_t text_len;
+	unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+	int rc = -1;
+
+	switch (reason) {
+	case KMSG_DUMP_RESTART:
+	case KMSG_DUMP_HALT:
+	case KMSG_DUMP_POWEROFF:
+		/* These are almost always orderly shutdowns. */
+		return;
+	case KMSG_DUMP_OOPS:
+		break;
+	case KMSG_DUMP_PANIC:
+		panicking = true;
+		break;
+	case KMSG_DUMP_EMERG:
+		if (panicking)
+			/* Panic report already captured. */
+			return;
+		break;
+	default:
+		pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
+		       __func__, (int) reason);
+		return;
+	}
+
+	if (clobbering_unread_rtas_event())
+		return;
+
+	if (!spin_trylock_irqsave(&lock, flags))
+		return;
+
+	if (big_oops_buf) {
+		kmsg_dump_get_buffer(dumper, false,
+				     big_oops_buf, big_oops_buf_sz, &text_len);
+		rc = zip_oops(text_len);
+	}
+	if (rc != 0) {
+		kmsg_dump_rewind(dumper);
+		kmsg_dump_get_buffer(dumper, false,
+				     oops_data, oops_data_sz, &text_len);
+		err_type = ERR_TYPE_KERNEL_PANIC;
+		oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+		oops_hdr->report_length = cpu_to_be16(text_len);
+		oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+	}
+
+	(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
+		(int) (sizeof(*oops_hdr) + text_len), err_type,
+		++oops_count);
+
+	spin_unlock_irqrestore(&lock, flags);
+}
+
 static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
 {
 	int size;
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 2f35a72642c6..b60a67d92ebd 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -72,7 +72,7 @@ static int of_pci_phb_probe(struct platform_device *dev)
 
 	/* Register devices with EEH */
 	if (dev->dev.of_node->child)
-		eeh_add_device_tree_early(dev->dev.of_node);
+		eeh_add_device_tree_early(PCI_DN(dev->dev.of_node));
 
 	/* Scan the bus */
 	pcibios_scan_phb(phb);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 2a525c938158..0d054068a21d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -76,7 +76,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 	list_add_tail(&phb->list_node, &hose_list);
 	spin_unlock(&hose_spinlock);
 	phb->dn = dev;
-	phb->is_dynamic = mem_init_done;
+	phb->is_dynamic = slab_is_available();
 #ifdef CONFIG_PPC64
 	if (dev) {
 		int nid = of_node_to_nid(dev);
@@ -109,8 +109,10 @@ void pcibios_free_controller(struct pci_controller *phb)
 resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 					 unsigned long type)
 {
-	if (ppc_md.pcibios_window_alignment)
-		return ppc_md.pcibios_window_alignment(bus, type);
+	struct pci_controller *phb = pci_bus_to_host(bus);
+
+	if (phb->controller_ops.window_alignment)
+		return phb->controller_ops.window_alignment(bus, type);
 
 	/*
 	 * PCI core will figure out the default
@@ -122,14 +124,26 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 
 void pcibios_reset_secondary_bus(struct pci_dev *dev)
 {
-	if (ppc_md.pcibios_reset_secondary_bus) {
-		ppc_md.pcibios_reset_secondary_bus(dev);
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (phb->controller_ops.reset_secondary_bus) {
+		phb->controller_ops.reset_secondary_bus(dev);
 		return;
 	}
 
 	pci_reset_secondary_bus(dev);
 }
 
+#ifdef CONFIG_PCI_IOV
+resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
+{
+	if (ppc_md.pcibios_iov_resource_alignment)
+		return ppc_md.pcibios_iov_resource_alignment(pdev, resno);
+
+	return pci_iov_resource_size(pdev, resno);
+}
+#endif /* CONFIG_PCI_IOV */
+
 static resource_size_t pcibios_io_size(const struct pci_controller *hose)
 {
 #ifdef CONFIG_PPC64
@@ -788,6 +802,10 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
 		       pci_name(dev));
 		return;
 	}
+
+	if (dev->is_virtfn)
+		return;
+
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 		struct resource *res = dev->resource + i;
 		struct pci_bus_region reg;
@@ -942,6 +960,8 @@ static void pcibios_fixup_bridge(struct pci_bus *bus)
 
 void pcibios_setup_bus_self(struct pci_bus *bus)
 {
+	struct pci_controller *phb;
+
 	/* Fix up the bus resources for P2P bridges */
 	if (bus->self != NULL)
 		pcibios_fixup_bridge(bus);
@@ -953,12 +973,14 @@ void pcibios_setup_bus_self(struct pci_bus *bus)
 		ppc_md.pcibios_fixup_bus(bus);
 
 	/* Setup bus DMA mappings */
-	if (ppc_md.pci_dma_bus_setup)
-		ppc_md.pci_dma_bus_setup(bus);
+	phb = pci_bus_to_host(bus);
+	if (phb->controller_ops.dma_bus_setup)
+		phb->controller_ops.dma_bus_setup(bus);
 }
 
 static void pcibios_setup_device(struct pci_dev *dev)
 {
+	struct pci_controller *phb;
 	/* Fixup NUMA node as it may not be setup yet by the generic
 	 * code and is needed by the DMA init
 	 */
@@ -969,8 +991,9 @@ static void pcibios_setup_device(struct pci_dev *dev)
 	set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
 
 	/* Additional platform DMA/iommu setup */
-	if (ppc_md.pci_dma_dev_setup)
-		ppc_md.pci_dma_dev_setup(dev);
+	phb = pci_bus_to_host(dev->bus);
+	if (phb->controller_ops.dma_dev_setup)
+		phb->controller_ops.dma_dev_setup(dev);
 
 	/* Read default IRQs and fixup if necessary */
 	pci_read_irq_line(dev);
@@ -986,6 +1009,12 @@ int pcibios_add_device(struct pci_dev *dev)
 	 */
 	if (dev->bus->is_added)
 		pcibios_setup_device(dev);
+
+#ifdef CONFIG_PCI_IOV
+	if (ppc_md.pcibios_fixup_sriov)
+		ppc_md.pcibios_fixup_sriov(dev);
+#endif /* CONFIG_PCI_IOV */
+
 	return 0;
 }
 
@@ -1450,8 +1479,10 @@ EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
 
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	if (ppc_md.pcibios_enable_device_hook)
-		if (ppc_md.pcibios_enable_device_hook(dev))
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (phb->controller_ops.enable_device_hook)
+		if (!phb->controller_ops.enable_device_hook(dev))
 			return -EINVAL;
 
 	return pci_enable_resources(dev, mask);
@@ -1624,8 +1655,8 @@ void pcibios_scan_phb(struct pci_controller *hose)
 
 	/* Get probe mode and perform scan */
 	mode = PCI_PROBE_NORMAL;
-	if (node && ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
+	if (node && hose->controller_ops.probe_mode)
+		mode = hose->controller_ops.probe_mode(bus);
 	pr_debug("    probe mode: %d\n", mode);
 	if (mode == PCI_PROBE_DEVTREE)
 		of_scan_bus(node, bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 5b789177aa29..7ed85a69a9c2 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -73,13 +73,16 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
 {
 	int slotno, mode, pass, max;
 	struct pci_dev *dev;
+	struct pci_controller *phb;
 	struct device_node *dn = pci_bus_to_OF_node(bus);
 
-	eeh_add_device_tree_early(dn);
+	eeh_add_device_tree_early(PCI_DN(dn));
+
+	phb = pci_bus_to_host(bus);
 
 	mode = PCI_PROBE_NORMAL;
-	if (ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
+	if (phb->controller_ops.probe_mode)
+		mode = phb->controller_ops.probe_mode(bus);
 
 	if (mode == PCI_PROBE_DEVTREE) {
 		/* use ofdt-based probe */
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 83df3075d3df..b3b4df91b792 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -32,12 +32,237 @@
 #include <asm/ppc-pci.h>
 #include <asm/firmware.h>
 
+/*
+ * The function is used to find the firmware data of one
+ * specific PCI device, which is attached to the indicated
+ * PCI bus. For VFs, their firmware data is linked to that
+ * one of PF's bridge. For other devices, their firmware
+ * data is linked to that of their bridge.
+ */
+static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
+{
+	struct pci_bus *pbus;
+	struct device_node *dn;
+	struct pci_dn *pdn;
+
+	/*
+	 * We probably have virtual bus which doesn't
+	 * have associated bridge.
+	 */
+	pbus = bus;
+	while (pbus) {
+		if (pci_is_root_bus(pbus) || pbus->self)
+			break;
+
+		pbus = pbus->parent;
+	}
+
+	/*
+	 * Except virtual bus, all PCI buses should
+	 * have device nodes.
+	 */
+	dn = pci_bus_to_OF_node(pbus);
+	pdn = dn ? PCI_DN(dn) : NULL;
+
+	return pdn;
+}
+
+struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+				    int devfn)
+{
+	struct device_node *dn = NULL;
+	struct pci_dn *parent, *pdn;
+	struct pci_dev *pdev = NULL;
+
+	/* Fast path: fetch from PCI device */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (pdev->devfn == devfn) {
+			if (pdev->dev.archdata.pci_data)
+				return pdev->dev.archdata.pci_data;
+
+			dn = pci_device_to_OF_node(pdev);
+			break;
+		}
+	}
+
+	/* Fast path: fetch from device node */
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/* Slow path: fetch from firmware data hierarchy */
+	parent = pci_bus_to_pdn(bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == bus->number &&
+                    pdn->devfn == devfn)
+                        return pdn;
+        }
+
+	return NULL;
+}
+
 struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
 {
-	struct device_node *dn = pci_device_to_OF_node(pdev);
-	if (!dn)
+	struct device_node *dn;
+	struct pci_dn *parent, *pdn;
+
+	/* Search device directly */
+	if (pdev->dev.archdata.pci_data)
+		return pdev->dev.archdata.pci_data;
+
+	/* Check device node */
+	dn = pci_device_to_OF_node(pdev);
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/*
+	 * VFs don't have device nodes. We hook their
+	 * firmware data to PF's bridge.
+	 */
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == pdev->bus->number &&
+		    pdn->devfn == pdev->devfn)
+			return pdn;
+	}
+
+	return NULL;
+}
+
+#ifdef CONFIG_PCI_IOV
+static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
+					   struct pci_dev *pdev,
+					   int busno, int devfn)
+{
+	struct pci_dn *pdn;
+
+	/* Except PHB, we always have the parent */
+	if (!parent)
+		return NULL;
+
+	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
+	if (!pdn) {
+		dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__);
 		return NULL;
-	return PCI_DN(dn);
+	}
+
+	pdn->phb = parent->phb;
+	pdn->parent = parent;
+	pdn->busno = busno;
+	pdn->devfn = devfn;
+#ifdef CONFIG_PPC_POWERNV
+	pdn->pe_number = IODA_INVALID_PE;
+#endif
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	list_add_tail(&pdn->list, &parent->child_list);
+
+	/*
+	 * If we already have PCI device instance, lets
+	 * bind them.
+	 */
+	if (pdev)
+		pdev->dev.archdata.pci_data = pdn;
+
+	return pdn;
+}
+#endif
+
+struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PCI_IOV
+	struct pci_dn *parent, *pdn;
+	int i;
+
+	/* Only support IOV for now */
+	if (!pdev->is_physfn)
+		return pci_get_pdn(pdev);
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return NULL;
+
+	pdn->flags |= PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return NULL;
+
+	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+		pdn = add_one_dev_pci_data(parent, NULL,
+					   pci_iov_virtfn_bus(pdev, i),
+					   pci_iov_virtfn_devfn(pdev, i));
+		if (!pdn) {
+			dev_warn(&pdev->dev, "%s: Cannot create firmware data for VF#%d\n",
+				 __func__, i);
+			return NULL;
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
+
+	return pci_get_pdn(pdev);
+}
+
+void remove_dev_pci_data(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PCI_IOV
+	struct pci_dn *parent;
+	struct pci_dn *pdn, *tmp;
+	int i;
+
+	/*
+	 * VF and VF PE are created/released dynamically, so we need to
+	 * bind/unbind them.  Otherwise the VF and VF PE would be mismatched
+	 * when re-enabling SR-IOV.
+	 */
+	if (pdev->is_virtfn) {
+		pdn = pci_get_pdn(pdev);
+#ifdef CONFIG_PPC_POWERNV
+		pdn->pe_number = IODA_INVALID_PE;
+#endif
+		return;
+	}
+
+	/* Only support IOV PF for now */
+	if (!pdev->is_physfn)
+		return;
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return;
+
+	pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return;
+
+	/*
+	 * We might introduce flag to pci_dn in future
+	 * so that we can release VF's firmware data in
+	 * a batch mode.
+	 */
+	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+		list_for_each_entry_safe(pdn, tmp,
+			&parent->child_list, list) {
+			if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
+			    pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
+				continue;
+
+			if (!list_empty(&pdn->list))
+				list_del(&pdn->list);
+
+			kfree(pdn);
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
 }
 
 /*
@@ -49,6 +274,7 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
 	struct pci_controller *phb = data;
 	const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
 	const __be32 *regs;
+	struct device_node *parent;
 	struct pci_dn *pdn;
 
 	pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
@@ -69,7 +295,25 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
 		pdn->devfn = (addr >> 8) & 0xff;
 	}
 
+	/* vendor/device IDs and class code */
+	regs = of_get_property(dn, "vendor-id", NULL);
+	pdn->vendor_id = regs ? of_read_number(regs, 1) : 0;
+	regs = of_get_property(dn, "device-id", NULL);
+	pdn->device_id = regs ? of_read_number(regs, 1) : 0;
+	regs = of_get_property(dn, "class-code", NULL);
+	pdn->class_code = regs ? of_read_number(regs, 1) : 0;
+
+	/* Extended config space */
 	pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1);
+
+	/* Attach to parent node */
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	parent = of_get_parent(dn);
+	pdn->parent = parent ? PCI_DN(parent) : NULL;
+	if (pdn->parent)
+		list_add_tail(&pdn->list, &pdn->parent->child_list);
+
 	return NULL;
 }
 
@@ -131,6 +375,46 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
 	return NULL;
 }
 
+static struct pci_dn *pci_dn_next_one(struct pci_dn *root,
+				      struct pci_dn *pdn)
+{
+	struct list_head *next = pdn->child_list.next;
+
+	if (next != &pdn->child_list)
+		return list_entry(next, struct pci_dn, list);
+
+	while (1) {
+		if (pdn == root)
+			return NULL;
+
+		next = pdn->list.next;
+		if (next != &pdn->parent->child_list)
+			break;
+
+		pdn = pdn->parent;
+	}
+
+	return list_entry(next, struct pci_dn, list);
+}
+
+void *traverse_pci_dn(struct pci_dn *root,
+		      void *(*fn)(struct pci_dn *, void *),
+		      void *data)
+{
+	struct pci_dn *pdn = root;
+	void *ret;
+
+	/* Only scan the child nodes */
+	for (pdn = pci_dn_next_one(root, pdn); pdn;
+	     pdn = pci_dn_next_one(root, pdn)) {
+		ret = fn(pdn, data);
+		if (ret)
+			return ret;
+	}
+
+	return NULL;
+}
+
 /** 
  * pci_devs_phb_init_dynamic - setup pci devices under this PHB
  * phb: pci-to-host bridge (top-level bridge connecting to cpu)
@@ -147,8 +431,12 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
 	/* PHB nodes themselves must not match */
 	update_dn_pci_info(dn, phb);
 	pdn = dn->data;
-	if (pdn)
+	if (pdn) {
 		pdn->devfn = pdn->busno = -1;
+		pdn->vendor_id = pdn->device_id = pdn->class_code = 0;
+		pdn->phb = phb;
+		phb->pci_data = pdn;
+	}
 
 	/* Update dn->phb ptrs for new phb and children devices */
 	traverse_pci_devices(dn, update_dn_pci_info, phb);
@@ -171,3 +459,16 @@ void __init pci_devs_phb_init(void)
 	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
 		pci_devs_phb_init_dynamic(phb);
 }
+
+static void pci_dev_pdn_setup(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn;
+
+	if (pdev->dev.archdata.pci_data)
+		return;
+
+	/* Setup the fast path */
+	pdn = pci_get_pdn(pdev);
+	pdev->dev.archdata.pci_data = pdn;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pci_dev_pdn_setup);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index e6245e9c7d8d..42e02a2d570b 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -207,6 +207,7 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 {
 	struct device_node *node = dev->dev.of_node;
 	struct pci_bus *bus;
+	struct pci_controller *phb;
 	const __be32 *busrange, *ranges;
 	int len, i, mode;
 	struct pci_bus_region region;
@@ -286,9 +287,11 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 		bus->number);
 	pr_debug("    bus name: %s\n", bus->name);
 
+	phb = pci_bus_to_host(bus);
+
 	mode = PCI_PROBE_NORMAL;
-	if (ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
+	if (phb->controller_ops.probe_mode)
+		mode = phb->controller_ops.probe_mode(bus);
 	pr_debug("    probe mode: %d\n", mode);
 
 	if (mode == PCI_PROBE_DEVTREE)
@@ -305,7 +308,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
 	const __be32 *reg;
 	int reglen, devfn;
 #ifdef CONFIG_EEH
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
 #endif
 
 	pr_debug("  * %s\n", dn->full_name);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b4cc7bef6b16..febb50dd5328 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1114,8 +1114,11 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
  */
 extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
 
+/*
+ * Copy architecture-specific thread state
+ */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg, struct task_struct *p)
+		unsigned long kthread_arg, struct task_struct *p)
 {
 	struct pt_regs *childregs, *kregs;
 	extern void ret_from_fork(void);
@@ -1127,6 +1130,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	sp -= sizeof(struct pt_regs);
 	childregs = (struct pt_regs *) sp;
 	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
 		struct thread_info *ti = (void *)task_stack_page(p);
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
@@ -1137,11 +1141,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		clear_tsk_thread_flag(p, TIF_32BIT);
 		childregs->softe = 1;
 #endif
-		childregs->gpr[15] = arg;
+		childregs->gpr[15] = kthread_arg;
 		p->thread.regs = NULL;	/* no user register state */
 		ti->flags |= _TIF_RESTOREALL;
 		f = ret_from_kernel_thread;
 	} else {
+		/* user thread */
 		struct pt_regs *regs = current_pt_regs();
 		CHECK_FULL_REGS(regs);
 		*childregs = *regs;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1a85d8f96739..fd1fe4c37599 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2898,7 +2898,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 * Call OF "quiesce" method to shut down pending DMA's from
 	 * devices etc...
 	 */
-	prom_printf("Calling quiesce...\n");
+	prom_printf("Quiescing Open Firmware ...\n");
 	call_prom("quiesce", 0, 0);
 
 	/*
@@ -2910,7 +2910,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 
 	/* Don't print anything after quiesce under OPAL, it crashes OFW */
 	if (of_platform != PLATFORM_OPAL) {
-		prom_printf("returning from prom_init\n");
+		prom_printf("Booting Linux via __start() ...\n");
 		prom_debug("->dt_header_start=0x%x\n", hdr);
 	}
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 21c45a2d0706..7a488c108410 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -401,7 +401,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
 			buf = altbuf;
 		} else {
 			buf = rtas_err_buf;
-			if (mem_init_done)
+			if (slab_is_available())
 				buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
 		}
 		if (buf)
@@ -461,7 +461,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 
 	if (buff_copy) {
 		log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
-		if (mem_init_done)
+		if (slab_is_available())
 			kfree(buff_copy);
 	}
 	return ret;
@@ -897,7 +897,7 @@ int rtas_offline_cpus_mask(cpumask_var_t cpus)
 }
 EXPORT_SYMBOL(rtas_offline_cpus_mask);
 
-int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
+int rtas_ibm_suspend_me(u64 handle)
 {
 	long state;
 	long rc;
@@ -919,13 +919,11 @@ int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
 		printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc);
 		return rc;
 	} else if (state == H_VASI_ENABLED) {
-		*vasi_return = RTAS_NOT_SUSPENDABLE;
-		return 0;
+		return -EAGAIN;
 	} else if (state != H_VASI_SUSPENDING) {
 		printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n",
 		       state);
-		*vasi_return = -1;
-		return 0;
+		return -EIO;
 	}
 
 	if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
@@ -972,7 +970,7 @@ out:
 	return atomic_read(&data.error);
 }
 #else /* CONFIG_PPC_PSERIES */
-int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
+int rtas_ibm_suspend_me(u64 handle)
 {
 	return -ENOSYS;
 }
@@ -1022,7 +1020,6 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
 	unsigned long flags;
 	char *buff_copy, *errbuf = NULL;
 	int nargs, nret, token;
-	int rc;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1054,15 +1051,18 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
 	if (token == ibm_suspend_me_token) {
 
 		/*
-		 * rtas_ibm_suspend_me assumes args are in cpu endian, or at least the
-		 * hcall within it requires it.
+		 * rtas_ibm_suspend_me assumes the streamid handle is in cpu
+		 * endian, or at least the hcall within it requires it.
 		 */
-		int vasi_rc = 0;
+		int rc = 0;
 		u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
 		              | be32_to_cpu(args.args[1]);
-		rc = rtas_ibm_suspend_me(handle, &vasi_rc);
-		args.rets[0] = cpu_to_be32(vasi_rc);
-		if (rc)
+		rc = rtas_ibm_suspend_me(handle);
+		if (rc == -EAGAIN)
+			args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
+		else if (rc == -EIO)
+			args.rets[0] = cpu_to_be32(-1);
+		else if (rc)
 			return rc;
 		goto copy_return;
 	}
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index ce230da2c015..73f1934582c2 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -113,7 +113,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
 
 	ret = rtas_read_config(pdn, where, size, val);
 	if (*val == EEH_IO_ERROR_VALUE(size) &&
-	    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+	    eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	return ret;
@@ -277,50 +277,3 @@ int rtas_setup_phb(struct pci_controller *phb)
 
 	return 0;
 }
-
-void __init find_and_init_phbs(void)
-{
-	struct device_node *node;
-	struct pci_controller *phb;
-	struct device_node *root = of_find_node_by_path("/");
-
-	for_each_child_of_node(root, node) {
-		if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
-					   strcmp(node->type, "pciex") != 0))
-			continue;
-
-		phb = pcibios_alloc_controller(node);
-		if (!phb)
-			continue;
-		rtas_setup_phb(phb);
-		pci_process_bridge_OF_ranges(phb, node, 0);
-		isa_bridge_find_early(phb);
-	}
-
-	of_node_put(root);
-	pci_devs_phb_init();
-
-	/*
-	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
-	 * in chosen.
-	 */
-	if (of_chosen) {
-		const int *prop;
-
-		prop = of_get_property(of_chosen,
-				"linux,pci-probe-only", NULL);
-		if (prop) {
-			if (*prop)
-				pci_add_flags(PCI_PROBE_ONLY);
-			else
-				pci_clear_flags(PCI_PROBE_ONLY);
-		}
-
-#ifdef CONFIG_PPC32 /* Will be made generic soon */
-		prop = of_get_property(of_chosen,
-				"linux,pci-assign-all-buses", NULL);
-		if (prop && *prop)
-			pci_add_flags(PCI_REASSIGN_ALL_BUS);
-#endif /* CONFIG_PPC32 */
-	}
-}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 49f553bbb360..c69671c03c3b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -37,6 +37,7 @@
 #include <linux/memblock.h>
 #include <linux/hugetlb.h>
 #include <linux/memory.h>
+#include <linux/nmi.h>
 
 #include <asm/io.h>
 #include <asm/kdump.h>
@@ -779,3 +780,22 @@ unsigned long memory_block_size_bytes(void)
 struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+	return ppc_proc_freq * watchdog_thresh;
+}
+
+/*
+ * The hardlockup detector breaks PMU event based branches and is likely
+ * to get false positives in KVM guests, so disable it by default.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+	hardlockup_detector_disable();
+
+	return 0;
+}
+early_initcall(disable_hardlockup_detector);
+#endif
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index b2702e87db0d..5fa92706444b 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -121,3 +121,20 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
 	return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low,
 			     (u64)len_high << 32 | len_low, advice);
 }
+
+long sys_switch_endian(void)
+{
+	struct thread_info *ti;
+
+	current->thread.regs->msr ^= MSR_LE;
+
+	/*
+	 * Set TIF_RESTOREALL so that r3 isn't clobbered on return to
+	 * userspace. That also has the effect of restoring the non-volatile
+	 * GPRs, so we saved them on the way in here.
+	 */
+	ti = current_thread_info();
+	ti->flags |= _TIF_RESTOREALL;
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 7ab5d434e2ee..4d6b1d3a747f 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -22,6 +22,7 @@
 #define PPC_SYS(func)		.llong	DOTSYM(ppc_##func),DOTSYM(ppc_##func)
 #define OLDSYS(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
 #define SYS32ONLY(func)		.llong	DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define PPC64ONLY(func)		.llong	DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
 #define SYSX(f, f3264, f32)	.llong	DOTSYM(f),DOTSYM(f3264)
 #else
 #define SYSCALL(func)		.long	sys_##func
@@ -29,6 +30,7 @@
 #define PPC_SYS(func)		.long	ppc_##func
 #define OLDSYS(func)		.long	sys_##func
 #define SYS32ONLY(func)		.long	sys_##func
+#define PPC64ONLY(func)		.long	sys_ni_syscall
 #define SYSX(f, f3264, f32)	.long	f32
 #endif
 #define SYSCALL_SPU(func)	SYSCALL(func)
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
index 238aa63ced8f..2384129f5893 100644
--- a/arch/powerpc/kernel/systbl_chk.c
+++ b/arch/powerpc/kernel/systbl_chk.c
@@ -21,9 +21,11 @@
 #ifdef CONFIG_PPC64
 #define OLDSYS(func)		-1
 #define SYS32ONLY(func)		-1
+#define PPC64ONLY(func)		__NR_##func
 #else
 #define OLDSYS(func)		__NR_old##func
 #define SYS32ONLY(func)		__NR_##func
+#define PPC64ONLY(func)		-1
 #endif
 #define SYSX(f, f3264, f32)	-1
 
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2a324f4cb1b9..5754b226da7e 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -152,9 +152,9 @@ _GLOBAL(tm_reclaim)
 
 	addi	r7, r3, THREAD_TRANSACT_VRSTATE
 	SAVE_32VRS(0, r6, r7)	/* r6 scratch, r7 transact vr state */
-	mfvscr	vr0
+	mfvscr	v0
 	li	r6, VRSTATE_VSCR
-	stvx	vr0, r7, r6
+	stvx	v0, r7, r6
 dont_backup_vec:
 	mfspr	r0, SPRN_VRSAVE
 	std	r0, THREAD_TRANSACT_VRSAVE(r3)
@@ -359,8 +359,8 @@ _GLOBAL(__tm_recheckpoint)
 
 	addi	r8, r3, THREAD_VRSTATE
 	li	r5, VRSTATE_VSCR
-	lvx	vr0, r8, r5
-	mtvscr	vr0
+	lvx	v0, r8, r5
+	mtvscr	v0
 	REST_32VRS(0, r5, r8)			/* r5 scratch, r8 ptr */
 dont_restore_vec:
 	ld	r5, THREAD_VRSAVE(r3)
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index b7aa07279a63..7cc38b5b58bc 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -46,8 +46,6 @@ void __init udbg_early_init(void)
 #elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE)
 	/* Maple real mode debug */
 	udbg_init_maple_realmode();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_BEAT)
-	udbg_init_debug_beat();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
 	udbg_init_pas_realmode();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 74f8050518d6..f5c80d567d8d 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -24,8 +24,8 @@ _GLOBAL(do_load_up_transact_altivec)
 	stw	r4,THREAD_USED_VR(r3)
 
 	li	r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
-	lvx	vr0,r10,r3
-	mtvscr	vr0
+	lvx	v0,r10,r3
+	mtvscr	v0
 	addi	r10,r3,THREAD_TRANSACT_VRSTATE
 	REST_32VRS(0,r4,r10)
 
@@ -52,8 +52,8 @@ _GLOBAL(vec_enable)
  */
 _GLOBAL(load_vr_state)
 	li	r4,VRSTATE_VSCR
-	lvx	vr0,r4,r3
-	mtvscr	vr0
+	lvx	v0,r4,r3
+	mtvscr	v0
 	REST_32VRS(0,r4,r3)
 	blr
 
@@ -63,9 +63,9 @@ _GLOBAL(load_vr_state)
  */
 _GLOBAL(store_vr_state)
 	SAVE_32VRS(0, r4, r3)
-	mfvscr	vr0
+	mfvscr	v0
 	li	r4, VRSTATE_VSCR
-	stvx	vr0, r4, r3
+	stvx	v0, r4, r3
 	blr
 
 /*
@@ -104,9 +104,9 @@ _GLOBAL(load_up_altivec)
 	addi	r4,r4,THREAD
 	addi	r6,r4,THREAD_VRSTATE
 	SAVE_32VRS(0,r5,r6)
-	mfvscr	vr0
+	mfvscr	v0
 	li	r10,VRSTATE_VSCR
-	stvx	vr0,r10,r6
+	stvx	v0,r10,r6
 	/* Disable VMX for last_task_used_altivec */
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
@@ -142,8 +142,8 @@ _GLOBAL(load_up_altivec)
 	li	r4,1
 	li	r10,VRSTATE_VSCR
 	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r6
-	mtvscr	vr0
+	lvx	v0,r10,r6
+	mtvscr	v0
 	REST_32VRS(0,r4,r6)
 #ifndef CONFIG_SMP
 	/* Update last_task_used_altivec to 'current' */
@@ -186,9 +186,9 @@ _GLOBAL(giveup_altivec)
 	addi	r7,r3,THREAD_VRSTATE
 2:	PPC_LCMPI	0,r5,0
 	SAVE_32VRS(0,r4,r7)
-	mfvscr	vr0
+	mfvscr	v0
 	li	r4,VRSTATE_VSCR
-	stvx	vr0,r4,r7
+	stvx	v0,r4,r7
 	beq	1f
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 #ifdef CONFIG_VSX
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 60081bd75847..93b5f5c9b445 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -84,7 +84,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 		}
 		if (dsisr & DSISR_MC_TLB_MULTI) {
 			if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-				cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+				cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
 			dsisr &= ~DSISR_MC_TLB_MULTI;
 		}
 		/* Any other errors we don't understand? */
@@ -102,7 +102,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 		break;
 	case SRR1_MC_IFETCH_TLBMULTI:
 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
-			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID);
 		break;
 	default:
 		handled = 0;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 24bfe401373e..91bbc845ac66 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -720,7 +720,7 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		return;
 	}
 
-	if (vcpu->arch.mmio_is_bigendian) {
+	if (!vcpu->arch.mmio_host_swabbed) {
 		switch (run->mmio.len) {
 		case 8: gpr = *(u64 *)run->mmio.data; break;
 		case 4: gpr = *(u32 *)run->mmio.data; break;
@@ -728,10 +728,10 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		case 1: gpr = *(u8 *)run->mmio.data; break;
 		}
 	} else {
-		/* Convert BE data from userland back to LE. */
 		switch (run->mmio.len) {
-		case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
-		case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
+		case 8: gpr = swab64(*(u64 *)run->mmio.data); break;
+		case 4: gpr = swab32(*(u32 *)run->mmio.data); break;
+		case 2: gpr = swab16(*(u16 *)run->mmio.data); break;
 		case 1: gpr = *(u8 *)run->mmio.data; break;
 		}
 	}
@@ -780,14 +780,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		       int is_default_endian)
 {
 	int idx, ret;
-	int is_bigendian;
+	bool host_swabbed;
 
+	/* Pity C doesn't have a logical XOR operator */
 	if (kvmppc_need_byteswap(vcpu)) {
-		/* Default endianness is "little endian". */
-		is_bigendian = !is_default_endian;
+		host_swabbed = is_default_endian;
 	} else {
-		/* Default endianness is "big endian". */
-		is_bigendian = is_default_endian;
+		host_swabbed = !is_default_endian;
 	}
 
 	if (bytes > sizeof(run->mmio.data)) {
@@ -800,7 +799,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	run->mmio.is_write = 0;
 
 	vcpu->arch.io_gpr = rt;
-	vcpu->arch.mmio_is_bigendian = is_bigendian;
+	vcpu->arch.mmio_host_swabbed = host_swabbed;
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_is_write = 0;
 	vcpu->arch.mmio_sign_extend = 0;
@@ -840,14 +839,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 {
 	void *data = run->mmio.data;
 	int idx, ret;
-	int is_bigendian;
+	bool host_swabbed;
 
+	/* Pity C doesn't have a logical XOR operator */
 	if (kvmppc_need_byteswap(vcpu)) {
-		/* Default endianness is "little endian". */
-		is_bigendian = !is_default_endian;
+		host_swabbed = is_default_endian;
 	} else {
-		/* Default endianness is "big endian". */
-		is_bigendian = is_default_endian;
+		host_swabbed = !is_default_endian;
 	}
 
 	if (bytes > sizeof(run->mmio.data)) {
@@ -862,7 +860,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	vcpu->mmio_is_write = 1;
 
 	/* Store the value at the lowest bytes in 'data'. */
-	if (is_bigendian) {
+	if (!host_swabbed) {
 		switch (bytes) {
 		case 8: *(u64 *)data = val; break;
 		case 4: *(u32 *)data = val; break;
@@ -870,11 +868,11 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		case 1: *(u8  *)data = val; break;
 		}
 	} else {
-		/* Store LE value into 'data'. */
 		switch (bytes) {
-		case 4: st_le32(data, val); break;
-		case 2: st_le16(data, val); break;
-		case 1: *(u8 *)data = val; break;
+		case 8: *(u64 *)data = swab64(val); break;
+		case 4: *(u32 *)data = swab32(val); break;
+		case 2: *(u16 *)data = swab16(val); break;
+		case 1: *(u8  *)data = val; break;
 		}
 	}
 
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 4a6c2cf890d9..60b0b3fc8fc1 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -10,7 +10,7 @@ void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
 
-	if (mem_init_done)
+	if (slab_is_available())
 		p = kzalloc(size, mask);
 	else {
 		p = memblock_virt_alloc(size, 0);
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 55f19f9fd708..6813f80d1eec 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -69,54 +69,6 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.  -- paulus
- */
-_GLOBAL(cacheable_memzero)
-	mr	r5,r4
-	li	r4,0
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
-	add	r8,r7,r5
-	srwi	r9,r8,LG_CACHELINE_BYTES
-	addic.	r9,r9,-1	/* total number of complete cachelines */
-	ble	2f
-	xori	r0,r7,CACHELINE_MASK & ~3
-	srwi.	r0,r0,2
-	beq	3f
-	mtctr	r0
-4:	stwu	r4,4(r6)
-	bdnz	4b
-3:	mtctr	r9
-	li	r7,4
-10:	dcbz	r7,r6
-	addi	r6,r6,CACHELINE_BYTES
-	bdnz	10b
-	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
-	addi	r5,r5,4
-2:	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
 _GLOBAL(memset)
 	rlwimi	r4,r4,8,16,23
 	rlwimi	r4,r4,16,0,15
@@ -142,85 +94,6 @@ _GLOBAL(memset)
 	bdnz	8b
 	blr
 
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic.  This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
-_GLOBAL(cacheable_memcpy)
-	add	r7,r3,r5		/* test if the src & dst overlap */
-	add	r8,r4,r5
-	cmplw	0,r4,r7
-	cmplw	1,r3,r8
-	crand	0,0,4			/* cr0.lt &= cr1.lt */
-	blt	memcpy			/* if regions overlap */
-
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	subf	r5,r0,r5
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	mtctr	r0
-	beq	63f
-53:
-	dcbz	r11,r6
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 32
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	53b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	blr
-
 _GLOBAL(memmove)
 	cmplw	0,r3,r4
 	bgt	backwards_memcpy
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index d7dafb3777ac..a84d333ecb09 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -83,23 +83,23 @@ _GLOBAL(copypage_power7)
 	li	r12,112
 
 	.align	5
-1:	lvx	vr7,r0,r4
-	lvx	vr6,r4,r6
-	lvx	vr5,r4,r7
-	lvx	vr4,r4,r8
-	lvx	vr3,r4,r9
-	lvx	vr2,r4,r10
-	lvx	vr1,r4,r11
-	lvx	vr0,r4,r12
+1:	lvx	v7,r0,r4
+	lvx	v6,r4,r6
+	lvx	v5,r4,r7
+	lvx	v4,r4,r8
+	lvx	v3,r4,r9
+	lvx	v2,r4,r10
+	lvx	v1,r4,r11
+	lvx	v0,r4,r12
 	addi	r4,r4,128
-	stvx	vr7,r0,r3
-	stvx	vr6,r3,r6
-	stvx	vr5,r3,r7
-	stvx	vr4,r3,r8
-	stvx	vr3,r3,r9
-	stvx	vr2,r3,r10
-	stvx	vr1,r3,r11
-	stvx	vr0,r3,r12
+	stvx	v7,r0,r3
+	stvx	v6,r3,r6
+	stvx	v5,r3,r7
+	stvx	v4,r3,r8
+	stvx	v3,r3,r9
+	stvx	v2,r3,r10
+	stvx	v1,r3,r11
+	stvx	v0,r3,r12
 	addi	r3,r3,128
 	bdnz	1b
 
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 92ee840529bc..da0c568d18c4 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -388,29 +388,29 @@ err3;	std	r0,0(r3)
 	li	r11,48
 
 	bf	cr7*4+3,5f
-err3;	lvx	vr1,r0,r4
+err3;	lvx	v1,r0,r4
 	addi	r4,r4,16
-err3;	stvx	vr1,r0,r3
+err3;	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 5:	bf	cr7*4+2,6f
-err3;	lvx	vr1,r0,r4
-err3;	lvx	vr0,r4,r9
+err3;	lvx	v1,r0,r4
+err3;	lvx	v0,r4,r9
 	addi	r4,r4,32
-err3;	stvx	vr1,r0,r3
-err3;	stvx	vr0,r3,r9
+err3;	stvx	v1,r0,r3
+err3;	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-err3;	lvx	vr3,r0,r4
-err3;	lvx	vr2,r4,r9
-err3;	lvx	vr1,r4,r10
-err3;	lvx	vr0,r4,r11
+err3;	lvx	v3,r0,r4
+err3;	lvx	v2,r4,r9
+err3;	lvx	v1,r4,r10
+err3;	lvx	v0,r4,r11
 	addi	r4,r4,64
-err3;	stvx	vr3,r0,r3
-err3;	stvx	vr2,r3,r9
-err3;	stvx	vr1,r3,r10
-err3;	stvx	vr0,r3,r11
+err3;	stvx	v3,r0,r3
+err3;	stvx	v2,r3,r9
+err3;	stvx	v1,r3,r10
+err3;	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -433,23 +433,23 @@ err3;	stvx	vr0,r3,r11
 	 */
 	.align	5
 8:
-err4;	lvx	vr7,r0,r4
-err4;	lvx	vr6,r4,r9
-err4;	lvx	vr5,r4,r10
-err4;	lvx	vr4,r4,r11
-err4;	lvx	vr3,r4,r12
-err4;	lvx	vr2,r4,r14
-err4;	lvx	vr1,r4,r15
-err4;	lvx	vr0,r4,r16
+err4;	lvx	v7,r0,r4
+err4;	lvx	v6,r4,r9
+err4;	lvx	v5,r4,r10
+err4;	lvx	v4,r4,r11
+err4;	lvx	v3,r4,r12
+err4;	lvx	v2,r4,r14
+err4;	lvx	v1,r4,r15
+err4;	lvx	v0,r4,r16
 	addi	r4,r4,128
-err4;	stvx	vr7,r0,r3
-err4;	stvx	vr6,r3,r9
-err4;	stvx	vr5,r3,r10
-err4;	stvx	vr4,r3,r11
-err4;	stvx	vr3,r3,r12
-err4;	stvx	vr2,r3,r14
-err4;	stvx	vr1,r3,r15
-err4;	stvx	vr0,r3,r16
+err4;	stvx	v7,r0,r3
+err4;	stvx	v6,r3,r9
+err4;	stvx	v5,r3,r10
+err4;	stvx	v4,r3,r11
+err4;	stvx	v3,r3,r12
+err4;	stvx	v2,r3,r14
+err4;	stvx	v1,r3,r15
+err4;	stvx	v0,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -463,29 +463,29 @@ err4;	stvx	vr0,r3,r16
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-err3;	lvx	vr3,r0,r4
-err3;	lvx	vr2,r4,r9
-err3;	lvx	vr1,r4,r10
-err3;	lvx	vr0,r4,r11
+err3;	lvx	v3,r0,r4
+err3;	lvx	v2,r4,r9
+err3;	lvx	v1,r4,r10
+err3;	lvx	v0,r4,r11
 	addi	r4,r4,64
-err3;	stvx	vr3,r0,r3
-err3;	stvx	vr2,r3,r9
-err3;	stvx	vr1,r3,r10
-err3;	stvx	vr0,r3,r11
+err3;	stvx	v3,r0,r3
+err3;	stvx	v2,r3,r9
+err3;	stvx	v1,r3,r10
+err3;	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-err3;	lvx	vr1,r0,r4
-err3;	lvx	vr0,r4,r9
+err3;	lvx	v1,r0,r4
+err3;	lvx	v0,r4,r9
 	addi	r4,r4,32
-err3;	stvx	vr1,r0,r3
-err3;	stvx	vr0,r3,r9
+err3;	stvx	v1,r0,r3
+err3;	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-err3;	lvx	vr1,r0,r4
+err3;	lvx	v1,r0,r4
 	addi	r4,r4,16
-err3;	stvx	vr1,r0,r3
+err3;	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
@@ -560,42 +560,42 @@ err3;	stw	r7,4(r3)
 	li	r10,32
 	li	r11,48
 
-	LVS(vr16,0,r4)		/* Setup permute control vector */
-err3;	lvx	vr0,0,r4
+	LVS(v16,0,r4)		/* Setup permute control vector */
+err3;	lvx	v0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-err3;	stvx	vr8,r0,r3
+err3;	stvx	v8,r0,r3
 	addi	r3,r3,16
-	vor	vr0,vr1,vr1
+	vor	v0,v1,v1
 
 5:	bf	cr7*4+2,6f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-err3;	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+err3;	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-err3;	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-err3;	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-err3;	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-err3;	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+err3;	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+err3;	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+err3;	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+err3;	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
-err3;	stvx	vr10,r3,r10
-err3;	stvx	vr11,r3,r11
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
+err3;	stvx	v10,r3,r10
+err3;	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -618,31 +618,31 @@ err3;	stvx	vr11,r3,r11
 	 */
 	.align	5
 8:
-err4;	lvx	vr7,r0,r4
-	VPERM(vr8,vr0,vr7,vr16)
-err4;	lvx	vr6,r4,r9
-	VPERM(vr9,vr7,vr6,vr16)
-err4;	lvx	vr5,r4,r10
-	VPERM(vr10,vr6,vr5,vr16)
-err4;	lvx	vr4,r4,r11
-	VPERM(vr11,vr5,vr4,vr16)
-err4;	lvx	vr3,r4,r12
-	VPERM(vr12,vr4,vr3,vr16)
-err4;	lvx	vr2,r4,r14
-	VPERM(vr13,vr3,vr2,vr16)
-err4;	lvx	vr1,r4,r15
-	VPERM(vr14,vr2,vr1,vr16)
-err4;	lvx	vr0,r4,r16
-	VPERM(vr15,vr1,vr0,vr16)
+err4;	lvx	v7,r0,r4
+	VPERM(v8,v0,v7,v16)
+err4;	lvx	v6,r4,r9
+	VPERM(v9,v7,v6,v16)
+err4;	lvx	v5,r4,r10
+	VPERM(v10,v6,v5,v16)
+err4;	lvx	v4,r4,r11
+	VPERM(v11,v5,v4,v16)
+err4;	lvx	v3,r4,r12
+	VPERM(v12,v4,v3,v16)
+err4;	lvx	v2,r4,r14
+	VPERM(v13,v3,v2,v16)
+err4;	lvx	v1,r4,r15
+	VPERM(v14,v2,v1,v16)
+err4;	lvx	v0,r4,r16
+	VPERM(v15,v1,v0,v16)
 	addi	r4,r4,128
-err4;	stvx	vr8,r0,r3
-err4;	stvx	vr9,r3,r9
-err4;	stvx	vr10,r3,r10
-err4;	stvx	vr11,r3,r11
-err4;	stvx	vr12,r3,r12
-err4;	stvx	vr13,r3,r14
-err4;	stvx	vr14,r3,r15
-err4;	stvx	vr15,r3,r16
+err4;	stvx	v8,r0,r3
+err4;	stvx	v9,r3,r9
+err4;	stvx	v10,r3,r10
+err4;	stvx	v11,r3,r11
+err4;	stvx	v12,r3,r12
+err4;	stvx	v13,r3,r14
+err4;	stvx	v14,r3,r15
+err4;	stvx	v15,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -656,36 +656,36 @@ err4;	stvx	vr15,r3,r16
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-err3;	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-err3;	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-err3;	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-err3;	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+err3;	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+err3;	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+err3;	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+err3;	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
-err3;	stvx	vr10,r3,r10
-err3;	stvx	vr11,r3,r11
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
+err3;	stvx	v10,r3,r10
+err3;	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-err3;	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+err3;	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-err3;	stvx	vr8,r0,r3
-err3;	stvx	vr9,r3,r9
+err3;	stvx	v8,r0,r3
+err3;	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-err3;	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+err3;	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-err3;	stvx	vr8,r0,r3
+err3;	stvx	v8,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
index a5b30c71a8d3..18af0b3d3eb2 100644
--- a/arch/powerpc/lib/crtsavres.S
+++ b/arch/powerpc/lib/crtsavres.S
@@ -236,78 +236,78 @@ _GLOBAL(_rest32gpr_31_x)
 
 _GLOBAL(_savevr_20)
 	li	r11,-192
-	stvx	vr20,r11,r0
+	stvx	v20,r11,r0
 _GLOBAL(_savevr_21)
 	li	r11,-176
-	stvx	vr21,r11,r0
+	stvx	v21,r11,r0
 _GLOBAL(_savevr_22)
 	li	r11,-160
-	stvx	vr22,r11,r0
+	stvx	v22,r11,r0
 _GLOBAL(_savevr_23)
 	li	r11,-144
-	stvx	vr23,r11,r0
+	stvx	v23,r11,r0
 _GLOBAL(_savevr_24)
 	li	r11,-128
-	stvx	vr24,r11,r0
+	stvx	v24,r11,r0
 _GLOBAL(_savevr_25)
 	li	r11,-112
-	stvx	vr25,r11,r0
+	stvx	v25,r11,r0
 _GLOBAL(_savevr_26)
 	li	r11,-96
-	stvx	vr26,r11,r0
+	stvx	v26,r11,r0
 _GLOBAL(_savevr_27)
 	li	r11,-80
-	stvx	vr27,r11,r0
+	stvx	v27,r11,r0
 _GLOBAL(_savevr_28)
 	li	r11,-64
-	stvx	vr28,r11,r0
+	stvx	v28,r11,r0
 _GLOBAL(_savevr_29)
 	li	r11,-48
-	stvx	vr29,r11,r0
+	stvx	v29,r11,r0
 _GLOBAL(_savevr_30)
 	li	r11,-32
-	stvx	vr30,r11,r0
+	stvx	v30,r11,r0
 _GLOBAL(_savevr_31)
 	li	r11,-16
-	stvx	vr31,r11,r0
+	stvx	v31,r11,r0
 	blr
 
 _GLOBAL(_restvr_20)
 	li	r11,-192
-	lvx	vr20,r11,r0
+	lvx	v20,r11,r0
 _GLOBAL(_restvr_21)
 	li	r11,-176
-	lvx	vr21,r11,r0
+	lvx	v21,r11,r0
 _GLOBAL(_restvr_22)
 	li	r11,-160
-	lvx	vr22,r11,r0
+	lvx	v22,r11,r0
 _GLOBAL(_restvr_23)
 	li	r11,-144
-	lvx	vr23,r11,r0
+	lvx	v23,r11,r0
 _GLOBAL(_restvr_24)
 	li	r11,-128
-	lvx	vr24,r11,r0
+	lvx	v24,r11,r0
 _GLOBAL(_restvr_25)
 	li	r11,-112
-	lvx	vr25,r11,r0
+	lvx	v25,r11,r0
 _GLOBAL(_restvr_26)
 	li	r11,-96
-	lvx	vr26,r11,r0
+	lvx	v26,r11,r0
 _GLOBAL(_restvr_27)
 	li	r11,-80
-	lvx	vr27,r11,r0
+	lvx	v27,r11,r0
 _GLOBAL(_restvr_28)
 	li	r11,-64
-	lvx	vr28,r11,r0
+	lvx	v28,r11,r0
 _GLOBAL(_restvr_29)
 	li	r11,-48
-	lvx	vr29,r11,r0
+	lvx	v29,r11,r0
 _GLOBAL(_restvr_30)
 	li	r11,-32
-	lvx	vr30,r11,r0
+	lvx	v30,r11,r0
 _GLOBAL(_restvr_31)
 	li	r11,-16
-	lvx	vr31,r11,r0
+	lvx	v31,r11,r0
 	blr
 
 #endif /* CONFIG_ALTIVEC */
@@ -443,101 +443,101 @@ _restgpr0_31:
 .globl	_savevr_20
 _savevr_20:
 	li	r12,-192
-	stvx	vr20,r12,r0
+	stvx	v20,r12,r0
 .globl	_savevr_21
 _savevr_21:
 	li	r12,-176
-	stvx	vr21,r12,r0
+	stvx	v21,r12,r0
 .globl	_savevr_22
 _savevr_22:
 	li	r12,-160
-	stvx	vr22,r12,r0
+	stvx	v22,r12,r0
 .globl	_savevr_23
 _savevr_23:
 	li	r12,-144
-	stvx	vr23,r12,r0
+	stvx	v23,r12,r0
 .globl	_savevr_24
 _savevr_24:
 	li	r12,-128
-	stvx	vr24,r12,r0
+	stvx	v24,r12,r0
 .globl	_savevr_25
 _savevr_25:
 	li	r12,-112
-	stvx	vr25,r12,r0
+	stvx	v25,r12,r0
 .globl	_savevr_26
 _savevr_26:
 	li	r12,-96
-	stvx	vr26,r12,r0
+	stvx	v26,r12,r0
 .globl	_savevr_27
 _savevr_27:
 	li	r12,-80
-	stvx	vr27,r12,r0
+	stvx	v27,r12,r0
 .globl	_savevr_28
 _savevr_28:
 	li	r12,-64
-	stvx	vr28,r12,r0
+	stvx	v28,r12,r0
 .globl	_savevr_29
 _savevr_29:
 	li	r12,-48
-	stvx	vr29,r12,r0
+	stvx	v29,r12,r0
 .globl	_savevr_30
 _savevr_30:
 	li	r12,-32
-	stvx	vr30,r12,r0
+	stvx	v30,r12,r0
 .globl	_savevr_31
 _savevr_31:
 	li	r12,-16
-	stvx	vr31,r12,r0
+	stvx	v31,r12,r0
 	blr
 
 .globl	_restvr_20
 _restvr_20:
 	li	r12,-192
-	lvx	vr20,r12,r0
+	lvx	v20,r12,r0
 .globl	_restvr_21
 _restvr_21:
 	li	r12,-176
-	lvx	vr21,r12,r0
+	lvx	v21,r12,r0
 .globl	_restvr_22
 _restvr_22:
 	li	r12,-160
-	lvx	vr22,r12,r0
+	lvx	v22,r12,r0
 .globl	_restvr_23
 _restvr_23:
 	li	r12,-144
-	lvx	vr23,r12,r0
+	lvx	v23,r12,r0
 .globl	_restvr_24
 _restvr_24:
 	li	r12,-128
-	lvx	vr24,r12,r0
+	lvx	v24,r12,r0
 .globl	_restvr_25
 _restvr_25:
 	li	r12,-112
-	lvx	vr25,r12,r0
+	lvx	v25,r12,r0
 .globl	_restvr_26
 _restvr_26:
 	li	r12,-96
-	lvx	vr26,r12,r0
+	lvx	v26,r12,r0
 .globl	_restvr_27
 _restvr_27:
 	li	r12,-80
-	lvx	vr27,r12,r0
+	lvx	v27,r12,r0
 .globl	_restvr_28
 _restvr_28:
 	li	r12,-64
-	lvx	vr28,r12,r0
+	lvx	v28,r12,r0
 .globl	_restvr_29
 _restvr_29:
 	li	r12,-48
-	lvx	vr29,r12,r0
+	lvx	v29,r12,r0
 .globl	_restvr_30
 _restvr_30:
 	li	r12,-32
-	lvx	vr30,r12,r0
+	lvx	v30,r12,r0
 .globl	_restvr_31
 _restvr_31:
 	li	r12,-16
-	lvx	vr31,r12,r0
+	lvx	v31,r12,r0
 	blr
 
 #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index 85aec08ab234..5d0cdbfbe3f2 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -184,16 +184,16 @@ _GLOBAL(do_stfd)
 	extab	2b,3b
 
 #ifdef CONFIG_ALTIVEC
-/* Get the contents of vrN into vr0; N is in r3. */
+/* Get the contents of vrN into v0; N is in r3. */
 _GLOBAL(get_vr)
 	mflr	r0
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* vr0 is already in vr0 */
+	blr			/* v0 is already in v0 */
 	nop
 reg = 1
 	.rept	31
-	vor	vr0,reg,reg	/* assembler doesn't know vmr? */
+	vor	v0,reg,reg	/* assembler doesn't know vmr? */
 	blr
 reg = reg + 1
 	.endr
@@ -203,16 +203,16 @@ reg = reg + 1
 	mtlr	r0
 	bctr
 
-/* Put the contents of vr0 into vrN; N is in r3. */
+/* Put the contents of v0 into vrN; N is in r3. */
 _GLOBAL(put_vr)
 	mflr	r0
 	rlwinm	r3,r3,3,0xf8
 	bcl	20,31,1f
-	blr			/* vr0 is already in vr0 */
+	blr			/* v0 is already in v0 */
 	nop
 reg = 1
 	.rept	31
-	vor	reg,vr0,vr0
+	vor	reg,v0,v0
 	blr
 reg = reg + 1
 	.endr
@@ -234,13 +234,13 @@ _GLOBAL(do_lvx)
 	MTMSRD(r7)
 	isync
 	beq	cr7,1f
-	stvx	vr0,r1,r8
+	stvx	v0,r1,r8
 1:	li	r9,-EFAULT
-2:	lvx	vr0,0,r4
+2:	lvx	v0,0,r4
 	li	r9,0
 3:	beq	cr7,4f
 	bl	put_vr
-	lvx	vr0,r1,r8
+	lvx	v0,r1,r8
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
 	MTMSRD(r6)
@@ -262,13 +262,13 @@ _GLOBAL(do_stvx)
 	MTMSRD(r7)
 	isync
 	beq	cr7,1f
-	stvx	vr0,r1,r8
+	stvx	v0,r1,r8
 	bl	get_vr
 1:	li	r9,-EFAULT
-2:	stvx	vr0,0,r4
+2:	stvx	v0,0,r4
 	li	r9,0
 3:	beq	cr7,4f
-	lvx	vr0,r1,r8
+	lvx	v0,r1,r8
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
 	MTMSRD(r6)
@@ -280,12 +280,12 @@ _GLOBAL(do_stvx)
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-/* Get the contents of vsrN into vsr0; N is in r3. */
+/* Get the contents of vsN into vs0; N is in r3. */
 _GLOBAL(get_vsr)
 	mflr	r0
 	rlwinm	r3,r3,3,0x1f8
 	bcl	20,31,1f
-	blr			/* vsr0 is already in vsr0 */
+	blr			/* vs0 is already in vs0 */
 	nop
 reg = 1
 	.rept	63
@@ -299,12 +299,12 @@ reg = reg + 1
 	mtlr	r0
 	bctr
 
-/* Put the contents of vsr0 into vsrN; N is in r3. */
+/* Put the contents of vs0 into vsN; N is in r3. */
 _GLOBAL(put_vsr)
 	mflr	r0
 	rlwinm	r3,r3,3,0x1f8
 	bcl	20,31,1f
-	blr			/* vr0 is already in vr0 */
+	blr			/* v0 is already in v0 */
 	nop
 reg = 1
 	.rept	63
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 170a0346f756..f7deebdf3365 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -41,6 +41,7 @@ void __spin_yield(arch_spinlock_t *lock)
 	plpar_hcall_norets(H_CONFER,
 		get_hard_smp_processor_id(holder_cpu), yield_count);
 }
+EXPORT_SYMBOL_GPL(__spin_yield);
 
 /*
  * Waiting for a read lock or a write lock on a rwlock...
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 0830587df16e..786234fd4e91 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -321,29 +321,29 @@ _GLOBAL(memcpy_power7)
 	li	r11,48
 
 	bf	cr7*4+3,5f
-	lvx	vr1,r0,r4
+	lvx	v1,r0,r4
 	addi	r4,r4,16
-	stvx	vr1,r0,r3
+	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 5:	bf	cr7*4+2,6f
-	lvx	vr1,r0,r4
-	lvx	vr0,r4,r9
+	lvx	v1,r0,r4
+	lvx	v0,r4,r9
 	addi	r4,r4,32
-	stvx	vr1,r0,r3
-	stvx	vr0,r3,r9
+	stvx	v1,r0,r3
+	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-	lvx	vr3,r0,r4
-	lvx	vr2,r4,r9
-	lvx	vr1,r4,r10
-	lvx	vr0,r4,r11
+	lvx	v3,r0,r4
+	lvx	v2,r4,r9
+	lvx	v1,r4,r10
+	lvx	v0,r4,r11
 	addi	r4,r4,64
-	stvx	vr3,r0,r3
-	stvx	vr2,r3,r9
-	stvx	vr1,r3,r10
-	stvx	vr0,r3,r11
+	stvx	v3,r0,r3
+	stvx	v2,r3,r9
+	stvx	v1,r3,r10
+	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -366,23 +366,23 @@ _GLOBAL(memcpy_power7)
 	 */
 	.align	5
 8:
-	lvx	vr7,r0,r4
-	lvx	vr6,r4,r9
-	lvx	vr5,r4,r10
-	lvx	vr4,r4,r11
-	lvx	vr3,r4,r12
-	lvx	vr2,r4,r14
-	lvx	vr1,r4,r15
-	lvx	vr0,r4,r16
+	lvx	v7,r0,r4
+	lvx	v6,r4,r9
+	lvx	v5,r4,r10
+	lvx	v4,r4,r11
+	lvx	v3,r4,r12
+	lvx	v2,r4,r14
+	lvx	v1,r4,r15
+	lvx	v0,r4,r16
 	addi	r4,r4,128
-	stvx	vr7,r0,r3
-	stvx	vr6,r3,r9
-	stvx	vr5,r3,r10
-	stvx	vr4,r3,r11
-	stvx	vr3,r3,r12
-	stvx	vr2,r3,r14
-	stvx	vr1,r3,r15
-	stvx	vr0,r3,r16
+	stvx	v7,r0,r3
+	stvx	v6,r3,r9
+	stvx	v5,r3,r10
+	stvx	v4,r3,r11
+	stvx	v3,r3,r12
+	stvx	v2,r3,r14
+	stvx	v1,r3,r15
+	stvx	v0,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -396,29 +396,29 @@ _GLOBAL(memcpy_power7)
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-	lvx	vr3,r0,r4
-	lvx	vr2,r4,r9
-	lvx	vr1,r4,r10
-	lvx	vr0,r4,r11
+	lvx	v3,r0,r4
+	lvx	v2,r4,r9
+	lvx	v1,r4,r10
+	lvx	v0,r4,r11
 	addi	r4,r4,64
-	stvx	vr3,r0,r3
-	stvx	vr2,r3,r9
-	stvx	vr1,r3,r10
-	stvx	vr0,r3,r11
+	stvx	v3,r0,r3
+	stvx	v2,r3,r9
+	stvx	v1,r3,r10
+	stvx	v0,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-	lvx	vr1,r0,r4
-	lvx	vr0,r4,r9
+	lvx	v1,r0,r4
+	lvx	v0,r4,r9
 	addi	r4,r4,32
-	stvx	vr1,r0,r3
-	stvx	vr0,r3,r9
+	stvx	v1,r0,r3
+	stvx	v0,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-	lvx	vr1,r0,r4
+	lvx	v1,r0,r4
 	addi	r4,r4,16
-	stvx	vr1,r0,r3
+	stvx	v1,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
@@ -494,42 +494,42 @@ _GLOBAL(memcpy_power7)
 	li	r10,32
 	li	r11,48
 
-	LVS(vr16,0,r4)		/* Setup permute control vector */
-	lvx	vr0,0,r4
+	LVS(v16,0,r4)		/* Setup permute control vector */
+	lvx	v0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-	stvx	vr8,r0,r3
+	stvx	v8,r0,r3
 	addi	r3,r3,16
-	vor	vr0,vr1,vr1
+	vor	v0,v1,v1
 
 5:	bf	cr7*4+2,6f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 6:	bf	cr7*4+1,7f
-	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
-	stvx	vr10,r3,r10
-	stvx	vr11,r3,r11
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 7:	sub	r5,r5,r6
@@ -552,31 +552,31 @@ _GLOBAL(memcpy_power7)
 	 */
 	.align	5
 8:
-	lvx	vr7,r0,r4
-	VPERM(vr8,vr0,vr7,vr16)
-	lvx	vr6,r4,r9
-	VPERM(vr9,vr7,vr6,vr16)
-	lvx	vr5,r4,r10
-	VPERM(vr10,vr6,vr5,vr16)
-	lvx	vr4,r4,r11
-	VPERM(vr11,vr5,vr4,vr16)
-	lvx	vr3,r4,r12
-	VPERM(vr12,vr4,vr3,vr16)
-	lvx	vr2,r4,r14
-	VPERM(vr13,vr3,vr2,vr16)
-	lvx	vr1,r4,r15
-	VPERM(vr14,vr2,vr1,vr16)
-	lvx	vr0,r4,r16
-	VPERM(vr15,vr1,vr0,vr16)
+	lvx	v7,r0,r4
+	VPERM(v8,v0,v7,v16)
+	lvx	v6,r4,r9
+	VPERM(v9,v7,v6,v16)
+	lvx	v5,r4,r10
+	VPERM(v10,v6,v5,v16)
+	lvx	v4,r4,r11
+	VPERM(v11,v5,v4,v16)
+	lvx	v3,r4,r12
+	VPERM(v12,v4,v3,v16)
+	lvx	v2,r4,r14
+	VPERM(v13,v3,v2,v16)
+	lvx	v1,r4,r15
+	VPERM(v14,v2,v1,v16)
+	lvx	v0,r4,r16
+	VPERM(v15,v1,v0,v16)
 	addi	r4,r4,128
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
-	stvx	vr10,r3,r10
-	stvx	vr11,r3,r11
-	stvx	vr12,r3,r12
-	stvx	vr13,r3,r14
-	stvx	vr14,r3,r15
-	stvx	vr15,r3,r16
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
+	stvx	v12,r3,r12
+	stvx	v13,r3,r14
+	stvx	v14,r3,r15
+	stvx	v15,r3,r16
 	addi	r3,r3,128
 	bdnz	8b
 
@@ -590,36 +590,36 @@ _GLOBAL(memcpy_power7)
 	mtocrf	0x01,r6
 
 	bf	cr7*4+1,9f
-	lvx	vr3,r0,r4
-	VPERM(vr8,vr0,vr3,vr16)
-	lvx	vr2,r4,r9
-	VPERM(vr9,vr3,vr2,vr16)
-	lvx	vr1,r4,r10
-	VPERM(vr10,vr2,vr1,vr16)
-	lvx	vr0,r4,r11
-	VPERM(vr11,vr1,vr0,vr16)
+	lvx	v3,r0,r4
+	VPERM(v8,v0,v3,v16)
+	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
 	addi	r4,r4,64
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
-	stvx	vr10,r3,r10
-	stvx	vr11,r3,r11
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
 	addi	r3,r3,64
 
 9:	bf	cr7*4+2,10f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
-	lvx	vr0,r4,r9
-	VPERM(vr9,vr1,vr0,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
+	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
 	addi	r4,r4,32
-	stvx	vr8,r0,r3
-	stvx	vr9,r3,r9
+	stvx	v8,r0,r3
+	stvx	v9,r3,r9
 	addi	r3,r3,32
 
 10:	bf	cr7*4+3,11f
-	lvx	vr1,r0,r4
-	VPERM(vr8,vr0,vr1,vr16)
+	lvx	v1,r0,r4
+	VPERM(v8,v0,v1,v16)
 	addi	r4,r4,16
-	stvx	vr8,r0,r3
+	stvx	v8,r0,r3
 	addi	r3,r3,16
 
 	/* Up to 15B to go */
diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c
index f993959647b5..c7f8e9586316 100644
--- a/arch/powerpc/lib/ppc_ksyms.c
+++ b/arch/powerpc/lib/ppc_ksyms.c
@@ -8,10 +8,6 @@ EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memchr);
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(cacheable_memcpy);
-EXPORT_SYMBOL(cacheable_memzero);
-#endif
 
 EXPORT_SYMBOL(strcpy);
 EXPORT_SYMBOL(strncpy);
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
index a1060a868e69..69abf844c2c3 100644
--- a/arch/powerpc/lib/rheap.c
+++ b/arch/powerpc/lib/rheap.c
@@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rh_create);
  */
 void rh_destroy(rh_info_t * info)
 {
-	if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
+	if ((info->flags & RHIF_STATIC_BLOCK) == 0)
 		kfree(info->block);
 
 	if ((info->flags & RHIF_STATIC_INFO) == 0)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 438dcd3fd0d1..9c8770b5f96f 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_40x)		+= 40x_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
 obj-$(CONFIG_PPC_FSL_BOOK3E)	+= fsl_booke_mmu.o
 obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_PPC_SPLPAR)	+= vphn.o
 obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-y				+= hugetlbpage.o
 ifeq ($(CONFIG_HUGETLB_PAGE),y)
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index d85e86aac7fb..169aba446a74 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -228,7 +228,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
 		do {
 			SetPageReserved(page);
 			map_page(vaddr, page_to_phys(page),
-				 pgprot_noncached(PAGE_KERNEL));
+				 pgprot_val(pgprot_noncached(PAGE_KERNEL)));
 			page++;
 			vaddr += PAGE_SIZE;
 		} while (size -= PAGE_SIZE);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index b46912fee7cd..9c90e66cffb6 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -181,7 +181,7 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
 		unsigned long cam_sz;
 
 		cam_sz = calc_cam_sz(ram, virt, phys);
-		settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0);
+		settlbcam(i, virt, phys, cam_sz, pgprot_val(PAGE_KERNEL_X), 0);
 
 		ram -= cam_sz;
 		amount_mapped += cam_sz;
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 86686514ae13..43dafb9d6a46 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -33,7 +33,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 	 * atomically mark the linux large page PMD busy and dirty
 	 */
 	do {
-		pmd_t pmd = ACCESS_ONCE(*pmdp);
+		pmd_t pmd = READ_ONCE(*pmdp);
 
 		old_pmd = pmd_val(pmd);
 		/* If PMD busy, retry the access */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7e408bfc7948..fa9d5c238d22 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -964,7 +964,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 		*shift = 0;
 
 	pgdp = pgdir + pgd_index(ea);
-	pgd  = ACCESS_ONCE(*pgdp);
+	pgd  = READ_ONCE(*pgdp);
 	/*
 	 * Always operate on the local stack value. This make sure the
 	 * value don't get updated by a parallel THP split/collapse,
@@ -1045,7 +1045,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 	if (pte_end < end)
 		end = pte_end;
 
-	pte = ACCESS_ONCE(*ptep);
+	pte = READ_ONCE(*ptep);
 	mask = _PAGE_PRESENT | _PAGE_USER;
 	if (write)
 		mask |= _PAGE_RW;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 10471f9bb63f..d747dd7bc90b 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -132,6 +132,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
 	align = max_t(unsigned long, align, minalign);
 	name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
 	new = kmem_cache_create(name, table_size, align, 0, ctor);
+	kfree(name);
 	pgtable_cache[shift - 1] = new;
 	pr_debug("Allocated pgtable cache for order %d\n", shift);
 }
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index b7285a5870f8..45fda71feb27 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,7 +61,6 @@
 #define CPU_FTR_NOEXECUTE	0
 #endif
 
-int mem_init_done;
 unsigned long long memory_limit;
 
 #ifdef CONFIG_HIGHMEM
@@ -377,8 +376,6 @@ void __init mem_init(void)
 	pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
 		VMALLOC_START, VMALLOC_END);
 #endif /* CONFIG_PPC32 */
-
-	mem_init_done = 1;
 }
 
 void free_initmem(void)
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 78c45f392f5b..085b66b10891 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -96,7 +96,7 @@ extern void _tlbia(void);
 extern void mapin_ram(void);
 extern int map_page(unsigned long va, phys_addr_t pa, int flags);
 extern void setbat(int index, unsigned long virt, phys_addr_t phys,
-		   unsigned int size, int flags);
+		   unsigned int size, pgprot_t prot);
 
 extern int __map_without_bats;
 extern int __allow_ioremap_reserved;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0257a7d659ef..5e80621d9324 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -958,6 +958,13 @@ void __init initmem_init(void)
 
 	memblock_dump_all();
 
+	/*
+	 * Reduce the possible NUMA nodes to the online NUMA nodes,
+	 * since we do not support node hotplug. This ensures that  we
+	 * lower the maximum NUMA node ID to what is actually present.
+	 */
+	nodes_and(node_possible_map, node_possible_map, node_online_map);
+
 	for_each_online_node(nid) {
 		unsigned long start_pfn, end_pfn;
 
@@ -1177,6 +1184,9 @@ u64 memory_hotplug_max(void)
 
 /* Virtual Processor Home Node (VPHN) support */
 #ifdef CONFIG_PPC_SPLPAR
+
+#include "vphn.h"
+
 struct topology_update_data {
 	struct topology_update_data *next;
 	unsigned int cpu;
@@ -1248,55 +1258,6 @@ static int update_cpu_associativity_changes_mask(void)
 }
 
 /*
- * 6 64-bit registers unpacked into 12 32-bit associativity values. To form
- * the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
-
-/*
- * Convert the associativity domain numbers returned from the hypervisor
- * to the sequence they would appear in the ibm,associativity property.
- */
-static int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
-{
-	int i, nr_assoc_doms = 0;
-	const __be16 *field = (const __be16 *) packed;
-
-#define VPHN_FIELD_UNUSED	(0xffff)
-#define VPHN_FIELD_MSB		(0x8000)
-#define VPHN_FIELD_MASK		(~VPHN_FIELD_MSB)
-
-	for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
-		if (be16_to_cpup(field) == VPHN_FIELD_UNUSED) {
-			/* All significant fields processed, and remaining
-			 * fields contain the reserved value of all 1's.
-			 * Just store them.
-			 */
-			unpacked[i] = *((__be32 *)field);
-			field += 2;
-		} else if (be16_to_cpup(field) & VPHN_FIELD_MSB) {
-			/* Data is in the lower 15 bits of this field */
-			unpacked[i] = cpu_to_be32(
-				be16_to_cpup(field) & VPHN_FIELD_MASK);
-			field++;
-			nr_assoc_doms++;
-		} else {
-			/* Data is in the lower 15 bits of this field
-			 * concatenated with the next 16 bit field
-			 */
-			unpacked[i] = *((__be32 *)field);
-			field += 2;
-			nr_assoc_doms++;
-		}
-	}
-
-	/* The first cell contains the length of the property */
-	unpacked[0] = cpu_to_be32(nr_assoc_doms);
-
-	return nr_assoc_doms;
-}
-
-/*
  * Retrieve the new associativity information for a virtual processor's
  * home node.
  */
@@ -1306,11 +1267,8 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity)
 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
 	u64 flags = 1;
 	int hwcpu = get_hard_smp_processor_id(cpu);
-	int i;
 
 	rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
-	for (i = 0; i < 6; i++)
-		retbuf[i] = cpu_to_be64(retbuf[i]);
 	vphn_unpack_associativity(retbuf, associativity);
 
 	return rc;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 03b1a3b0fbd5..7692d1bb1bc6 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -54,9 +54,6 @@ extern char etext[], _stext[];
 #ifdef HAVE_BATS
 extern phys_addr_t v_mapped_by_bats(unsigned long va);
 extern unsigned long p_mapped_by_bats(phys_addr_t pa);
-void setbat(int index, unsigned long virt, phys_addr_t phys,
-	    unsigned int size, int flags);
-
 #else /* !HAVE_BATS */
 #define v_mapped_by_bats(x)	(0UL)
 #define p_mapped_by_bats(x)	(0UL)
@@ -110,9 +107,8 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
-	extern int mem_init_done;
 
-	if (mem_init_done) {
+	if (slab_is_available()) {
 		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
 	} else {
 		pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
@@ -192,7 +188,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 
 	/* Make sure we have the base flags */
 	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= PAGE_KERNEL;
+		flags |= pgprot_val(PAGE_KERNEL);
 
 	/* Non-cacheable page cannot be coherent */
 	if (flags & _PAGE_NO_CACHE)
@@ -219,9 +215,9 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	 * Don't allow anybody to remap normal RAM that we're using.
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
-	if (mem_init_done && (p < virt_to_phys(high_memory)) &&
+	if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
 	    !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
-		printk("__ioremap(): phys addr 0x%llx is RAM lr %pf\n",
+		printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
 		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
 	}
@@ -247,7 +243,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	if ((v = p_mapped_by_tlbcam(p)))
 		goto out;
 
-	if (mem_init_done) {
+	if (slab_is_available()) {
 		struct vm_struct *area;
 		area = get_vm_area_caller(size, VM_IOREMAP, caller);
 		if (area == 0)
@@ -266,7 +262,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
 		err = map_page(v+i, p+i, flags);
 	if (err) {
-		if (mem_init_done)
+		if (slab_is_available())
 			vunmap((void *)v);
 		return NULL;
 	}
@@ -327,7 +323,7 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
 	p = memstart_addr + s;
 	for (; s < top; s += PAGE_SIZE) {
 		ktext = ((char *) v >= _stext && (char *) v < etext);
-		f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL;
+		f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL);
 		map_page(v, p, f);
 #ifdef CONFIG_PPC_STD_MMU_32
 		if (ktext)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6957cc1ca0a7..59daa5eeec25 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -231,7 +231,7 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
 	if ((size == 0) || (paligned == 0))
 		return NULL;
 
-	if (mem_init_done) {
+	if (slab_is_available()) {
 		struct vm_struct *area;
 
 		area = __get_vm_area_caller(size, VM_IOREMAP,
@@ -315,7 +315,7 @@ void __iounmap(volatile void __iomem *token)
 {
 	void *addr;
 
-	if (!mem_init_done)
+	if (!slab_is_available())
 		return;
 	
 	addr = (void *) ((unsigned long __force)
@@ -723,7 +723,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	assert_spin_locked(&mm->page_table_lock);
 	WARN_ON(!pmd_trans_huge(pmd));
 #endif
-	trace_hugepage_set_pmd(addr, pmd);
+	trace_hugepage_set_pmd(addr, pmd_val(pmd));
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 5029dc19b517..6b2f3e457171 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -113,11 +113,12 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
  * of 2 between 128k and 256M.
  */
 void __init setbat(int index, unsigned long virt, phys_addr_t phys,
-		   unsigned int size, int flags)
+		   unsigned int size, pgprot_t prot)
 {
 	unsigned int bl;
 	int wimgxpp;
 	struct ppc_bat *bat = BATS[index];
+	unsigned long flags = pgprot_val(prot);
 
 	if ((flags & _PAGE_NO_CACHE) ||
 	    (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
@@ -224,7 +225,7 @@ void __init MMU_init_hw(void)
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
 	Hash = __va(memblock_alloc(Hash_size, Hash_size));
-	cacheable_memzero(Hash, Hash_size);
+	memset(Hash, 0, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
 	Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index d2a94b85dbc2..c522969f012d 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -216,7 +216,7 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
 			continue;
 		pte = pte_val(*ptep);
 		if (hugepage_shift)
-			trace_hugepage_invalidate(start, pte_val(pte));
+			trace_hugepage_invalidate(start, pte);
 		if (!(pte & _PAGE_HASHPTE))
 			continue;
 		if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/vphn.c
new file mode 100644
index 000000000000..5f8ef50e5c66
--- /dev/null
+++ b/arch/powerpc/mm/vphn.c
@@ -0,0 +1,70 @@
+#include <asm/byteorder.h>
+#include "vphn.h"
+
+/*
+ * The associativity domain numbers are returned from the hypervisor as a
+ * stream of mixed 16-bit and 32-bit fields. The stream is terminated by the
+ * special value of "all ones" (aka. 0xffff) and its size may not exceed 48
+ * bytes.
+ *
+ *    --- 16-bit fields -->
+ *  _________________________
+ *  |  0  |  1  |  2  |  3  |   be_packed[0]
+ *  ------+-----+-----+------
+ *  _________________________
+ *  |  4  |  5  |  6  |  7  |   be_packed[1]
+ *  -------------------------
+ *            ...
+ *  _________________________
+ *  | 20  | 21  | 22  | 23  |   be_packed[5]
+ *  -------------------------
+ *
+ * Convert to the sequence they would appear in the ibm,associativity property.
+ */
+int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
+{
+	__be64 be_packed[VPHN_REGISTER_COUNT];
+	int i, nr_assoc_doms = 0;
+	const __be16 *field = (const __be16 *) be_packed;
+	u16 last = 0;
+	bool is_32bit = false;
+
+#define VPHN_FIELD_UNUSED	(0xffff)
+#define VPHN_FIELD_MSB		(0x8000)
+#define VPHN_FIELD_MASK		(~VPHN_FIELD_MSB)
+
+	/* Let's fix the values returned by plpar_hcall9() */
+	for (i = 0; i < VPHN_REGISTER_COUNT; i++)
+		be_packed[i] = cpu_to_be64(packed[i]);
+
+	for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
+		u16 new = be16_to_cpup(field++);
+
+		if (is_32bit) {
+			/* Let's concatenate the 16 bits of this field to the
+			 * 15 lower bits of the previous field
+			 */
+			unpacked[++nr_assoc_doms] =
+				cpu_to_be32(last << 16 | new);
+			is_32bit = false;
+		} else if (new == VPHN_FIELD_UNUSED)
+			/* This is the list terminator */
+			break;
+		else if (new & VPHN_FIELD_MSB) {
+			/* Data is in the lower 15 bits of this field */
+			unpacked[++nr_assoc_doms] =
+				cpu_to_be32(new & VPHN_FIELD_MASK);
+		} else {
+			/* Data is in the lower 15 bits of this field
+			 * concatenated with the next 16 bit field
+			 */
+			last = new;
+			is_32bit = true;
+		}
+	}
+
+	/* The first cell contains the length of the property */
+	unpacked[0] = cpu_to_be32(nr_assoc_doms);
+
+	return nr_assoc_doms;
+}
diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/vphn.h
new file mode 100644
index 000000000000..fe8b7805b78f
--- /dev/null
+++ b/arch/powerpc/mm/vphn.h
@@ -0,0 +1,16 @@
+#ifndef _ARCH_POWERPC_MM_VPHN_H_
+#define _ARCH_POWERPC_MM_VPHN_H_
+
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers.
+ */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked);
+
+#endif
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 2396dda282cd..ead55351b254 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -243,7 +243,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
 
-	for (;;) {
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		fp = (unsigned long __user *) sp;
 		if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
 			return;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 7fd60dcb2cb0..12b638425bb9 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1837,8 +1837,10 @@ static int power_pmu_event_init(struct perf_event *event)
 		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
 
-		if(cpuhw->bhrb_filter == -1)
+		if (cpuhw->bhrb_filter == -1) {
+			put_cpu_var(cpu_hw_events);
 			return -EOPNOTSUPP;
+		}
 	}
 
 	put_cpu_var(cpu_hw_events);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index abeb9ec0d117..ec2eb20631d1 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = {
 
 static struct kmem_cache *hv_page_cache;
 
+/*
+ * request_buffer and result_buffer are not required to be 4k aligned,
+ * but are not allowed to cross any 4k boundary. Aligning them to 4k is
+ * the simplest way to ensure that.
+ */
+#define H24x7_DATA_BUFFER_SIZE	4096
+DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+
 static char *event_name(struct hv_24x7_event_data *ev, int *len)
 {
 	*len = be16_to_cpu(ev->event_name_len) - 2;
@@ -152,6 +161,7 @@ static char *event_desc(struct hv_24x7_event_data *ev, int *len)
 {
 	unsigned nl = be16_to_cpu(ev->event_name_len);
 	__be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
+
 	*len = be16_to_cpu(*desc_len) - 2;
 	return (char *)ev->remainder + nl;
 }
@@ -162,6 +172,7 @@ static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
 	__be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
 	unsigned desc_len = be16_to_cpu(*desc_len_);
 	__be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
+
 	*len = be16_to_cpu(*long_desc_len) - 2;
 	return (char *)ev->remainder + nl + desc_len;
 }
@@ -239,14 +250,12 @@ static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
 					      unsigned long index)
 {
 	pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
-			phys_4096,
-			version,
-			index);
+			phys_4096, version, index);
+
 	WARN_ON(!IS_ALIGNED(phys_4096, 4096));
+
 	return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
-			phys_4096,
-			version,
-			index);
+			phys_4096, version, index);
 }
 
 static unsigned long h_get_24x7_catalog_page(char page[],
@@ -300,6 +309,7 @@ static ssize_t device_show_string(struct device *dev,
 	struct dev_ext_attribute *d;
 
 	d = container_of(attr, struct dev_ext_attribute, attr);
+
 	return sprintf(buf, "%s\n", (char *)d->var);
 }
 
@@ -314,6 +324,7 @@ static struct attribute *device_str_attr_create_(char *name, char *str)
 	attr->attr.attr.name = name;
 	attr->attr.attr.mode = 0444;
 	attr->attr.show = device_show_string;
+
 	return &attr->attr.attr;
 }
 
@@ -387,7 +398,6 @@ static struct attribute *event_to_attr(unsigned ix,
 		a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
 				(int)event_name_len, ev_name, ev_suffix, nonce);
 
-
 	if (!a_ev_name)
 		goto out_val;
 
@@ -637,7 +647,7 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
 
 #define MAX_4K (SIZE_MAX / 4096)
 
-static void create_events_from_catalog(struct attribute ***events_,
+static int create_events_from_catalog(struct attribute ***events_,
 		struct attribute ***event_descs_,
 		struct attribute ***event_long_descs_)
 {
@@ -655,19 +665,25 @@ static void create_events_from_catalog(struct attribute ***events_,
 	void *event_data, *end;
 	struct hv_24x7_event_data *event;
 	struct rb_root ev_uniq = RB_ROOT;
+	int ret = 0;
 
-	if (!page)
+	if (!page) {
+		ret = -ENOMEM;
 		goto e_out;
+	}
 
 	hret = h_get_24x7_catalog_page(page, 0, 0);
-	if (hret)
+	if (hret) {
+		ret = -EIO;
 		goto e_free;
+	}
 
 	catalog_version_num = be64_to_cpu(page_0->version);
 	catalog_page_len = be32_to_cpu(page_0->length);
 
 	if (MAX_4K < catalog_page_len) {
 		pr_err("invalid page count: %zu\n", catalog_page_len);
+		ret = -EIO;
 		goto e_free;
 	}
 
@@ -686,6 +702,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 			|| (MAX_4K - event_data_offs < event_data_len)) {
 		pr_err("invalid event data offs %zu and/or len %zu\n",
 				event_data_offs, event_data_len);
+		ret = -EIO;
 		goto e_free;
 	}
 
@@ -694,12 +711,14 @@ static void create_events_from_catalog(struct attribute ***events_,
 				event_data_offs,
 				event_data_offs + event_data_len,
 				catalog_page_len);
+		ret = -EIO;
 		goto e_free;
 	}
 
 	if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
 		pr_err("event_entry_count %zu is invalid\n",
 				event_entry_count);
+		ret = -EIO;
 		goto e_free;
 	}
 
@@ -712,6 +731,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 	event_data = vmalloc(event_data_bytes);
 	if (!event_data) {
 		pr_err("could not allocate event data\n");
+		ret = -ENOMEM;
 		goto e_free;
 	}
 
@@ -731,6 +751,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 		if (hret) {
 			pr_err("failed to get event data in page %zu\n",
 					i + event_data_offs);
+			ret = -EIO;
 			goto e_event_data;
 		}
 	}
@@ -778,18 +799,24 @@ static void create_events_from_catalog(struct attribute ***events_,
 				event_idx_last, event_entry_count, junk_events);
 
 	events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
-	if (!events)
+	if (!events) {
+		ret = -ENOMEM;
 		goto e_event_data;
+	}
 
 	event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
 				GFP_KERNEL);
-	if (!event_descs)
+	if (!event_descs) {
+		ret = -ENOMEM;
 		goto e_event_attrs;
+	}
 
 	event_long_descs = kmalloc_array(event_idx + 1,
 			sizeof(*event_long_descs), GFP_KERNEL);
-	if (!event_long_descs)
+	if (!event_long_descs) {
+		ret = -ENOMEM;
 		goto e_event_descs;
+	}
 
 	/* Iterate over the catalog filling in the attribute vector */
 	for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
@@ -843,7 +870,7 @@ static void create_events_from_catalog(struct attribute ***events_,
 	*events_ = events;
 	*event_descs_ = event_descs;
 	*event_long_descs_ = event_long_descs;
-	return;
+	return 0;
 
 e_event_descs:
 	kfree(event_descs);
@@ -857,6 +884,7 @@ e_out:
 	*events_ = NULL;
 	*event_descs_ = NULL;
 	*event_long_descs_ = NULL;
+	return ret;
 }
 
 static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
@@ -872,6 +900,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
 	uint64_t catalog_version_num = 0;
 	void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
 	struct hv_24x7_catalog_page_0 *page_0 = page;
+
 	if (!page)
 		return -ENOMEM;
 
@@ -976,31 +1005,104 @@ static const struct attribute_group *attr_groups[] = {
 	NULL,
 };
 
-DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
-DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer,
+			unsigned long ret)
+{
+	struct hv_24x7_request *req;
+
+	req = &request_buffer->requests[0];
+	pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
+			"ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+			req->performance_domain, req->data_offset,
+			req->starting_ix, req->starting_lpar_ix, ret, ret,
+			result_buffer->detailed_rc,
+			result_buffer->failing_request_ix);
+}
+
+/*
+ * Start the process for a new H_GET_24x7_DATA hcall.
+ */
+static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer)
+{
+
+	memset(request_buffer, 0, 4096);
+	memset(result_buffer, 0, 4096);
+
+	request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
+	/* memset above set request_buffer->num_requests to 0 */
+}
 
-static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
-					 u16 lpar, u64 *res,
-					 bool success_expected)
+/*
+ * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
+ * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
+ */
+static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer)
 {
 	unsigned long ret;
 
 	/*
-	 * request_buffer and result_buffer are not required to be 4k aligned,
-	 * but are not allowed to cross any 4k boundary. Aligning them to 4k is
-	 * the simplest way to ensure that.
+	 * NOTE: Due to variable number of array elements in request and
+	 *	 result buffer(s), sizeof() is not reliable. Use the actual
+	 *	 allocated buffer size, H24x7_DATA_BUFFER_SIZE.
 	 */
-	struct reqb {
-		struct hv_24x7_request_buffer buf;
-		struct hv_24x7_request req;
-	} __packed *request_buffer;
-
-	struct {
-		struct hv_24x7_data_result_buffer buf;
-		struct hv_24x7_result res;
-		struct hv_24x7_result_element elem;
-		__be64 result;
-	} __packed *result_buffer;
+	ret = plpar_hcall_norets(H_GET_24X7_DATA,
+			virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
+			virt_to_phys(result_buffer),  H24x7_DATA_BUFFER_SIZE);
+
+	if (ret)
+		log_24x7_hcall(request_buffer, result_buffer, ret);
+
+	return ret;
+}
+
+/*
+ * Add the given @event to the next slot in the 24x7 request_buffer.
+ *
+ * Note that H_GET_24X7_DATA hcall allows reading several counters'
+ * values in a single HCALL. We expect the caller to add events to the
+ * request buffer one by one, make the HCALL and process the results.
+ */
+static int add_event_to_24x7_request(struct perf_event *event,
+				struct hv_24x7_request_buffer *request_buffer)
+{
+	u16 idx;
+	int i;
+	struct hv_24x7_request *req;
+
+	if (request_buffer->num_requests > 254) {
+		pr_devel("Too many requests for 24x7 HCALL %d\n",
+				request_buffer->num_requests);
+		return -EINVAL;
+	}
+
+	if (is_physical_domain(event_get_domain(event)))
+		idx = event_get_core(event);
+	else
+		idx = event_get_vcpu(event);
+
+	i = request_buffer->num_requests++;
+	req = &request_buffer->requests[i];
+
+	req->performance_domain = event_get_domain(event);
+	req->data_size = cpu_to_be16(8);
+	req->data_offset = cpu_to_be32(event_get_offset(event));
+	req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
+	req->max_num_lpars = cpu_to_be16(1);
+	req->starting_ix = cpu_to_be16(idx);
+	req->max_ix = cpu_to_be16(1);
+
+	return 0;
+}
+
+static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
+{
+	unsigned long ret;
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+	struct hv_24x7_result *resb;
 
 	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
 	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
@@ -1008,63 +1110,28 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
 	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
 
-	memset(request_buffer, 0, 4096);
-	memset(result_buffer, 0, 4096);
-
-	*request_buffer = (struct reqb) {
-		.buf = {
-			.interface_version = HV_24X7_IF_VERSION_CURRENT,
-			.num_requests = 1,
-		},
-		.req = {
-			.performance_domain = domain,
-			.data_size = cpu_to_be16(8),
-			.data_offset = cpu_to_be32(offset),
-			.starting_lpar_ix = cpu_to_be16(lpar),
-			.max_num_lpars = cpu_to_be16(1),
-			.starting_ix = cpu_to_be16(ix),
-			.max_ix = cpu_to_be16(1),
-		}
-	};
+	init_24x7_request(request_buffer, result_buffer);
 
-	ret = plpar_hcall_norets(H_GET_24X7_DATA,
-			virt_to_phys(request_buffer), sizeof(*request_buffer),
-			virt_to_phys(result_buffer),  sizeof(*result_buffer));
+	ret = add_event_to_24x7_request(event, request_buffer);
+	if (ret)
+		goto out;
 
+	ret = make_24x7_request(request_buffer, result_buffer);
 	if (ret) {
-		if (success_expected)
-			pr_err_ratelimited("hcall failed: %d %#x %#x %d => "
-				"0x%lx (%ld) detail=0x%x failing ix=%x\n",
-				domain, offset, ix, lpar, ret, ret,
-				result_buffer->buf.detailed_rc,
-				result_buffer->buf.failing_request_ix);
+		log_24x7_hcall(request_buffer, result_buffer, ret);
 		goto out;
 	}
 
-	*res = be64_to_cpu(result_buffer->result);
+	/* process result from hcall */
+	resb = &result_buffer->results[0];
+	*count = be64_to_cpu(resb->elements[0].element_data[0]);
 
 out:
+	put_cpu_var(hv_24x7_reqb);
+	put_cpu_var(hv_24x7_resb);
 	return ret;
 }
 
-static unsigned long event_24x7_request(struct perf_event *event, u64 *res,
-		bool success_expected)
-{
-	u16 idx;
-	unsigned domain = event_get_domain(event);
-
-	if (is_physical_domain(domain))
-		idx = event_get_core(event);
-	else
-		idx = event_get_vcpu(event);
-
-	return single_24x7_request(event_get_domain(event),
-				event_get_offset(event),
-				idx,
-				event_get_lpar(event),
-				res,
-				success_expected);
-}
 
 static int h_24x7_event_init(struct perf_event *event)
 {
@@ -1133,7 +1200,7 @@ static int h_24x7_event_init(struct perf_event *event)
 	}
 
 	/* see if the event complains */
-	if (event_24x7_request(event, &ct, false)) {
+	if (single_24x7_request(event, &ct)) {
 		pr_devel("test hcall failed\n");
 		return -EIO;
 	}
@@ -1145,7 +1212,7 @@ static u64 h_24x7_get_value(struct perf_event *event)
 {
 	unsigned long ret;
 	u64 ct;
-	ret = event_24x7_request(event, &ct, true);
+	ret = single_24x7_request(event, &ct);
 	if (ret)
 		/* We checked this in event init, shouldn't fail here... */
 		return 0;
@@ -1153,15 +1220,22 @@ static u64 h_24x7_get_value(struct perf_event *event)
 	return ct;
 }
 
-static void h_24x7_event_update(struct perf_event *event)
+static void update_event_count(struct perf_event *event, u64 now)
 {
 	s64 prev;
-	u64 now;
-	now = h_24x7_get_value(event);
+
 	prev = local64_xchg(&event->hw.prev_count, now);
 	local64_add(now - prev, &event->count);
 }
 
+static void h_24x7_event_read(struct perf_event *event)
+{
+	u64 now;
+
+	now = h_24x7_get_value(event);
+	update_event_count(event, now);
+}
+
 static void h_24x7_event_start(struct perf_event *event, int flags)
 {
 	if (flags & PERF_EF_RELOAD)
@@ -1170,7 +1244,7 @@ static void h_24x7_event_start(struct perf_event *event, int flags)
 
 static void h_24x7_event_stop(struct perf_event *event, int flags)
 {
-	h_24x7_event_update(event);
+	h_24x7_event_read(event);
 }
 
 static int h_24x7_event_add(struct perf_event *event, int flags)
@@ -1191,7 +1265,7 @@ static struct pmu h_24x7_pmu = {
 	.del         = h_24x7_event_stop,
 	.start       = h_24x7_event_start,
 	.stop        = h_24x7_event_stop,
-	.read        = h_24x7_event_update,
+	.read        = h_24x7_event_read,
 };
 
 static int hv_24x7_init(void)
@@ -1219,10 +1293,13 @@ static int hv_24x7_init(void)
 	/* sampling not supported */
 	h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
-	create_events_from_catalog(&event_group.attrs,
+	r = create_events_from_catalog(&event_group.attrs,
 				   &event_desc_group.attrs,
 				   &event_long_desc_group.attrs);
 
+	if (r)
+		return r;
+
 	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
 	if (r)
 		return r;
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 69cd4e690f58..0f9fa21a29f2 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -50,7 +50,7 @@ struct hv_24x7_request_buffer {
 	__u8 interface_version;
 	__u8 num_requests;
 	__u8 reserved[0xE];
-	struct hv_24x7_request requests[];
+	struct hv_24x7_request requests[1];
 } __packed;
 
 struct hv_24x7_result_element {
@@ -66,7 +66,7 @@ struct hv_24x7_result_element {
 	__be32 lpar_cfg_instance_id;
 
 	/* size = @result_element_data_size of cointaining result. */
-	__u8 element_data[];
+	__u64 element_data[1];
 } __packed;
 
 struct hv_24x7_result {
@@ -87,7 +87,7 @@ struct hv_24x7_result {
 	/* WARNING: only valid for first result element due to variable sizes
 	 *          of result elements */
 	/* struct hv_24x7_result_element[@num_elements_returned] */
-	struct hv_24x7_result_element elements[];
+	struct hv_24x7_result_element elements[1];
 } __packed;
 
 struct hv_24x7_data_result_buffer {
@@ -103,7 +103,7 @@ struct hv_24x7_data_result_buffer {
 	__u8 reserved2[0x8];
 	/* WARNING: only valid for the first result due to variable sizes of
 	 *	    results */
-	struct hv_24x7_result results[]; /* [@num_results] */
+	struct hv_24x7_result results[1]; /* [@num_results] */
 } __packed;
 
 #endif
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 4a9ad871a168..7bfb9b184dd4 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -40,6 +40,7 @@ static const struct of_device_id mpc85xx_common_ids[] __initconst = {
 	{ .compatible = "fsl,qoriq-pcie-v2.4", },
 	{ .compatible = "fsl,qoriq-pcie-v2.3", },
 	{ .compatible = "fsl,qoriq-pcie-v2.2", },
+	{ .compatible = "fsl,fman", },
 	{},
 };
 
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 1f309ccb096e..9824d2cf79bd 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -88,6 +88,15 @@ static const struct of_device_id of_device_ids[] = {
 		.compatible	= "simple-bus"
 	},
 	{
+		.compatible	= "mdio-mux-gpio"
+	},
+	{
+		.compatible	= "fsl,fpga-ngpixis"
+	},
+	{
+		.compatible	= "fsl,fpga-qixis"
+	},
+	{
 		.compatible	= "fsl,srio",
 	},
 	{
@@ -108,6 +117,9 @@ static const struct of_device_id of_device_ids[] = {
 	{
 		.compatible	= "fsl,qe",
 	},
+	{
+		.compatible    = "fsl,fman",
+	},
 	/* The following two are for the Freescale hypervisor */
 	{
 		.name		= "hypervisor",
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index d7c1e69f3070..8631ac5f0e57 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -360,10 +360,10 @@ static void mpc85xx_smp_kexec_down(void *arg)
 static void map_and_flush(unsigned long paddr)
 {
 	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
-	unsigned long kaddr  = (unsigned long)kmap(page);
+	unsigned long kaddr  = (unsigned long)kmap_atomic(page);
 
 	flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
-	kunmap(page);
+	kunmap_atomic((void *)kaddr);
 }
 
 /**
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 391b3f6b54a3..b7f9c408bf24 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -72,11 +72,6 @@ config PPC_SMP_MUXED_IPI
 	  cpu.	This will enable the generic code to multiplex the 4
 	  messages on to one ipi.
 
-config PPC_UDBG_BEAT
-	bool "BEAT based debug console"
-	depends on PPC_CELLEB
-	default n
-
 config IPIC
 	bool
 	default n
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 76483e3acd60..7264e91190be 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -2,6 +2,7 @@ config PPC64
 	bool "64-bit kernel"
 	default n
 	select HAVE_VIRT_CPU_ACCOUNTING
+	select ZLIB_DEFLATE
 	help
 	  This option selects whether a 32-bit or a 64-bit kernel
 	  will be built.
@@ -15,7 +16,7 @@ choice
 	  The most common ones are the desktop and server CPUs (601, 603,
 	  604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
 	  embedded 512x/52xx/82xx/83xx/86xx counterparts.
-	  The other embeeded parts, namely 4xx, 8xx, e200 (55xx) and e500
+	  The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
 	  (85xx) each form a family of their own that is not compatible
 	  with the others.
 
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 870b6dbd4d18..2f23133ab3d1 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -33,17 +33,6 @@ config PPC_IBM_CELL_BLADE
 	select PPC_UDBG_16550
 	select UDBG_RTAS_CONSOLE
 
-config PPC_CELLEB
-	bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
-	depends on PPC64 && PPC_BOOK3S
-	select PPC_CELL_NATIVE
-	select PPC_OF_PLATFORM_PCI
-	select PCI
-	select HAS_TXX9_SERIAL
-	select PPC_UDBG_BEAT
-	select USB_OHCI_BIG_ENDIAN_MMIO
-	select USB_EHCI_BIG_ENDIAN_MMIO
-
 config PPC_CELL_QPACE
 	bool "IBM Cell - QPACE"
 	depends on PPC64 && PPC_BOOK3S
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 2d16884f67b9..34699bddfddd 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -29,18 +29,3 @@ obj-$(CONFIG_AXON_MSI)			+= axon_msi.o
 
 # qpace setup
 obj-$(CONFIG_PPC_CELL_QPACE)		+= qpace_setup.o
-
-# celleb stuff
-ifeq ($(CONFIG_PPC_CELLEB),y)
-obj-y					+= celleb_setup.o \
-					   celleb_pci.o celleb_scc_epci.o \
-					   celleb_scc_pciex.o \
-					   celleb_scc_uhc.o \
-					   spider-pci.o beat.o beat_htab.o \
-					   beat_hvCall.o beat_interrupt.o \
-					   beat_iommu.o
-
-obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
-obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
-obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
-endif
diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c
deleted file mode 100644
index affcf566d460..000000000000
--- a/arch/powerpc/platforms/cell/beat.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Simple routines for Celleb/Beat
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/rtc.h>
-#include <linux/interrupt.h>
-#include <linux/irqreturn.h>
-#include <linux/reboot.h>
-
-#include <asm/hvconsole.h>
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/firmware.h>
-
-#include "beat_wrapper.h"
-#include "beat.h"
-#include "beat_interrupt.h"
-
-static int beat_pm_poweroff_flag;
-
-void beat_restart(char *cmd)
-{
-	beat_shutdown_logical_partition(!beat_pm_poweroff_flag);
-}
-
-void beat_power_off(void)
-{
-	beat_shutdown_logical_partition(0);
-}
-
-u64 beat_halt_code = 0x1000000000000000UL;
-EXPORT_SYMBOL(beat_halt_code);
-
-void beat_halt(void)
-{
-	beat_shutdown_logical_partition(beat_halt_code);
-}
-
-int beat_set_rtc_time(struct rtc_time *rtc_time)
-{
-	u64 tim;
-	tim = mktime(rtc_time->tm_year+1900,
-		     rtc_time->tm_mon+1, rtc_time->tm_mday,
-		     rtc_time->tm_hour, rtc_time->tm_min, rtc_time->tm_sec);
-	if (beat_rtc_write(tim))
-		return -1;
-	return 0;
-}
-
-void beat_get_rtc_time(struct rtc_time *rtc_time)
-{
-	u64 tim;
-
-	if (beat_rtc_read(&tim))
-		tim = 0;
-	to_tm(tim, rtc_time);
-	rtc_time->tm_year -= 1900;
-	rtc_time->tm_mon -= 1;
-}
-
-#define	BEAT_NVRAM_SIZE	4096
-
-ssize_t beat_nvram_read(char *buf, size_t count, loff_t *index)
-{
-	unsigned int i;
-	unsigned long len;
-	char *p = buf;
-
-	if (*index >= BEAT_NVRAM_SIZE)
-		return -ENODEV;
-	i = *index;
-	if (i + count > BEAT_NVRAM_SIZE)
-		count = BEAT_NVRAM_SIZE - i;
-
-	for (; count != 0; count -= len) {
-		len = count;
-		if (len > BEAT_NVRW_CNT)
-			len = BEAT_NVRW_CNT;
-		if (beat_eeprom_read(i, len, p))
-			return -EIO;
-
-		p += len;
-		i += len;
-	}
-	*index = i;
-	return p - buf;
-}
-
-ssize_t beat_nvram_write(char *buf, size_t count, loff_t *index)
-{
-	unsigned int i;
-	unsigned long len;
-	char *p = buf;
-
-	if (*index >= BEAT_NVRAM_SIZE)
-		return -ENODEV;
-	i = *index;
-	if (i + count > BEAT_NVRAM_SIZE)
-		count = BEAT_NVRAM_SIZE - i;
-
-	for (; count != 0; count -= len) {
-		len = count;
-		if (len > BEAT_NVRW_CNT)
-			len = BEAT_NVRW_CNT;
-		if (beat_eeprom_write(i, len, p))
-			return -EIO;
-
-		p += len;
-		i += len;
-	}
-	*index = i;
-	return p - buf;
-}
-
-ssize_t beat_nvram_get_size(void)
-{
-	return BEAT_NVRAM_SIZE;
-}
-
-int beat_set_xdabr(unsigned long dabr, unsigned long dabrx)
-{
-	if (beat_set_dabr(dabr, dabrx))
-		return -1;
-	return 0;
-}
-
-int64_t beat_get_term_char(u64 vterm, u64 *len, u64 *t1, u64 *t2)
-{
-	u64 db[2];
-	s64 ret;
-
-	ret = beat_get_characters_from_console(vterm, len, (u8 *)db);
-	if (ret == 0) {
-		*t1 = db[0];
-		*t2 = db[1];
-	}
-	return ret;
-}
-EXPORT_SYMBOL(beat_get_term_char);
-
-int64_t beat_put_term_char(u64 vterm, u64 len, u64 t1, u64 t2)
-{
-	u64 db[2];
-
-	db[0] = t1;
-	db[1] = t2;
-	return beat_put_characters_to_console(vterm, len, (u8 *)db);
-}
-EXPORT_SYMBOL(beat_put_term_char);
-
-void beat_power_save(void)
-{
-	beat_pause(0);
-}
-
-#ifdef CONFIG_KEXEC
-void beat_kexec_cpu_down(int crash, int secondary)
-{
-	beatic_deinit_IRQ();
-}
-#endif
-
-static irqreturn_t beat_power_event(int virq, void *arg)
-{
-	printk(KERN_DEBUG "Beat: power button pressed\n");
-	beat_pm_poweroff_flag = 1;
-	ctrl_alt_del();
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t beat_reset_event(int virq, void *arg)
-{
-	printk(KERN_DEBUG "Beat: reset button pressed\n");
-	beat_pm_poweroff_flag = 0;
-	ctrl_alt_del();
-	return IRQ_HANDLED;
-}
-
-static struct beat_event_list {
-	const char *typecode;
-	irq_handler_t handler;
-	unsigned int virq;
-} beat_event_list[] = {
-	{ "power", beat_power_event, 0 },
-	{ "reset", beat_reset_event, 0 },
-};
-
-static int __init beat_register_event(void)
-{
-	u64 path[4], data[2];
-	int rc, i;
-	unsigned int virq;
-
-	for (i = 0; i < ARRAY_SIZE(beat_event_list); i++) {
-		struct beat_event_list *ev = &beat_event_list[i];
-
-		if (beat_construct_event_receive_port(data) != 0) {
-			printk(KERN_ERR "Beat: "
-			       "cannot construct event receive port for %s\n",
-			       ev->typecode);
-			return -EINVAL;
-		}
-
-		virq = irq_create_mapping(NULL, data[0]);
-		if (virq == NO_IRQ) {
-			printk(KERN_ERR "Beat: failed to get virtual IRQ"
-			       " for event receive port for %s\n",
-			       ev->typecode);
-			beat_destruct_event_receive_port(data[0]);
-			return -EIO;
-		}
-		ev->virq = virq;
-
-		rc = request_irq(virq, ev->handler, 0,
-				      ev->typecode, NULL);
-		if (rc != 0) {
-			printk(KERN_ERR "Beat: failed to request virtual IRQ"
-			       " for event receive port for %s\n",
-			       ev->typecode);
-			beat_destruct_event_receive_port(data[0]);
-			return rc;
-		}
-
-		path[0] = 0x1000000065780000ul;	/* 1,ex */
-		path[1] = 0x627574746f6e0000ul;	/* button */
-		path[2] = 0;
-		strncpy((char *)&path[2], ev->typecode, 8);
-		path[3] = 0;
-		data[1] = 0;
-
-		beat_create_repository_node(path, data);
-	}
-	return 0;
-}
-
-static int __init beat_event_init(void)
-{
-	if (!firmware_has_feature(FW_FEATURE_BEAT))
-		return -EINVAL;
-
-	beat_pm_poweroff_flag = 0;
-	return beat_register_event();
-}
-
-device_initcall(beat_event_init);
diff --git a/arch/powerpc/platforms/cell/beat.h b/arch/powerpc/platforms/cell/beat.h
deleted file mode 100644
index bfcb8e351ae5..000000000000
--- a/arch/powerpc/platforms/cell/beat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Guest OS Interfaces.
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_BEAT_H
-#define _CELLEB_BEAT_H
-
-int64_t beat_get_term_char(uint64_t, uint64_t *, uint64_t *, uint64_t *);
-int64_t beat_put_term_char(uint64_t, uint64_t, uint64_t, uint64_t);
-int64_t beat_repository_encode(int, const char *, uint64_t[4]);
-void beat_restart(char *);
-void beat_power_off(void);
-void beat_halt(void);
-int beat_set_rtc_time(struct rtc_time *);
-void beat_get_rtc_time(struct rtc_time *);
-ssize_t beat_nvram_get_size(void);
-ssize_t beat_nvram_read(char *, size_t, loff_t *);
-ssize_t beat_nvram_write(char *, size_t, loff_t *);
-int beat_set_xdabr(unsigned long, unsigned long);
-void beat_power_save(void);
-void beat_kexec_cpu_down(int, int);
-
-#endif /* _CELLEB_BEAT_H */
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
deleted file mode 100644
index bee9232fe619..000000000000
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * "Cell Reference Set" HTAB support.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/lpar.c:
- *  Copyright (C) 2001 Todd Inglett, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG_LOW
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "beat_wrapper.h"
-
-#ifdef DEBUG_LOW
-#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while (0)
-#else
-#define DBG_LOW(fmt...) do { } while (0)
-#endif
-
-static DEFINE_RAW_SPINLOCK(beat_htab_lock);
-
-static inline unsigned int beat_read_mask(unsigned hpte_group)
-{
-	unsigned long rmask = 0;
-	u64 hpte_v[5];
-
-	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x8000;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x4000;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x2000;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x1000;
-	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x0800;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x0400;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x0200;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x0100;
-	hpte_group = ~hpte_group & (htab_hash_mask * HPTES_PER_GROUP);
-	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x80;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x40;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x20;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x10;
-	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
-	if (!(hpte_v[0] & HPTE_V_BOLTED))
-		rmask |= 0x08;
-	if (!(hpte_v[1] & HPTE_V_BOLTED))
-		rmask |= 0x04;
-	if (!(hpte_v[2] & HPTE_V_BOLTED))
-		rmask |= 0x02;
-	if (!(hpte_v[3] & HPTE_V_BOLTED))
-		rmask |= 0x01;
-	return rmask;
-}
-
-static long beat_lpar_hpte_insert(unsigned long hpte_group,
-				  unsigned long vpn, unsigned long pa,
-				  unsigned long rflags, unsigned long vflags,
-				  int psize, int apsize, int ssize)
-{
-	unsigned long lpar_rc;
-	u64 hpte_v, hpte_r, slot;
-
-	if (vflags & HPTE_V_SECONDARY)
-		return -1;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
-			"rflags=%lx, vflags=%lx, psize=%d)\n",
-		hpte_group, va, pa, rflags, vflags, psize);
-
-	hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
-		vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-
-	if (rflags & _PAGE_NO_CACHE)
-		hpte_r &= ~HPTE_R_M;
-
-	raw_spin_lock(&beat_htab_lock);
-	lpar_rc = beat_read_mask(hpte_group);
-	if (lpar_rc == 0) {
-		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" full\n");
-		raw_spin_unlock(&beat_htab_lock);
-		return -1;
-	}
-
-	lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48,
-		hpte_v, hpte_r, &slot);
-	raw_spin_unlock(&beat_htab_lock);
-
-	/*
-	 * Since we try and ioremap PHBs we don't own, the pte insert
-	 * will fail. However we must catch the failure in hash_page
-	 * or we will loop forever, so return -2 in this case.
-	 */
-	if (unlikely(lpar_rc != 0)) {
-		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" lpar err %lx\n", lpar_rc);
-		return -2;
-	}
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" -> slot: %lx\n", slot);
-
-	/* We have to pass down the secondary bucket bit here as well */
-	return (slot ^ hpte_group) & 15;
-}
-
-static long beat_lpar_hpte_remove(unsigned long hpte_group)
-{
-	DBG_LOW("hpte_remove(group=%lx)\n", hpte_group);
-	return -1;
-}
-
-static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
-{
-	unsigned long dword0;
-	unsigned long lpar_rc;
-	u64 dword[5];
-
-	lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
-
-	dword0 = dword[slot&3];
-
-	BUG_ON(lpar_rc != 0);
-
-	return dword0;
-}
-
-static void beat_lpar_hptab_clear(void)
-{
-	unsigned long size_bytes = 1UL << ppc64_pft_size;
-	unsigned long hpte_count = size_bytes >> 4;
-	int i;
-	u64 dummy0, dummy1;
-
-	/* TODO: Use bulk call */
-	for (i = 0; i < hpte_count; i++)
-		beat_write_htab_entry(0, i, 0, 0, -1UL, -1UL, &dummy0, &dummy1);
-}
-
-/*
- * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
- * the low 3 bits of flags happen to line up.  So no transform is needed.
- * We can probably optimize here and assume the high bits of newpp are
- * already zero.  For now I am paranoid.
- */
-static long beat_lpar_hpte_updatepp(unsigned long slot,
-				    unsigned long newpp,
-				    unsigned long vpn,
-				    int psize, int apsize,
-				    int ssize, unsigned long flags)
-{
-	unsigned long lpar_rc;
-	u64 dummy0, dummy1;
-	unsigned long want_v;
-
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
-	DBG_LOW("    update: "
-		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
-		want_v & HPTE_V_AVPN, slot, psize, newpp);
-
-	raw_spin_lock(&beat_htab_lock);
-	dummy0 = beat_lpar_hpte_getword0(slot);
-	if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) {
-		DBG_LOW("not found !\n");
-		raw_spin_unlock(&beat_htab_lock);
-		return -1;
-	}
-
-	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0,
-					&dummy1);
-	raw_spin_unlock(&beat_htab_lock);
-	if (lpar_rc != 0 || dummy0 == 0) {
-		DBG_LOW("not found !\n");
-		return -1;
-	}
-
-	DBG_LOW("ok %lx %lx\n", dummy0, dummy1);
-
-	BUG_ON(lpar_rc != 0);
-
-	return 0;
-}
-
-static long beat_lpar_hpte_find(unsigned long vpn, int psize)
-{
-	unsigned long hash;
-	unsigned long i, j;
-	long slot;
-	unsigned long want_v, hpte_v;
-
-	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
-	for (j = 0; j < 2; j++) {
-		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		for (i = 0; i < HPTES_PER_GROUP; i++) {
-			hpte_v = beat_lpar_hpte_getword0(slot);
-
-			if (HPTE_V_COMPARE(hpte_v, want_v)
-			    && (hpte_v & HPTE_V_VALID)
-			    && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
-				/* HPTE matches */
-				if (j)
-					slot = -slot;
-				return slot;
-			}
-			++slot;
-		}
-		hash = ~hash;
-	}
-
-	return -1;
-}
-
-static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
-					  unsigned long ea,
-					  int psize, int ssize)
-{
-	unsigned long vpn;
-	unsigned long lpar_rc, slot, vsid;
-	u64 dummy0, dummy1;
-
-	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
-
-	raw_spin_lock(&beat_htab_lock);
-	slot = beat_lpar_hpte_find(vpn, psize);
-	BUG_ON(slot == -1);
-
-	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
-		&dummy0, &dummy1);
-	raw_spin_unlock(&beat_htab_lock);
-
-	BUG_ON(lpar_rc != 0);
-}
-
-static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
-				      int psize, int apsize,
-				      int ssize, int local)
-{
-	unsigned long want_v;
-	unsigned long lpar_rc;
-	u64 dummy1, dummy2;
-	unsigned long flags;
-
-	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
-		slot, va, psize, local);
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
-	raw_spin_lock_irqsave(&beat_htab_lock, flags);
-	dummy1 = beat_lpar_hpte_getword0(slot);
-
-	if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) {
-		DBG_LOW("not found !\n");
-		raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
-		return;
-	}
-
-	lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0,
-		&dummy1, &dummy2);
-	raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
-
-	BUG_ON(lpar_rc != 0);
-}
-
-void __init hpte_init_beat(void)
-{
-	ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
-	ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
-	ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
-	ppc_md.hpte_insert	= beat_lpar_hpte_insert;
-	ppc_md.hpte_remove	= beat_lpar_hpte_remove;
-	ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
-}
-
-static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
-				  unsigned long vpn, unsigned long pa,
-				  unsigned long rflags, unsigned long vflags,
-				  int psize, int apsize, int ssize)
-{
-	unsigned long lpar_rc;
-	u64 hpte_v, hpte_r, slot;
-
-	if (vflags & HPTE_V_SECONDARY)
-		return -1;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, "
-			"rflags=%lx, vflags=%lx, psize=%d)\n",
-		hpte_group, vpn, pa, rflags, vflags, psize);
-
-	hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
-		vflags | HPTE_V_VALID;
-	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
-
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-
-	if (rflags & _PAGE_NO_CACHE)
-		hpte_r &= ~HPTE_R_M;
-
-	/* insert into not-volted entry */
-	lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
-		HPTE_V_BOLTED, 0, &slot);
-	/*
-	 * Since we try and ioremap PHBs we don't own, the pte insert
-	 * will fail. However we must catch the failure in hash_page
-	 * or we will loop forever, so return -2 in this case.
-	 */
-	if (unlikely(lpar_rc != 0)) {
-		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" lpar err %lx\n", lpar_rc);
-		return -2;
-	}
-	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" -> slot: %lx\n", slot);
-
-	/* We have to pass down the secondary bucket bit here as well */
-	return (slot ^ hpte_group) & 15;
-}
-
-/*
- * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
- * the low 3 bits of flags happen to line up.  So no transform is needed.
- * We can probably optimize here and assume the high bits of newpp are
- * already zero.  For now I am paranoid.
- */
-static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
-				       unsigned long newpp,
-				       unsigned long vpn,
-				       int psize, int apsize,
-				       int ssize, unsigned long flags)
-{
-	unsigned long lpar_rc;
-	unsigned long want_v;
-	unsigned long pss;
-
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
-
-	DBG_LOW("    update: "
-		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
-		want_v & HPTE_V_AVPN, slot, psize, newpp);
-
-	lpar_rc = beat_update_htab_permission3(0, slot, want_v, pss, 7, newpp);
-
-	if (lpar_rc == 0xfffffff7) {
-		DBG_LOW("not found !\n");
-		return -1;
-	}
-
-	DBG_LOW("ok\n");
-
-	BUG_ON(lpar_rc != 0);
-
-	return 0;
-}
-
-static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
-					 int psize, int apsize,
-					 int ssize, int local)
-{
-	unsigned long want_v;
-	unsigned long lpar_rc;
-	unsigned long pss;
-
-	DBG_LOW("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
-		slot, vpn, psize, local);
-	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
-
-	lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
-
-	/* E_busy can be valid output: page may be already replaced */
-	BUG_ON(lpar_rc != 0 && lpar_rc != 0xfffffff7);
-}
-
-static int64_t _beat_lpar_hptab_clear_v3(void)
-{
-	return beat_clear_htab3(0);
-}
-
-static void beat_lpar_hptab_clear_v3(void)
-{
-	_beat_lpar_hptab_clear_v3();
-}
-
-void __init hpte_init_beat_v3(void)
-{
-	if (_beat_lpar_hptab_clear_v3() == 0) {
-		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate_v3;
-		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp_v3;
-		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
-		ppc_md.hpte_insert	= beat_lpar_hpte_insert_v3;
-		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
-		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear_v3;
-	} else {
-		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
-		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
-		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
-		ppc_md.hpte_insert	= beat_lpar_hpte_insert;
-		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
-		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
-	}
-}
diff --git a/arch/powerpc/platforms/cell/beat_hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
deleted file mode 100644
index 96c801907126..000000000000
--- a/arch/powerpc/platforms/cell/beat_hvCall.S
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Beat hypervisor call I/F
- *
- * (C) Copyright 2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/hvCall.S.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <asm/ppc_asm.h>
-
-/* Not implemented on Beat, now */
-#define	HCALL_INST_PRECALL
-#define	HCALL_INST_POSTCALL
-
-	.text
-
-#define	HVSC	.long	0x44000022
-
-/* Note: takes only 7 input parameters at maximum */
-_GLOBAL(beat_hcall_norets)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	mr	r11,r3
-	mr	r3,r4
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes 8 input parameters at maximum */
-_GLOBAL(beat_hcall_norets8)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	mr	r11,r3
-	mr	r3,r4
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	ld	r10,STK_PARAM(R10)(r1)
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 1 output parameters at maximum */
-_GLOBAL(beat_hcall1)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 2 output parameters at maximum */
-_GLOBAL(beat_hcall2)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 3 output parameters at maximum */
-_GLOBAL(beat_hcall3)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 4 output parameters at maximum */
-_GLOBAL(beat_hcall4)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 5 output parameters at maximum */
-_GLOBAL(beat_hcall5)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-	std	r8, 32(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-/* Note: takes only 6 input parameters, 6 output parameters at maximum */
-_GLOBAL(beat_hcall6)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL
-
-	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
-
-	mr	r11,r3
-	mr	r3,r5
-	mr	r4,r6
-	mr	r5,r7
-	mr	r6,r8
-	mr	r7,r9
-	mr	r8,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	HCALL_INST_POSTCALL
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-	std	r8, 32(r12)
-	std	r9, 40(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
deleted file mode 100644
index 9e5dfbcc00af..000000000000
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Celleb/Beat Interrupt controller
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/percpu.h>
-#include <linux/types.h>
-
-#include <asm/machdep.h>
-
-#include "beat_interrupt.h"
-#include "beat_wrapper.h"
-
-#define	MAX_IRQS	NR_IRQS
-static DEFINE_RAW_SPINLOCK(beatic_irq_mask_lock);
-static uint64_t	beatic_irq_mask_enable[(MAX_IRQS+255)/64];
-static uint64_t	beatic_irq_mask_ack[(MAX_IRQS+255)/64];
-
-static struct irq_domain *beatic_host;
-
-/*
- * In this implementation, "virq" == "IRQ plug number",
- * "(irq_hw_number_t)hwirq" == "IRQ outlet number".
- */
-
-/* assumption: locked */
-static inline void beatic_update_irq_mask(unsigned int irq_plug)
-{
-	int off;
-	unsigned long masks[4];
-
-	off = (irq_plug / 256) * 4;
-	masks[0] = beatic_irq_mask_enable[off + 0]
-		& beatic_irq_mask_ack[off + 0];
-	masks[1] = beatic_irq_mask_enable[off + 1]
-		& beatic_irq_mask_ack[off + 1];
-	masks[2] = beatic_irq_mask_enable[off + 2]
-		& beatic_irq_mask_ack[off + 2];
-	masks[3] = beatic_irq_mask_enable[off + 3]
-		& beatic_irq_mask_ack[off + 3];
-	if (beat_set_interrupt_mask(irq_plug&~255UL,
-		masks[0], masks[1], masks[2], masks[3]) != 0)
-		panic("Failed to set mask IRQ!");
-}
-
-static void beatic_mask_irq(struct irq_data *d)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_enable[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_unmask_irq(struct irq_data *d)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_enable[d->irq/64] |= 1UL << (63 - (d->irq%64));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_ack_irq(struct irq_data *d)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_ack[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_end_irq(struct irq_data *d)
-{
-	s64 err;
-	unsigned long flags;
-
-	err = beat_downcount_of_interrupt(d->irq);
-	if (err != 0) {
-		if ((err & 0xFFFFFFFF) != 0xFFFFFFF5) /* -11: wrong state */
-			panic("Failed to downcount IRQ! Error = %16llx", err);
-
-		printk(KERN_ERR "IRQ over-downcounted, plug %d\n", d->irq);
-	}
-	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
-	beatic_irq_mask_ack[d->irq/64] |= 1UL << (63 - (d->irq%64));
-	beatic_update_irq_mask(d->irq);
-	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static struct irq_chip beatic_pic = {
-	.name = "CELL-BEAT",
-	.irq_unmask = beatic_unmask_irq,
-	.irq_mask = beatic_mask_irq,
-	.irq_eoi = beatic_end_irq,
-};
-
-/*
- * Dispose binding hardware IRQ number (hw) and Virtuql IRQ number (virq),
- * update flags.
- *
- * Note that the number (virq) is already assigned at upper layer.
- */
-static void beatic_pic_host_unmap(struct irq_domain *h, unsigned int virq)
-{
-	beat_destruct_irq_plug(virq);
-}
-
-/*
- * Create or update binding hardware IRQ number (hw) and Virtuql
- * IRQ number (virq). This is called only once for a given mapping.
- *
- * Note that the number (virq) is already assigned at upper layer.
- */
-static int beatic_pic_host_map(struct irq_domain *h, unsigned int virq,
-			       irq_hw_number_t hw)
-{
-	int64_t	err;
-
-	err = beat_construct_and_connect_irq_plug(virq, hw);
-	if (err < 0)
-		return -EIO;
-
-	irq_set_status_flags(virq, IRQ_LEVEL);
-	irq_set_chip_and_handler(virq, &beatic_pic, handle_fasteoi_irq);
-	return 0;
-}
-
-/*
- * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
- * to pass away to irq_create_mapping().
- *
- * Called from irq_create_of_mapping() only.
- * Note: We have only 1 entry to translate.
- */
-static int beatic_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
-				 const u32 *intspec, unsigned int intsize,
-				 irq_hw_number_t *out_hwirq,
-				 unsigned int *out_flags)
-{
-	const u64 *intspec2 = (const u64 *)intspec;
-
-	*out_hwirq = *intspec2;
-	*out_flags |= IRQ_TYPE_LEVEL_LOW;
-	return 0;
-}
-
-static int beatic_pic_host_match(struct irq_domain *h, struct device_node *np)
-{
-	/* Match all */
-	return 1;
-}
-
-static const struct irq_domain_ops beatic_pic_host_ops = {
-	.map = beatic_pic_host_map,
-	.unmap = beatic_pic_host_unmap,
-	.xlate = beatic_pic_host_xlate,
-	.match = beatic_pic_host_match,
-};
-
-/*
- * Get an IRQ number
- * Note: returns VIRQ
- */
-static inline unsigned int beatic_get_irq_plug(void)
-{
-	int i;
-	uint64_t	pending[4], ub;
-
-	for (i = 0; i < MAX_IRQS; i += 256) {
-		beat_detect_pending_interrupts(i, pending);
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[0] & beatic_irq_mask_enable[i/64+0]
-				       & beatic_irq_mask_ack[i/64+0]));
-		if (ub != 64)
-			return i + ub + 0;
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[1] & beatic_irq_mask_enable[i/64+1]
-				       & beatic_irq_mask_ack[i/64+1]));
-		if (ub != 64)
-			return i + ub + 64;
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[2] & beatic_irq_mask_enable[i/64+2]
-				       & beatic_irq_mask_ack[i/64+2]));
-		if (ub != 64)
-			return i + ub + 128;
-		__asm__ ("cntlzd %0,%1":"=r"(ub):
-			"r"(pending[3] & beatic_irq_mask_enable[i/64+3]
-				       & beatic_irq_mask_ack[i/64+3]));
-		if (ub != 64)
-			return i + ub + 192;
-	}
-
-	return NO_IRQ;
-}
-unsigned int beatic_get_irq(void)
-{
-	unsigned int ret;
-
-	ret = beatic_get_irq_plug();
-	if (ret != NO_IRQ)
-		beatic_ack_irq(irq_get_irq_data(ret));
-	return ret;
-}
-
-/*
- */
-void __init beatic_init_IRQ(void)
-{
-	int	i;
-
-	memset(beatic_irq_mask_enable, 0, sizeof(beatic_irq_mask_enable));
-	memset(beatic_irq_mask_ack, 255, sizeof(beatic_irq_mask_ack));
-	for (i = 0; i < MAX_IRQS; i += 256)
-		beat_set_interrupt_mask(i, 0L, 0L, 0L, 0L);
-
-	/* Set out get_irq function */
-	ppc_md.get_irq = beatic_get_irq;
-
-	/* Allocate an irq host */
-	beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL);
-	BUG_ON(beatic_host == NULL);
-	irq_set_default_host(beatic_host);
-}
-
-void beatic_deinit_IRQ(void)
-{
-	int	i;
-
-	for (i = 1; i < nr_irqs; i++)
-		beat_destruct_irq_plug(i);
-}
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
deleted file mode 100644
index a7e52f91a078..000000000000
--- a/arch/powerpc/platforms/cell/beat_interrupt.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Celleb/Beat Interrupt controller
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef ASM_BEAT_PIC_H
-#define ASM_BEAT_PIC_H
-#ifdef __KERNEL__
-
-extern void beatic_init_IRQ(void);
-extern unsigned int beatic_get_irq(void);
-extern void beatic_deinit_IRQ(void);
-
-#endif
-#endif /* ASM_BEAT_PIC_H */
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
deleted file mode 100644
index 3ce685568935..000000000000
--- a/arch/powerpc/platforms/cell/beat_iommu.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Support for IOMMU on Celleb platform.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/pci.h>
-#include <linux/of_platform.h>
-
-#include <asm/machdep.h>
-
-#include "beat_wrapper.h"
-
-#define DMA_FLAGS 0xf800000000000000UL	/* r/w permitted, coherency required,
-					   strongest order */
-
-static int __init find_dma_window(u64 *io_space_id, u64 *ioid,
-				  u64 *base, u64 *size, u64 *io_page_size)
-{
-	struct device_node *dn;
-	const unsigned long *dma_window;
-
-	for_each_node_by_type(dn, "ioif") {
-		dma_window = of_get_property(dn, "toshiba,dma-window", NULL);
-		if (dma_window) {
-			*io_space_id = (dma_window[0] >> 32) & 0xffffffffUL;
-			*ioid = dma_window[0] & 0x7ffUL;
-			*base = dma_window[1];
-			*size = dma_window[2];
-			*io_page_size = 1 << dma_window[3];
-			of_node_put(dn);
-			return 1;
-		}
-	}
-	return 0;
-}
-
-static unsigned long celleb_dma_direct_offset;
-
-static void __init celleb_init_direct_mapping(void)
-{
-	u64 lpar_addr, io_addr;
-	u64 io_space_id, ioid, dma_base, dma_size, io_page_size;
-
-	if (!find_dma_window(&io_space_id, &ioid, &dma_base, &dma_size,
-			     &io_page_size)) {
-		pr_info("No dma window found !\n");
-		return;
-	}
-
-	for (lpar_addr = 0; lpar_addr < dma_size; lpar_addr += io_page_size) {
-		io_addr = lpar_addr + dma_base;
-		(void)beat_put_iopte(io_space_id, io_addr, lpar_addr,
-				     ioid, DMA_FLAGS);
-	}
-
-	celleb_dma_direct_offset = dma_base;
-}
-
-static void celleb_dma_dev_setup(struct device *dev)
-{
-	set_dma_ops(dev, &dma_direct_ops);
-	set_dma_offset(dev, celleb_dma_direct_offset);
-}
-
-static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
-{
-	celleb_dma_dev_setup(&pdev->dev);
-}
-
-static int celleb_of_bus_notify(struct notifier_block *nb,
-				unsigned long action, void *data)
-{
-	struct device *dev = data;
-
-	/* We are only intereted in device addition */
-	if (action != BUS_NOTIFY_ADD_DEVICE)
-		return 0;
-
-	celleb_dma_dev_setup(dev);
-
-	return 0;
-}
-
-static struct notifier_block celleb_of_bus_notifier = {
-	.notifier_call = celleb_of_bus_notify
-};
-
-static int __init celleb_init_iommu(void)
-{
-	celleb_init_direct_mapping();
-	ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
-	bus_register_notifier(&platform_bus_type, &celleb_of_bus_notifier);
-
-	return 0;
-}
-
-machine_arch_initcall(celleb_beat, celleb_init_iommu);
diff --git a/arch/powerpc/platforms/cell/beat_spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
deleted file mode 100644
index 13f52589d3a9..000000000000
--- a/arch/powerpc/platforms/cell/beat_spu_priv1.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * spu hypervisor abstraction for Beat
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <asm/types.h>
-#include <asm/spu.h>
-#include <asm/spu_priv1.h>
-
-#include "beat_wrapper.h"
-
-static inline void _int_mask_set(struct spu *spu, int class, u64 mask)
-{
-	spu->shadow_int_mask_RW[class] = mask;
-	beat_set_irq_mask_for_spe(spu->spe_id, class, mask);
-}
-
-static inline u64 _int_mask_get(struct spu *spu, int class)
-{
-	return spu->shadow_int_mask_RW[class];
-}
-
-static void int_mask_set(struct spu *spu, int class, u64 mask)
-{
-	_int_mask_set(spu, class, mask);
-}
-
-static u64 int_mask_get(struct spu *spu, int class)
-{
-	return _int_mask_get(spu, class);
-}
-
-static void int_mask_and(struct spu *spu, int class, u64 mask)
-{
-	u64 old_mask;
-	old_mask = _int_mask_get(spu, class);
-	_int_mask_set(spu, class, old_mask & mask);
-}
-
-static void int_mask_or(struct spu *spu, int class, u64 mask)
-{
-	u64 old_mask;
-	old_mask = _int_mask_get(spu, class);
-	_int_mask_set(spu, class, old_mask | mask);
-}
-
-static void int_stat_clear(struct spu *spu, int class, u64 stat)
-{
-	beat_clear_interrupt_status_of_spe(spu->spe_id, class, stat);
-}
-
-static u64 int_stat_get(struct spu *spu, int class)
-{
-	u64 int_stat;
-	beat_get_interrupt_status_of_spe(spu->spe_id, class, &int_stat);
-	return int_stat;
-}
-
-static void cpu_affinity_set(struct spu *spu, int cpu)
-{
-	return;
-}
-
-static u64 mfc_dar_get(struct spu *spu)
-{
-	u64 dar;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_dar_RW), &dar);
-	return dar;
-}
-
-static u64 mfc_dsisr_get(struct spu *spu)
-{
-	u64 dsisr;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_dsisr_RW), &dsisr);
-	return dsisr;
-}
-
-static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_dsisr_RW), dsisr);
-}
-
-static void mfc_sdr_setup(struct spu *spu)
-{
-	return;
-}
-
-static void mfc_sr1_set(struct spu *spu, u64 sr1)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_sr1_RW), sr1);
-}
-
-static u64 mfc_sr1_get(struct spu *spu)
-{
-	u64 sr1;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_sr1_RW), &sr1);
-	return sr1;
-}
-
-static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_tclass_id_RW), tclass_id);
-}
-
-static u64 mfc_tclass_id_get(struct spu *spu)
-{
-	u64 tclass_id;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, mfc_tclass_id_RW), &tclass_id);
-	return tclass_id;
-}
-
-static void tlb_invalidate(struct spu *spu)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, tlb_invalidate_entry_W), 0ul);
-}
-
-static void resource_allocation_groupID_set(struct spu *spu, u64 id)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
-		id);
-}
-
-static u64 resource_allocation_groupID_get(struct spu *spu)
-{
-	u64 id;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
-		&id);
-	return id;
-}
-
-static void resource_allocation_enable_set(struct spu *spu, u64 enable)
-{
-	beat_set_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_enable_RW),
-		enable);
-}
-
-static u64 resource_allocation_enable_get(struct spu *spu)
-{
-	u64 enable;
-	beat_get_spe_privileged_state_1_registers(
-		spu->spe_id,
-		offsetof(struct spu_priv1, resource_allocation_enable_RW),
-		&enable);
-	return enable;
-}
-
-const struct spu_priv1_ops spu_priv1_beat_ops = {
-	.int_mask_and = int_mask_and,
-	.int_mask_or = int_mask_or,
-	.int_mask_set = int_mask_set,
-	.int_mask_get = int_mask_get,
-	.int_stat_clear = int_stat_clear,
-	.int_stat_get = int_stat_get,
-	.cpu_affinity_set = cpu_affinity_set,
-	.mfc_dar_get = mfc_dar_get,
-	.mfc_dsisr_get = mfc_dsisr_get,
-	.mfc_dsisr_set = mfc_dsisr_set,
-	.mfc_sdr_setup = mfc_sdr_setup,
-	.mfc_sr1_set = mfc_sr1_set,
-	.mfc_sr1_get = mfc_sr1_get,
-	.mfc_tclass_id_set = mfc_tclass_id_set,
-	.mfc_tclass_id_get = mfc_tclass_id_get,
-	.tlb_invalidate = tlb_invalidate,
-	.resource_allocation_groupID_set = resource_allocation_groupID_set,
-	.resource_allocation_groupID_get = resource_allocation_groupID_get,
-	.resource_allocation_enable_set = resource_allocation_enable_set,
-	.resource_allocation_enable_get = resource_allocation_enable_get,
-};
diff --git a/arch/powerpc/platforms/cell/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
deleted file mode 100644
index 8580dc7e1798..000000000000
--- a/arch/powerpc/platforms/cell/beat_syscall.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Beat hypervisor call numbers
- *
- * (C) Copyright 2004-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef BEAT_BEAT_syscall_H
-#define BEAT_BEAT_syscall_H
-
-#ifdef	__ASSEMBLY__
-#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((__v)<<60|(__x))
-#else
-#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((u64)(__v)<<60|(__x))
-#endif
-#define HV_allocate_memory __BEAT_ADD_VENDOR_ID(0, 0)
-#define HV_construct_virtual_address_space __BEAT_ADD_VENDOR_ID(2, 0)
-#define HV_destruct_virtual_address_space __BEAT_ADD_VENDOR_ID(10, 0)
-#define HV_get_virtual_address_space_id_of_ppe __BEAT_ADD_VENDOR_ID(4, 0)
-#define HV_query_logical_partition_address_region_info 			\
-						__BEAT_ADD_VENDOR_ID(6, 0)
-#define HV_release_memory __BEAT_ADD_VENDOR_ID(13, 0)
-#define HV_select_virtual_address_space __BEAT_ADD_VENDOR_ID(7, 0)
-#define HV_load_range_registers __BEAT_ADD_VENDOR_ID(68, 0)
-#define HV_set_ppe_l2cache_rmt_entry __BEAT_ADD_VENDOR_ID(70, 0)
-#define HV_set_ppe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(71, 0)
-#define HV_set_spe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(72, 0)
-#define HV_get_io_address_translation_fault_info __BEAT_ADD_VENDOR_ID(14, 0)
-#define HV_get_iopte __BEAT_ADD_VENDOR_ID(16, 0)
-#define HV_preload_iopt_cache __BEAT_ADD_VENDOR_ID(17, 0)
-#define HV_put_iopte __BEAT_ADD_VENDOR_ID(15, 0)
-#define HV_connect_event_ports __BEAT_ADD_VENDOR_ID(21, 0)
-#define HV_construct_event_receive_port __BEAT_ADD_VENDOR_ID(18, 0)
-#define HV_destruct_event_receive_port __BEAT_ADD_VENDOR_ID(19, 0)
-#define HV_destruct_event_send_port __BEAT_ADD_VENDOR_ID(22, 0)
-#define HV_get_state_of_event_send_port __BEAT_ADD_VENDOR_ID(25, 0)
-#define HV_request_to_connect_event_ports __BEAT_ADD_VENDOR_ID(20, 0)
-#define HV_send_event_externally __BEAT_ADD_VENDOR_ID(23, 0)
-#define HV_send_event_locally __BEAT_ADD_VENDOR_ID(24, 0)
-#define HV_construct_and_connect_irq_plug __BEAT_ADD_VENDOR_ID(28, 0)
-#define HV_destruct_irq_plug __BEAT_ADD_VENDOR_ID(29, 0)
-#define HV_detect_pending_interrupts __BEAT_ADD_VENDOR_ID(26, 0)
-#define HV_end_of_interrupt __BEAT_ADD_VENDOR_ID(27, 0)
-#define HV_assign_control_signal_notification_port __BEAT_ADD_VENDOR_ID(45, 0)
-#define HV_end_of_control_signal_processing __BEAT_ADD_VENDOR_ID(48, 0)
-#define HV_get_control_signal __BEAT_ADD_VENDOR_ID(46, 0)
-#define HV_set_irq_mask_for_spe __BEAT_ADD_VENDOR_ID(61, 0)
-#define HV_shutdown_logical_partition __BEAT_ADD_VENDOR_ID(44, 0)
-#define HV_connect_message_ports __BEAT_ADD_VENDOR_ID(35, 0)
-#define HV_destruct_message_port __BEAT_ADD_VENDOR_ID(36, 0)
-#define HV_receive_message __BEAT_ADD_VENDOR_ID(37, 0)
-#define HV_get_message_port_info __BEAT_ADD_VENDOR_ID(34, 0)
-#define HV_request_to_connect_message_ports __BEAT_ADD_VENDOR_ID(33, 0)
-#define HV_send_message __BEAT_ADD_VENDOR_ID(32, 0)
-#define HV_get_logical_ppe_id __BEAT_ADD_VENDOR_ID(69, 0)
-#define HV_pause __BEAT_ADD_VENDOR_ID(9, 0)
-#define HV_destruct_shared_memory_handle __BEAT_ADD_VENDOR_ID(51, 0)
-#define HV_get_shared_memory_info __BEAT_ADD_VENDOR_ID(52, 0)
-#define HV_permit_sharing_memory __BEAT_ADD_VENDOR_ID(50, 0)
-#define HV_request_to_attach_shared_memory __BEAT_ADD_VENDOR_ID(49, 0)
-#define HV_enable_logical_spe_execution __BEAT_ADD_VENDOR_ID(55, 0)
-#define HV_construct_logical_spe __BEAT_ADD_VENDOR_ID(53, 0)
-#define HV_disable_logical_spe_execution __BEAT_ADD_VENDOR_ID(56, 0)
-#define HV_destruct_logical_spe __BEAT_ADD_VENDOR_ID(54, 0)
-#define HV_sense_spe_execution_status __BEAT_ADD_VENDOR_ID(58, 0)
-#define HV_insert_htab_entry __BEAT_ADD_VENDOR_ID(101, 0)
-#define HV_read_htab_entries __BEAT_ADD_VENDOR_ID(95, 0)
-#define HV_write_htab_entry __BEAT_ADD_VENDOR_ID(94, 0)
-#define HV_assign_io_address_translation_fault_port 			\
-						__BEAT_ADD_VENDOR_ID(100, 0)
-#define HV_set_interrupt_mask __BEAT_ADD_VENDOR_ID(73, 0)
-#define HV_get_logical_partition_id __BEAT_ADD_VENDOR_ID(74, 0)
-#define HV_create_repository_node2 __BEAT_ADD_VENDOR_ID(90, 0)
-#define HV_create_repository_node __BEAT_ADD_VENDOR_ID(90, 0) /* alias */
-#define HV_get_repository_node_value2 __BEAT_ADD_VENDOR_ID(91, 0)
-#define HV_get_repository_node_value __BEAT_ADD_VENDOR_ID(91, 0) /* alias */
-#define HV_modify_repository_node_value2 __BEAT_ADD_VENDOR_ID(92, 0)
-#define HV_modify_repository_node_value __BEAT_ADD_VENDOR_ID(92, 0) /* alias */
-#define HV_remove_repository_node2 __BEAT_ADD_VENDOR_ID(93, 0)
-#define HV_remove_repository_node __BEAT_ADD_VENDOR_ID(93, 0) /* alias */
-#define HV_cancel_shared_memory __BEAT_ADD_VENDOR_ID(104, 0)
-#define HV_clear_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(206, 0)
-#define HV_construct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(80, 0)
-#define HV_destruct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(81, 0)
-#define HV_disconnect_ipspc_service __BEAT_ADD_VENDOR_ID(88, 0)
-#define HV_execute_ipspc_command __BEAT_ADD_VENDOR_ID(86, 0)
-#define HV_get_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(205, 0)
-#define HV_get_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(208, 0)
-#define HV_permit_use_of_ipspc_service __BEAT_ADD_VENDOR_ID(85, 0)
-#define HV_reinitialize_logical_spe __BEAT_ADD_VENDOR_ID(82, 0)
-#define HV_request_ipspc_service __BEAT_ADD_VENDOR_ID(84, 0)
-#define HV_stop_ipspc_command __BEAT_ADD_VENDOR_ID(87, 0)
-#define HV_set_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(204, 0)
-#define HV_get_status_of_ipspc_service __BEAT_ADD_VENDOR_ID(203, 0)
-#define HV_put_characters_to_console __BEAT_ADD_VENDOR_ID(0x101, 1)
-#define HV_get_characters_from_console __BEAT_ADD_VENDOR_ID(0x102, 1)
-#define HV_get_base_clock __BEAT_ADD_VENDOR_ID(0x111, 1)
-#define HV_set_base_clock __BEAT_ADD_VENDOR_ID(0x112, 1)
-#define HV_get_frame_cycle __BEAT_ADD_VENDOR_ID(0x114, 1)
-#define HV_disable_console __BEAT_ADD_VENDOR_ID(0x115, 1)
-#define HV_disable_all_console __BEAT_ADD_VENDOR_ID(0x116, 1)
-#define HV_oneshot_timer __BEAT_ADD_VENDOR_ID(0x117, 1)
-#define HV_set_dabr __BEAT_ADD_VENDOR_ID(0x118, 1)
-#define HV_get_dabr __BEAT_ADD_VENDOR_ID(0x119, 1)
-#define HV_start_hv_stats __BEAT_ADD_VENDOR_ID(0x21c, 1)
-#define HV_stop_hv_stats __BEAT_ADD_VENDOR_ID(0x21d, 1)
-#define HV_get_hv_stats __BEAT_ADD_VENDOR_ID(0x21e, 1)
-#define HV_get_hv_error_stats __BEAT_ADD_VENDOR_ID(0x221, 1)
-#define HV_get_stats __BEAT_ADD_VENDOR_ID(0x224, 1)
-#define HV_get_heap_stats __BEAT_ADD_VENDOR_ID(0x225, 1)
-#define HV_get_memory_stats __BEAT_ADD_VENDOR_ID(0x227, 1)
-#define HV_get_memory_detail __BEAT_ADD_VENDOR_ID(0x228, 1)
-#define HV_set_priority_of_irq_outlet __BEAT_ADD_VENDOR_ID(0x122, 1)
-#define HV_get_physical_spe_by_reservation_id __BEAT_ADD_VENDOR_ID(0x128, 1)
-#define HV_get_spe_context __BEAT_ADD_VENDOR_ID(0x129, 1)
-#define HV_set_spe_context __BEAT_ADD_VENDOR_ID(0x12a, 1)
-#define HV_downcount_of_interrupt __BEAT_ADD_VENDOR_ID(0x12e, 1)
-#define HV_peek_spe_context __BEAT_ADD_VENDOR_ID(0x12f, 1)
-#define HV_read_bpa_register __BEAT_ADD_VENDOR_ID(0x131, 1)
-#define HV_write_bpa_register __BEAT_ADD_VENDOR_ID(0x132, 1)
-#define HV_map_context_table_of_spe __BEAT_ADD_VENDOR_ID(0x137, 1)
-#define HV_get_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x138, 1)
-#define HV_set_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x139, 1)
-#define HV_init_pm __BEAT_ADD_VENDOR_ID(0x150, 1)
-#define HV_set_pm_signal __BEAT_ADD_VENDOR_ID(0x151, 1)
-#define HV_get_pm_signal __BEAT_ADD_VENDOR_ID(0x152, 1)
-#define HV_set_pm_config __BEAT_ADD_VENDOR_ID(0x153, 1)
-#define HV_get_pm_config __BEAT_ADD_VENDOR_ID(0x154, 1)
-#define HV_get_inner_trace_data __BEAT_ADD_VENDOR_ID(0x155, 1)
-#define HV_set_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x156, 1)
-#define HV_get_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x157, 1)
-#define HV_set_pm_interrupt __BEAT_ADD_VENDOR_ID(0x158, 1)
-#define HV_get_pm_interrupt __BEAT_ADD_VENDOR_ID(0x159, 1)
-#define HV_kick_pm __BEAT_ADD_VENDOR_ID(0x160, 1)
-#define HV_construct_pm_context __BEAT_ADD_VENDOR_ID(0x164, 1)
-#define HV_destruct_pm_context __BEAT_ADD_VENDOR_ID(0x165, 1)
-#define HV_be_slow __BEAT_ADD_VENDOR_ID(0x170, 1)
-#define HV_assign_ipspc_server_connection_status_notification_port 	\
-						__BEAT_ADD_VENDOR_ID(0x173, 1)
-#define HV_get_raid_of_physical_spe __BEAT_ADD_VENDOR_ID(0x174, 1)
-#define HV_set_physical_spe_to_rag __BEAT_ADD_VENDOR_ID(0x175, 1)
-#define HV_release_physical_spe_from_rag __BEAT_ADD_VENDOR_ID(0x176, 1)
-#define HV_rtc_read __BEAT_ADD_VENDOR_ID(0x190, 1)
-#define HV_rtc_write __BEAT_ADD_VENDOR_ID(0x191, 1)
-#define HV_eeprom_read __BEAT_ADD_VENDOR_ID(0x192, 1)
-#define HV_eeprom_write __BEAT_ADD_VENDOR_ID(0x193, 1)
-#define HV_insert_htab_entry3 __BEAT_ADD_VENDOR_ID(0x104, 1)
-#define HV_invalidate_htab_entry3 __BEAT_ADD_VENDOR_ID(0x105, 1)
-#define HV_update_htab_permission3 __BEAT_ADD_VENDOR_ID(0x106, 1)
-#define HV_clear_htab3 __BEAT_ADD_VENDOR_ID(0x107, 1)
-#endif
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
deleted file mode 100644
index 350735bc8888..000000000000
--- a/arch/powerpc/platforms/cell/beat_udbg.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * udbg function for Beat
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/console.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-
-#include "beat.h"
-
-#define	celleb_vtermno	0
-
-static void udbg_putc_beat(char c)
-{
-	unsigned long rc;
-
-	if (c == '\n')
-		udbg_putc_beat('\r');
-
-	rc = beat_put_term_char(celleb_vtermno, 1, (uint64_t)c << 56, 0);
-}
-
-/* Buffered chars getc */
-static u64 inbuflen;
-static u64 inbuf[2];	/* must be 2 u64s */
-
-static int udbg_getc_poll_beat(void)
-{
-	/* The interface is tricky because it may return up to 16 chars.
-	 * We save them statically for future calls to udbg_getc().
-	 */
-	char ch, *buf = (char *)inbuf;
-	int i;
-	long rc;
-	if (inbuflen == 0) {
-		/* get some more chars. */
-		inbuflen = 0;
-		rc = beat_get_term_char(celleb_vtermno, &inbuflen,
-					inbuf+0, inbuf+1);
-		if (rc != 0)
-			inbuflen = 0;	/* otherwise inbuflen is garbage */
-	}
-	if (inbuflen <= 0 || inbuflen > 16) {
-		/* Catch error case as well as other oddities (corruption) */
-		inbuflen = 0;
-		return -1;
-	}
-	ch = buf[0];
-	for (i = 1; i < inbuflen; i++)	/* shuffle them down. */
-		buf[i-1] = buf[i];
-	inbuflen--;
-	return ch;
-}
-
-static int udbg_getc_beat(void)
-{
-	int ch;
-	for (;;) {
-		ch = udbg_getc_poll_beat();
-		if (ch == -1) {
-			/* This shouldn't be needed...but... */
-			volatile unsigned long delay;
-			for (delay = 0; delay < 2000000; delay++)
-				;
-		} else {
-			return ch;
-		}
-	}
-}
-
-/* call this from early_init() for a working debug console on
- * vterm capable LPAR machines
- */
-void __init udbg_init_debug_beat(void)
-{
-	udbg_putc = udbg_putc_beat;
-	udbg_getc = udbg_getc_beat;
-	udbg_getc_poll = udbg_getc_poll_beat;
-}
diff --git a/arch/powerpc/platforms/cell/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
deleted file mode 100644
index c1109969f242..000000000000
--- a/arch/powerpc/platforms/cell/beat_wrapper.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Beat hypervisor call I/F
- *
- * (C) Copyright 2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/plpar_wrapper.h.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#ifndef BEAT_HCALL
-#include <linux/string.h>
-#include "beat_syscall.h"
-
-/* defined in hvCall.S */
-extern s64 beat_hcall_norets(u64 opcode, ...);
-extern s64 beat_hcall_norets8(u64 opcode, u64 arg1, u64 arg2, u64 arg3,
-	u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8);
-extern s64 beat_hcall1(u64 opcode, u64 retbuf[1], ...);
-extern s64 beat_hcall2(u64 opcode, u64 retbuf[2], ...);
-extern s64 beat_hcall3(u64 opcode, u64 retbuf[3], ...);
-extern s64 beat_hcall4(u64 opcode, u64 retbuf[4], ...);
-extern s64 beat_hcall5(u64 opcode, u64 retbuf[5], ...);
-extern s64 beat_hcall6(u64 opcode, u64 retbuf[6], ...);
-
-static inline s64 beat_downcount_of_interrupt(u64 plug_id)
-{
-	return beat_hcall_norets(HV_downcount_of_interrupt, plug_id);
-}
-
-static inline s64 beat_set_interrupt_mask(u64 index,
-	u64 val0, u64 val1, u64 val2, u64 val3)
-{
-	return beat_hcall_norets(HV_set_interrupt_mask, index,
-	       val0, val1, val2, val3);
-}
-
-static inline s64 beat_destruct_irq_plug(u64 plug_id)
-{
-	return beat_hcall_norets(HV_destruct_irq_plug, plug_id);
-}
-
-static inline s64 beat_construct_and_connect_irq_plug(u64 plug_id,
-	u64 outlet_id)
-{
-	return beat_hcall_norets(HV_construct_and_connect_irq_plug, plug_id,
-	       outlet_id);
-}
-
-static inline s64 beat_detect_pending_interrupts(u64 index, u64 *retbuf)
-{
-	return beat_hcall4(HV_detect_pending_interrupts, retbuf, index);
-}
-
-static inline s64 beat_pause(u64 style)
-{
-	return beat_hcall_norets(HV_pause, style);
-}
-
-static inline s64 beat_read_htab_entries(u64 htab_id, u64 index, u64 *retbuf)
-{
-	return beat_hcall5(HV_read_htab_entries, retbuf, htab_id, index);
-}
-
-static inline s64 beat_insert_htab_entry(u64 htab_id, u64 group,
-	u64 bitmask, u64 hpte_v, u64 hpte_r, u64 *slot)
-{
-	u64 dummy[3];
-	s64 ret;
-
-	ret = beat_hcall3(HV_insert_htab_entry, dummy, htab_id, group,
-		bitmask, hpte_v, hpte_r);
-	*slot = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_write_htab_entry(u64 htab_id, u64 slot,
-	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 mask_r,
-	u64 *ret_v, u64 *ret_r)
-{
-	u64 dummy[2];
-	s64 ret;
-
-	ret = beat_hcall2(HV_write_htab_entry, dummy, htab_id, slot,
-		hpte_v, hpte_r, mask_v, mask_r);
-	*ret_v = dummy[0];
-	*ret_r = dummy[1];
-	return ret;
-}
-
-static inline s64 beat_insert_htab_entry3(u64 htab_id, u64 group,
-	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 value_v, u64 *slot)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_insert_htab_entry3, dummy, htab_id, group,
-		hpte_v, hpte_r, mask_v, value_v);
-	*slot = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_invalidate_htab_entry3(u64 htab_id, u64 group,
-	u64 va, u64 pss)
-{
-	return beat_hcall_norets(HV_invalidate_htab_entry3,
-		htab_id, group, va, pss);
-}
-
-static inline s64 beat_update_htab_permission3(u64 htab_id, u64 group,
-	u64 va, u64 pss, u64 ptel_mask, u64 ptel_value)
-{
-	return beat_hcall_norets(HV_update_htab_permission3,
-		htab_id, group, va, pss, ptel_mask, ptel_value);
-}
-
-static inline s64 beat_clear_htab3(u64 htab_id)
-{
-	return beat_hcall_norets(HV_clear_htab3, htab_id);
-}
-
-static inline void beat_shutdown_logical_partition(u64 code)
-{
-	(void)beat_hcall_norets(HV_shutdown_logical_partition, code);
-}
-
-static inline s64 beat_rtc_write(u64 time_from_epoch)
-{
-	return beat_hcall_norets(HV_rtc_write, time_from_epoch);
-}
-
-static inline s64 beat_rtc_read(u64 *time_from_epoch)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_rtc_read, dummy);
-	*time_from_epoch = dummy[0];
-	return ret;
-}
-
-#define	BEAT_NVRW_CNT	(sizeof(u64) * 6)
-
-static inline s64 beat_eeprom_write(u64 index, u64 length, u8 *buffer)
-{
-	u64	b[6];
-
-	if (length > BEAT_NVRW_CNT)
-		return -1;
-	memcpy(b, buffer, sizeof(b));
-	return beat_hcall_norets8(HV_eeprom_write, index, length,
-		b[0], b[1], b[2], b[3], b[4], b[5]);
-}
-
-static inline s64 beat_eeprom_read(u64 index, u64 length, u8 *buffer)
-{
-	u64	b[6];
-	s64	ret;
-
-	if (length > BEAT_NVRW_CNT)
-		return -1;
-	ret = beat_hcall6(HV_eeprom_read, b, index, length);
-	memcpy(buffer, b, length);
-	return ret;
-}
-
-static inline s64 beat_set_dabr(u64 value, u64 style)
-{
-	return beat_hcall_norets(HV_set_dabr, value, style);
-}
-
-static inline s64 beat_get_characters_from_console(u64 termno, u64 *len,
-	u8 *buffer)
-{
-	u64 dummy[3];
-	s64 ret;
-
-	ret = beat_hcall3(HV_get_characters_from_console, dummy, termno, len);
-	*len = dummy[0];
-	memcpy(buffer, dummy + 1, *len);
-	return ret;
-}
-
-static inline s64 beat_put_characters_to_console(u64 termno, u64 len,
-	u8 *buffer)
-{
-	u64 b[2];
-
-	memcpy(b, buffer, len);
-	return beat_hcall_norets(HV_put_characters_to_console, termno, len,
-		b[0], b[1]);
-}
-
-static inline s64 beat_get_spe_privileged_state_1_registers(
-		u64 id, u64 offsetof, u64 *value)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_get_spe_privileged_state_1_registers, dummy, id,
-		offsetof);
-	*value = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_set_irq_mask_for_spe(u64 id, u64 class, u64 mask)
-{
-	return beat_hcall_norets(HV_set_irq_mask_for_spe, id, class, mask);
-}
-
-static inline s64 beat_clear_interrupt_status_of_spe(u64 id, u64 class,
-	u64 mask)
-{
-	return beat_hcall_norets(HV_clear_interrupt_status_of_spe,
-		id, class, mask);
-}
-
-static inline s64 beat_set_spe_privileged_state_1_registers(
-		u64 id, u64 offsetof, u64 value)
-{
-	return beat_hcall_norets(HV_set_spe_privileged_state_1_registers,
-		id, offsetof, value);
-}
-
-static inline s64 beat_get_interrupt_status_of_spe(u64 id, u64 class, u64 *val)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_get_interrupt_status_of_spe, dummy, id, class);
-	*val = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_put_iopte(u64 ioas_id, u64 io_addr, u64 real_addr,
-	u64 ioid, u64 flags)
-{
-	return beat_hcall_norets(HV_put_iopte, ioas_id, io_addr, real_addr,
-		ioid, flags);
-}
-
-static inline s64 beat_construct_event_receive_port(u64 *port)
-{
-	u64 dummy[1];
-	s64 ret;
-
-	ret = beat_hcall1(HV_construct_event_receive_port, dummy);
-	*port = dummy[0];
-	return ret;
-}
-
-static inline s64 beat_destruct_event_receive_port(u64 port)
-{
-	s64 ret;
-
-	ret = beat_hcall_norets(HV_destruct_event_receive_port, port);
-	return ret;
-}
-
-static inline s64 beat_create_repository_node(u64 path[4], u64 data[2])
-{
-	s64 ret;
-
-	ret = beat_hcall_norets(HV_create_repository_node2,
-		path[0], path[1], path[2], path[3], data[0], data[1]);
-	return ret;
-}
-
-static inline s64 beat_get_repository_node_value(u64 lpid, u64 path[4],
-	u64 data[2])
-{
-	s64 ret;
-
-	ret = beat_hcall2(HV_get_repository_node_value2, data,
-		lpid, path[0], path[1], path[2], path[3]);
-	return ret;
-}
-
-#endif
diff --git a/arch/powerpc/platforms/cell/cell.h b/arch/powerpc/platforms/cell/cell.h
new file mode 100644
index 000000000000..ef143dfee068
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cell.h
@@ -0,0 +1,24 @@
+/*
+ * Cell Platform common data structures
+ *
+ * Copyright 2015, Daniel Axtens, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef CELL_H
+#define CELL_H
+
+#include <asm/pci-bridge.h>
+
+extern struct pci_controller_ops cell_pci_controller_ops;
+
+#endif
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
deleted file mode 100644
index 3ce70ded2d6a..000000000000
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * Support for PCI on Celleb platform.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/kernel/rtas_pci.c:
- *  Copyright (C) 2001 Dave Engebretsen, IBM Corporation
- *  Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/memblock.h>
-#include <linux/pci_regs.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-#include "celleb_pci.h"
-
-#define MAX_PCI_DEVICES    32
-#define MAX_PCI_FUNCTIONS   8
-#define MAX_PCI_BASE_ADDRS  3 /* use 64 bit address */
-
-/* definition for fake pci configuration area for GbE, .... ,and etc. */
-
-struct celleb_pci_resource {
-	struct resource r[MAX_PCI_BASE_ADDRS];
-};
-
-struct celleb_pci_private {
-	unsigned char *fake_config[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
-	struct celleb_pci_resource *res[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
-};
-
-static inline u8 celleb_fake_config_readb(void *addr)
-{
-	u8 *p = addr;
-	return *p;
-}
-
-static inline u16 celleb_fake_config_readw(void *addr)
-{
-	__le16 *p = addr;
-	return le16_to_cpu(*p);
-}
-
-static inline u32 celleb_fake_config_readl(void *addr)
-{
-	__le32 *p = addr;
-	return le32_to_cpu(*p);
-}
-
-static inline void celleb_fake_config_writeb(u32 val, void *addr)
-{
-	u8 *p = addr;
-	*p = val;
-}
-
-static inline void celleb_fake_config_writew(u32 val, void *addr)
-{
-	__le16 val16;
-	__le16 *p = addr;
-	val16 = cpu_to_le16(val);
-	*p = val16;
-}
-
-static inline void celleb_fake_config_writel(u32 val, void *addr)
-{
-	__le32 val32;
-	__le32 *p = addr;
-	val32 = cpu_to_le32(val);
-	*p = val32;
-}
-
-static unsigned char *get_fake_config_start(struct pci_controller *hose,
-					    int devno, int fn)
-{
-	struct celleb_pci_private *private = hose->private_data;
-
-	if (private == NULL)
-		return NULL;
-
-	return private->fake_config[devno][fn];
-}
-
-static struct celleb_pci_resource *get_resource_start(
-				struct pci_controller *hose,
-				int devno, int fn)
-{
-	struct celleb_pci_private *private = hose->private_data;
-
-	if (private == NULL)
-		return NULL;
-
-	return private->res[devno][fn];
-}
-
-
-static void celleb_config_read_fake(unsigned char *config, int where,
-				    int size, u32 *val)
-{
-	char *p = config + where;
-
-	switch (size) {
-	case 1:
-		*val = celleb_fake_config_readb(p);
-		break;
-	case 2:
-		*val = celleb_fake_config_readw(p);
-		break;
-	case 4:
-		*val = celleb_fake_config_readl(p);
-		break;
-	}
-}
-
-static void celleb_config_write_fake(unsigned char *config, int where,
-				     int size, u32 val)
-{
-	char *p = config + where;
-
-	switch (size) {
-	case 1:
-		celleb_fake_config_writeb(val, p);
-		break;
-	case 2:
-		celleb_fake_config_writew(val, p);
-		break;
-	case 4:
-		celleb_fake_config_writel(val, p);
-		break;
-	}
-}
-
-static int celleb_fake_pci_read_config(struct pci_bus *bus,
-		unsigned int devfn, int where, int size, u32 *val)
-{
-	char *config;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	unsigned int devno = devfn >> 3;
-	unsigned int fn = devfn & 0x7;
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	pr_debug("    fake read: bus=0x%x, ", bus->number);
-	config = get_fake_config_start(hose, devno, fn);
-
-	pr_debug("devno=0x%x, where=0x%x, size=0x%x, ", devno, where, size);
-	if (!config) {
-		pr_debug("failed\n");
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-
-	celleb_config_read_fake(config, where, size, val);
-	pr_debug("val=0x%x\n", *val);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-
-static int celleb_fake_pci_write_config(struct pci_bus *bus,
-		unsigned int devfn, int where, int size, u32 val)
-{
-	char *config;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct celleb_pci_resource *res;
-	unsigned int devno = devfn >> 3;
-	unsigned int fn = devfn & 0x7;
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	config = get_fake_config_start(hose, devno, fn);
-
-	if (!config)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (val == ~0) {
-		int i = (where - PCI_BASE_ADDRESS_0) >> 3;
-
-		switch (where) {
-		case PCI_BASE_ADDRESS_0:
-		case PCI_BASE_ADDRESS_2:
-			if (size != 4)
-				return PCIBIOS_DEVICE_NOT_FOUND;
-			res = get_resource_start(hose, devno, fn);
-			if (!res)
-				return PCIBIOS_DEVICE_NOT_FOUND;
-			celleb_config_write_fake(config, where, size,
-					(res->r[i].end - res->r[i].start));
-			return PCIBIOS_SUCCESSFUL;
-		case PCI_BASE_ADDRESS_1:
-		case PCI_BASE_ADDRESS_3:
-		case PCI_BASE_ADDRESS_4:
-		case PCI_BASE_ADDRESS_5:
-			break;
-		default:
-			break;
-		}
-	}
-
-	celleb_config_write_fake(config, where, size, val);
-	pr_debug("    fake write: where=%x, size=%d, val=%x\n",
-		 where, size, val);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops celleb_fake_pci_ops = {
-	.read = celleb_fake_pci_read_config,
-	.write = celleb_fake_pci_write_config,
-};
-
-static inline void celleb_setup_pci_base_addrs(struct pci_controller *hose,
-					unsigned int devno, unsigned int fn,
-					unsigned int num_base_addr)
-{
-	u32 val;
-	unsigned char *config;
-	struct celleb_pci_resource *res;
-
-	config = get_fake_config_start(hose, devno, fn);
-	res = get_resource_start(hose, devno, fn);
-
-	if (!config || !res)
-		return;
-
-	switch (num_base_addr) {
-	case 3:
-		val = (res->r[2].start & 0xfffffff0)
-		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_4, 4, val);
-		val = res->r[2].start >> 32;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_5, 4, val);
-		/* FALLTHROUGH */
-	case 2:
-		val = (res->r[1].start & 0xfffffff0)
-		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_2, 4, val);
-		val = res->r[1].start >> 32;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_3, 4, val);
-		/* FALLTHROUGH */
-	case 1:
-		val = (res->r[0].start & 0xfffffff0)
-		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_0, 4, val);
-		val = res->r[0].start >> 32;
-		celleb_config_write_fake(config, PCI_BASE_ADDRESS_1, 4, val);
-		break;
-	}
-
-	val = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
-	celleb_config_write_fake(config, PCI_COMMAND, 2, val);
-}
-
-static int __init celleb_setup_fake_pci_device(struct device_node *node,
-					       struct pci_controller *hose)
-{
-	unsigned int rlen;
-	int num_base_addr = 0;
-	u32 val;
-	const u32 *wi0, *wi1, *wi2, *wi3, *wi4;
-	unsigned int devno, fn;
-	struct celleb_pci_private *private = hose->private_data;
-	unsigned char **config = NULL;
-	struct celleb_pci_resource **res = NULL;
-	const char *name;
-	const unsigned long *li;
-	int size, result;
-
-	if (private == NULL) {
-		printk(KERN_ERR "PCI: "
-		       "memory space for pci controller is not assigned\n");
-		goto error;
-	}
-
-	name = of_get_property(node, "model", &rlen);
-	if (!name) {
-		printk(KERN_ERR "PCI: model property not found.\n");
-		goto error;
-	}
-
-	wi4 = of_get_property(node, "reg", &rlen);
-	if (wi4 == NULL)
-		goto error;
-
-	devno = ((wi4[0] >> 8) & 0xff) >> 3;
-	fn = (wi4[0] >> 8) & 0x7;
-
-	pr_debug("PCI: celleb_setup_fake_pci() %s devno=%x fn=%x\n", name,
-		 devno, fn);
-
-	size = 256;
-	config = &private->fake_config[devno][fn];
-	*config = zalloc_maybe_bootmem(size, GFP_KERNEL);
-	if (*config == NULL) {
-		printk(KERN_ERR "PCI: "
-		       "not enough memory for fake configuration space\n");
-		goto error;
-	}
-	pr_debug("PCI: fake config area assigned 0x%016lx\n",
-		 (unsigned long)*config);
-
-	size = sizeof(struct celleb_pci_resource);
-	res = &private->res[devno][fn];
-	*res = zalloc_maybe_bootmem(size, GFP_KERNEL);
-	if (*res == NULL) {
-		printk(KERN_ERR
-		       "PCI: not enough memory for resource data space\n");
-		goto error;
-	}
-	pr_debug("PCI: res assigned 0x%016lx\n", (unsigned long)*res);
-
-	wi0 = of_get_property(node, "device-id", NULL);
-	wi1 = of_get_property(node, "vendor-id", NULL);
-	wi2 = of_get_property(node, "class-code", NULL);
-	wi3 = of_get_property(node, "revision-id", NULL);
-	if (!wi0 || !wi1 || !wi2 || !wi3) {
-		printk(KERN_ERR "PCI: Missing device tree properties.\n");
-		goto error;
-	}
-
-	celleb_config_write_fake(*config, PCI_DEVICE_ID, 2, wi0[0] & 0xffff);
-	celleb_config_write_fake(*config, PCI_VENDOR_ID, 2, wi1[0] & 0xffff);
-	pr_debug("class-code = 0x%08x\n", wi2[0]);
-
-	celleb_config_write_fake(*config, PCI_CLASS_PROG, 1, wi2[0] & 0xff);
-	celleb_config_write_fake(*config, PCI_CLASS_DEVICE, 2,
-				 (wi2[0] >> 8) & 0xffff);
-	celleb_config_write_fake(*config, PCI_REVISION_ID, 1, wi3[0]);
-
-	while (num_base_addr < MAX_PCI_BASE_ADDRS) {
-		result = of_address_to_resource(node,
-				num_base_addr, &(*res)->r[num_base_addr]);
-		if (result)
-			break;
-		num_base_addr++;
-	}
-
-	celleb_setup_pci_base_addrs(hose, devno, fn, num_base_addr);
-
-	li = of_get_property(node, "interrupts", &rlen);
-	if (!li) {
-		printk(KERN_ERR "PCI: interrupts not found.\n");
-		goto error;
-	}
-	val = li[0];
-	celleb_config_write_fake(*config, PCI_INTERRUPT_PIN, 1, 1);
-	celleb_config_write_fake(*config, PCI_INTERRUPT_LINE, 1, val);
-
-#ifdef DEBUG
-	pr_debug("PCI: %s irq=%ld\n", name, li[0]);
-	for (i = 0; i < 6; i++) {
-		celleb_config_read_fake(*config,
-					PCI_BASE_ADDRESS_0 + 0x4 * i, 4,
-					&val);
-		pr_debug("PCI: %s fn=%d base_address_%d=0x%x\n",
-			 name, fn, i, val);
-	}
-#endif
-
-	celleb_config_write_fake(*config, PCI_HEADER_TYPE, 1,
-				 PCI_HEADER_TYPE_NORMAL);
-
-	return 0;
-
-error:
-	if (mem_init_done) {
-		if (config && *config)
-			kfree(*config);
-		if (res && *res)
-			kfree(*res);
-
-	} else {
-		if (config && *config) {
-			size = 256;
-			memblock_free(__pa(*config), size);
-		}
-		if (res && *res) {
-			size = sizeof(struct celleb_pci_resource);
-			memblock_free(__pa(*res), size);
-		}
-	}
-
-	return 1;
-}
-
-static int __init phb_set_bus_ranges(struct device_node *dev,
-				     struct pci_controller *phb)
-{
-	const int *bus_range;
-	unsigned int len;
-
-	bus_range = of_get_property(dev, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int))
-		return 1;
-
-	phb->first_busno = bus_range[0];
-	phb->last_busno = bus_range[1];
-
-	return 0;
-}
-
-static void __init celleb_alloc_private_mem(struct pci_controller *hose)
-{
-	hose->private_data =
-		zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
-			GFP_KERNEL);
-}
-
-static int __init celleb_setup_fake_pci(struct device_node *dev,
-					struct pci_controller *phb)
-{
-	struct device_node *node;
-
-	phb->ops = &celleb_fake_pci_ops;
-	celleb_alloc_private_mem(phb);
-
-	for (node = of_get_next_child(dev, NULL);
-	     node != NULL; node = of_get_next_child(dev, node))
-		celleb_setup_fake_pci_device(node, phb);
-
-	return 0;
-}
-
-static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
-	.setup = celleb_setup_fake_pci,
-};
-
-static const struct of_device_id celleb_phb_match[] __initconst = {
-	{
-		.name = "pci-pseudo",
-		.data = &celleb_fake_pci_spec,
-	}, {
-		.name = "epci",
-		.data = &celleb_epci_spec,
-	}, {
-		.name = "pcie",
-		.data = &celleb_pciex_spec,
-	}, {
-	},
-};
-
-int __init celleb_setup_phb(struct pci_controller *phb)
-{
-	struct device_node *dev = phb->dn;
-	const struct of_device_id *match;
-	const struct celleb_phb_spec *phb_spec;
-	int rc;
-
-	match = of_match_node(celleb_phb_match, dev);
-	if (!match)
-		return 1;
-
-	phb_set_bus_ranges(dev, phb);
-	phb->buid = 1;
-
-	phb_spec = match->data;
-	rc = (*phb_spec->setup)(dev, phb);
-	if (rc)
-		return 1;
-
-	if (phb_spec->ops)
-		iowa_register_bus(phb, phb_spec->ops,
-				  phb_spec->iowa_init,
-				  phb_spec->iowa_data);
-	return 0;
-}
-
-int celleb_pci_probe_mode(struct pci_bus *bus)
-{
-	return PCI_PROBE_DEVTREE;
-}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
deleted file mode 100644
index a801fcc5f389..000000000000
--- a/arch/powerpc/platforms/cell/celleb_pci.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * pci prototypes for Celleb platform
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_PCI_H
-#define _CELLEB_PCI_H
-
-#include <linux/pci.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <asm/ppc-pci.h>
-#include <asm/io-workarounds.h>
-
-struct iowa_bus;
-
-struct celleb_phb_spec {
-	int (*setup)(struct device_node *, struct pci_controller *);
-	struct ppc_pci_io *ops;
-	int (*iowa_init)(struct iowa_bus *, void *);
-	void *iowa_data;
-};
-
-extern int celleb_setup_phb(struct pci_controller *);
-extern int celleb_pci_probe_mode(struct pci_bus *);
-
-extern struct celleb_phb_spec celleb_epci_spec;
-extern struct celleb_phb_spec celleb_pciex_spec;
-
-#endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
deleted file mode 100644
index b596a711c348..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * SCC (Super Companion Chip) definitions
- *
- * (C) Copyright 2004-2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_SCC_H
-#define _CELLEB_SCC_H
-
-#define PCI_VENDOR_ID_TOSHIBA_2                 0x102f
-#define PCI_DEVICE_ID_TOSHIBA_SCC_PCIEXC_BRIDGE 0x01b0
-#define PCI_DEVICE_ID_TOSHIBA_SCC_EPCI_BRIDGE   0x01b1
-#define PCI_DEVICE_ID_TOSHIBA_SCC_BRIDGE        0x01b2
-#define PCI_DEVICE_ID_TOSHIBA_SCC_GBE           0x01b3
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA           0x01b4
-#define PCI_DEVICE_ID_TOSHIBA_SCC_USB2          0x01b5
-#define PCI_DEVICE_ID_TOSHIBA_SCC_USB           0x01b6
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ENCDEC        0x01b7
-
-#define SCC_EPCI_REG            0x0000d000
-
-/* EPCI registers */
-#define SCC_EPCI_CNF10_REG      0x010
-#define SCC_EPCI_CNF14_REG      0x014
-#define SCC_EPCI_CNF18_REG      0x018
-#define SCC_EPCI_PVBAT          0x100
-#define SCC_EPCI_VPMBAT         0x104
-#define SCC_EPCI_VPIBAT         0x108
-#define SCC_EPCI_VCSR           0x110
-#define SCC_EPCI_VIENAB         0x114
-#define SCC_EPCI_VISTAT         0x118
-#define SCC_EPCI_VRDCOUNT       0x124
-#define SCC_EPCI_BAM0           0x12c
-#define SCC_EPCI_BAM1           0x134
-#define SCC_EPCI_BAM2           0x13c
-#define SCC_EPCI_IADR           0x164
-#define SCC_EPCI_CLKRST         0x800
-#define SCC_EPCI_INTSET         0x804
-#define SCC_EPCI_STATUS         0x808
-#define SCC_EPCI_ABTSET         0x80c
-#define SCC_EPCI_WATRP          0x810
-#define SCC_EPCI_DUMYRADR       0x814
-#define SCC_EPCI_SWRESP         0x818
-#define SCC_EPCI_CNTOPT         0x81c
-#define SCC_EPCI_ECMODE         0xf00
-#define SCC_EPCI_IOM_AC_NUM     5
-#define SCC_EPCI_IOM_ACTE(n)    (0xf10 + (n) * 4)
-#define SCC_EPCI_IOT_AC_NUM     4
-#define SCC_EPCI_IOT_ACTE(n)    (0xf30 + (n) * 4)
-#define SCC_EPCI_MAEA           0xf50
-#define SCC_EPCI_MAEC           0xf54
-#define SCC_EPCI_CKCTRL         0xff0
-
-/* bits for SCC_EPCI_VCSR */
-#define SCC_EPCI_VCSR_FRE       0x00020000
-#define SCC_EPCI_VCSR_FWE       0x00010000
-#define SCC_EPCI_VCSR_DR        0x00000400
-#define SCC_EPCI_VCSR_SR        0x00000008
-#define SCC_EPCI_VCSR_AT        0x00000004
-
-/* bits for SCC_EPCI_VIENAB/SCC_EPCI_VISTAT */
-#define SCC_EPCI_VISTAT_PMPE    0x00000008
-#define SCC_EPCI_VISTAT_PMFE    0x00000004
-#define SCC_EPCI_VISTAT_PRA     0x00000002
-#define SCC_EPCI_VISTAT_PRD     0x00000001
-#define SCC_EPCI_VISTAT_ALL     0x0000000f
-
-#define SCC_EPCI_VIENAB_PMPEE   0x00000008
-#define SCC_EPCI_VIENAB_PMFEE   0x00000004
-#define SCC_EPCI_VIENAB_PRA     0x00000002
-#define SCC_EPCI_VIENAB_PRD     0x00000001
-#define SCC_EPCI_VIENAB_ALL     0x0000000f
-
-/* bits for SCC_EPCI_CLKRST */
-#define SCC_EPCI_CLKRST_CKS_MASK 0x00030000
-#define SCC_EPCI_CLKRST_CKS_2   0x00000000
-#define SCC_EPCI_CLKRST_CKS_4   0x00010000
-#define SCC_EPCI_CLKRST_CKS_8   0x00020000
-#define SCC_EPCI_CLKRST_PCICRST 0x00000400
-#define SCC_EPCI_CLKRST_BC      0x00000200
-#define SCC_EPCI_CLKRST_PCIRST  0x00000100
-#define SCC_EPCI_CLKRST_PCKEN   0x00000001
-
-/* bits for SCC_EPCI_INTSET/SCC_EPCI_STATUS */
-#define SCC_EPCI_INT_2M         0x01000000
-#define SCC_EPCI_INT_RERR       0x00200000
-#define SCC_EPCI_INT_SERR       0x00100000
-#define SCC_EPCI_INT_PRTER      0x00080000
-#define SCC_EPCI_INT_SER        0x00040000
-#define SCC_EPCI_INT_PER        0x00020000
-#define SCC_EPCI_INT_PAI        0x00010000
-#define SCC_EPCI_INT_1M         0x00000100
-#define SCC_EPCI_INT_PME        0x00000010
-#define SCC_EPCI_INT_INTD       0x00000008
-#define SCC_EPCI_INT_INTC       0x00000004
-#define SCC_EPCI_INT_INTB       0x00000002
-#define SCC_EPCI_INT_INTA       0x00000001
-#define SCC_EPCI_INT_DEVINT     0x0000000f
-#define SCC_EPCI_INT_ALL        0x003f001f
-#define SCC_EPCI_INT_ALLERR     0x003f0000
-
-/* bits for SCC_EPCI_CKCTRL */
-#define SCC_EPCI_CKCTRL_CRST0   0x00010000
-#define SCC_EPCI_CKCTRL_CRST1   0x00020000
-#define SCC_EPCI_CKCTRL_OCLKEN  0x00000100
-#define SCC_EPCI_CKCTRL_LCLKEN  0x00000001
-
-#define SCC_EPCI_IDSEL_AD_TO_SLOT(ad)       ((ad) - 10)
-#define SCC_EPCI_MAX_DEVNU      SCC_EPCI_IDSEL_AD_TO_SLOT(32)
-
-/* bits for SCC_EPCI_CNTOPT */
-#define SCC_EPCI_CNTOPT_O2PMB   0x00000002
-
-/* SCC PCIEXC SMMIO registers */
-#define PEXCADRS		0x000
-#define PEXCWDATA		0x004
-#define PEXCRDATA		0x008
-#define PEXDADRS		0x010
-#define PEXDCMND		0x014
-#define PEXDWDATA		0x018
-#define PEXDRDATA		0x01c
-#define PEXREQID		0x020
-#define PEXTIDMAP		0x024
-#define PEXINTMASK		0x028
-#define PEXINTSTS		0x02c
-#define PEXAERRMASK		0x030
-#define PEXAERRSTS		0x034
-#define PEXPRERRMASK		0x040
-#define PEXPRERRSTS		0x044
-#define PEXPRERRID01		0x048
-#define PEXPRERRID23		0x04c
-#define PEXVDMASK		0x050
-#define PEXVDSTS		0x054
-#define PEXRCVCPLIDA		0x060
-#define PEXLENERRIDA		0x068
-#define PEXPHYPLLST		0x070
-#define PEXDMRDEN0		0x100
-#define PEXDMRDADR0		0x104
-#define PEXDMRDENX		0x110
-#define PEXDMRDADRX		0x114
-#define PEXECMODE		0xf00
-#define PEXMAEA(n)		(0xf50 + (8 * n))
-#define PEXMAEC(n)		(0xf54 + (8 * n))
-#define PEXCCRCTRL		0xff0
-
-/* SCC PCIEXC bits and shifts for PEXCADRS */
-#define PEXCADRS_BYTE_EN_SHIFT		20
-#define PEXCADRS_CMD_SHIFT		16
-#define PEXCADRS_CMD_READ		(0xa << PEXCADRS_CMD_SHIFT)
-#define PEXCADRS_CMD_WRITE		(0xb << PEXCADRS_CMD_SHIFT)
-
-/* SCC PCIEXC shifts for PEXDADRS */
-#define PEXDADRS_BUSNO_SHIFT		20
-#define PEXDADRS_DEVNO_SHIFT		15
-#define PEXDADRS_FUNCNO_SHIFT		12
-
-/* SCC PCIEXC bits and shifts for PEXDCMND */
-#define PEXDCMND_BYTE_EN_SHIFT		4
-#define PEXDCMND_IO_READ		0x2
-#define PEXDCMND_IO_WRITE		0x3
-#define PEXDCMND_CONFIG_READ		0xa
-#define PEXDCMND_CONFIG_WRITE		0xb
-
-/* SCC PCIEXC bits for PEXPHYPLLST */
-#define PEXPHYPLLST_PEXPHYAPLLST	0x00000001
-
-/* SCC PCIEXC bits for PEXECMODE */
-#define PEXECMODE_ALL_THROUGH		0x00000000
-#define PEXECMODE_ALL_8BIT		0x00550155
-#define PEXECMODE_ALL_16BIT		0x00aa02aa
-
-/* SCC PCIEXC bits for PEXCCRCTRL */
-#define PEXCCRCTRL_PEXIPCOREEN		0x00040000
-#define PEXCCRCTRL_PEXIPCONTEN		0x00020000
-#define PEXCCRCTRL_PEXPHYPLLEN		0x00010000
-#define PEXCCRCTRL_PCIEXCAOCKEN		0x00000100
-
-/* SCC PCIEXC port configuration registers */
-#define PEXTCERRCHK		0x21c
-#define PEXTAMAPB0		0x220
-#define PEXTAMAPL0		0x224
-#define PEXTAMAPB(n)		(PEXTAMAPB0 + 8 * (n))
-#define PEXTAMAPL(n)		(PEXTAMAPL0 + 8 * (n))
-#define PEXCHVC0P		0x500
-#define PEXCHVC0NP		0x504
-#define PEXCHVC0C		0x508
-#define PEXCDVC0P		0x50c
-#define PEXCDVC0NP		0x510
-#define PEXCDVC0C		0x514
-#define PEXCHVCXP		0x518
-#define PEXCHVCXNP		0x51c
-#define PEXCHVCXC		0x520
-#define PEXCDVCXP		0x524
-#define PEXCDVCXNP		0x528
-#define PEXCDVCXC		0x52c
-#define PEXCTTRG		0x530
-#define PEXTSCTRL		0x700
-#define PEXTSSTS		0x704
-#define PEXSKPCTRL		0x708
-
-/* UHC registers */
-#define SCC_UHC_CKRCTRL         0xff0
-#define SCC_UHC_ECMODE          0xf00
-
-/* bits for SCC_UHC_CKRCTRL */
-#define SCC_UHC_F48MCKLEN       0x00000001
-#define SCC_UHC_P_SUSPEND       0x00000002
-#define SCC_UHC_PHY_SUSPEND_SEL 0x00000004
-#define SCC_UHC_HCLKEN          0x00000100
-#define SCC_UHC_USBEN           0x00010000
-#define SCC_UHC_USBCEN          0x00020000
-#define SCC_UHC_PHYEN           0x00040000
-
-/* bits for SCC_UHC_ECMODE */
-#define SCC_UHC_ECMODE_BY_BYTE  0x00000555
-#define SCC_UHC_ECMODE_BY_WORD  0x00000aaa
-
-#endif /* _CELLEB_SCC_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c
deleted file mode 100644
index 9438bbed402f..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_epci.c
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Support for SCC external PCI
- *
- * (C) Copyright 2004-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/pci_regs.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-#include "celleb_scc.h"
-#include "celleb_pci.h"
-
-#define MAX_PCI_DEVICES   32
-#define MAX_PCI_FUNCTIONS  8
-
-#define iob()  __asm__ __volatile__("eieio; sync":::"memory")
-
-static inline PCI_IO_ADDR celleb_epci_get_epci_base(
-					struct pci_controller *hose)
-{
-	/*
-	 * Note:
-	 * Celleb epci uses cfg_addr as a base address for
-	 * epci control registers.
-	 */
-
-	return hose->cfg_addr;
-}
-
-static inline PCI_IO_ADDR celleb_epci_get_epci_cfg(
-					struct pci_controller *hose)
-{
-	/*
-	 * Note:
-	 * Celleb epci uses cfg_data as a base address for
-	 * configuration area for epci devices.
-	 */
-
-	return hose->cfg_data;
-}
-
-static inline void clear_and_disable_master_abort_interrupt(
-					struct pci_controller *hose)
-{
-	PCI_IO_ADDR epci_base;
-	PCI_IO_ADDR reg;
-	epci_base = celleb_epci_get_epci_base(hose);
-	reg = epci_base + PCI_COMMAND;
-	out_be32(reg, in_be32(reg) | (PCI_STATUS_REC_MASTER_ABORT << 16));
-}
-
-static int celleb_epci_check_abort(struct pci_controller *hose,
-				   PCI_IO_ADDR addr)
-{
-	PCI_IO_ADDR reg;
-	PCI_IO_ADDR epci_base;
-	u32 val;
-
-	iob();
-	epci_base = celleb_epci_get_epci_base(hose);
-
-	reg = epci_base + PCI_COMMAND;
-	val = in_be32(reg);
-
-	if (val & (PCI_STATUS_REC_MASTER_ABORT << 16)) {
-		out_be32(reg,
-			 (val & 0xffff) | (PCI_STATUS_REC_MASTER_ABORT << 16));
-
-		/* clear PCI Controller error, FRE, PMFE */
-		reg = epci_base + SCC_EPCI_STATUS;
-		out_be32(reg, SCC_EPCI_INT_PAI);
-
-		reg = epci_base + SCC_EPCI_VCSR;
-		val = in_be32(reg) & 0xffff;
-		val |= SCC_EPCI_VCSR_FRE;
-		out_be32(reg, val);
-
-		reg = epci_base + SCC_EPCI_VISTAT;
-		out_be32(reg, SCC_EPCI_VISTAT_PMFE);
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus,
-		struct pci_controller *hose, unsigned int devfn, int where)
-{
-	PCI_IO_ADDR addr;
-
-	if (bus != hose->bus)
-		addr = celleb_epci_get_epci_cfg(hose) +
-		       (((bus->number & 0xff) << 16)
-			| ((devfn & 0xff) << 8)
-			| (where & 0xff)
-			| 0x01000000);
-	else
-		addr = celleb_epci_get_epci_cfg(hose) +
-		       (((devfn & 0xff) << 8) | (where & 0xff));
-
-	pr_debug("EPCI: config_addr = 0x%p\n", addr);
-
-	return addr;
-}
-
-static int celleb_epci_read_config(struct pci_bus *bus,
-			unsigned int devfn, int where, int size, u32 *val)
-{
-	PCI_IO_ADDR epci_base;
-	PCI_IO_ADDR addr;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	if (!celleb_epci_get_epci_cfg(hose))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (bus->number == hose->first_busno && devfn == 0) {
-		/* EPCI controller self */
-
-		epci_base = celleb_epci_get_epci_base(hose);
-		addr = epci_base + where;
-
-		switch (size) {
-		case 1:
-			*val = in_8(addr);
-			break;
-		case 2:
-			*val = in_be16(addr);
-			break;
-		case 4:
-			*val = in_be32(addr);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-
-	} else {
-
-		clear_and_disable_master_abort_interrupt(hose);
-		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
-
-		switch (size) {
-		case 1:
-			*val = in_8(addr);
-			break;
-		case 2:
-			*val = in_le16(addr);
-			break;
-		case 4:
-			*val = in_le32(addr);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-	}
-
-	pr_debug("EPCI: "
-		 "addr=0x%p, devfn=0x%x, where=0x%x, size=0x%x, val=0x%x\n",
-		 addr, devfn, where, size, *val);
-
-	return celleb_epci_check_abort(hose, NULL);
-}
-
-static int celleb_epci_write_config(struct pci_bus *bus,
-			unsigned int devfn, int where, int size, u32 val)
-{
-	PCI_IO_ADDR epci_base;
-	PCI_IO_ADDR addr;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-
-	/* allignment check */
-	BUG_ON(where % size);
-
-	if (!celleb_epci_get_epci_cfg(hose))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (bus->number == hose->first_busno && devfn == 0) {
-		/* EPCI controller self */
-
-		epci_base = celleb_epci_get_epci_base(hose);
-		addr = epci_base + where;
-
-		switch (size) {
-		case 1:
-			out_8(addr, val);
-			break;
-		case 2:
-			out_be16(addr, val);
-			break;
-		case 4:
-			out_be32(addr, val);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-
-	} else {
-
-		clear_and_disable_master_abort_interrupt(hose);
-		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
-
-		switch (size) {
-		case 1:
-			out_8(addr, val);
-			break;
-		case 2:
-			out_le16(addr, val);
-			break;
-		case 4:
-			out_le32(addr, val);
-			break;
-		default:
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-	}
-
-	return celleb_epci_check_abort(hose, addr);
-}
-
-struct pci_ops celleb_epci_ops = {
-	.read = celleb_epci_read_config,
-	.write = celleb_epci_write_config,
-};
-
-/* to be moved in FW */
-static int __init celleb_epci_init(struct pci_controller *hose)
-{
-	u32 val;
-	PCI_IO_ADDR reg;
-	PCI_IO_ADDR epci_base;
-	int hwres = 0;
-
-	epci_base = celleb_epci_get_epci_base(hose);
-
-	/* PCI core reset(Internal bus and PCI clock) */
-	reg = epci_base + SCC_EPCI_CKCTRL;
-	val = in_be32(reg);
-	if (val == 0x00030101)
-		hwres = 1;
-	else {
-		val &= ~(SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
-		out_be32(reg, val);
-
-		/* set PCI core clock */
-		val = in_be32(reg);
-		val |= (SCC_EPCI_CKCTRL_OCLKEN | SCC_EPCI_CKCTRL_LCLKEN);
-		out_be32(reg, val);
-
-		/* release PCI core reset (internal bus) */
-		val = in_be32(reg);
-		val |= SCC_EPCI_CKCTRL_CRST0;
-		out_be32(reg, val);
-
-		/* set PCI clock select */
-		reg = epci_base + SCC_EPCI_CLKRST;
-		val = in_be32(reg);
-		val &= ~SCC_EPCI_CLKRST_CKS_MASK;
-		val |= SCC_EPCI_CLKRST_CKS_2;
-		out_be32(reg, val);
-
-		/* set arbiter */
-		reg = epci_base + SCC_EPCI_ABTSET;
-		out_be32(reg, 0x0f1f001f);	/* temporary value */
-
-		/* buffer on */
-		reg = epci_base + SCC_EPCI_CLKRST;
-		val = in_be32(reg);
-		val |= SCC_EPCI_CLKRST_BC;
-		out_be32(reg, val);
-
-		/* PCI clock enable */
-		val = in_be32(reg);
-		val |= SCC_EPCI_CLKRST_PCKEN;
-		out_be32(reg, val);
-
-		/* release PCI core reset (all) */
-		reg = epci_base + SCC_EPCI_CKCTRL;
-		val = in_be32(reg);
-		val |= (SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
-		out_be32(reg, val);
-
-		/* set base translation registers. (already set by Beat) */
-
-		/* set base address masks. (already set by Beat) */
-	}
-
-	/* release interrupt masks and clear all interrupts */
-	reg = epci_base + SCC_EPCI_INTSET;
-	out_be32(reg, 0x013f011f);	/* all interrupts enable */
-	reg = epci_base + SCC_EPCI_VIENAB;
-	val = SCC_EPCI_VIENAB_PMPEE | SCC_EPCI_VIENAB_PMFEE;
-	out_be32(reg, val);
-	reg = epci_base + SCC_EPCI_STATUS;
-	out_be32(reg, 0xffffffff);
-	reg = epci_base + SCC_EPCI_VISTAT;
-	out_be32(reg, 0xffffffff);
-
-	/* disable PCI->IB address translation */
-	reg = epci_base + SCC_EPCI_VCSR;
-	val = in_be32(reg);
-	val &= ~(SCC_EPCI_VCSR_DR | SCC_EPCI_VCSR_AT);
-	out_be32(reg, val);
-
-	/* set base addresses. (no need to set?) */
-
-	/* memory space, bus master enable */
-	reg = epci_base + PCI_COMMAND;
-	val = PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
-	out_be32(reg, val);
-
-	/* endian mode setup */
-	reg = epci_base + SCC_EPCI_ECMODE;
-	val = 0x00550155;
-	out_be32(reg, val);
-
-	/* set control option */
-	reg = epci_base + SCC_EPCI_CNTOPT;
-	val = in_be32(reg);
-	val |= SCC_EPCI_CNTOPT_O2PMB;
-	out_be32(reg, val);
-
-	/* XXX: temporay: set registers for address conversion setup */
-	reg = epci_base + SCC_EPCI_CNF10_REG;
-	out_be32(reg, 0x80000008);
-	reg = epci_base + SCC_EPCI_CNF14_REG;
-	out_be32(reg, 0x40000008);
-
-	reg = epci_base + SCC_EPCI_BAM0;
-	out_be32(reg, 0x80000000);
-	reg = epci_base + SCC_EPCI_BAM1;
-	out_be32(reg, 0xe0000000);
-
-	reg = epci_base + SCC_EPCI_PVBAT;
-	out_be32(reg, 0x80000000);
-
-	if (!hwres) {
-		/* release external PCI reset */
-		reg = epci_base + SCC_EPCI_CLKRST;
-		val = in_be32(reg);
-		val |= SCC_EPCI_CLKRST_PCIRST;
-		out_be32(reg, val);
-	}
-
-	return 0;
-}
-
-static int __init celleb_setup_epci(struct device_node *node,
-				    struct pci_controller *hose)
-{
-	struct resource r;
-
-	pr_debug("PCI: celleb_setup_epci()\n");
-
-	/*
-	 * Note:
-	 * Celleb epci uses cfg_addr and cfg_data member of
-	 * pci_controller structure in irregular way.
-	 *
-	 * cfg_addr is used to map for control registers of
-	 * celleb epci.
-	 *
-	 * cfg_data is used for configuration area of devices
-	 * on Celleb epci buses.
-	 */
-
-	if (of_address_to_resource(node, 0, &r))
-		goto error;
-	hose->cfg_addr = ioremap(r.start, resource_size(&r));
-	if (!hose->cfg_addr)
-		goto error;
-	pr_debug("EPCI: cfg_addr map 0x%016llx->0x%016lx + 0x%016llx\n",
-		 r.start, (unsigned long)hose->cfg_addr, resource_size(&r));
-
-	if (of_address_to_resource(node, 2, &r))
-		goto error;
-	hose->cfg_data = ioremap(r.start, resource_size(&r));
-	if (!hose->cfg_data)
-		goto error;
-	pr_debug("EPCI: cfg_data map 0x%016llx->0x%016lx + 0x%016llx\n",
-		 r.start, (unsigned long)hose->cfg_data, resource_size(&r));
-
-	hose->ops = &celleb_epci_ops;
-	celleb_epci_init(hose);
-
-	return 0;
-
-error:
-	if (hose->cfg_addr)
-		iounmap(hose->cfg_addr);
-
-	if (hose->cfg_data)
-		iounmap(hose->cfg_data);
-	return 1;
-}
-
-struct celleb_phb_spec celleb_epci_spec __initdata = {
-	.setup = celleb_setup_epci,
-	.ops = &spiderpci_ops,
-	.iowa_init = &spiderpci_iowa_init,
-	.iowa_data = (void *)0,
-};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
deleted file mode 100644
index 94170e4f2ce7..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ /dev/null
@@ -1,538 +0,0 @@
-/*
- * Support for Celleb PCI-Express.
- *
- * (C) Copyright 2007-2008 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/iommu.h>
-#include <asm/byteorder.h>
-
-#include "celleb_scc.h"
-#include "celleb_pci.h"
-
-#define PEX_IN(base, off)	in_be32((void __iomem *)(base) + (off))
-#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data))
-
-static void scc_pciex_io_flush(struct iowa_bus *bus)
-{
-	(void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0);
-}
-
-/*
- * Memory space access to device on PCIEX
- */
-#define PCIEX_MMIO_READ(name, ret)					\
-static ret scc_pciex_##name(const PCI_IO_ADDR addr)			\
-{									\
-	ret val = __do_##name(addr);					\
-	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
-	return val;							\
-}
-
-#define PCIEX_MMIO_READ_STR(name)					\
-static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf,		\
-			     unsigned long count)			\
-{									\
-	__do_##name(addr, buf, count);					\
-	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
-}
-
-PCIEX_MMIO_READ(readb, u8)
-PCIEX_MMIO_READ(readw, u16)
-PCIEX_MMIO_READ(readl, u32)
-PCIEX_MMIO_READ(readq, u64)
-PCIEX_MMIO_READ(readw_be, u16)
-PCIEX_MMIO_READ(readl_be, u32)
-PCIEX_MMIO_READ(readq_be, u64)
-PCIEX_MMIO_READ_STR(readsb)
-PCIEX_MMIO_READ_STR(readsw)
-PCIEX_MMIO_READ_STR(readsl)
-
-static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
-				    unsigned long n)
-{
-	__do_memcpy_fromio(dest, src, n);
-	scc_pciex_io_flush(iowa_mem_find_bus(src));
-}
-
-/*
- * I/O port access to devices on PCIEX.
- */
-
-static inline unsigned long get_bus_address(struct pci_controller *phb,
-					    unsigned long port)
-{
-	return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE);
-}
-
-static u32 scc_pciex_read_port(struct pci_controller *phb,
-			       unsigned long port, int size)
-{
-	unsigned int byte_enable;
-	unsigned int cmd, shift;
-	unsigned long addr;
-	u32 data, ret;
-
-	BUG_ON(((port & 0x3ul) + size) > 4);
-
-	addr = get_bus_address(phb, port);
-	shift = addr & 0x3ul;
-	byte_enable = ((1 << size) - 1) << shift;
-	cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
-	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
-	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
-	data = PEX_IN(phb->cfg_addr, PEXDRDATA);
-	ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8));
-
-	pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x,"
-		 " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable,
-		 cmd, data, ret);
-
-	return ret;
-}
-
-static void scc_pciex_write_port(struct pci_controller *phb,
-				 unsigned long port, int size, u32 val)
-{
-	unsigned int byte_enable;
-	unsigned int cmd, shift;
-	unsigned long addr;
-	u32 data;
-
-	BUG_ON(((port & 0x3ul) + size) > 4);
-
-	addr = get_bus_address(phb, port);
-	shift = addr & 0x3ul;
-	byte_enable = ((1 << size) - 1) << shift;
-	cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
-	data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8);
-	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
-	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
-	PEX_OUT(phb->cfg_addr, PEXDWDATA, data);
-
-	pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x,"
-		 " be=%x, cmd=%x, data=%x\n", port, addr, size, val,
-		 byte_enable, cmd, data);
-}
-
-static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port)
-{
-	return (u8)scc_pciex_read_port(phb, port, 1);
-}
-
-static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port)
-{
-	u32 data;
-	if ((port & 0x3ul) < 3)
-		data = scc_pciex_read_port(phb, port, 2);
-	else {
-		u32 d1 = scc_pciex_read_port(phb, port, 1);
-		u32 d2 = scc_pciex_read_port(phb, port + 1, 1);
-		data = d1 | (d2 << 8);
-	}
-	return (u16)data;
-}
-
-static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port)
-{
-	unsigned int mod = port & 0x3ul;
-	u32 data;
-	if (mod == 0)
-		data = scc_pciex_read_port(phb, port, 4);
-	else {
-		u32 d1 = scc_pciex_read_port(phb, port, 4 - mod);
-		u32 d2 = scc_pciex_read_port(phb, port + 1, mod);
-		data = d1 | (d2 << (mod * 8));
-	}
-	return data;
-}
-
-static void __scc_pciex_outb(struct pci_controller *phb,
-			     u8 val, unsigned long port)
-{
-	scc_pciex_write_port(phb, port, 1, (u32)val);
-}
-
-static void __scc_pciex_outw(struct pci_controller *phb,
-			     u16 val, unsigned long port)
-{
-	if ((port & 0x3ul) < 3)
-		scc_pciex_write_port(phb, port, 2, (u32)val);
-	else {
-		u32 d1 = val & 0x000000FF;
-		u32 d2 = (val & 0x0000FF00) >> 8;
-		scc_pciex_write_port(phb, port, 1, d1);
-		scc_pciex_write_port(phb, port + 1, 1, d2);
-	}
-}
-
-static void __scc_pciex_outl(struct pci_controller *phb,
-			     u32 val, unsigned long port)
-{
-	unsigned int mod = port & 0x3ul;
-	if (mod == 0)
-		scc_pciex_write_port(phb, port, 4, val);
-	else {
-		u32 d1 = val & (0xFFFFFFFFul >> (mod * 8));
-		u32 d2 = val >> ((4 - mod) * 8);
-		scc_pciex_write_port(phb, port, 4 - mod, d1);
-		scc_pciex_write_port(phb, port + 1, mod, d2);
-	}
-}
-
-#define PCIEX_PIO_FUNC(size, name)					\
-static u##size scc_pciex_in##name(unsigned long port)			\
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
-	u##size data = __scc_pciex_in##name(bus->phb, port);		\
-	scc_pciex_io_flush(bus);					\
-	return data;							\
-}									\
-static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
-	__le##size *dst = b;						\
-	for (; c != 0; c--, dst++)					\
-		*dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \
-	scc_pciex_io_flush(bus);					\
-}									\
-static void scc_pciex_out##name(u##size val, unsigned long port)	\
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
-	__scc_pciex_out##name(bus->phb, val, port);			\
-}									\
-static void scc_pciex_outs##name(unsigned long p, const void *b,	\
-				 unsigned long c)			\
-{									\
-	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
-	const __le##size *src = b;					\
-	for (; c != 0; c--, src++)					\
-		__scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \
-}
-#define __le8 u8
-#define cpu_to_le8(x) (x)
-#define le8_to_cpu(x) (x)
-PCIEX_PIO_FUNC(8, b)
-PCIEX_PIO_FUNC(16, w)
-PCIEX_PIO_FUNC(32, l)
-
-static struct ppc_pci_io scc_pciex_ops = {
-	.readb = scc_pciex_readb,
-	.readw = scc_pciex_readw,
-	.readl = scc_pciex_readl,
-	.readq = scc_pciex_readq,
-	.readw_be = scc_pciex_readw_be,
-	.readl_be = scc_pciex_readl_be,
-	.readq_be = scc_pciex_readq_be,
-	.readsb = scc_pciex_readsb,
-	.readsw = scc_pciex_readsw,
-	.readsl = scc_pciex_readsl,
-	.memcpy_fromio = scc_pciex_memcpy_fromio,
-	.inb = scc_pciex_inb,
-	.inw = scc_pciex_inw,
-	.inl = scc_pciex_inl,
-	.outb = scc_pciex_outb,
-	.outw = scc_pciex_outw,
-	.outl = scc_pciex_outl,
-	.insb = scc_pciex_insb,
-	.insw = scc_pciex_insw,
-	.insl = scc_pciex_insl,
-	.outsb = scc_pciex_outsb,
-	.outsw = scc_pciex_outsw,
-	.outsl = scc_pciex_outsl,
-};
-
-static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
-{
-	dma_addr_t dummy_page_da;
-	void *dummy_page_va;
-
-	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!dummy_page_va) {
-		pr_err("PCIEX:Alloc dummy_page_va failed\n");
-		return -1;
-	}
-
-	dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
-				       PAGE_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(bus->phb->parent, dummy_page_da)) {
-		pr_err("PCIEX:Map dummy page failed.\n");
-		kfree(dummy_page_va);
-		return -1;
-	}
-
-	PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da);
-
-	return 0;
-}
-
-/*
- * config space access
- */
-#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \
-	((uint32_t)(((addr) & ~0x3UL) | \
-	((bus_no) << PEXDADRS_BUSNO_SHIFT) | \
-	((dev_no)  << PEXDADRS_DEVNO_SHIFT) | \
-	((func_no) << PEXDADRS_FUNCNO_SHIFT)))
-
-#define MK_PEXDCMND_BYTE_EN(addr, size) \
-	((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
-#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
-
-static uint32_t config_read_pciex_dev(unsigned int __iomem *base,
-		uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
-		uint64_t off, uint64_t size)
-{
-	uint32_t ret;
-	uint32_t addr, cmd;
-
-	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
-	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size);
-	PEX_OUT(base, PEXDADRS, addr);
-	PEX_OUT(base, PEXDCMND, cmd);
-	ret = (PEX_IN(base, PEXDRDATA)
-		>> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1);
-	return ret;
-}
-
-static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no,
-	uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
-	uint32_t data)
-{
-	uint32_t addr, cmd;
-
-	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
-	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size);
-	PEX_OUT(base, PEXDADRS, addr);
-	PEX_OUT(base, PEXDCMND, cmd);
-	PEX_OUT(base, PEXDWDATA,
-		(data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8));
-}
-
-#define MK_PEXCADRS_BYTE_EN(off, len) \
-	((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
-#define MK_PEXCADRS(cmd, addr, size) \
-	((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
-static uint32_t config_read_pciex_rc(unsigned int __iomem *base,
-				     uint32_t where, uint32_t size)
-{
-	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
-	return (PEX_IN(base, PEXCRDATA)
-		>> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
-}
-
-static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where,
-				  uint32_t size, uint32_t val)
-{
-	uint32_t data;
-
-	data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8);
-	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size));
-	PEX_OUT(base, PEXCWDATA, data);
-}
-
-/* Interfaces */
-/* Note: Work-around
- *  On SCC PCIEXC, one device is seen on all 32 dev_no.
- *  As SCC PCIEXC can have only one device on the bus, we look only one dev_no.
- * (dev_no = 1)
- */
-static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
-				 int where, int size, unsigned int *val)
-{
-	struct pci_controller *phb = pci_bus_to_host(bus);
-
-	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) {
-		*val = ~0;
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-
-	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
-		*val = config_read_pciex_rc(phb->cfg_addr, where, size);
-	else
-		*val = config_read_pciex_dev(phb->cfg_addr, bus->number,
-				PCI_SLOT(devfn), PCI_FUNC(devfn), where, size);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
-				  int where, int size, unsigned int val)
-{
-	struct pci_controller *phb = pci_bus_to_host(bus);
-
-	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
-		config_write_pciex_rc(phb->cfg_addr, where, size, val);
-	else
-		config_write_pciex_dev(phb->cfg_addr, bus->number,
-			PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops scc_pciex_pci_ops = {
-	.read = scc_pciex_read_config,
-	.write = scc_pciex_write_config,
-};
-
-static void pciex_clear_intr_all(unsigned int __iomem *base)
-{
-	PEX_OUT(base, PEXAERRSTS, 0xffffffff);
-	PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
-	PEX_OUT(base, PEXINTSTS, 0xffffffff);
-}
-
-#if 0
-static void pciex_disable_intr_all(unsigned int *base)
-{
-	PEX_OUT(base, PEXINTMASK,   0x0);
-	PEX_OUT(base, PEXAERRMASK,  0x0);
-	PEX_OUT(base, PEXPRERRMASK, 0x0);
-	PEX_OUT(base, PEXVDMASK,    0x0);
-}
-#endif
-
-static void pciex_enable_intr_all(unsigned int __iomem *base)
-{
-	PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
-	PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
-	PEX_OUT(base, PEXPRERRMASK, 0x0001010f);
-	PEX_OUT(base, PEXVDMASK, 0x00000001);
-}
-
-static void pciex_check_status(unsigned int __iomem *base)
-{
-	uint32_t err = 0;
-	uint32_t intsts, aerr, prerr, rcvcp, lenerr;
-	uint32_t maea, maec;
-
-	intsts = PEX_IN(base, PEXINTSTS);
-	aerr = PEX_IN(base, PEXAERRSTS);
-	prerr = PEX_IN(base, PEXPRERRSTS);
-	rcvcp = PEX_IN(base, PEXRCVCPLIDA);
-	lenerr = PEX_IN(base, PEXLENERRIDA);
-
-	if (intsts || aerr || prerr || rcvcp || lenerr)
-		err = 1;
-
-	pr_info("PCEXC interrupt!!\n");
-	pr_info("PEXINTSTS    :0x%08x\n", intsts);
-	pr_info("PEXAERRSTS   :0x%08x\n", aerr);
-	pr_info("PEXPRERRSTS  :0x%08x\n", prerr);
-	pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp);
-	pr_info("PEXLENERRIDA :0x%08x\n", lenerr);
-
-	/* print detail of Protection Error */
-	if (intsts & 0x00004000) {
-		uint32_t i, n;
-		for (i = 0; i < 4; i++) {
-			n = 1 << i;
-			if (prerr & n) {
-				maea = PEX_IN(base, PEXMAEA(i));
-				maec = PEX_IN(base, PEXMAEC(i));
-				pr_info("PEXMAEC%d     :0x%08x\n", i, maec);
-				pr_info("PEXMAEA%d     :0x%08x\n", i, maea);
-			}
-		}
-	}
-
-	if (err)
-		pciex_clear_intr_all(base);
-}
-
-static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id)
-{
-	struct pci_controller *phb = dev_id;
-
-	pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq);
-
-	BUG_ON(phb->cfg_addr == NULL);
-
-	pciex_check_status(phb->cfg_addr);
-
-	return IRQ_HANDLED;
-}
-
-static __init int celleb_setup_pciex(struct device_node *node,
-				     struct pci_controller *phb)
-{
-	struct resource	r;
-	int virq;
-
-	/* SMMIO registers; used inside this file */
-	if (of_address_to_resource(node, 0, &r)) {
-		pr_err("PCIEXC:Failed to get config resource.\n");
-		return 1;
-	}
-	phb->cfg_addr = ioremap(r.start, resource_size(&r));
-	if (!phb->cfg_addr) {
-		pr_err("PCIEXC:Failed to remap SMMIO region.\n");
-		return 1;
-	}
-
-	/* Not use cfg_data,  cmd and data regs are near address reg */
-	phb->cfg_data = NULL;
-
-	/* set pci_ops */
-	phb->ops = &scc_pciex_pci_ops;
-
-	/* internal interrupt handler */
-	virq = irq_of_parse_and_map(node, 1);
-	if (!virq) {
-		pr_err("PCIEXC:Failed to map irq\n");
-		goto error;
-	}
-	if (request_irq(virq, pciex_handle_internal_irq,
-			0, "pciex", (void *)phb)) {
-		pr_err("PCIEXC:Failed to request irq\n");
-		goto error;
-	}
-
-	/* enable all interrupts */
-	pciex_clear_intr_all(phb->cfg_addr);
-	pciex_enable_intr_all(phb->cfg_addr);
-	/* MSI: TBD */
-
-	return 0;
-
-error:
-	phb->cfg_data = NULL;
-	if (phb->cfg_addr)
-		iounmap(phb->cfg_addr);
-	phb->cfg_addr = NULL;
-	return 1;
-}
-
-struct celleb_phb_spec celleb_pciex_spec __initdata = {
-	.setup = celleb_setup_pciex,
-	.ops = &scc_pciex_ops,
-	.iowa_init = &scc_pciex_iowa_init,
-};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
deleted file mode 100644
index c8eb57193826..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_sio.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * setup serial port in SCC
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/tty.h>
-#include <linux/serial.h>
-#include <linux/serial_core.h>
-#include <linux/console.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
-
-/* sio irq0=0xb00010022 irq0=0xb00010023 irq2=0xb00010024
-    mmio=0xfff000-0x1000,0xff2000-0x1000 */
-static int txx9_serial_bitmap __initdata;
-
-static struct {
-	uint32_t offset;
-	uint32_t index;
-} txx9_scc_tab[3] __initdata = {
-	{ 0x300, 0 },	/* 0xFFF300 */
-	{ 0x400, 0 },	/* 0xFFF400 */
-	{ 0x800, 1 }	/* 0xFF2800 */
-};
-
-static int __init txx9_serial_init(void)
-{
-	extern int early_serial_txx9_setup(struct uart_port *port);
-	struct device_node *node;
-	int i;
-	struct uart_port req;
-	struct of_phandle_args irq;
-	struct resource res;
-
-	for_each_compatible_node(node, "serial", "toshiba,sio-scc") {
-		for (i = 0; i < ARRAY_SIZE(txx9_scc_tab); i++) {
-			if (!(txx9_serial_bitmap & (1<<i)))
-				continue;
-
-			if (of_irq_parse_one(node, i, &irq))
-				continue;
-			if (of_address_to_resource(node,
-				txx9_scc_tab[i].index, &res))
-				continue;
-
-			memset(&req, 0, sizeof(req));
-			req.line = i;
-			req.iotype = UPIO_MEM;
-			req.mapbase = res.start + txx9_scc_tab[i].offset;
-#ifdef CONFIG_SERIAL_TXX9_CONSOLE
-			req.membase = ioremap(req.mapbase, 0x24);
-#endif
-			req.irq = irq_create_of_mapping(&irq);
-			req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
-				/*HAVE_CTS_LINE*/;
-			req.uartclk = 83300000;
-			early_serial_txx9_setup(&req);
-		}
-	}
-
-	return 0;
-}
-
-static int __init txx9_serial_config(char *ptr)
-{
-	int	i;
-
-	for (;;) {
-		switch (get_option(&ptr, &i)) {
-		default:
-			return 0;
-		case 2:
-			txx9_serial_bitmap |= 1 << i;
-			break;
-		case 1:
-			txx9_serial_bitmap |= 1 << i;
-			return 0;
-		}
-	}
-}
-__setup("txx9_serial=", txx9_serial_config);
-
-console_initcall(txx9_serial_init);
diff --git a/arch/powerpc/platforms/cell/celleb_scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
deleted file mode 100644
index d63b720bfe3a..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc_uhc.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * SCC (Super Companion Chip) UHC setup
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-
-#include <asm/delay.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-
-#include "celleb_scc.h"
-
-#define UHC_RESET_WAIT_MAX 10000
-
-static inline int uhc_clkctrl_ready(u32 val)
-{
-	const u32 mask = SCC_UHC_USBCEN | SCC_UHC_USBCEN;
-	return((val & mask) == mask);
-}
-
-/*
- * UHC(usb host controller) enable function.
- * affect to both of OHCI and EHCI core module.
- */
-static void enable_scc_uhc(struct pci_dev *dev)
-{
-	void __iomem *uhc_base;
-	u32 __iomem *uhc_clkctrl;
-	u32 __iomem *uhc_ecmode;
-	u32 val = 0;
-	int i;
-
-	if (!machine_is(celleb_beat) &&
-	    !machine_is(celleb_native))
-		return;
-
-	uhc_base = ioremap(pci_resource_start(dev, 0),
-			   pci_resource_len(dev, 0));
-	if (!uhc_base) {
-		printk(KERN_ERR "failed to map UHC register base.\n");
-		return;
-	}
-	uhc_clkctrl = uhc_base + SCC_UHC_CKRCTRL;
-	uhc_ecmode  = uhc_base + SCC_UHC_ECMODE;
-
-	/* setup for normal mode */
-	val |= SCC_UHC_F48MCKLEN;
-	out_be32(uhc_clkctrl, val);
-	val |= SCC_UHC_PHY_SUSPEND_SEL;
-	out_be32(uhc_clkctrl, val);
-	udelay(10);
-	val |= SCC_UHC_PHYEN;
-	out_be32(uhc_clkctrl, val);
-	udelay(50);
-
-	/* disable reset */
-	val |= SCC_UHC_HCLKEN;
-	out_be32(uhc_clkctrl, val);
-	val |= (SCC_UHC_USBCEN | SCC_UHC_USBEN);
-	out_be32(uhc_clkctrl, val);
-	i = 0;
-	while (!uhc_clkctrl_ready(in_be32(uhc_clkctrl))) {
-		udelay(10);
-		if (i++ > UHC_RESET_WAIT_MAX) {
-			printk(KERN_ERR "Failed to disable UHC reset %x\n",
-			       in_be32(uhc_clkctrl));
-			break;
-		}
-	}
-
-	/* Endian Conversion Mode for Master ALL area */
-	out_be32(uhc_ecmode, SCC_UHC_ECMODE_BY_BYTE);
-
-	iounmap(uhc_base);
-}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2,
-		 PCI_DEVICE_ID_TOSHIBA_SCC_USB, enable_scc_uhc);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
deleted file mode 100644
index 90be8ec51686..000000000000
--- a/arch/powerpc/platforms/cell/celleb_setup.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Celleb setup code
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/cell/setup.c:
- *  Copyright (C) 1995  Linus Torvalds
- *  Adapted from 'alpha' version by Gary Thomas
- *  Modified by Cort Dougan (cort@cs.nmt.edu)
- *  Modified by PPC64 Team, IBM Corp
- *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/console.h>
-#include <linux/of_platform.h>
-
-#include <asm/mmu.h>
-#include <asm/processor.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/cputable.h>
-#include <asm/irq.h>
-#include <asm/time.h>
-#include <asm/spu_priv1.h>
-#include <asm/firmware.h>
-#include <asm/rtas.h>
-#include <asm/cell-regs.h>
-
-#include "beat_interrupt.h"
-#include "beat_wrapper.h"
-#include "beat.h"
-#include "celleb_pci.h"
-#include "interrupt.h"
-#include "pervasive.h"
-#include "ras.h"
-
-static char celleb_machine_type[128] = "Celleb";
-
-static void celleb_show_cpuinfo(struct seq_file *m)
-{
-	struct device_node *root;
-	const char *model = "";
-
-	root = of_find_node_by_path("/");
-	if (root)
-		model = of_get_property(root, "model", NULL);
-	/* using "CHRP" is to trick anaconda into installing FCx into Celleb */
-	seq_printf(m, "machine\t\t: %s %s\n", celleb_machine_type, model);
-	of_node_put(root);
-}
-
-static int __init celleb_machine_type_hack(char *ptr)
-{
-	strlcpy(celleb_machine_type, ptr, sizeof(celleb_machine_type));
-	return 0;
-}
-
-__setup("celleb_machine_type_hack=", celleb_machine_type_hack);
-
-static void celleb_progress(char *s, unsigned short hex)
-{
-	printk("*** %04x : %s\n", hex, s ? s : "");
-}
-
-static void __init celleb_setup_arch_common(void)
-{
-	/* init to some ~sane value until calibrate_delay() runs */
-	loops_per_jiffy = 50000000;
-
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-}
-
-static const struct of_device_id celleb_bus_ids[] __initconst = {
-	{ .type = "scc", },
-	{ .type = "ioif", },	/* old style */
-	{},
-};
-
-static int __init celleb_publish_devices(void)
-{
-	/* Publish OF platform devices for southbridge IOs */
-	of_platform_bus_probe(NULL, celleb_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(celleb_beat, celleb_publish_devices);
-machine_device_initcall(celleb_native, celleb_publish_devices);
-
-
-/*
- * functions for Celleb-Beat
- */
-static void __init celleb_setup_arch_beat(void)
-{
-#ifdef CONFIG_SPU_BASE
-	spu_priv1_ops		= &spu_priv1_beat_ops;
-	spu_management_ops	= &spu_management_of_ops;
-#endif
-
-	celleb_setup_arch_common();
-}
-
-static int __init celleb_probe_beat(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (!of_flat_dt_is_compatible(root, "Beat"))
-		return 0;
-
-	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS
-		| FW_FEATURE_BEAT | FW_FEATURE_LPAR;
-	hpte_init_beat_v3();
-	pm_power_off = beat_power_off;
-
-	return 1;
-}
-
-
-/*
- * functions for Celleb-native
- */
-static void __init celleb_init_IRQ_native(void)
-{
-	iic_init_IRQ();
-	spider_init_IRQ();
-}
-
-static void __init celleb_setup_arch_native(void)
-{
-#ifdef CONFIG_SPU_BASE
-	spu_priv1_ops		= &spu_priv1_mmio_ops;
-	spu_management_ops	= &spu_management_of_ops;
-#endif
-
-	cbe_regs_init();
-
-#ifdef CONFIG_CBE_RAS
-	cbe_ras_init();
-#endif
-
-#ifdef CONFIG_SMP
-	smp_init_cell();
-#endif
-
-	cbe_pervasive_init();
-
-	/* XXX: nvram initialization should be added */
-
-	celleb_setup_arch_common();
-}
-
-static int __init celleb_probe_native(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (of_flat_dt_is_compatible(root, "Beat") ||
-	    !of_flat_dt_is_compatible(root, "TOSHIBA,Celleb"))
-		return 0;
-
-	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS;
-	hpte_init_native();
-	pm_power_off = rtas_power_off;
-
-	return 1;
-}
-
-
-/*
- * machine definitions
- */
-define_machine(celleb_beat) {
-	.name			= "Cell Reference Set (Beat)",
-	.probe			= celleb_probe_beat,
-	.setup_arch		= celleb_setup_arch_beat,
-	.show_cpuinfo		= celleb_show_cpuinfo,
-	.restart		= beat_restart,
-	.halt			= beat_halt,
-	.get_rtc_time		= beat_get_rtc_time,
-	.set_rtc_time		= beat_set_rtc_time,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= celleb_progress,
-	.power_save		= beat_power_save,
-	.nvram_size		= beat_nvram_get_size,
-	.nvram_read		= beat_nvram_read,
-	.nvram_write		= beat_nvram_write,
-	.set_dabr		= beat_set_xdabr,
-	.init_IRQ		= beatic_init_IRQ,
-	.get_irq		= beatic_get_irq,
-	.pci_probe_mode 	= celleb_pci_probe_mode,
-	.pci_setup_phb		= celleb_setup_phb,
-#ifdef CONFIG_KEXEC
-	.kexec_cpu_down		= beat_kexec_cpu_down,
-#endif
-};
-
-define_machine(celleb_native) {
-	.name			= "Cell Reference Set (native)",
-	.probe			= celleb_probe_native,
-	.setup_arch		= celleb_setup_arch_native,
-	.show_cpuinfo		= celleb_show_cpuinfo,
-	.restart		= rtas_restart,
-	.halt			= rtas_halt,
-	.get_boot_time		= rtas_get_boot_time,
-	.get_rtc_time		= rtas_get_rtc_time,
-	.set_rtc_time		= rtas_set_rtc_time,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= celleb_progress,
-	.pci_probe_mode 	= celleb_pci_probe_mode,
-	.pci_setup_phb		= celleb_setup_phb,
-	.init_IRQ		= celleb_init_IRQ_native,
-};
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 4c11421847be..3af8324c122e 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void)
 
 void iic_setup_cpu(void)
 {
-	out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff);
+	out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
 }
 
 u8 iic_get_target_id(int cpu)
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index c7c8720aa39f..21b502398bf3 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -39,6 +39,7 @@
 #include <asm/firmware.h>
 #include <asm/cell-regs.h>
 
+#include "cell.h"
 #include "interrupt.h"
 
 /* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
@@ -197,7 +198,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
 
 	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
 
-	for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift)
+	for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
 		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
 
 	mb();
@@ -857,7 +858,7 @@ static int __init cell_iommu_init_disabled(void)
 	cell_dma_direct_offset += base;
 
 	if (cell_dma_direct_offset != 0)
-		ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+		cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
 
 	printk("iommu: disabled, direct DMA offset is 0x%lx\n",
 	       cell_dma_direct_offset);
@@ -1197,8 +1198,8 @@ static int __init cell_iommu_init(void)
 		if (cell_iommu_init_disabled() == 0)
 			goto bail;
 
-	/* Setup various ppc_md. callbacks */
-	ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+	/* Setup various callbacks */
+	cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
 	ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
 	ppc_md.tce_build = tce_build_cell;
 	ppc_md.tce_free = tce_free_cell;
@@ -1234,5 +1235,3 @@ static int __init cell_iommu_init(void)
 	return 0;
 }
 machine_arch_initcall(cell, cell_iommu_init);
-machine_arch_initcall(celleb_native, cell_iommu_init);
-
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index d62aa982d530..36cff28d0293 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -54,6 +54,7 @@
 #include <asm/cell-regs.h>
 #include <asm/io-workarounds.h>
 
+#include "cell.h"
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
@@ -126,6 +127,8 @@ static int cell_setup_phb(struct pci_controller *phb)
 	if (rc)
 		return rc;
 
+	phb->controller_ops = cell_pci_controller_ops;
+
 	np = phb->dn;
 	model = of_get_property(np, "model", NULL);
 	if (model == NULL || strcmp(np->name, "pci"))
@@ -279,3 +282,5 @@ define_machine(cell) {
 	.init_IRQ       	= cell_init_irq,
 	.pci_setup_phb		= cell_setup_phb,
 };
+
+struct pci_controller_ops cell_pci_controller_ops;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index b64e7ead752f..895560f4be69 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -102,13 +102,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 	return 1;
 }
 
-static int __init smp_iic_probe(void)
-{
-	iic_request_IPIs();
-
-	return num_possible_cpus();
-}
-
 static void smp_cell_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
@@ -139,7 +132,7 @@ static int smp_cell_kick_cpu(int nr)
 
 static struct smp_ops_t bpa_iic_smp_ops = {
 	.message_pass	= iic_message_pass,
-	.probe		= smp_iic_probe,
+	.probe		= iic_request_IPIs,
 	.kick_cpu	= smp_cell_kick_cpu,
 	.setup_cpu	= smp_cell_setup_cpu,
 	.cpu_bootable	= smp_generic_cpu_bootable,
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index b0ec78e8ad68..a494028b2cdf 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -39,6 +39,7 @@ static void *spu_syscall_table[] = {
 #define PPC_SYS(func)		sys_ni_syscall,
 #define OLDSYS(func)		sys_ni_syscall,
 #define SYS32ONLY(func)		sys_ni_syscall,
+#define PPC64ONLY(func)		sys_ni_syscall,
 #define SYSX(f, f3264, f32)	sys_ni_syscall,
 
 #define SYSCALL_SPU(func)	sys_##func,
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 860a59eb8ea2..15ebc4e8a151 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -253,7 +253,7 @@ static void briq_restart(char *cmd)
  * But unfortunately, the firmware does not connect /chosen/{stdin,stdout}
  * the the built-in serial node. Instead, a /failsafe node is created.
  */
-static void chrp_init_early(void)
+static __init void chrp_init_early(void)
 {
 	struct device_node *node;
 	const char *property;
diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h
index c6911ddc479f..eecfa182b06e 100644
--- a/arch/powerpc/platforms/maple/maple.h
+++ b/arch/powerpc/platforms/maple/maple.h
@@ -10,3 +10,5 @@ extern void maple_calibrate_decr(void);
 extern void maple_pci_init(void);
 extern void maple_pci_irq_fixup(struct pci_dev *dev);
 extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel);
+
+extern struct pci_controller_ops maple_pci_controller_ops;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index d3a13067ec42..a923230e575b 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -510,6 +510,7 @@ static int __init maple_add_bridge(struct device_node *dev)
 		return -ENOMEM;
 	hose->first_busno = bus_range ? bus_range[0] : 0;
 	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = maple_pci_controller_ops;
 
 	disp_name = NULL;
 	if (of_device_is_compatible(dev, "u3-agp")) {
@@ -660,3 +661,6 @@ static void quirk_ipr_msi(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN,
 			quirk_ipr_msi);
+
+struct pci_controller_ops maple_pci_controller_ops = {
+};
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 56b85cd61aaf..a837188544c8 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -203,7 +203,7 @@ static void __init maple_init_early(void)
 {
 	DBG(" -> maple_init_early\n");
 
-	iommu_init_early_dart();
+	iommu_init_early_dart(&maple_pci_controller_ops);
 
 	DBG(" <- maple_init_early\n");
 }
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 2e576f2ae442..b8f567b2ea19 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -27,6 +27,8 @@
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 
+#include "pasemi.h"
+
 #define IOBMAP_PAGE_SHIFT	12
 #define IOBMAP_PAGE_SIZE	(1 << IOBMAP_PAGE_SHIFT)
 #define IOBMAP_PAGE_MASK	(IOBMAP_PAGE_SIZE - 1)
@@ -248,8 +250,8 @@ void __init iommu_init_early_pasemi(void)
 
 	iob_init(NULL);
 
-	ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pasemi;
-	ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pasemi;
+	pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
+	pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
 	ppc_md.tce_build = iobmap_build;
 	ppc_md.tce_free  = iobmap_free;
 	set_pci_dma_ops(&dma_iommu_ops);
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
index ea65bf0eb897..11f230a48227 100644
--- a/arch/powerpc/platforms/pasemi/pasemi.h
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -30,5 +30,6 @@ static inline void restore_astate(int cpu)
 }
 #endif
 
+extern struct pci_controller_ops pasemi_pci_controller_ops;
 
 #endif /* _PASEMI_PASEMI_H */
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index aa862713258c..f3a68a0fef23 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -31,6 +31,8 @@
 
 #include <asm/ppc-pci.h>
 
+#include "pasemi.h"
+
 #define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off))
 
 static inline int pa_pxp_offset_valid(u8 bus, u8 devfn, int offset)
@@ -199,6 +201,7 @@ static int __init pas_add_bridge(struct device_node *dev)
 
 	hose->first_busno = 0;
 	hose->last_busno = 0xff;
+	hose->controller_ops = pasemi_pci_controller_ops;
 
 	setup_pa_pxp(hose);
 
@@ -239,3 +242,5 @@ void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
 
 	return (void __iomem *)pa_pxp_cfg_addr(hose, dev->bus->number, dev->devfn, offset);
 }
+
+struct pci_controller_ops pasemi_pci_controller_ops;
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 3e91ef538114..76f5013c35e5 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -246,7 +246,7 @@ static void __init bootx_scan_dt_build_strings(unsigned long base,
 		DBG(" detected display ! adding properties names !\n");
 		bootx_dt_add_string("linux,boot-display", mem_end);
 		bootx_dt_add_string("linux,opened", mem_end);
-		strncpy(bootx_disp_path, namep, 255);
+		strlcpy(bootx_disp_path, namep, sizeof(bootx_disp_path));
 	}
 
 	/* get and store all property names */
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index f4071a67ad00..59ab16fa600f 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -27,6 +27,8 @@
 #include <asm/grackle.h>
 #include <asm/ppc-pci.h>
 
+#include "pmac.h"
+
 #undef DEBUG
 
 #ifdef DEBUG
@@ -798,6 +800,7 @@ static int __init pmac_add_bridge(struct device_node *dev)
 		return -ENOMEM;
 	hose->first_busno = bus_range ? bus_range[0] : 0;
 	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = pmac_pci_controller_ops;
 
 	disp_name = NULL;
 
@@ -942,7 +945,7 @@ void __init pmac_pci_init(void)
 }
 
 #ifdef CONFIG_PPC32
-int pmac_pci_enable_device_hook(struct pci_dev *dev)
+static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
 {
 	struct device_node* node;
 	int updatecfg = 0;
@@ -958,11 +961,11 @@ int pmac_pci_enable_device_hook(struct pci_dev *dev)
 	    && !node) {
 		printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n",
 		       pci_name(dev));
-		return -EINVAL;
+		return false;
 	}
 
 	if (!node)
-		return 0;
+		return true;
 
 	uninorth_child = node->parent &&
 		of_device_is_compatible(node->parent, "uni-north");
@@ -1003,7 +1006,7 @@ int pmac_pci_enable_device_hook(struct pci_dev *dev)
 				      L1_CACHE_BYTES >> 2);
 	}
 
-	return 0;
+	return true;
 }
 
 void pmac_pci_fixup_ohci(struct pci_dev *dev)
@@ -1223,3 +1226,30 @@ static void fixup_u4_pcie(struct pci_dev* dev)
 	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0);
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie);
+
+#ifdef CONFIG_PPC64
+static int pmac_pci_probe_mode(struct pci_bus *bus)
+{
+	struct device_node *node = pci_bus_to_OF_node(bus);
+
+	/* We need to use normal PCI probing for the AGP bus,
+	 * since the device for the AGP bridge isn't in the tree.
+	 * Same for the PCIe host on U4 and the HT host bridge.
+	 */
+	if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") ||
+				  of_device_is_compatible(node, "u4-pcie") ||
+				  of_device_is_compatible(node, "u3-ht")))
+		return PCI_PROBE_NORMAL;
+	return PCI_PROBE_DEVTREE;
+}
+#endif /* CONFIG_PPC64 */
+
+struct pci_controller_ops pmac_pci_controller_ops = {
+#ifdef CONFIG_PPC64
+	.probe_mode		= pmac_pci_probe_mode,
+#endif
+#ifdef CONFIG_PPC32
+	.enable_device_hook	= pmac_pci_enable_device_hook,
+#endif
+};
+
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 4c24bf60d39d..59cfc9d63c2d 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -321,6 +321,9 @@ static void __init pmac_pic_probe_oldstyle(void)
 		max_irqs = max_real_irqs = 64;
 
 		/* We might have a second cascaded heathrow */
+
+		/* Compensate for of_node_put() in of_find_node_by_name() */
+		of_node_get(master);
 		slave = of_find_node_by_name(master, "mac-io");
 
 		/* Check ordering of master & slave */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 8327cce2bdb0..e7f8163d6769 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -25,7 +25,6 @@ extern void pmac_pci_init(void);
 extern void pmac_nvram_update(void);
 extern unsigned char pmac_nvram_read_byte(int addr);
 extern void pmac_nvram_write_byte(int addr, unsigned char val);
-extern int pmac_pci_enable_device_hook(struct pci_dev *dev);
 extern void pmac_pcibios_after_init(void);
 extern int of_show_percpuinfo(struct seq_file *m, int i);
 
@@ -39,4 +38,6 @@ extern void low_cpu_die(void) __attribute__((noreturn));
 extern int pmac_nvram_init(void);
 extern void pmac_pic_init(void);
 
+extern struct pci_controller_ops pmac_pci_controller_ops;
+
 #endif /* __PMAC_H__ */
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 713d36d45d1d..8dd78f4e1af4 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -473,7 +473,7 @@ static void __init pmac_init_early(void)
 	udbg_adb_init(!!strstr(boot_command_line, "btextdbg"));
 
 #ifdef CONFIG_PPC64
-	iommu_init_early_dart();
+	iommu_init_early_dart(&pmac_pci_controller_ops);
 #endif
 
 	/* SMP Init has to be done early as we need to patch up
@@ -637,24 +637,6 @@ static int __init pmac_probe(void)
 	return 1;
 }
 
-#ifdef CONFIG_PPC64
-/* Move that to pci.c */
-static int pmac_pci_probe_mode(struct pci_bus *bus)
-{
-	struct device_node *node = pci_bus_to_OF_node(bus);
-
-	/* We need to use normal PCI probing for the AGP bus,
-	 * since the device for the AGP bridge isn't in the tree.
-	 * Same for the PCIe host on U4 and the HT host bridge.
-	 */
-	if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") ||
-				  of_device_is_compatible(node, "u4-pcie") ||
-				  of_device_is_compatible(node, "u3-ht")))
-		return PCI_PROBE_NORMAL;
-	return PCI_PROBE_DEVTREE;
-}
-#endif /* CONFIG_PPC64 */
-
 define_machine(powermac) {
 	.name			= "PowerMac",
 	.probe			= pmac_probe,
@@ -674,12 +656,10 @@ define_machine(powermac) {
 	.feature_call		= pmac_do_feature_call,
 	.progress		= udbg_progress,
 #ifdef CONFIG_PPC64
-	.pci_probe_mode		= pmac_pci_probe_mode,
 	.power_save		= power4_idle,
 	.enable_pmcs		= power4_enable_pmcs,
 #endif /* CONFIG_PPC64 */
 #ifdef CONFIG_PPC32
-	.pcibios_enable_device_hook = pmac_pci_enable_device_hook,
 	.pcibios_after_init	= pmac_pcibios_after_init,
 	.phys_mem_access_prot	= pci_phys_mem_access_prot,
 #endif
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index af094ae03dbb..28a147ca32ba 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -268,14 +268,14 @@ static void __init psurge_quad_init(void)
 	mdelay(33);
 }
 
-static int __init smp_psurge_probe(void)
+static void __init smp_psurge_probe(void)
 {
 	int i, ncpus;
 	struct device_node *dn;
 
 	/* We don't do SMP on the PPC601 -- paulus */
 	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		return 1;
+		return;
 
 	/*
 	 * The powersurge cpu board can be used in the generation
@@ -289,7 +289,7 @@ static int __init smp_psurge_probe(void)
 	 */
 	dn = of_find_node_by_name(NULL, "hammerhead");
 	if (dn == NULL)
-		return 1;
+		return;
 	of_node_put(dn);
 
 	hhead_base = ioremap(HAMMERHEAD_BASE, 0x800);
@@ -310,13 +310,13 @@ static int __init smp_psurge_probe(void)
 			/* not a dual-cpu card */
 			iounmap(hhead_base);
 			psurge_type = PSURGE_NONE;
-			return 1;
+			return;
 		}
 		ncpus = 2;
 	}
 
 	if (psurge_secondary_ipi_init())
-		return 1;
+		return;
 
 	psurge_start = ioremap(PSURGE_START, 4);
 	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
@@ -332,8 +332,6 @@ static int __init smp_psurge_probe(void)
 		set_cpu_present(i, true);
 
 	if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
-
-	return ncpus;
 }
 
 static int __init smp_psurge_kick_cpu(int nr)
@@ -766,7 +764,7 @@ static void __init smp_core99_setup(int ncpus)
 		powersave_nap = 0;
 }
 
-static int __init smp_core99_probe(void)
+static void __init smp_core99_probe(void)
 {
 	struct device_node *cpus;
 	int ncpus = 0;
@@ -781,7 +779,7 @@ static int __init smp_core99_probe(void)
 
 	/* Nothing more to do if less than 2 of them */
 	if (ncpus <= 1)
-		return 1;
+		return;
 
 	/* We need to perform some early initialisations before we can start
 	 * setting up SMP as we are running before initcalls
@@ -797,8 +795,6 @@ static int __init smp_core99_probe(void)
 
 	/* Collect l2cr and l3cr values from CPU 0 */
 	core99_init_caches(0);
-
-	return ncpus;
 }
 
 static int smp_core99_kick_cpu(int nr)
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 45a8ed0585cd..4b044d8cb49a 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -19,10 +19,3 @@ config PPC_POWERNV
 	select CPU_FREQ_GOV_CONSERVATIVE
 	select PPC_DOORBELL
 	default y
-
-config PPC_POWERNV_RTAS
-	depends on PPC_POWERNV
-	bool "Support for RTAS based PowerNV platforms such as BML"
-	default y
-	select PPC_ICS_RTAS
-	select PPC_RTAS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 6f3c5d33c3af..33e44f37212f 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -5,7 +5,7 @@ obj-y			+= opal-msglog.o opal-hmi.o opal-power.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
-obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_EEH)	+= eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)	+= opal-tracepoints.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
deleted file mode 100644
index 2809c9895288..000000000000
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ /dev/null
@@ -1,1149 +0,0 @@
-/*
- * The file intends to implement the functions needed by EEH, which is
- * built on IODA compliant chip. Actually, lots of functions related
- * to EEH would be built based on the OPAL APIs.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/msi.h>
-#include <linux/notifier.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/eeh.h>
-#include <asm/eeh_event.h>
-#include <asm/io.h>
-#include <asm/iommu.h>
-#include <asm/msi_bitmap.h>
-#include <asm/opal.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-#include <asm/tce.h>
-
-#include "powernv.h"
-#include "pci.h"
-
-static int ioda_eeh_nb_init = 0;
-
-static int ioda_eeh_event(struct notifier_block *nb,
-			  unsigned long events, void *change)
-{
-	uint64_t changed_evts = (uint64_t)change;
-
-	/*
-	 * We simply send special EEH event if EEH has
-	 * been enabled, or clear pending events in
-	 * case that we enable EEH soon
-	 */
-	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
-	    !(events & OPAL_EVENT_PCI_ERROR))
-		return 0;
-
-	if (eeh_enabled())
-		eeh_send_failure_event(NULL);
-	else
-		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
-
-	return 0;
-}
-
-static struct notifier_block ioda_eeh_nb = {
-	.notifier_call	= ioda_eeh_event,
-	.next		= NULL,
-	.priority	= 0
-};
-
-#ifdef CONFIG_DEBUG_FS
-static ssize_t ioda_eeh_ei_write(struct file *filp,
-				 const char __user *user_buf,
-				 size_t count, loff_t *ppos)
-{
-	struct pci_controller *hose = filp->private_data;
-	struct pnv_phb *phb = hose->private_data;
-	struct eeh_dev *edev;
-	struct eeh_pe *pe;
-	int pe_no, type, func;
-	unsigned long addr, mask;
-	char buf[50];
-	int ret;
-
-	if (!phb->eeh_ops || !phb->eeh_ops->err_inject)
-		return -ENXIO;
-
-	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
-	if (!ret)
-		return -EFAULT;
-
-	/* Retrieve parameters */
-	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
-		     &pe_no, &type, &func, &addr, &mask);
-	if (ret != 5)
-		return -EINVAL;
-
-	/* Retrieve PE */
-	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-	if (!edev)
-		return -ENOMEM;
-	edev->phb = hose;
-	edev->pe_config_addr = pe_no;
-	pe = eeh_pe_get(edev);
-	kfree(edev);
-	if (!pe)
-		return -ENODEV;
-
-	/* Do error injection */
-	ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
-	return ret < 0 ? ret : count;
-}
-
-static const struct file_operations ioda_eeh_ei_fops = {
-	.open   = simple_open,
-	.llseek = no_llseek,
-	.write  = ioda_eeh_ei_write,
-};
-
-static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
-{
-	struct pci_controller *hose = data;
-	struct pnv_phb *phb = hose->private_data;
-
-	out_be64(phb->regs + offset, val);
-	return 0;
-}
-
-static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val)
-{
-	struct pci_controller *hose = data;
-	struct pnv_phb *phb = hose->private_data;
-
-	*val = in_be64(phb->regs + offset);
-	return 0;
-}
-
-static int ioda_eeh_outb_dbgfs_set(void *data, u64 val)
-{
-	return ioda_eeh_dbgfs_set(data, 0xD10, val);
-}
-
-static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val)
-{
-	return ioda_eeh_dbgfs_get(data, 0xD10, val);
-}
-
-static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val)
-{
-	return ioda_eeh_dbgfs_set(data, 0xD90, val);
-}
-
-static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val)
-{
-	return ioda_eeh_dbgfs_get(data, 0xD90, val);
-}
-
-static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val)
-{
-	return ioda_eeh_dbgfs_set(data, 0xE10, val);
-}
-
-static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val)
-{
-	return ioda_eeh_dbgfs_get(data, 0xE10, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get,
-			ioda_eeh_outb_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get,
-			ioda_eeh_inbA_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
-			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
-#endif /* CONFIG_DEBUG_FS */
-
-
-/**
- * ioda_eeh_post_init - Chip dependent post initialization
- * @hose: PCI controller
- *
- * The function will be called after eeh PEs and devices
- * have been built. That means the EEH is ready to supply
- * service with I/O cache.
- */
-static int ioda_eeh_post_init(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	int ret;
-
-	/* Register OPAL event notifier */
-	if (!ioda_eeh_nb_init) {
-		ret = opal_notifier_register(&ioda_eeh_nb);
-		if (ret) {
-			pr_err("%s: Can't register OPAL event notifier (%d)\n",
-			       __func__, ret);
-			return ret;
-		}
-
-		ioda_eeh_nb_init = 1;
-	}
-
-#ifdef CONFIG_DEBUG_FS
-	if (!phb->has_dbgfs && phb->dbgfs) {
-		phb->has_dbgfs = 1;
-
-		debugfs_create_file("err_injct", 0200,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_ei_fops);
-
-		debugfs_create_file("err_injct_outbound", 0600,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_outb_dbgfs_ops);
-		debugfs_create_file("err_injct_inboundA", 0600,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_inbA_dbgfs_ops);
-		debugfs_create_file("err_injct_inboundB", 0600,
-				    phb->dbgfs, hose,
-				    &ioda_eeh_inbB_dbgfs_ops);
-	}
-#endif
-
-	/* If EEH is enabled, we're going to rely on that.
-	 * Otherwise, we restore to conventional mechanism
-	 * to clear frozen PE during PCI config access.
-	 */
-	if (eeh_enabled())
-		phb->flags |= PNV_PHB_FLAG_EEH;
-	else
-		phb->flags &= ~PNV_PHB_FLAG_EEH;
-
-	return 0;
-}
-
-/**
- * ioda_eeh_set_option - Set EEH operation or I/O setting
- * @pe: EEH PE
- * @option: options
- *
- * Enable or disable EEH option for the indicated PE. The
- * function also can be used to enable I/O or DMA for the
- * PE.
- */
-static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
-{
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	bool freeze_pe = false;
-	int enable, ret = 0;
-	s64 rc;
-
-	/* Check on PE number */
-	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
-		pr_err("%s: PE address %x out of range [0, %x] "
-		       "on PHB#%x\n",
-			__func__, pe->addr, phb->ioda.total_pe,
-			hose->global_number);
-		return -EINVAL;
-	}
-
-	switch (option) {
-	case EEH_OPT_DISABLE:
-		return -EPERM;
-	case EEH_OPT_ENABLE:
-		return 0;
-	case EEH_OPT_THAW_MMIO:
-		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
-		break;
-	case EEH_OPT_THAW_DMA:
-		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
-		break;
-	case EEH_OPT_FREEZE_PE:
-		freeze_pe = true;
-		enable = OPAL_EEH_ACTION_SET_FREEZE_ALL;
-		break;
-	default:
-		pr_warn("%s: Invalid option %d\n",
-			__func__, option);
-		return -EINVAL;
-	}
-
-	/* If PHB supports compound PE, to handle it */
-	if (freeze_pe) {
-		if (phb->freeze_pe) {
-			phb->freeze_pe(phb, pe->addr);
-		} else {
-			rc = opal_pci_eeh_freeze_set(phb->opal_id,
-						     pe->addr,
-						     enable);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld freezing "
-					"PHB#%x-PE#%x\n",
-					__func__, rc,
-					phb->hose->global_number, pe->addr);
-				ret = -EIO;
-			}
-		}
-	} else {
-		if (phb->unfreeze_pe) {
-			ret = phb->unfreeze_pe(phb, pe->addr, enable);
-		} else {
-			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
-						       pe->addr,
-						       enable);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld enable %d "
-					"for PHB#%x-PE#%x\n",
-					__func__, rc, option,
-					phb->hose->global_number, pe->addr);
-				ret = -EIO;
-			}
-		}
-	}
-
-	return ret;
-}
-
-static void ioda_eeh_phb_diag(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-	long rc;
-
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
-					 PNV_PCI_DIAG_BUF_SIZE);
-	if (rc != OPAL_SUCCESS)
-		pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-			__func__, pe->phb->global_number, rc);
-}
-
-static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-	u8 fstate;
-	__be16 pcierr;
-	s64 rc;
-	int result = 0;
-
-	rc = opal_pci_eeh_freeze_status(phb->opal_id,
-					pe->addr,
-					&fstate,
-					&pcierr,
-					NULL);
-	if (rc != OPAL_SUCCESS) {
-		pr_warn("%s: Failure %lld getting PHB#%x state\n",
-			__func__, rc, phb->hose->global_number);
-		return EEH_STATE_NOT_SUPPORT;
-	}
-
-	/*
-	 * Check PHB state. If the PHB is frozen for the
-	 * first time, to dump the PHB diag-data.
-	 */
-	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
-		result = (EEH_STATE_MMIO_ACTIVE  |
-			  EEH_STATE_DMA_ACTIVE   |
-			  EEH_STATE_MMIO_ENABLED |
-			  EEH_STATE_DMA_ENABLED);
-	} else if (!(pe->state & EEH_PE_ISOLATED)) {
-		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-		ioda_eeh_phb_diag(pe);
-
-		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-	}
-
-	return result;
-}
-
-static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-	u8 fstate;
-	__be16 pcierr;
-	s64 rc;
-	int result;
-
-	/*
-	 * We don't clobber hardware frozen state until PE
-	 * reset is completed. In order to keep EEH core
-	 * moving forward, we have to return operational
-	 * state during PE reset.
-	 */
-	if (pe->state & EEH_PE_RESET) {
-		result = (EEH_STATE_MMIO_ACTIVE  |
-			  EEH_STATE_DMA_ACTIVE   |
-			  EEH_STATE_MMIO_ENABLED |
-			  EEH_STATE_DMA_ENABLED);
-		return result;
-	}
-
-	/*
-	 * Fetch PE state from hardware. If the PHB
-	 * supports compound PE, let it handle that.
-	 */
-	if (phb->get_pe_state) {
-		fstate = phb->get_pe_state(phb, pe->addr);
-	} else {
-		rc = opal_pci_eeh_freeze_status(phb->opal_id,
-						pe->addr,
-						&fstate,
-						&pcierr,
-						NULL);
-		if (rc != OPAL_SUCCESS) {
-			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
-				__func__, rc, phb->hose->global_number, pe->addr);
-			return EEH_STATE_NOT_SUPPORT;
-		}
-	}
-
-	/* Figure out state */
-	switch (fstate) {
-	case OPAL_EEH_STOPPED_NOT_FROZEN:
-		result = (EEH_STATE_MMIO_ACTIVE  |
-			  EEH_STATE_DMA_ACTIVE   |
-			  EEH_STATE_MMIO_ENABLED |
-			  EEH_STATE_DMA_ENABLED);
-		break;
-	case OPAL_EEH_STOPPED_MMIO_FREEZE:
-		result = (EEH_STATE_DMA_ACTIVE |
-			  EEH_STATE_DMA_ENABLED);
-		break;
-	case OPAL_EEH_STOPPED_DMA_FREEZE:
-		result = (EEH_STATE_MMIO_ACTIVE |
-			  EEH_STATE_MMIO_ENABLED);
-		break;
-	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
-		result = 0;
-		break;
-	case OPAL_EEH_STOPPED_RESET:
-		result = EEH_STATE_RESET_ACTIVE;
-		break;
-	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
-		result = EEH_STATE_UNAVAILABLE;
-		break;
-	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
-		result = EEH_STATE_NOT_SUPPORT;
-		break;
-	default:
-		result = EEH_STATE_NOT_SUPPORT;
-		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
-			__func__, phb->hose->global_number,
-			pe->addr, fstate);
-	}
-
-	/*
-	 * If PHB supports compound PE, to freeze all
-	 * slave PEs for consistency.
-	 *
-	 * If the PE is switching to frozen state for the
-	 * first time, to dump the PHB diag-data.
-	 */
-	if (!(result & EEH_STATE_NOT_SUPPORT) &&
-	    !(result & EEH_STATE_UNAVAILABLE) &&
-	    !(result & EEH_STATE_MMIO_ACTIVE) &&
-	    !(result & EEH_STATE_DMA_ACTIVE)  &&
-	    !(pe->state & EEH_PE_ISOLATED)) {
-		if (phb->freeze_pe)
-			phb->freeze_pe(phb, pe->addr);
-
-		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-		ioda_eeh_phb_diag(pe);
-
-		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-	}
-
-	return result;
-}
-
-/**
- * ioda_eeh_get_state - Retrieve the state of PE
- * @pe: EEH PE
- *
- * The PE's state should be retrieved from the PEEV, PEST
- * IODA tables. Since the OPAL has exported the function
- * to do it, it'd better to use that.
- */
-static int ioda_eeh_get_state(struct eeh_pe *pe)
-{
-	struct pnv_phb *phb = pe->phb->private_data;
-
-	/* Sanity check on PE number. PHB PE should have 0 */
-	if (pe->addr < 0 ||
-	    pe->addr >= phb->ioda.total_pe) {
-		pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n",
-			__func__, phb->hose->global_number,
-			pe->addr, phb->ioda.total_pe);
-		return EEH_STATE_NOT_SUPPORT;
-	}
-
-	if (pe->type & EEH_PE_PHB)
-		return ioda_eeh_get_phb_state(pe);
-
-	return ioda_eeh_get_pe_state(pe);
-}
-
-static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
-{
-	s64 rc = OPAL_HARDWARE;
-
-	while (1) {
-		rc = opal_pci_poll(phb->opal_id);
-		if (rc <= 0)
-			break;
-
-		if (system_state < SYSTEM_RUNNING)
-			udelay(1000 * rc);
-		else
-			msleep(rc);
-	}
-
-	return rc;
-}
-
-int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
-{
-	struct pnv_phb *phb = hose->private_data;
-	s64 rc = OPAL_HARDWARE;
-
-	pr_debug("%s: Reset PHB#%x, option=%d\n",
-		 __func__, hose->global_number, option);
-
-	/* Issue PHB complete reset request */
-	if (option == EEH_RESET_FUNDAMENTAL ||
-	    option == EEH_RESET_HOT)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PHB_COMPLETE,
-				OPAL_ASSERT_RESET);
-	else if (option == EEH_RESET_DEACTIVATE)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PHB_COMPLETE,
-				OPAL_DEASSERT_RESET);
-	if (rc < 0)
-		goto out;
-
-	/*
-	 * Poll state of the PHB until the request is done
-	 * successfully. The PHB reset is usually PHB complete
-	 * reset followed by hot reset on root bus. So we also
-	 * need the PCI bus settlement delay.
-	 */
-	rc = ioda_eeh_phb_poll(phb);
-	if (option == EEH_RESET_DEACTIVATE) {
-		if (system_state < SYSTEM_RUNNING)
-			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
-		else
-			msleep(EEH_PE_RST_SETTLE_TIME);
-	}
-out:
-	if (rc != OPAL_SUCCESS)
-		return -EIO;
-
-	return 0;
-}
-
-static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
-{
-	struct pnv_phb *phb = hose->private_data;
-	s64 rc = OPAL_SUCCESS;
-
-	pr_debug("%s: Reset PHB#%x, option=%d\n",
-		 __func__, hose->global_number, option);
-
-	/*
-	 * During the reset deassert time, we needn't care
-	 * the reset scope because the firmware does nothing
-	 * for fundamental or hot reset during deassert phase.
-	 */
-	if (option == EEH_RESET_FUNDAMENTAL)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PCI_FUNDAMENTAL,
-				OPAL_ASSERT_RESET);
-	else if (option == EEH_RESET_HOT)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PCI_HOT,
-				OPAL_ASSERT_RESET);
-	else if (option == EEH_RESET_DEACTIVATE)
-		rc = opal_pci_reset(phb->opal_id,
-				OPAL_RESET_PCI_HOT,
-				OPAL_DEASSERT_RESET);
-	if (rc < 0)
-		goto out;
-
-	/* Poll state of the PHB until the request is done */
-	rc = ioda_eeh_phb_poll(phb);
-	if (option == EEH_RESET_DEACTIVATE)
-		msleep(EEH_PE_RST_SETTLE_TIME);
-out:
-	if (rc != OPAL_SUCCESS)
-		return -EIO;
-
-	return 0;
-}
-
-static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
-
-{
-	struct device_node *dn = pci_device_to_OF_node(dev);
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
-	int aer = edev ? edev->aer_cap : 0;
-	u32 ctrl;
-
-	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
-		 __func__, pci_domain_nr(dev->bus),
-		 dev->bus->number, option);
-
-	switch (option) {
-	case EEH_RESET_FUNDAMENTAL:
-	case EEH_RESET_HOT:
-		/* Don't report linkDown event */
-		if (aer) {
-			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					     4, &ctrl);
-			ctrl |= PCI_ERR_UNC_SURPDN;
-                        eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					      4, ctrl);
-                }
-
-		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
-		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
-		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
-		msleep(EEH_PE_RST_HOLD_TIME);
-
-		break;
-	case EEH_RESET_DEACTIVATE:
-		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
-		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
-		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
-		msleep(EEH_PE_RST_SETTLE_TIME);
-
-		/* Continue reporting linkDown event */
-		if (aer) {
-			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					     4, &ctrl);
-			ctrl &= ~PCI_ERR_UNC_SURPDN;
-			eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
-					      4, ctrl);
-		}
-
-		break;
-	}
-
-	return 0;
-}
-
-void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
-{
-	struct pci_controller *hose;
-
-	if (pci_is_root_bus(dev->bus)) {
-		hose = pci_bus_to_host(dev->bus);
-		ioda_eeh_root_reset(hose, EEH_RESET_HOT);
-		ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
-	} else {
-		ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
-		ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
-	}
-}
-
-/**
- * ioda_eeh_reset - Reset the indicated PE
- * @pe: EEH PE
- * @option: reset option
- *
- * Do reset on the indicated PE. For PCI bus sensitive PE,
- * we need to reset the parent p2p bridge. The PHB has to
- * be reinitialized if the p2p bridge is root bridge. For
- * PCI device sensitive PE, we will try to reset the device
- * through FLR. For now, we don't have OPAL APIs to do HARD
- * reset yet, so all reset would be SOFT (HOT) reset.
- */
-static int ioda_eeh_reset(struct eeh_pe *pe, int option)
-{
-	struct pci_controller *hose = pe->phb;
-	struct pci_bus *bus;
-	int ret;
-
-	/*
-	 * For PHB reset, we always have complete reset. For those PEs whose
-	 * primary bus derived from root complex (root bus) or root port
-	 * (usually bus#1), we apply hot or fundamental reset on the root port.
-	 * For other PEs, we always have hot reset on the PE primary bus.
-	 *
-	 * Here, we have different design to pHyp, which always clear the
-	 * frozen state during PE reset. However, the good idea here from
-	 * benh is to keep frozen state before we get PE reset done completely
-	 * (until BAR restore). With the frozen state, HW drops illegal IO
-	 * or MMIO access, which can incur recrusive frozen PE during PE
-	 * reset. The side effect is that EEH core has to clear the frozen
-	 * state explicitly after BAR restore.
-	 */
-	if (pe->type & EEH_PE_PHB) {
-		ret = ioda_eeh_phb_reset(hose, option);
-	} else {
-		struct pnv_phb *phb;
-		s64 rc;
-
-		/*
-		 * The frozen PE might be caused by PAPR error injection
-		 * registers, which are expected to be cleared after hitting
-		 * frozen PE as stated in the hardware spec. Unfortunately,
-		 * that's not true on P7IOC. So we have to clear it manually
-		 * to avoid recursive EEH errors during recovery.
-		 */
-		phb = hose->private_data;
-		if (phb->model == PNV_PHB_MODEL_P7IOC &&
-		    (option == EEH_RESET_HOT ||
-		    option == EEH_RESET_FUNDAMENTAL)) {
-			rc = opal_pci_reset(phb->opal_id,
-					    OPAL_RESET_PHB_ERROR,
-					    OPAL_ASSERT_RESET);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Failure %lld clearing "
-					"error injection registers\n",
-					__func__, rc);
-				return -EIO;
-			}
-		}
-
-		bus = eeh_pe_bus_get(pe);
-		if (pci_is_root_bus(bus) ||
-		    pci_is_root_bus(bus->parent))
-			ret = ioda_eeh_root_reset(hose, option);
-		else
-			ret = ioda_eeh_bridge_reset(bus->self, option);
-	}
-
-	return ret;
-}
-
-/**
- * ioda_eeh_get_log - Retrieve error log
- * @pe: frozen PE
- * @severity: permanent or temporary error
- * @drv_log: device driver log
- * @len: length of device driver log
- *
- * Retrieve error log, which contains log from device driver
- * and firmware.
- */
-static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
-			    char *drv_log, unsigned long len)
-{
-	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
-		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-
-	return 0;
-}
-
-/**
- * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
- * @pe: EEH PE
- *
- * For particular PE, it might have included PCI bridges. In order
- * to make the PE work properly, those PCI bridges should be configured
- * correctly. However, we need do nothing on P7IOC since the reset
- * function will do everything that should be covered by the function.
- */
-static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
-{
-	return 0;
-}
-
-static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func,
-			       unsigned long addr, unsigned long mask)
-{
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	s64 ret;
-
-	/* Sanity check on error type */
-	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
-	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
-		pr_warn("%s: Invalid error type %d\n",
-			__func__, type);
-		return -ERANGE;
-	}
-
-	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
-	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
-		pr_warn("%s: Invalid error function %d\n",
-			__func__, func);
-		return -ERANGE;
-	}
-
-	/* Firmware supports error injection ? */
-	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
-		pr_warn("%s: Firmware doesn't support error injection\n",
-			__func__);
-		return -ENXIO;
-	}
-
-	/* Do error injection */
-	ret = opal_pci_err_inject(phb->opal_id, pe->addr,
-				  type, func, addr, mask);
-	if (ret != OPAL_SUCCESS) {
-		pr_warn("%s: Failure %lld injecting error "
-			"%d-%d to PHB#%x-PE#%x\n",
-			__func__, ret, type, func,
-			hose->global_number, pe->addr);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
-{
-	/* GEM */
-	if (data->gemXfir || data->gemRfir ||
-	    data->gemRirqfir || data->gemMask || data->gemRwof)
-		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
-			be64_to_cpu(data->gemXfir),
-			be64_to_cpu(data->gemRfir),
-			be64_to_cpu(data->gemRirqfir),
-			be64_to_cpu(data->gemMask),
-			be64_to_cpu(data->gemRwof));
-
-	/* LEM */
-	if (data->lemFir || data->lemErrMask ||
-	    data->lemAction0 || data->lemAction1 || data->lemWof)
-		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
-			be64_to_cpu(data->lemFir),
-			be64_to_cpu(data->lemErrMask),
-			be64_to_cpu(data->lemAction0),
-			be64_to_cpu(data->lemAction1),
-			be64_to_cpu(data->lemWof));
-}
-
-static void ioda_eeh_hub_diag(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
-	long rc;
-
-	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
-	if (rc != OPAL_SUCCESS) {
-		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
-			__func__, phb->hub_id, rc);
-		return;
-	}
-
-	switch (data->type) {
-	case OPAL_P7IOC_DIAG_TYPE_RGC:
-		pr_info("P7IOC diag-data for RGC\n\n");
-		ioda_eeh_hub_diag_common(data);
-		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
-			pr_info("  RGC: %016llx %016llx\n",
-				be64_to_cpu(data->rgc.rgcStatus),
-				be64_to_cpu(data->rgc.rgcLdcp));
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_BI:
-		pr_info("P7IOC diag-data for BI %s\n\n",
-			data->bi.biDownbound ? "Downbound" : "Upbound");
-		ioda_eeh_hub_diag_common(data);
-		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
-		    data->bi.biLdcp2 || data->bi.biFenceStatus)
-			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
-				be64_to_cpu(data->bi.biLdcp0),
-				be64_to_cpu(data->bi.biLdcp1),
-				be64_to_cpu(data->bi.biLdcp2),
-				be64_to_cpu(data->bi.biFenceStatus));
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_CI:
-		pr_info("P7IOC diag-data for CI Port %d\n\n",
-			data->ci.ciPort);
-		ioda_eeh_hub_diag_common(data);
-		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
-			pr_info("  CI:  %016llx %016llx\n",
-				be64_to_cpu(data->ci.ciPortStatus),
-				be64_to_cpu(data->ci.ciPortLdcp));
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_MISC:
-		pr_info("P7IOC diag-data for MISC\n\n");
-		ioda_eeh_hub_diag_common(data);
-		break;
-	case OPAL_P7IOC_DIAG_TYPE_I2C:
-		pr_info("P7IOC diag-data for I2C\n\n");
-		ioda_eeh_hub_diag_common(data);
-		break;
-	default:
-		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
-			__func__, phb->hub_id, data->type);
-	}
-}
-
-static int ioda_eeh_get_pe(struct pci_controller *hose,
-			   u16 pe_no, struct eeh_pe **pe)
-{
-	struct pnv_phb *phb = hose->private_data;
-	struct pnv_ioda_pe *pnv_pe;
-	struct eeh_pe *dev_pe;
-	struct eeh_dev edev;
-
-	/*
-	 * If PHB supports compound PE, to fetch
-	 * the master PE because slave PE is invisible
-	 * to EEH core.
-	 */
-	pnv_pe = &phb->ioda.pe_array[pe_no];
-	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
-		pnv_pe = pnv_pe->master;
-		WARN_ON(!pnv_pe ||
-			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
-		pe_no = pnv_pe->pe_number;
-	}
-
-	/* Find the PE according to PE# */
-	memset(&edev, 0, sizeof(struct eeh_dev));
-	edev.phb = hose;
-	edev.pe_config_addr = pe_no;
-	dev_pe = eeh_pe_get(&edev);
-	if (!dev_pe)
-		return -EEXIST;
-
-	/* Freeze the (compound) PE */
-	*pe = dev_pe;
-	if (!(dev_pe->state & EEH_PE_ISOLATED))
-		phb->freeze_pe(phb, pe_no);
-
-	/*
-	 * At this point, we're sure the (compound) PE should
-	 * have been frozen. However, we still need poke until
-	 * hitting the frozen PE on top level.
-	 */
-	dev_pe = dev_pe->parent;
-	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
-		int ret;
-		int active_flags = (EEH_STATE_MMIO_ACTIVE |
-				    EEH_STATE_DMA_ACTIVE);
-
-		ret = eeh_ops->get_state(dev_pe, NULL);
-		if (ret <= 0 || (ret & active_flags) == active_flags) {
-			dev_pe = dev_pe->parent;
-			continue;
-		}
-
-		/* Frozen parent PE */
-		*pe = dev_pe;
-		if (!(dev_pe->state & EEH_PE_ISOLATED))
-			phb->freeze_pe(phb, dev_pe->addr);
-
-		/* Next one */
-		dev_pe = dev_pe->parent;
-	}
-
-	return 0;
-}
-
-/**
- * ioda_eeh_next_error - Retrieve next error for EEH core to handle
- * @pe: The affected PE
- *
- * The function is expected to be called by EEH core while it gets
- * special EEH event (without binding PE). The function calls to
- * OPAL APIs for next error to handle. The informational error is
- * handled internally by platform. However, the dead IOC, dead PHB,
- * fenced PHB and frozen PE should be handled by EEH core eventually.
- */
-static int ioda_eeh_next_error(struct eeh_pe **pe)
-{
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
-	struct eeh_pe *phb_pe, *parent_pe;
-	__be64 frozen_pe_no;
-	__be16 err_type, severity;
-	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
-	long rc;
-	int state, ret = EEH_NEXT_ERR_NONE;
-
-	/*
-	 * While running here, it's safe to purge the event queue.
-	 * And we should keep the cached OPAL notifier event sychronized
-	 * between the kernel and firmware.
-	 */
-	eeh_remove_event(NULL, false);
-	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
-
-	list_for_each_entry(hose, &hose_list, list_node) {
-		/*
-		 * If the subordinate PCI buses of the PHB has been
-		 * removed or is exactly under error recovery, we
-		 * needn't take care of it any more.
-		 */
-		phb = hose->private_data;
-		phb_pe = eeh_phb_pe_get(hose);
-		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
-			continue;
-
-		rc = opal_pci_next_error(phb->opal_id,
-				&frozen_pe_no, &err_type, &severity);
-
-		/* If OPAL API returns error, we needn't proceed */
-		if (rc != OPAL_SUCCESS) {
-			pr_devel("%s: Invalid return value on "
-				 "PHB#%x (0x%lx) from opal_pci_next_error",
-				 __func__, hose->global_number, rc);
-			continue;
-		}
-
-		/* If the PHB doesn't have error, stop processing */
-		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
-		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
-			pr_devel("%s: No error found on PHB#%x\n",
-				 __func__, hose->global_number);
-			continue;
-		}
-
-		/*
-		 * Processing the error. We're expecting the error with
-		 * highest priority reported upon multiple errors on the
-		 * specific PHB.
-		 */
-		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
-			 __func__, be16_to_cpu(err_type), be16_to_cpu(severity),
-			 be64_to_cpu(frozen_pe_no), hose->global_number);
-		switch (be16_to_cpu(err_type)) {
-		case OPAL_EEH_IOC_ERROR:
-			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
-				pr_err("EEH: dead IOC detected\n");
-				ret = EEH_NEXT_ERR_DEAD_IOC;
-			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
-				pr_info("EEH: IOC informative error "
-					"detected\n");
-				ioda_eeh_hub_diag(hose);
-				ret = EEH_NEXT_ERR_NONE;
-			}
-
-			break;
-		case OPAL_EEH_PHB_ERROR:
-			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
-				*pe = phb_pe;
-				pr_err("EEH: dead PHB#%x detected, "
-				       "location: %s\n",
-				       hose->global_number,
-				       eeh_pe_loc_get(phb_pe));
-				ret = EEH_NEXT_ERR_DEAD_PHB;
-			} else if (be16_to_cpu(severity) ==
-						OPAL_EEH_SEV_PHB_FENCED) {
-				*pe = phb_pe;
-				pr_err("EEH: Fenced PHB#%x detected, "
-				       "location: %s\n",
-				       hose->global_number,
-				       eeh_pe_loc_get(phb_pe));
-				ret = EEH_NEXT_ERR_FENCED_PHB;
-			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
-				pr_info("EEH: PHB#%x informative error "
-					"detected, location: %s\n",
-					hose->global_number,
-					eeh_pe_loc_get(phb_pe));
-				ioda_eeh_phb_diag(phb_pe);
-				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
-				ret = EEH_NEXT_ERR_NONE;
-			}
-
-			break;
-		case OPAL_EEH_PE_ERROR:
-			/*
-			 * If we can't find the corresponding PE, we
-			 * just try to unfreeze.
-			 */
-			if (ioda_eeh_get_pe(hose,
-					    be64_to_cpu(frozen_pe_no), pe)) {
-				/* Try best to clear it */
-				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
-					hose->global_number, frozen_pe_no);
-				pr_info("EEH: PHB location: %s\n",
-					eeh_pe_loc_get(phb_pe));
-				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
-					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
-				ret = EEH_NEXT_ERR_NONE;
-			} else if ((*pe)->state & EEH_PE_ISOLATED ||
-				   eeh_pe_passed(*pe)) {
-				ret = EEH_NEXT_ERR_NONE;
-			} else {
-				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
-					(*pe)->addr, (*pe)->phb->global_number);
-				pr_err("EEH: PE location: %s, PHB location: %s\n",
-					eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
-				ret = EEH_NEXT_ERR_FROZEN_PE;
-			}
-
-			break;
-		default:
-			pr_warn("%s: Unexpected error type %d\n",
-				__func__, be16_to_cpu(err_type));
-		}
-
-		/*
-		 * EEH core will try recover from fenced PHB or
-		 * frozen PE. In the time for frozen PE, EEH core
-		 * enable IO path for that before collecting logs,
-		 * but it ruins the site. So we have to dump the
-		 * log in advance here.
-		 */
-		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
-		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
-		    !((*pe)->state & EEH_PE_ISOLATED)) {
-			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
-			ioda_eeh_phb_diag(*pe);
-
-			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-				pnv_pci_dump_phb_diag_data((*pe)->phb,
-							   (*pe)->data);
-		}
-
-		/*
-		 * We probably have the frozen parent PE out there and
-		 * we need have to handle frozen parent PE firstly.
-		 */
-		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
-			parent_pe = (*pe)->parent;
-			while (parent_pe) {
-				/* Hit the ceiling ? */
-				if (parent_pe->type & EEH_PE_PHB)
-					break;
-
-				/* Frozen parent PE ? */
-				state = ioda_eeh_get_state(parent_pe);
-				if (state > 0 &&
-				    (state & active_flags) != active_flags)
-					*pe = parent_pe;
-
-				/* Next parent level */
-				parent_pe = parent_pe->parent;
-			}
-
-			/* We possibly migrate to another PE */
-			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
-		}
-
-		/*
-		 * If we have no errors on the specific PHB or only
-		 * informative error there, we continue poking it.
-		 * Otherwise, we need actions to be taken by upper
-		 * layer.
-		 */
-		if (ret > EEH_NEXT_ERR_INF)
-			break;
-	}
-
-	return ret;
-}
-
-struct pnv_eeh_ops ioda_eeh_ops = {
-	.post_init		= ioda_eeh_post_init,
-	.set_option		= ioda_eeh_set_option,
-	.get_state		= ioda_eeh_get_state,
-	.reset			= ioda_eeh_reset,
-	.get_log		= ioda_eeh_get_log,
-	.configure_bridge	= ioda_eeh_configure_bridge,
-	.err_inject		= ioda_eeh_err_inject,
-	.next_error		= ioda_eeh_next_error
-};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index e261869adc86..ce738ab3d5a9 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/atomic.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/init.h>
@@ -38,12 +39,14 @@
 #include "powernv.h"
 #include "pci.h"
 
+static bool pnv_eeh_nb_init = false;
+
 /**
- * powernv_eeh_init - EEH platform dependent initialization
+ * pnv_eeh_init - EEH platform dependent initialization
  *
  * EEH platform dependent initialization on powernv
  */
-static int powernv_eeh_init(void)
+static int pnv_eeh_init(void)
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
@@ -85,37 +88,280 @@ static int powernv_eeh_init(void)
 	return 0;
 }
 
+static int pnv_eeh_event(struct notifier_block *nb,
+			 unsigned long events, void *change)
+{
+	uint64_t changed_evts = (uint64_t)change;
+
+	/*
+	 * We simply send special EEH event if EEH has
+	 * been enabled, or clear pending events in
+	 * case that we enable EEH soon
+	 */
+	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
+	    !(events & OPAL_EVENT_PCI_ERROR))
+		return 0;
+
+	if (eeh_enabled())
+		eeh_send_failure_event(NULL);
+	else
+		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+
+	return 0;
+}
+
+static struct notifier_block pnv_eeh_nb = {
+	.notifier_call	= pnv_eeh_event,
+	.next		= NULL,
+	.priority	= 0
+};
+
+#ifdef CONFIG_DEBUG_FS
+static ssize_t pnv_eeh_ei_write(struct file *filp,
+				const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct pci_controller *hose = filp->private_data;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	int pe_no, type, func;
+	unsigned long addr, mask;
+	char buf[50];
+	int ret;
+
+	if (!eeh_ops || !eeh_ops->err_inject)
+		return -ENXIO;
+
+	/* Copy over argument buffer */
+	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+	if (!ret)
+		return -EFAULT;
+
+	/* Retrieve parameters */
+	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
+		     &pe_no, &type, &func, &addr, &mask);
+	if (ret != 5)
+		return -EINVAL;
+
+	/* Retrieve PE */
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return -ENOMEM;
+	edev->phb = hose;
+	edev->pe_config_addr = pe_no;
+	pe = eeh_pe_get(edev);
+	kfree(edev);
+	if (!pe)
+		return -ENODEV;
+
+	/* Do error injection */
+	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
+	return ret < 0 ? ret : count;
+}
+
+static const struct file_operations pnv_eeh_ei_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= pnv_eeh_ei_write,
+};
+
+static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	out_be64(phb->regs + offset, val);
+	return 0;
+}
+
+static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	*val = in_be64(phb->regs + offset);
+	return 0;
+}
+
+static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xD10, val);
+}
+
+static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xD10, val);
+}
+
+static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xD90, val);
+}
+
+static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xD90, val);
+}
+
+static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xE10, val);
+}
+
+static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xE10, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
+			pnv_eeh_outb_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
+			pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
+			pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
+#endif /* CONFIG_DEBUG_FS */
+
 /**
- * powernv_eeh_post_init - EEH platform dependent post initialization
+ * pnv_eeh_post_init - EEH platform dependent post initialization
  *
  * EEH platform dependent post initialization on powernv. When
  * the function is called, the EEH PEs and devices should have
  * been built. If the I/O cache staff has been built, EEH is
  * ready to supply service.
  */
-static int powernv_eeh_post_init(void)
+static int pnv_eeh_post_init(void)
 {
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
 	int ret = 0;
 
+	/* Register OPAL event notifier */
+	if (!pnv_eeh_nb_init) {
+		ret = opal_notifier_register(&pnv_eeh_nb);
+		if (ret) {
+			pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+				__func__, ret);
+			return ret;
+		}
+
+		pnv_eeh_nb_init = true;
+	}
+
 	list_for_each_entry(hose, &hose_list, list_node) {
 		phb = hose->private_data;
 
-		if (phb->eeh_ops && phb->eeh_ops->post_init) {
-			ret = phb->eeh_ops->post_init(hose);
-			if (ret)
-				break;
-		}
+		/*
+		 * If EEH is enabled, we're going to rely on that.
+		 * Otherwise, we restore to conventional mechanism
+		 * to clear frozen PE during PCI config access.
+		 */
+		if (eeh_enabled())
+			phb->flags |= PNV_PHB_FLAG_EEH;
+		else
+			phb->flags &= ~PNV_PHB_FLAG_EEH;
+
+		/* Create debugfs entries */
+#ifdef CONFIG_DEBUG_FS
+		if (phb->has_dbgfs || !phb->dbgfs)
+			continue;
+
+		phb->has_dbgfs = 1;
+		debugfs_create_file("err_injct", 0200,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_ei_fops);
+
+		debugfs_create_file("err_injct_outbound", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_outb_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundA", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_inbA_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundB", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_inbB_dbgfs_ops);
+#endif /* CONFIG_DEBUG_FS */
 	}
 
+
 	return ret;
 }
 
+static int pnv_eeh_cap_start(struct pci_dn *pdn)
+{
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+	int pos = pnv_eeh_cap_start(pdn);
+	int cnt = 48;   /* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+	while (cnt--) {
+		pnv_pci_cfg_read(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+
+		pos &= ~3;
+		pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+
+		/* Found */
+		if (id == cap)
+			return pos;
+
+		/* Next one */
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 header;
+	int pos = 256, ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
 /**
- * powernv_eeh_dev_probe - Do probe on PCI device
- * @dev: PCI device
- * @flag: unused
+ * pnv_eeh_probe - Do probe on PCI device
+ * @pdn: PCI device node
+ * @data: unused
  *
  * When EEH module is installed during system boot, all PCI devices
  * are checked one by one to see if it supports EEH. The function
@@ -129,12 +375,12 @@ static int powernv_eeh_post_init(void)
  * was possiblly triggered by EEH core, the binding between EEH device
  * and the PCI device isn't built yet.
  */
-static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
+static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pci_controller *hose = pdn->phb;
 	struct pnv_phb *phb = hose->private_data;
-	struct device_node *dn = pci_device_to_OF_node(dev);
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	uint32_t pcie_flags;
 	int ret;
 
 	/*
@@ -143,40 +389,42 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * the root bridge. So it's not reasonable to continue
 	 * the probing.
 	 */
-	if (!dn || !edev || edev->pe)
-		return 0;
+	if (!edev || edev->pe)
+		return NULL;
 
 	/* Skip for PCI-ISA bridge */
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
-		return 0;
+	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
 
 	/* Initialize eeh device */
-	edev->class_code = dev->class;
+	edev->class_code = pdn->class_code;
 	edev->mode	&= 0xFFFFFF00;
-	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
-	edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
-	if (pci_is_pcie(dev)) {
-		edev->pcie_cap = pci_pcie_cap(dev);
-
-		if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
-			edev->mode |= EEH_DEV_ROOT_PORT;
-		else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
-			edev->mode |= EEH_DEV_DS_PORT;
-
-		edev->aer_cap = pci_find_ext_capability(dev,
-							PCI_EXT_CAP_ID_ERR);
+		if (edev->pcie_cap) {
+			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
 	}
 
-	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
-	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
+	edev->config_addr    = (pdn->busno << 8) | (pdn->devfn);
+	edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
 
 	/* Create PE */
 	ret = eeh_add_to_parent_pe(edev);
 	if (ret) {
-		pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n",
-			__func__, pci_name(dev), ret);
-		return ret;
+		pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
+			__func__, hose->global_number, pdn->busno,
+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
+		return NULL;
 	}
 
 	/*
@@ -195,8 +443,10 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * Broadcom Austin 4-ports NICs (14e4:1657)
 	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
 	 */
-	if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
-	    (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
+	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x1657) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x168e))
 		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
 
 	/*
@@ -206,7 +456,8 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * to PE reset.
 	 */
 	if (!edev->pe->bus)
-		edev->pe->bus = dev->bus;
+		edev->pe->bus = pci_find_bus(hose->global_number,
+					     pdn->busno);
 
 	/*
 	 * Enable EEH explicitly so that we will do EEH check
@@ -217,11 +468,11 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	/* Save memory bars */
 	eeh_save_bars(edev);
 
-	return 0;
+	return NULL;
 }
 
 /**
- * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
  * @pe: EEH PE
  * @option: operation to be issued
  *
@@ -229,36 +480,236 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
  * Currently, following options are support according to PAPR:
  * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
  */
-static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
+static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
 {
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	bool freeze_pe = false;
+	int opt, ret = 0;
+	s64 rc;
+
+	/* Sanity check on option */
+	switch (option) {
+	case EEH_OPT_DISABLE:
+		return -EPERM;
+	case EEH_OPT_ENABLE:
+		return 0;
+	case EEH_OPT_THAW_MMIO:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
+		break;
+	case EEH_OPT_THAW_DMA:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
+		break;
+	case EEH_OPT_FREEZE_PE:
+		freeze_pe = true;
+		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
+		break;
+	default:
+		pr_warn("%s: Invalid option %d\n", __func__, option);
+		return -EINVAL;
+	}
 
-	/*
-	 * What we need do is pass it down for hardware
-	 * implementation to handle it.
-	 */
-	if (phb->eeh_ops && phb->eeh_ops->set_option)
-		ret = phb->eeh_ops->set_option(pe, option);
+	/* If PHB supports compound PE, to handle it */
+	if (freeze_pe) {
+		if (phb->freeze_pe) {
+			phb->freeze_pe(phb, pe->addr);
+		} else {
+			rc = opal_pci_eeh_freeze_set(phb->opal_id,
+						     pe->addr, opt);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld freezing "
+					"PHB#%x-PE#%x\n",
+					__func__, rc,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
+		}
+	} else {
+		if (phb->unfreeze_pe) {
+			ret = phb->unfreeze_pe(phb, pe->addr, opt);
+		} else {
+			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+						       pe->addr, opt);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld enable %d "
+					"for PHB#%x-PE#%x\n",
+					__func__, rc, option,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
+		}
+	}
 
 	return ret;
 }
 
 /**
- * powernv_eeh_get_pe_addr - Retrieve PE address
+ * pnv_eeh_get_pe_addr - Retrieve PE address
  * @pe: EEH PE
  *
  * Retrieve the PE address according to the given tranditional
  * PCI BDF (Bus/Device/Function) address.
  */
-static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
+static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
 {
 	return pe->addr;
 }
 
+static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	s64 rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS)
+		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
+			__func__, rc, pe->phb->global_number);
+}
+
+static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result = 0;
+
+	rc = opal_pci_eeh_freeze_status(phb->opal_id,
+					pe->addr,
+					&fstate,
+					&pcierr,
+					NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting PHB#%x state\n",
+			__func__, rc, phb->hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/*
+	 * Check PHB state. If the PHB is frozen for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+	} else if (!(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
+static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result;
+
+	/*
+	 * We don't clobber hardware frozen state until PE
+	 * reset is completed. In order to keep EEH core
+	 * moving forward, we have to return operational
+	 * state during PE reset.
+	 */
+	if (pe->state & EEH_PE_RESET) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		return result;
+	}
+
+	/*
+	 * Fetch PE state from hardware. If the PHB
+	 * supports compound PE, let it handle that.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe->addr);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe->addr,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+				__func__, rc, phb->hose->global_number,
+				pe->addr);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+	}
+
+	/* Figure out state */
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result = (EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result = 0;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result = EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result = EEH_STATE_UNAVAILABLE;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result = EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		result = EEH_STATE_NOT_SUPPORT;
+		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+			__func__, phb->hose->global_number,
+			pe->addr, fstate);
+	}
+
+	/*
+	 * If PHB supports compound PE, to freeze all
+	 * slave PEs for consistency.
+	 *
+	 * If the PE is switching to frozen state for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (!(result & EEH_STATE_NOT_SUPPORT) &&
+	    !(result & EEH_STATE_UNAVAILABLE) &&
+	    !(result & EEH_STATE_MMIO_ACTIVE) &&
+	    !(result & EEH_STATE_DMA_ACTIVE)  &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe->addr);
+
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
 /**
- * powernv_eeh_get_state - Retrieve PE state
+ * pnv_eeh_get_state - Retrieve PE state
  * @pe: EEH PE
  * @delay: delay while PE state is temporarily unavailable
  *
@@ -267,64 +718,279 @@ static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
  * we prefer passing down to hardware implementation to handle
  * it.
  */
-static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
+static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+	int ret;
+
+	if (pe->type & EEH_PE_PHB)
+		ret = pnv_eeh_get_phb_state(pe);
+	else
+		ret = pnv_eeh_get_pe_state(pe);
+
+	if (!delay)
+		return ret;
+
+	/*
+	 * If the PE state is temporarily unavailable,
+	 * to inform the EEH core delay for default
+	 * period (1 second)
+	 */
+	*delay = 0;
+	if (ret & EEH_STATE_UNAVAILABLE)
+		*delay = 1000;
+
+	return ret;
+}
+
+static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
+{
+	s64 rc = OPAL_HARDWARE;
+
+	while (1) {
+		rc = opal_pci_poll(phb->opal_id);
+		if (rc <= 0)
+			break;
+
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * rc);
+		else
+			msleep(rc);
+	}
+
+	return rc;
+}
+
+int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
 {
-	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
-	int ret = EEH_STATE_NOT_SUPPORT;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/* Issue PHB complete reset request */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
 
-	if (phb->eeh_ops && phb->eeh_ops->get_state) {
-		ret = phb->eeh_ops->get_state(pe);
+	/*
+	 * Poll state of the PHB until the request is done
+	 * successfully. The PHB reset is usually PHB complete
+	 * reset followed by hot reset on root bus. So we also
+	 * need the PCI bus settlement delay.
+	 */
+	rc = pnv_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE) {
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+		else
+			msleep(EEH_PE_RST_SETTLE_TIME);
+	}
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
 
-		/*
-		 * If the PE state is temporarily unavailable,
-		 * to inform the EEH core delay for default
-		 * period (1 second)
-		 */
-		if (delay) {
-			*delay = 0;
-			if (ret & EEH_STATE_UNAVAILABLE)
-				*delay = 1000;
+	return 0;
+}
+
+static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/*
+	 * During the reset deassert time, we needn't care
+	 * the reset scope because the firmware does nothing
+	 * for fundamental or hot reset during deassert phase.
+	 */
+	if (option == EEH_RESET_FUNDAMENTAL)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_FUNDAMENTAL,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/* Poll state of the PHB until the request is done */
+	rc = pnv_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE)
+		msleep(EEH_PE_RST_SETTLE_TIME);
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
+{
+	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	int aer = edev ? edev->aer_cap : 0;
+	u32 ctrl;
+
+	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+		 __func__, pci_domain_nr(dev->bus),
+		 dev->bus->number, option);
+
+	switch (option) {
+	case EEH_RESET_FUNDAMENTAL:
+	case EEH_RESET_HOT:
+		/* Don't report linkDown event */
+		if (aer) {
+			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl |= PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
 		}
+
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_HOLD_TIME);
+		break;
+	case EEH_RESET_DEACTIVATE:
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		/* Continue reporting linkDown event */
+		if (aer) {
+			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl &= ~PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
+		break;
 	}
 
-	return ret;
+	return 0;
+}
+
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+
+	if (pci_is_root_bus(dev->bus)) {
+		hose = pci_bus_to_host(dev->bus);
+		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
+		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+	} else {
+		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+	}
 }
 
 /**
- * powernv_eeh_reset - Reset the specified PE
+ * pnv_eeh_reset - Reset the specified PE
  * @pe: EEH PE
  * @option: reset option
  *
- * Reset the specified PE
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
  */
-static int powernv_eeh_reset(struct eeh_pe *pe, int option)
+static int pnv_eeh_reset(struct eeh_pe *pe, int option)
 {
 	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	struct pci_bus *bus;
+	int ret;
+
+	/*
+	 * For PHB reset, we always have complete reset. For those PEs whose
+	 * primary bus derived from root complex (root bus) or root port
+	 * (usually bus#1), we apply hot or fundamental reset on the root port.
+	 * For other PEs, we always have hot reset on the PE primary bus.
+	 *
+	 * Here, we have different design to pHyp, which always clear the
+	 * frozen state during PE reset. However, the good idea here from
+	 * benh is to keep frozen state before we get PE reset done completely
+	 * (until BAR restore). With the frozen state, HW drops illegal IO
+	 * or MMIO access, which can incur recrusive frozen PE during PE
+	 * reset. The side effect is that EEH core has to clear the frozen
+	 * state explicitly after BAR restore.
+	 */
+	if (pe->type & EEH_PE_PHB) {
+		ret = pnv_eeh_phb_reset(hose, option);
+	} else {
+		struct pnv_phb *phb;
+		s64 rc;
 
-	if (phb->eeh_ops && phb->eeh_ops->reset)
-		ret = phb->eeh_ops->reset(pe, option);
+		/*
+		 * The frozen PE might be caused by PAPR error injection
+		 * registers, which are expected to be cleared after hitting
+		 * frozen PE as stated in the hardware spec. Unfortunately,
+		 * that's not true on P7IOC. So we have to clear it manually
+		 * to avoid recursive EEH errors during recovery.
+		 */
+		phb = hose->private_data;
+		if (phb->model == PNV_PHB_MODEL_P7IOC &&
+		    (option == EEH_RESET_HOT ||
+		    option == EEH_RESET_FUNDAMENTAL)) {
+			rc = opal_pci_reset(phb->opal_id,
+					    OPAL_RESET_PHB_ERROR,
+					    OPAL_ASSERT_RESET);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld clearing "
+					"error injection registers\n",
+					__func__, rc);
+				return -EIO;
+			}
+		}
+
+		bus = eeh_pe_bus_get(pe);
+		if (pci_is_root_bus(bus) ||
+			pci_is_root_bus(bus->parent))
+			ret = pnv_eeh_root_reset(hose, option);
+		else
+			ret = pnv_eeh_bridge_reset(bus->self, option);
+	}
 
 	return ret;
 }
 
 /**
- * powernv_eeh_wait_state - Wait for PE state
+ * pnv_eeh_wait_state - Wait for PE state
  * @pe: EEH PE
  * @max_wait: maximal period in microsecond
  *
  * Wait for the state of associated PE. It might take some time
  * to retrieve the PE's state.
  */
-static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 {
 	int ret;
 	int mwait;
 
 	while (1) {
-		ret = powernv_eeh_get_state(pe, &mwait);
+		ret = pnv_eeh_get_state(pe, &mwait);
 
 		/*
 		 * If the PE's state is temporarily unavailable,
@@ -348,7 +1014,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 }
 
 /**
- * powernv_eeh_get_log - Retrieve error log
+ * pnv_eeh_get_log - Retrieve error log
  * @pe: EEH PE
  * @severity: temporary or permanent error log
  * @drv_log: driver log to be combined with retrieved error log
@@ -356,41 +1022,30 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
  *
  * Retrieve the temporary or permanent error from the PE.
  */
-static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
-			       char *drv_log, unsigned long len)
+static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
+			   char *drv_log, unsigned long len)
 {
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
 
-	if (phb->eeh_ops && phb->eeh_ops->get_log)
-		ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
-
-	return ret;
+	return 0;
 }
 
 /**
- * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
  * @pe: EEH PE
  *
  * The function will be called to reconfigure the bridges included
  * in the specified PE so that the mulfunctional PE would be recovered
  * again.
  */
-static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
+static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
 {
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-	int ret = 0;
-
-	if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
-		ret = phb->eeh_ops->configure_bridge(pe);
-
-	return ret;
+	return 0;
 }
 
 /**
- * powernv_pe_err_inject - Inject specified error to the indicated PE
+ * pnv_pe_err_inject - Inject specified error to the indicated PE
  * @pe: the indicated PE
  * @type: error type
  * @func: specific error type
@@ -401,22 +1056,52 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
  * determined by @type and @func, to the indicated PE for
  * testing purpose.
  */
-static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
-				  unsigned long addr, unsigned long mask)
+static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+			      unsigned long addr, unsigned long mask)
 {
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
-	int ret = -EEXIST;
+	s64 rc;
+
+	/* Sanity check on error type */
+	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+		pr_warn("%s: Invalid error type %d\n",
+			__func__, type);
+		return -ERANGE;
+	}
 
-	if (phb->eeh_ops && phb->eeh_ops->err_inject)
-		ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
+	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
+	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
+		pr_warn("%s: Invalid error function %d\n",
+			__func__, func);
+		return -ERANGE;
+	}
 
-	return ret;
+	/* Firmware supports error injection ? */
+	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
+		pr_warn("%s: Firmware doesn't support error injection\n",
+			__func__);
+		return -ENXIO;
+	}
+
+	/* Do error injection */
+	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
+				 type, func, addr, mask);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld injecting error "
+			"%d-%d to PHB#%x-PE#%x\n",
+			__func__, rc, type, func,
+			hose->global_number, pe->addr);
+		return -EIO;
+	}
+
+	return 0;
 }
 
-static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
+static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
 {
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
 	if (!edev || !edev->pe)
 		return false;
@@ -427,51 +1112,377 @@ static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
 	return false;
 }
 
-static int powernv_eeh_read_config(struct device_node *dn,
-				   int where, int size, u32 *val)
+static int pnv_eeh_read_config(struct pci_dn *pdn,
+			       int where, int size, u32 *val)
 {
-	if (powernv_eeh_cfg_blocked(dn)) {
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn)) {
 		*val = 0xFFFFFFFF;
 		return PCIBIOS_SET_FAILED;
 	}
 
-	return pnv_pci_cfg_read(dn, where, size, val);
+	return pnv_pci_cfg_read(pdn, where, size, val);
 }
 
-static int powernv_eeh_write_config(struct device_node *dn,
-				    int where, int size, u32 val)
+static int pnv_eeh_write_config(struct pci_dn *pdn,
+				int where, int size, u32 val)
 {
-	if (powernv_eeh_cfg_blocked(dn))
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn))
 		return PCIBIOS_SET_FAILED;
 
-	return pnv_pci_cfg_write(dn, where, size, val);
+	return pnv_pci_cfg_write(pdn, where, size, val);
+}
+
+static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+	/* GEM */
+	if (data->gemXfir || data->gemRfir ||
+	    data->gemRirqfir || data->gemMask || data->gemRwof)
+		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->gemXfir),
+			be64_to_cpu(data->gemRfir),
+			be64_to_cpu(data->gemRirqfir),
+			be64_to_cpu(data->gemMask),
+			be64_to_cpu(data->gemRwof));
+
+	/* LEM */
+	if (data->lemFir || data->lemErrMask ||
+	    data->lemAction0 || data->lemAction1 || data->lemWof)
+		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrMask),
+			be64_to_cpu(data->lemAction0),
+			be64_to_cpu(data->lemAction1),
+			be64_to_cpu(data->lemWof));
+}
+
+static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
+	long rc;
+
+	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+			__func__, phb->hub_id, rc);
+		return;
+	}
+
+	switch (data->type) {
+	case OPAL_P7IOC_DIAG_TYPE_RGC:
+		pr_info("P7IOC diag-data for RGC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
+			pr_info("  RGC: %016llx %016llx\n",
+				be64_to_cpu(data->rgc.rgcStatus),
+				be64_to_cpu(data->rgc.rgcLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_BI:
+		pr_info("P7IOC diag-data for BI %s\n\n",
+			data->bi.biDownbound ? "Downbound" : "Upbound");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
+		    data->bi.biLdcp2 || data->bi.biFenceStatus)
+			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
+				be64_to_cpu(data->bi.biLdcp0),
+				be64_to_cpu(data->bi.biLdcp1),
+				be64_to_cpu(data->bi.biLdcp2),
+				be64_to_cpu(data->bi.biFenceStatus));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_CI:
+		pr_info("P7IOC diag-data for CI Port %d\n\n",
+			data->ci.ciPort);
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
+			pr_info("  CI:  %016llx %016llx\n",
+				be64_to_cpu(data->ci.ciPortStatus),
+				be64_to_cpu(data->ci.ciPortLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_MISC:
+		pr_info("P7IOC diag-data for MISC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_I2C:
+		pr_info("P7IOC diag-data for I2C\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	default:
+		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			__func__, phb->hub_id, data->type);
+	}
+}
+
+static int pnv_eeh_get_pe(struct pci_controller *hose,
+			  u16 pe_no, struct eeh_pe **pe)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pnv_pe;
+	struct eeh_pe *dev_pe;
+	struct eeh_dev edev;
+
+	/*
+	 * If PHB supports compound PE, to fetch
+	 * the master PE because slave PE is invisible
+	 * to EEH core.
+	 */
+	pnv_pe = &phb->ioda.pe_array[pe_no];
+	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+		pnv_pe = pnv_pe->master;
+		WARN_ON(!pnv_pe ||
+			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pnv_pe->pe_number;
+	}
+
+	/* Find the PE according to PE# */
+	memset(&edev, 0, sizeof(struct eeh_dev));
+	edev.phb = hose;
+	edev.pe_config_addr = pe_no;
+	dev_pe = eeh_pe_get(&edev);
+	if (!dev_pe)
+		return -EEXIST;
+
+	/* Freeze the (compound) PE */
+	*pe = dev_pe;
+	if (!(dev_pe->state & EEH_PE_ISOLATED))
+		phb->freeze_pe(phb, pe_no);
+
+	/*
+	 * At this point, we're sure the (compound) PE should
+	 * have been frozen. However, we still need poke until
+	 * hitting the frozen PE on top level.
+	 */
+	dev_pe = dev_pe->parent;
+	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
+		int ret;
+		int active_flags = (EEH_STATE_MMIO_ACTIVE |
+				    EEH_STATE_DMA_ACTIVE);
+
+		ret = eeh_ops->get_state(dev_pe, NULL);
+		if (ret <= 0 || (ret & active_flags) == active_flags) {
+			dev_pe = dev_pe->parent;
+			continue;
+		}
+
+		/* Frozen parent PE */
+		*pe = dev_pe;
+		if (!(dev_pe->state & EEH_PE_ISOLATED))
+			phb->freeze_pe(phb, dev_pe->addr);
+
+		/* Next one */
+		dev_pe = dev_pe->parent;
+	}
+
+	return 0;
 }
 
 /**
- * powernv_eeh_next_error - Retrieve next EEH error to handle
+ * pnv_eeh_next_error - Retrieve next EEH error to handle
  * @pe: Affected PE
  *
- * Using OPAL API, to retrieve next EEH error for EEH core to handle
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
  */
-static int powernv_eeh_next_error(struct eeh_pe **pe)
+static int pnv_eeh_next_error(struct eeh_pe **pe)
 {
 	struct pci_controller *hose;
-	struct pnv_phb *phb = NULL;
+	struct pnv_phb *phb;
+	struct eeh_pe *phb_pe, *parent_pe;
+	__be64 frozen_pe_no;
+	__be16 err_type, severity;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	long rc;
+	int state, ret = EEH_NEXT_ERR_NONE;
+
+	/*
+	 * While running here, it's safe to purge the event queue.
+	 * And we should keep the cached OPAL notifier event sychronized
+	 * between the kernel and firmware.
+	 */
+	eeh_remove_event(NULL, false);
+	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
 	list_for_each_entry(hose, &hose_list, list_node) {
+		/*
+		 * If the subordinate PCI buses of the PHB has been
+		 * removed or is exactly under error recovery, we
+		 * needn't take care of it any more.
+		 */
 		phb = hose->private_data;
-		break;
-	}
+		phb_pe = eeh_phb_pe_get(hose);
+		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
+			continue;
+
+		rc = opal_pci_next_error(phb->opal_id,
+					 &frozen_pe_no, &err_type, &severity);
+		if (rc != OPAL_SUCCESS) {
+			pr_devel("%s: Invalid return value on "
+				 "PHB#%x (0x%lx) from opal_pci_next_error",
+				 __func__, hose->global_number, rc);
+			continue;
+		}
+
+		/* If the PHB doesn't have error, stop processing */
+		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
+			pr_devel("%s: No error found on PHB#%x\n",
+				 __func__, hose->global_number);
+			continue;
+		}
+
+		/*
+		 * Processing the error. We're expecting the error with
+		 * highest priority reported upon multiple errors on the
+		 * specific PHB.
+		 */
+		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
+			__func__, be16_to_cpu(err_type),
+			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
+			hose->global_number);
+		switch (be16_to_cpu(err_type)) {
+		case OPAL_EEH_IOC_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
+				pr_err("EEH: dead IOC detected\n");
+				ret = EEH_NEXT_ERR_DEAD_IOC;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: IOC informative error "
+					"detected\n");
+				pnv_eeh_get_and_dump_hub_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PHB_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+				*pe = phb_pe;
+				pr_err("EEH: dead PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_DEAD_PHB;
+			} else if (be16_to_cpu(severity) ==
+				   OPAL_EEH_SEV_PHB_FENCED) {
+				*pe = phb_pe;
+				pr_err("EEH: Fenced PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FENCED_PHB;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: PHB#%x informative error "
+					"detected, location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				pnv_eeh_get_phb_diag(phb_pe);
+				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PE_ERROR:
+			/*
+			 * If we can't find the corresponding PE, we
+			 * just try to unfreeze.
+			 */
+			if (pnv_eeh_get_pe(hose,
+				be64_to_cpu(frozen_pe_no), pe)) {
+				/* Try best to clear it */
+				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+					hose->global_number, frozen_pe_no);
+				pr_info("EEH: PHB location: %s\n",
+					eeh_pe_loc_get(phb_pe));
+				opal_pci_eeh_freeze_clear(phb->opal_id,
+					frozen_pe_no,
+					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+				ret = EEH_NEXT_ERR_NONE;
+			} else if ((*pe)->state & EEH_PE_ISOLATED ||
+				   eeh_pe_passed(*pe)) {
+				ret = EEH_NEXT_ERR_NONE;
+			} else {
+				pr_err("EEH: Frozen PE#%x "
+				       "on PHB#%x detected\n",
+				       (*pe)->addr,
+					(*pe)->phb->global_number);
+				pr_err("EEH: PE location: %s, "
+				       "PHB location: %s\n",
+				       eeh_pe_loc_get(*pe),
+				       eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FROZEN_PE;
+			}
+
+			break;
+		default:
+			pr_warn("%s: Unexpected error type %d\n",
+				__func__, be16_to_cpu(err_type));
+		}
 
-	if (phb && phb->eeh_ops->next_error)
-		return phb->eeh_ops->next_error(pe);
+		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			pnv_eeh_get_phb_diag(*pe);
+
+			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+				pnv_pci_dump_phb_diag_data((*pe)->phb,
+							   (*pe)->data);
+		}
 
-	return -EEXIST;
+		/*
+		 * We probably have the frozen parent PE out there and
+		 * we need have to handle frozen parent PE firstly.
+		 */
+		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+			parent_pe = (*pe)->parent;
+			while (parent_pe) {
+				/* Hit the ceiling ? */
+				if (parent_pe->type & EEH_PE_PHB)
+					break;
+
+				/* Frozen parent PE ? */
+				state = eeh_ops->get_state(parent_pe, NULL);
+				if (state > 0 &&
+				    (state & active_flags) != active_flags)
+					*pe = parent_pe;
+
+				/* Next parent level */
+				parent_pe = parent_pe->parent;
+			}
+
+			/* We possibly migrate to another PE */
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+		}
+
+		/*
+		 * If we have no errors on the specific PHB or only
+		 * informative error there, we continue poking it.
+		 * Otherwise, we need actions to be taken by upper
+		 * layer.
+		 */
+		if (ret > EEH_NEXT_ERR_INF)
+			break;
+	}
+
+	return ret;
 }
 
-static int powernv_eeh_restore_config(struct device_node *dn)
+static int pnv_eeh_restore_config(struct pci_dn *pdn)
 {
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	struct pnv_phb *phb;
 	s64 ret;
 
@@ -490,24 +1501,23 @@ static int powernv_eeh_restore_config(struct device_node *dn)
 	return 0;
 }
 
-static struct eeh_ops powernv_eeh_ops = {
+static struct eeh_ops pnv_eeh_ops = {
 	.name                   = "powernv",
-	.init                   = powernv_eeh_init,
-	.post_init              = powernv_eeh_post_init,
-	.of_probe               = NULL,
-	.dev_probe              = powernv_eeh_dev_probe,
-	.set_option             = powernv_eeh_set_option,
-	.get_pe_addr            = powernv_eeh_get_pe_addr,
-	.get_state              = powernv_eeh_get_state,
-	.reset                  = powernv_eeh_reset,
-	.wait_state             = powernv_eeh_wait_state,
-	.get_log                = powernv_eeh_get_log,
-	.configure_bridge       = powernv_eeh_configure_bridge,
-	.err_inject		= powernv_eeh_err_inject,
-	.read_config            = powernv_eeh_read_config,
-	.write_config           = powernv_eeh_write_config,
-	.next_error		= powernv_eeh_next_error,
-	.restore_config		= powernv_eeh_restore_config
+	.init                   = pnv_eeh_init,
+	.post_init              = pnv_eeh_post_init,
+	.probe			= pnv_eeh_probe,
+	.set_option             = pnv_eeh_set_option,
+	.get_pe_addr            = pnv_eeh_get_pe_addr,
+	.get_state              = pnv_eeh_get_state,
+	.reset                  = pnv_eeh_reset,
+	.wait_state             = pnv_eeh_wait_state,
+	.get_log                = pnv_eeh_get_log,
+	.configure_bridge       = pnv_eeh_configure_bridge,
+	.err_inject		= pnv_eeh_err_inject,
+	.read_config            = pnv_eeh_read_config,
+	.write_config           = pnv_eeh_write_config,
+	.next_error		= pnv_eeh_next_error,
+	.restore_config		= pnv_eeh_restore_config
 };
 
 /**
@@ -521,7 +1531,7 @@ static int __init eeh_powernv_init(void)
 	int ret = -EINVAL;
 
 	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
-	ret = eeh_ops_register(&powernv_eeh_ops);
+	ret = eeh_ops_register(&pnv_eeh_ops);
 	if (!ret)
 		pr_info("EEH: PowerNV platform initialized\n");
 	else
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 23260f7dfa7a..5aa9c1ce4de3 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -452,5 +452,6 @@ void __init opal_platform_dump_init(void)
 		return;
 	}
 
-	opal_dump_resend_notification();
+	if (opal_check_token(OPAL_DUMP_RESEND))
+		opal_dump_resend_notification();
 }
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 518fe95dbf24..38ce757e5e2a 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -313,7 +313,8 @@ int __init opal_elog_init(void)
 	}
 
 	/* We are now ready to pull error logs from opal. */
-	opal_resend_pending_logs();
+	if (opal_check_token(OPAL_ELOG_RESEND))
+		opal_resend_pending_logs();
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 5c21d9c07f45..4ec6219287fc 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -120,7 +120,11 @@ static struct image_header_t	image_header;
 static struct image_data_t	image_data;
 static struct validate_flash_t	validate_flash_data;
 static struct manage_flash_t	manage_flash_data;
-static struct update_flash_t	update_flash_data;
+
+/* Initialize update_flash_data status to No Operation */
+static struct update_flash_t	update_flash_data = {
+	.status = FLASH_NO_OP,
+};
 
 static DEFINE_MUTEX(image_data_mutex);
 
@@ -542,7 +546,7 @@ static struct attribute_group image_op_attr_group = {
 	.attrs = image_op_attrs,
 };
 
-void __init opal_flash_init(void)
+void __init opal_flash_update_init(void)
 {
 	int ret;
 
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index f9896fd5d04a..9db4398ded5d 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 
 #include <asm/opal.h>
+#include <asm/nvram.h>
 #include <asm/machdep.h>
 
 static unsigned int nvram_size;
@@ -62,6 +63,15 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
 	return count;
 }
 
+static int __init opal_nvram_init_log_partitions(void)
+{
+	/* Scan nvram for partitions */
+	nvram_scan_partitions();
+	nvram_init_oops_partition(0);
+	return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
 void __init opal_nvram_init(void)
 {
 	struct device_node *np;
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index 4ab67ef7abc9..655250499d18 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -46,18 +46,28 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
 
 	mutex_lock(&opal_sensor_mutex);
 	ret = opal_sensor_read(sensor_hndl, token, &data);
-	if (ret != OPAL_ASYNC_COMPLETION)
-		goto out_token;
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_err("%s: Failed to wait for the async response, %d\n",
+			       __func__, ret);
+			goto out_token;
+		}
 
-	ret = opal_async_wait_response(token, &msg);
-	if (ret) {
-		pr_err("%s: Failed to wait for the async response, %d\n",
-				__func__, ret);
-		goto out_token;
-	}
+		ret = opal_error_code(be64_to_cpu(msg.params[1]));
+		*sensor_data = be32_to_cpu(data);
+		break;
+
+	case OPAL_SUCCESS:
+		ret = 0;
+		*sensor_data = be32_to_cpu(data);
+		break;
 
-	*sensor_data = be32_to_cpu(data);
-	ret = be64_to_cpu(msg.params[1]);
+	default:
+		ret = opal_error_code(ret);
+		break;
+	}
 
 out_token:
 	mutex_unlock(&opal_sensor_mutex);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index fcbe899fe299..a7ade94cdf87 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -286,9 +286,12 @@ OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
 OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
 OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
-OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CXL_MODE);
+OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CAPI_MODE);
 OPAL_CALL(opal_tpo_write,			OPAL_WRITE_TPO);
 OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
 OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
 OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
 OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);
+OPAL_CALL(opal_flash_read,			OPAL_FLASH_READ);
+OPAL_CALL(opal_flash_write,			OPAL_FLASH_WRITE);
+OPAL_CALL(opal_flash_erase,			OPAL_FLASH_ERASE);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 18fd4e71c9c1..2241565b0739 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -23,6 +23,8 @@
 #include <linux/kobject.h>
 #include <linux/delay.h>
 #include <linux/memblock.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include <asm/machdep.h>
 #include <asm/opal.h>
@@ -58,6 +60,7 @@ static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
 static DEFINE_SPINLOCK(opal_notifier_lock);
 static uint64_t last_notified_mask = 0x0ul;
 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
+static uint32_t opal_heartbeat;
 
 static void opal_reinit_cores(void)
 {
@@ -302,23 +305,26 @@ void opal_notifier_disable(void)
  * Opal message notifier based on message type. Allow subscribers to get
  * notified for specific messgae type.
  */
-int opal_message_notifier_register(enum OpalMessageType msg_type,
+int opal_message_notifier_register(enum opal_msg_type msg_type,
 					struct notifier_block *nb)
 {
-	if (!nb) {
-		pr_warning("%s: Invalid argument (%p)\n",
-			   __func__, nb);
-		return -EINVAL;
-	}
-	if (msg_type > OPAL_MSG_TYPE_MAX) {
-		pr_warning("%s: Invalid message type argument (%d)\n",
+	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
+		pr_warning("%s: Invalid arguments, msg_type:%d\n",
 			   __func__, msg_type);
 		return -EINVAL;
 	}
+
 	return atomic_notifier_chain_register(
 				&opal_msg_notifier_head[msg_type], nb);
 }
 
+int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+				     struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(
+			&opal_msg_notifier_head[msg_type], nb);
+}
+
 static void opal_message_do_notify(uint32_t msg_type, void *msg)
 {
 	/* notify subscribers */
@@ -351,7 +357,7 @@ static void opal_handle_message(void)
 	type = be32_to_cpu(msg.msg_type);
 
 	/* Sanity check */
-	if (type > OPAL_MSG_TYPE_MAX) {
+	if (type >= OPAL_MSG_TYPE_MAX) {
 		pr_warning("%s: Unknown message type: %u\n", __func__, type);
 		return;
 	}
@@ -665,6 +671,9 @@ static void __init opal_dump_region_init(void)
 	uint64_t size;
 	int rc;
 
+	if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
+		return;
+
 	/* Register kernel log buffer */
 	addr = log_buf_addr_get();
 	if (addr == NULL)
@@ -684,6 +693,15 @@ static void __init opal_dump_region_init(void)
 			"rc = %d\n", rc);
 }
 
+static void opal_flash_init(struct device_node *opal_node)
+{
+	struct device_node *np;
+
+	for_each_child_of_node(opal_node, np)
+		if (of_device_is_compatible(np, "ibm,opal-flash"))
+			of_platform_device_create(np, NULL, NULL);
+}
+
 static void opal_ipmi_init(struct device_node *opal_node)
 {
 	struct device_node *np;
@@ -741,6 +759,29 @@ static void __init opal_irq_init(struct device_node *dn)
 	}
 }
 
+static int kopald(void *unused)
+{
+	set_freezable();
+	do {
+		try_to_freeze();
+		opal_poll_events(NULL);
+		msleep_interruptible(opal_heartbeat);
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static void opal_init_heartbeat(void)
+{
+	/* Old firwmware, we assume the HVC heartbeat is sufficient */
+	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
+				 &opal_heartbeat) != 0)
+		opal_heartbeat = 0;
+
+	if (opal_heartbeat)
+		kthread_run(kopald, NULL, "kopald");
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
@@ -769,6 +810,9 @@ static int __init opal_init(void)
 	/* Create i2c platform devices */
 	opal_i2c_create_devs();
 
+	/* Setup a heatbeat thread if requested by OPAL */
+	opal_init_heartbeat();
+
 	/* Find all OPAL interrupts and request them */
 	opal_irq_init(opal_node);
 
@@ -782,7 +826,7 @@ static int __init opal_init(void)
 		/* Setup error log interface */
 		rc = opal_elog_init();
 		/* Setup code update interface */
-		opal_flash_init();
+		opal_flash_update_init();
 		/* Setup platform dump extract interface */
 		opal_platform_dump_init();
 		/* Setup system parameters interface */
@@ -791,8 +835,11 @@ static int __init opal_init(void)
 		opal_msglog_init();
 	}
 
+	/* Initialize OPAL IPMI backend */
 	opal_ipmi_init(opal_node);
 
+	opal_flash_init(opal_node);
+
 	return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
@@ -823,13 +870,17 @@ void opal_shutdown(void)
 	}
 
 	/* Unregister memory dump region */
-	opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
+	if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
+		opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
 }
 
 /* Export this so that test modules can use it */
 EXPORT_SYMBOL_GPL(opal_invalid_call);
 EXPORT_SYMBOL_GPL(opal_ipmi_send);
 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
+EXPORT_SYMBOL_GPL(opal_flash_read);
+EXPORT_SYMBOL_GPL(opal_flash_write);
+EXPORT_SYMBOL_GPL(opal_flash_erase);
 
 /* Convert a region of vmalloc memory to an opal sg list */
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
@@ -894,6 +945,25 @@ void opal_free_sg_list(struct opal_sg_list *sg)
 	}
 }
 
+int opal_error_code(int rc)
+{
+	switch (rc) {
+	case OPAL_SUCCESS:		return 0;
+
+	case OPAL_PARAMETER:		return -EINVAL;
+	case OPAL_ASYNC_COMPLETION:	return -EINPROGRESS;
+	case OPAL_BUSY_EVENT:		return -EBUSY;
+	case OPAL_NO_MEM:		return -ENOMEM;
+
+	case OPAL_UNSUPPORTED:		return -EIO;
+	case OPAL_HARDWARE:		return -EIO;
+	case OPAL_INTERNAL_ERROR:	return -EIO;
+	default:
+		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
+		return -EIO;
+	}
+}
+
 EXPORT_SYMBOL_GPL(opal_poll_events);
 EXPORT_SYMBOL_GPL(opal_rtc_read);
 EXPORT_SYMBOL_GPL(opal_rtc_write);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6c9ff2b95119..920c252d1f49 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -44,6 +44,9 @@
 #include "powernv.h"
 #include "pci.h"
 
+/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
+#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
+
 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 			    const char *fmt, ...)
 {
@@ -56,11 +59,18 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	if (pe->pdev)
+	if (pe->flags & PNV_IODA_PE_DEV)
 		strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
-	else
+	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
 		sprintf(pfix, "%04x:%02x     ",
 			pci_domain_nr(pe->pbus), pe->pbus->number);
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		sprintf(pfix, "%04x:%02x:%2x.%d",
+			pci_domain_nr(pe->parent_dev->bus),
+			(pe->rid & 0xff00) >> 8,
+			PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
+#endif /* CONFIG_PCI_IOV*/
 
 	printk("%spci %s: [PE# %.3d] %pV",
 	       level, pfix, pe->pe_number, &vaf);
@@ -591,7 +601,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
 			      bool is_add)
 {
 	struct pnv_ioda_pe *slave;
-	struct pci_dev *pdev;
+	struct pci_dev *pdev = NULL;
 	int ret;
 
 	/*
@@ -630,8 +640,12 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
 
 	if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
 		pdev = pe->pbus->self;
-	else
+	else if (pe->flags & PNV_IODA_PE_DEV)
 		pdev = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		pdev = pe->parent_dev->bus->self;
+#endif /* CONFIG_PCI_IOV */
 	while (pdev) {
 		struct pci_dn *pdn = pci_get_pdn(pdev);
 		struct pnv_ioda_pe *parent;
@@ -649,6 +663,87 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
 	return 0;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *parent;
+	uint8_t bcomp, dcomp, fcomp;
+	int64_t rc;
+	long rid_end, rid;
+
+	/* Currently, we just deconfigure VF PE. Bus PE will always there.*/
+	if (pe->pbus) {
+		int count;
+
+		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+		parent = pe->pbus->self;
+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
+			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+		else
+			count = 1;
+
+		switch(count) {
+		case  1: bcomp = OpalPciBusAll;         break;
+		case  2: bcomp = OpalPciBus7Bits;       break;
+		case  4: bcomp = OpalPciBus6Bits;       break;
+		case  8: bcomp = OpalPciBus5Bits;       break;
+		case 16: bcomp = OpalPciBus4Bits;       break;
+		case 32: bcomp = OpalPciBus3Bits;       break;
+		default:
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
+			/* Do an exact match only */
+			bcomp = OpalPciBusAll;
+		}
+		rid_end = pe->rid + (count << 8);
+	} else {
+		if (pe->flags & PNV_IODA_PE_VF)
+			parent = pe->parent_dev;
+		else
+			parent = pe->pdev->bus->self;
+		bcomp = OpalPciBusAll;
+		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+		rid_end = pe->rid + 1;
+	}
+
+	/* Clear the reverse map */
+	for (rid = pe->rid; rid < rid_end; rid++)
+		phb->ioda.pe_rmap[rid] = 0;
+
+	/* Release from all parents PELT-V */
+	while (parent) {
+		struct pci_dn *pdn = pci_get_pdn(parent);
+		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+			rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+						pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+			/* XXX What to do in case of error ? */
+		}
+		parent = parent->bus->self;
+	}
+
+	opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
+				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+	/* Disassociate PE in PELT */
+	rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+				pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+	if (rc)
+		pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+			     bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
+	if (rc)
+		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+
+	pe->pbus = NULL;
+	pe->pdev = NULL;
+	pe->parent_dev = NULL;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
 	struct pci_dev *parent;
@@ -675,15 +770,19 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 		case 16: bcomp = OpalPciBus4Bits;	break;
 		case 32: bcomp = OpalPciBus3Bits;	break;
 		default:
-			pr_err("%s: Number of subordinate busses %d"
-			       " unsupported\n",
-			       pci_name(pe->pbus->self), count);
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
 			/* Do an exact match only */
 			bcomp = OpalPciBusAll;
 		}
 		rid_end = pe->rid + (count << 8);
 	} else {
-		parent = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+		if (pe->flags & PNV_IODA_PE_VF)
+			parent = pe->parent_dev;
+		else
+#endif /* CONFIG_PCI_IOV */
+			parent = pe->pdev->bus->self;
 		bcomp = OpalPciBusAll;
 		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
 		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
@@ -774,6 +873,78 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
 	return 10;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+	struct pci_dn *pdn = pci_get_pdn(dev);
+	int i;
+	struct resource *res, res2;
+	resource_size_t size;
+	u16 num_vfs;
+
+	if (!dev->is_physfn)
+		return -EINVAL;
+
+	/*
+	 * "offset" is in VFs.  The M64 windows are sized so that when they
+	 * are segmented, each segment is the same size as the IOV BAR.
+	 * Each segment is in a separate PE, and the high order bits of the
+	 * address are the PE number.  Therefore, each VF's BAR is in a
+	 * separate PE, and changing the IOV BAR start address changes the
+	 * range of PEs the VFs are in.
+	 */
+	num_vfs = pdn->num_vfs;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		/*
+		 * The actual IOV BAR range is determined by the start address
+		 * and the actual size for num_vfs VFs BAR.  This check is to
+		 * make sure that after shifting, the range will not overlap
+		 * with another device.
+		 */
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2.flags = res->flags;
+		res2.start = res->start + (size * offset);
+		res2.end = res2.start + (size * num_vfs) - 1;
+
+		if (res2.end > res->end) {
+			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+				i, &res2, res, num_vfs, offset);
+			return -EBUSY;
+		}
+	}
+
+	/*
+	 * After doing so, there would be a "hole" in the /proc/iomem when
+	 * offset is a positive value. It looks like the device return some
+	 * mmio back to the system, which actually no one could use it.
+	 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2 = *res;
+		res->start += size * offset;
+
+		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n",
+			 i, &res2, res, num_vfs, offset);
+		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+	}
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 #if 0
 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
@@ -857,7 +1028,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 				pci_name(dev));
 			continue;
 		}
-		pdn->pcidev = dev;
 		pdn->pe_number = pe->pe_number;
 		pe->dma_weight += pnv_ioda_dma_weight(dev);
 		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
@@ -916,6 +1086,10 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 		return;
 	}
 
+	pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+			GFP_KERNEL, hose->node);
+	pe->tce32_table->data = pe;
+
 	/* Associate it with all child devices */
 	pnv_ioda_setup_same_PE(bus, pe);
 
@@ -974,6 +1148,441 @@ static void pnv_pci_ioda_setup_PEs(void)
 	}
 }
 
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	int                    i, j;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < M64_PER_IOV; j++) {
+			if (pdn->m64_wins[i][j] == IODA_INVALID_M64)
+				continue;
+			opal_pci_phb_mmio_enable(phb->opal_id,
+				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
+			clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc);
+			pdn->m64_wins[i][j] = IODA_INVALID_M64;
+		}
+
+	return 0;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	unsigned int           win;
+	struct resource       *res;
+	int                    i, j;
+	int64_t                rc;
+	int                    total_vfs;
+	resource_size_t        size, start;
+	int                    pe_num;
+	int                    vf_groups;
+	int                    vf_per_group;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+
+	/* Initialize the m64_wins to IODA_INVALID_M64 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < M64_PER_IOV; j++)
+			pdn->m64_wins[i][j] = IODA_INVALID_M64;
+
+	if (pdn->m64_per_iov == M64_PER_IOV) {
+		vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV;
+		vf_per_group = (num_vfs <= M64_PER_IOV)? 1:
+			roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+	} else {
+		vf_groups = 1;
+		vf_per_group = 1;
+	}
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		for (j = 0; j < vf_groups; j++) {
+			do {
+				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+						phb->ioda.m64_bar_idx + 1, 0);
+
+				if (win >= phb->ioda.m64_bar_idx + 1)
+					goto m64_failed;
+			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+			pdn->m64_wins[i][j] = win;
+
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				size = pci_iov_resource_size(pdev,
+							PCI_IOV_RESOURCES + i);
+				size = size * vf_per_group;
+				start = res->start + size * j;
+			} else {
+				size = resource_size(res);
+				start = res->start;
+			}
+
+			/* Map the M64 here */
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				pe_num = pdn->offset + j;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+						pe_num, OPAL_M64_WINDOW_TYPE,
+						pdn->m64_wins[i][j], 0);
+			}
+
+			rc = opal_pci_set_phb_mem_window(phb->opal_id,
+						 OPAL_M64_WINDOW_TYPE,
+						 pdn->m64_wins[i][j],
+						 start,
+						 0, /* unused */
+						 size);
+
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
+					win, rc);
+				goto m64_failed;
+			}
+
+			if (pdn->m64_per_iov == M64_PER_IOV)
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+			else
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
+					win, rc);
+				goto m64_failed;
+			}
+		}
+	}
+	return 0;
+
+m64_failed:
+	pnv_pci_vf_release_m64(pdev);
+	return -EBUSY;
+}
+
+static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct iommu_table    *tbl;
+	unsigned long         addr;
+	int64_t               rc;
+
+	bus = dev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	tbl = pe->tce32_table;
+	addr = tbl->it_base;
+
+	opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+				   pe->pe_number << 1, 1, __pa(addr),
+				   0, 0x1000);
+
+	rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+				        pe->pe_number,
+				        (pe->pe_number << 1) + 1,
+				        pe->tce_bypass_base,
+				        0);
+	if (rc)
+		pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+
+	iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
+	free_pages(addr, get_order(TCE32_TABLE_SIZE));
+	pe->tce32_table = NULL;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe, *pe_n;
+	struct pci_dn         *pdn;
+	u16                    vf_index;
+	int64_t                rc;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (!pdev->is_physfn)
+		return;
+
+	if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
+			for (vf_index = vf_group * vf_per_group;
+				vf_index < (vf_group + 1) * vf_per_group &&
+				vf_index < num_vfs;
+				vf_index++)
+				for (vf_index1 = vf_group * vf_per_group;
+					vf_index1 < (vf_group + 1) * vf_per_group &&
+					vf_index1 < num_vfs;
+					vf_index1++){
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_REMOVE_PE_FROM_DOMAIN);
+
+					if (rc)
+					    dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n",
+						__func__,
+						pdn->offset + vf_index1, rc);
+				}
+	}
+
+	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+		if (pe->parent_dev != pdev)
+			continue;
+
+		pnv_pci_ioda2_release_dma_pe(pdev, pe);
+
+		/* Remove from list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_del(&pe->list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_ioda_deconfigure_pe(phb, pe);
+
+		pnv_ioda_free_pe(phb, pe->pe_number);
+	}
+}
+
+void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	struct pci_sriov      *iov;
+	u16 num_vfs;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+	iov = pdev->sriov;
+	num_vfs = pdn->num_vfs;
+
+	/* Release VF PEs */
+	pnv_ioda_release_vf_PE(pdev, num_vfs);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		if (pdn->m64_per_iov == 1)
+			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+
+		/* Release M64 windows */
+		pnv_pci_vf_release_m64(pdev);
+
+		/* Release PE numbers */
+		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+		pdn->offset = 0;
+	}
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe);
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	int                    pe_num;
+	u16                    vf_index;
+	struct pci_dn         *pdn;
+	int64_t                rc;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (!pdev->is_physfn)
+		return;
+
+	/* Reserve PE for each VF */
+	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+		pe_num = pdn->offset + vf_index;
+
+		pe = &phb->ioda.pe_array[pe_num];
+		pe->pe_number = pe_num;
+		pe->phb = phb;
+		pe->flags = PNV_IODA_PE_VF;
+		pe->pbus = NULL;
+		pe->parent_dev = pdev;
+		pe->tce32_seg = -1;
+		pe->mve_number = -1;
+		pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
+			   pci_iov_virtfn_devfn(pdev, vf_index);
+
+		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
+			hose->global_number, pdev->bus->number,
+			PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+			PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
+
+		if (pnv_ioda_configure_pe(phb, pe)) {
+			/* XXX What do we do here ? */
+			if (pe_num)
+				pnv_ioda_free_pe(phb, pe_num);
+			pe->pdev = NULL;
+			continue;
+		}
+
+		pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+				GFP_KERNEL, hose->node);
+		pe->tce32_table->data = pe;
+
+		/* Put PE to the list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_add_tail(&pe->list, &phb->ioda.pe_list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_pci_ioda2_setup_dma_pe(phb, pe);
+	}
+
+	if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
+			for (vf_index = vf_group * vf_per_group;
+			     vf_index < (vf_group + 1) * vf_per_group &&
+			     vf_index < num_vfs;
+			     vf_index++) {
+				for (vf_index1 = vf_group * vf_per_group;
+				     vf_index1 < (vf_group + 1) * vf_per_group &&
+				     vf_index1 < num_vfs;
+				     vf_index1++) {
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_ADD_PE_TO_DOMAIN);
+
+					if (rc)
+					    dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n",
+						__func__,
+						pdn->offset + vf_index1, rc);
+				}
+			}
+		}
+	}
+}
+
+int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	int                    ret;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		/* Calculate available PE for required VFs */
+		mutex_lock(&phb->ioda.pe_alloc_mutex);
+		pdn->offset = bitmap_find_next_zero_area(
+			phb->ioda.pe_alloc, phb->ioda.total_pe,
+			0, num_vfs, 0);
+		if (pdn->offset >= phb->ioda.total_pe) {
+			mutex_unlock(&phb->ioda.pe_alloc_mutex);
+			dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+			pdn->offset = 0;
+			return -EBUSY;
+		}
+		bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+		pdn->num_vfs = num_vfs;
+		mutex_unlock(&phb->ioda.pe_alloc_mutex);
+
+		/* Assign M64 window accordingly */
+		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+		if (ret) {
+			dev_info(&pdev->dev, "Not enough M64 window resources\n");
+			goto m64_failed;
+		}
+
+		/*
+		 * When using one M64 BAR to map one IOV BAR, we need to shift
+		 * the IOV BAR according to the PE# allocated to the VFs.
+		 * Otherwise, the PE# for the VF will conflict with others.
+		 */
+		if (pdn->m64_per_iov == 1) {
+			ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+			if (ret)
+				goto m64_failed;
+		}
+	}
+
+	/* Setup VF PEs */
+	pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+	return 0;
+
+m64_failed:
+	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+	pdn->offset = 0;
+
+	return ret;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	pnv_pci_sriov_disable(pdev);
+
+	/* Release PCI data */
+	remove_dev_pci_data(pdev);
+	return 0;
+}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	/* Allocate PCI data */
+	add_dev_pci_data(pdev);
+
+	pnv_pci_sriov_enable(pdev, num_vfs);
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
 {
 	struct pci_dn *pdn = pci_get_pdn(pdev);
@@ -989,7 +1598,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 
 	pe = &phb->ioda.pe_array[pdn->pe_number];
 	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
-	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
+	set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
 }
 
 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
@@ -1016,7 +1625,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
 	} else {
 		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
 		set_dma_ops(&pdev->dev, &dma_iommu_ops);
-		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+		set_iommu_table_base(&pdev->dev, pe->tce32_table);
 	}
 	*pdev->dev.dma_mask = dma_mask;
 	return 0;
@@ -1053,9 +1662,9 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		if (add_to_iommu_group)
 			set_iommu_table_base_and_group(&dev->dev,
-						       &pe->tce32_table);
+						       pe->tce32_table);
 		else
-			set_iommu_table_base(&dev->dev, &pe->tce32_table);
+			set_iommu_table_base(&dev->dev, pe->tce32_table);
 
 		if (dev->subordinate)
 			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
@@ -1145,8 +1754,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 				 __be64 *startp, __be64 *endp, bool rm)
 {
-	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-					      tce32_table);
+	struct pnv_ioda_pe *pe = tbl->data;
 	struct pnv_phb *phb = pe->phb;
 
 	if (phb->type == PNV_PHB_IODA1)
@@ -1167,9 +1775,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	int64_t rc;
 	void *addr;
 
-	/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
-
 	/* XXX FIXME: Handle 64-bit only DMA devices */
 	/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
 	/* XXX FIXME: Allocate multi-level tables on PHB3 */
@@ -1212,7 +1817,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	}
 
 	/* Setup linux iommu table */
-	tbl = &pe->tce32_table;
+	tbl = pe->tce32_table;
 	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
 				  base << 28, IOMMU_PAGE_SHIFT_4K);
 
@@ -1232,12 +1837,19 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 				 TCE_PCI_SWINV_PAIR);
 	}
 	iommu_init_table(tbl, phb->hose->node);
-	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
-	if (pe->pdev)
+	if (pe->flags & PNV_IODA_PE_DEV) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-	else
+	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+	} else if (pe->flags & PNV_IODA_PE_VF) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+	}
 
 	return;
  fail:
@@ -1250,8 +1862,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 
 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
 {
-	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-					      tce32_table);
+	struct pnv_ioda_pe *pe = tbl->data;
 	uint16_t window_id = (pe->pe_number << 1 ) + 1;
 	int64_t rc;
 
@@ -1296,10 +1907,10 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
 	pe->tce_bypass_base = 1ull << 59;
 
 	/* Install set_bypass callback for VFIO */
-	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
+	pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
 
 	/* Enable bypass by default */
-	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
+	pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
 }
 
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
@@ -1347,7 +1958,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	}
 
 	/* Setup linux iommu table */
-	tbl = &pe->tce32_table;
+	tbl = pe->tce32_table;
 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
 			IOMMU_PAGE_SHIFT_4K);
 
@@ -1365,12 +1976,19 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
 	}
 	iommu_init_table(tbl, phb->hose->node);
-	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
-	if (pe->pdev)
+	if (pe->flags & PNV_IODA_PE_DEV) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-	else
+	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
 		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+	} else if (pe->flags & PNV_IODA_PE_VF) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+	}
 
 	/* Also create a bypass window */
 	if (!pnv_iommu_bypass_disabled)
@@ -1731,6 +2349,73 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
 #endif /* CONFIG_PCI_MSI */
 
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct resource *res;
+	int i;
+	resource_size_t size;
+	struct pci_dn *pdn;
+	int mul, total_vfs;
+
+	if (!pdev->is_physfn || pdev->is_added)
+		return;
+
+	hose = pci_bus_to_host(pdev->bus);
+	phb = hose->private_data;
+
+	pdn = pci_get_pdn(pdev);
+	pdn->vfs_expanded = 0;
+
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+	pdn->m64_per_iov = 1;
+	mul = phb->ioda.total_pe;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+				 i, res);
+			continue;
+		}
+
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+
+		/* bigger than 64M */
+		if (size > (1 << 26)) {
+			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
+				 i, res);
+			pdn->m64_per_iov = M64_PER_IOV;
+			mul = roundup_pow_of_two(total_vfs);
+			break;
+		}
+	}
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
+				 i, res);
+			continue;
+		}
+
+		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+		res->end = res->start + size * mul - 1;
+		dev_dbg(&pdev->dev, "                       %pR\n", res);
+		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+			 i, res, mul);
+	}
+	pdn->vfs_expanded = mul;
+}
+#endif /* CONFIG_PCI_IOV */
+
 /*
  * This function is supposed to be called on basis of PE from top
  * to bottom style. So the the I/O or MMIO segment assigned to
@@ -1777,7 +2462,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
 				region.start += phb->ioda.io_segsize;
 				index++;
 			}
-		} else if (res->flags & IORESOURCE_MEM) {
+		} else if ((res->flags & IORESOURCE_MEM) &&
+			   !pnv_pci_is_mem_pref_64(res->flags)) {
 			region.start = res->start -
 				       hose->mem_offset[0] -
 				       phb->ioda.m32_pci_base;
@@ -1907,10 +2593,29 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
 	return phb->ioda.io_segsize;
 }
 
+#ifdef CONFIG_PCI_IOV
+static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
+						      int resno)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	resource_size_t align, iov_align;
+
+	iov_align = resource_size(&pdev->resource[resno]);
+	if (iov_align)
+		return iov_align;
+
+	align = pci_iov_resource_size(pdev, resno);
+	if (pdn->vfs_expanded)
+		return pdn->vfs_expanded * align;
+
+	return align;
+}
+#endif /* CONFIG_PCI_IOV */
+
 /* Prevent enabling devices for which we couldn't properly
  * assign a PE
  */
-static int pnv_pci_enable_device_hook(struct pci_dev *dev)
+static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
 	struct pnv_phb *phb = hose->private_data;
@@ -1922,13 +2627,13 @@ static int pnv_pci_enable_device_hook(struct pci_dev *dev)
 	 * PEs isn't ready.
 	 */
 	if (!phb->initialized)
-		return 0;
+		return true;
 
 	pdn = pci_get_pdn(dev);
 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
-		return -EINVAL;
+		return false;
 
-	return 0;
+	return true;
 }
 
 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
@@ -1988,9 +2693,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		hose->last_busno = 0xff;
 	}
 	hose->private_data = phb;
+	hose->controller_ops = pnv_pci_controller_ops;
 	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = ioda_type;
+	mutex_init(&phb->ioda.pe_alloc_mutex);
 
 	/* Detect specific models for error handling */
 	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
@@ -2050,6 +2757,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 
 	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
 	INIT_LIST_HEAD(&phb->ioda.pe_list);
+	mutex_init(&phb->ioda.pe_list_mutex);
 
 	/* Calculate how many 32-bit TCE segments we have */
 	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
@@ -2078,9 +2786,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	phb->get_pe_state = pnv_ioda_get_pe_state;
 	phb->freeze_pe = pnv_ioda_freeze_pe;
 	phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
-#ifdef CONFIG_EEH
-	phb->eeh_ops = &ioda_eeh_ops;
-#endif
 
 	/* Setup RID -> PE mapping function */
 	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -2104,9 +2809,15 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	 * the child P2P bridges) can form individual PE.
 	 */
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
-	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
-	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
-	ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
+	pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook;
+	pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment;
+	pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus;
+
+#ifdef CONFIG_PCI_IOV
+	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
+	ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+#endif
+
 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
 	/* Reset IODA tables to a clean state */
@@ -2121,8 +2832,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	 */
 	if (is_kdump_kernel()) {
 		pr_info("  Issue PHB reset ...\n");
-		ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
-		ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
+		pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+		pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
 	}
 
 	/* Remove M64 resource if we can't configure it successfully */
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 6ef6d4d8e7e2..4729ca793813 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -133,6 +133,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
 	phb->hose->first_busno = 0;
 	phb->hose->last_busno = 0xff;
 	phb->hose->private_data = phb;
+	phb->hose->controller_ops = pnv_pci_controller_ops;
 	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = PNV_PHB_P5IOC2;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 54323d6b5166..bca2aeb6e4b6 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -366,9 +366,9 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 	spin_unlock_irqrestore(&phb->lock, flags);
 }
 
-static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
-				     struct device_node *dn)
+static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
 {
+	struct pnv_phb *phb = pdn->phb->private_data;
 	u8	fstate;
 	__be16	pcierr;
 	int	pe_no;
@@ -379,7 +379,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 	 * setup that yet. So all ER errors should be mapped to
 	 * reserved PE.
 	 */
-	pe_no = PCI_DN(dn)->pe_number;
+	pe_no = pdn->pe_number;
 	if (pe_no == IODA_INVALID_PE) {
 		if (phb->type == PNV_PHB_P5IOC2)
 			pe_no = 0;
@@ -407,8 +407,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 	}
 
 	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
-		(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
-		pe_no, fstate);
+		(pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
 
 	/* Clear the frozen state if applicable */
 	if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
@@ -425,10 +424,9 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 	}
 }
 
-int pnv_pci_cfg_read(struct device_node *dn,
+int pnv_pci_cfg_read(struct pci_dn *pdn,
 		     int where, int size, u32 *val)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
 	struct pnv_phb *phb = pdn->phb->private_data;
 	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 	s64 rc;
@@ -462,10 +460,9 @@ int pnv_pci_cfg_read(struct device_node *dn,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-int pnv_pci_cfg_write(struct device_node *dn,
+int pnv_pci_cfg_write(struct pci_dn *pdn,
 		      int where, int size, u32 val)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
 	struct pnv_phb *phb = pdn->phb->private_data;
 	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 
@@ -489,18 +486,17 @@ int pnv_pci_cfg_write(struct device_node *dn,
 }
 
 #if CONFIG_EEH
-static bool pnv_pci_cfg_check(struct pci_controller *hose,
-			      struct device_node *dn)
+static bool pnv_pci_cfg_check(struct pci_dn *pdn)
 {
 	struct eeh_dev *edev = NULL;
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pdn->phb->private_data;
 
 	/* EEH not enabled ? */
 	if (!(phb->flags & PNV_PHB_FLAG_EEH))
 		return true;
 
 	/* PE reset or device removed ? */
-	edev = of_node_to_eeh_dev(dn);
+	edev = pdn->edev;
 	if (edev) {
 		if (edev->pe &&
 		    (edev->pe->state & EEH_PE_CFG_BLOCKED))
@@ -513,8 +509,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose,
 	return true;
 }
 #else
-static inline pnv_pci_cfg_check(struct pci_controller *hose,
-				struct device_node *dn)
+static inline pnv_pci_cfg_check(struct pci_dn *pdn)
 {
 	return true;
 }
@@ -524,32 +519,26 @@ static int pnv_pci_read_config(struct pci_bus *bus,
 			       unsigned int devfn,
 			       int where, int size, u32 *val)
 {
-	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
 	struct pci_dn *pdn;
 	struct pnv_phb *phb;
-	bool found = false;
 	int ret;
 
 	*val = 0xFFFFFFFF;
-	for (dn = busdn->child; dn; dn = dn->sibling) {
-		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn) {
-			phb = pdn->phb->private_data;
-			found = true;
-			break;
-		}
-	}
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+	if (!pnv_pci_cfg_check(pdn))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	ret = pnv_pci_cfg_read(dn, where, size, val);
-	if (phb->flags & PNV_PHB_FLAG_EEH) {
+	ret = pnv_pci_cfg_read(pdn, where, size, val);
+	phb = pdn->phb->private_data;
+	if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
 		if (*val == EEH_IO_ERROR_VALUE(size) &&
-		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+		    eeh_dev_check_failure(pdn->edev))
                         return PCIBIOS_DEVICE_NOT_FOUND;
 	} else {
-		pnv_pci_config_check_eeh(phb, dn);
+		pnv_pci_config_check_eeh(pdn);
 	}
 
 	return ret;
@@ -559,27 +548,21 @@ static int pnv_pci_write_config(struct pci_bus *bus,
 				unsigned int devfn,
 				int where, int size, u32 val)
 {
-	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
 	struct pci_dn *pdn;
 	struct pnv_phb *phb;
-	bool found = false;
 	int ret;
 
-	for (dn = busdn->child; dn; dn = dn->sibling) {
-		pdn = PCI_DN(dn);
-		if (pdn && pdn->devfn == devfn) {
-			phb = pdn->phb->private_data;
-			found = true;
-			break;
-		}
-	}
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+	if (!pnv_pci_cfg_check(pdn))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	ret = pnv_pci_cfg_write(dn, where, size, val);
+	ret = pnv_pci_cfg_write(pdn, where, size, val);
+	phb = pdn->phb->private_data;
 	if (!(phb->flags & PNV_PHB_FLAG_EEH))
-		pnv_pci_config_check_eeh(phb, dn);
+		pnv_pci_config_check_eeh(pdn);
 
 	return ret;
 }
@@ -679,66 +662,31 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 	tbl->it_type = TCE_PCI;
 }
 
-static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
-{
-	struct iommu_table *tbl;
-	const __be64 *basep, *swinvp;
-	const __be32 *sizep;
-
-	basep = of_get_property(hose->dn, "linux,tce-base", NULL);
-	sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
-	if (basep == NULL || sizep == NULL) {
-		pr_err("PCI: %s has missing tce entries !\n",
-		       hose->dn->full_name);
-		return NULL;
-	}
-	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
-	if (WARN_ON(!tbl))
-		return NULL;
-	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
-				  be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K);
-	iommu_init_table(tbl, hose->node);
-	iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
-
-	/* Deal with SW invalidated TCEs when needed (BML way) */
-	swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
-				 NULL);
-	if (swinvp) {
-		tbl->it_busno = be64_to_cpu(swinvp[1]);
-		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
-	}
-	return tbl;
-}
-
-static void pnv_pci_dma_fallback_setup(struct pci_controller *hose,
-				       struct pci_dev *pdev)
-{
-	struct device_node *np = pci_bus_to_OF_node(hose->bus);
-	struct pci_dn *pdn;
-
-	if (np == NULL)
-		return;
-	pdn = PCI_DN(np);
-	if (!pdn->iommu_table)
-		pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
-	if (!pdn->iommu_table)
-		return;
-	set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table);
-}
-
 static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 {
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 	struct pnv_phb *phb = hose->private_data;
+#ifdef CONFIG_PCI_IOV
+	struct pnv_ioda_pe *pe;
+	struct pci_dn *pdn;
+
+	/* Fix the VF pdn PE number */
+	if (pdev->is_virtfn) {
+		pdn = pci_get_pdn(pdev);
+		WARN_ON(pdn->pe_number != IODA_INVALID_PE);
+		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+			if (pe->rid == ((pdev->bus->number << 8) |
+			    (pdev->devfn & 0xff))) {
+				pdn->pe_number = pe->pe_number;
+				pe->pdev = pdev;
+				break;
+			}
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
 
-	/* If we have no phb structure, try to setup a fallback based on
-	 * the device-tree (RTAS PCI for example)
-	 */
 	if (phb && phb->dma_dev_setup)
 		phb->dma_dev_setup(phb, pdev);
-	else
-		pnv_pci_dma_fallback_setup(hose, pdev);
 }
 
 int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
@@ -784,44 +732,36 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
 void __init pnv_pci_init(void)
 {
 	struct device_node *np;
+	bool found_ioda = false;
 
 	pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
 
-	/* OPAL absent, try POPAL first then RTAS detection of PHBs */
-	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
-#ifdef CONFIG_PPC_POWERNV_RTAS
-		init_pci_config_tokens();
-		find_and_init_phbs();
-#endif /* CONFIG_PPC_POWERNV_RTAS */
-	}
-	/* OPAL is here, do our normal stuff */
-	else {
-		int found_ioda = 0;
+	/* If we don't have OPAL, eg. in sim, just skip PCI probe */
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return;
 
-		/* Look for IODA IO-Hubs. We don't support mixing IODA
-		 * and p5ioc2 due to the need to change some global
-		 * probing flags
-		 */
-		for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
-			pnv_pci_init_ioda_hub(np);
-			found_ioda = 1;
-		}
+	/* Look for IODA IO-Hubs. We don't support mixing IODA
+	 * and p5ioc2 due to the need to change some global
+	 * probing flags
+	 */
+	for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
+		pnv_pci_init_ioda_hub(np);
+		found_ioda = true;
+	}
 
-		/* Look for p5ioc2 IO-Hubs */
-		if (!found_ioda)
-			for_each_compatible_node(np, NULL, "ibm,p5ioc2")
-				pnv_pci_init_p5ioc2_hub(np);
+	/* Look for p5ioc2 IO-Hubs */
+	if (!found_ioda)
+		for_each_compatible_node(np, NULL, "ibm,p5ioc2")
+			pnv_pci_init_p5ioc2_hub(np);
 
-		/* Look for ioda2 built-in PHB3's */
-		for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
-			pnv_pci_init_ioda2_phb(np);
-	}
+	/* Look for ioda2 built-in PHB3's */
+	for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
+		pnv_pci_init_ioda2_phb(np);
 
 	/* Setup the linkage between OF nodes and PHBs */
 	pci_devs_phb_init();
 
 	/* Configure IOMMU DMA hooks */
-	ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
 	ppc_md.tce_build = pnv_tce_build_vm;
 	ppc_md.tce_free = pnv_tce_free_vm;
 	ppc_md.tce_build_rm = pnv_tce_build_rm;
@@ -837,3 +777,7 @@ void __init pnv_pci_init(void)
 }
 
 machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
+
+struct pci_controller_ops pnv_pci_controller_ops = {
+	.dma_dev_setup = pnv_pci_dma_dev_setup,
+};
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 6c02ff8dd69f..070ee888fc95 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -23,6 +23,7 @@ enum pnv_phb_model {
 #define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
 #define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
 #define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
+#define PNV_IODA_PE_VF		(1 << 5)	/* PE for one VF 		*/
 
 /* Data associated with a PE, including IOMMU tracking etc.. */
 struct pnv_phb;
@@ -34,6 +35,9 @@ struct pnv_ioda_pe {
 	 * entire bus (& children). In the former case, pdev
 	 * is populated, in the later case, pbus is.
 	 */
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev          *parent_dev;
+#endif
 	struct pci_dev		*pdev;
 	struct pci_bus		*pbus;
 
@@ -53,7 +57,7 @@ struct pnv_ioda_pe {
 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
 	int			tce32_seg;
 	int			tce32_segcount;
-	struct iommu_table	tce32_table;
+	struct iommu_table	*tce32_table;
 	phys_addr_t		tce_inval_reg_phys;
 
 	/* 64-bit TCE bypass region */
@@ -75,22 +79,6 @@ struct pnv_ioda_pe {
 	struct list_head	list;
 };
 
-/* IOC dependent EEH operations */
-#ifdef CONFIG_EEH
-struct pnv_eeh_ops {
-	int (*post_init)(struct pci_controller *hose);
-	int (*set_option)(struct eeh_pe *pe, int option);
-	int (*get_state)(struct eeh_pe *pe);
-	int (*reset)(struct eeh_pe *pe, int option);
-	int (*get_log)(struct eeh_pe *pe, int severity,
-		       char *drv_log, unsigned long len);
-	int (*configure_bridge)(struct eeh_pe *pe);
-	int (*err_inject)(struct eeh_pe *pe, int type, int func,
-			  unsigned long addr, unsigned long mask);
-	int (*next_error)(struct eeh_pe **pe);
-};
-#endif /* CONFIG_EEH */
-
 #define PNV_PHB_FLAG_EEH	(1 << 0)
 
 struct pnv_phb {
@@ -104,10 +92,6 @@ struct pnv_phb {
 	int			initialized;
 	spinlock_t		lock;
 
-#ifdef CONFIG_EEH
-	struct pnv_eeh_ops	*eeh_ops;
-#endif
-
 #ifdef CONFIG_DEBUG_FS
 	int			has_dbgfs;
 	struct dentry		*dbgfs;
@@ -165,6 +149,8 @@ struct pnv_phb {
 
 			/* PE allocation bitmap */
 			unsigned long		*pe_alloc;
+			/* PE allocation mutex */
+			struct mutex		pe_alloc_mutex;
 
 			/* M32 & IO segment maps */
 			unsigned int		*m32_segmap;
@@ -179,6 +165,7 @@ struct pnv_phb {
 			 * on the sequence of creation
 			 */
 			struct list_head	pe_list;
+			struct mutex            pe_list_mutex;
 
 			/* Reverse map of PEs, will have to extend if
 			 * we are to support more than 256 PEs, indexed
@@ -213,15 +200,12 @@ struct pnv_phb {
 };
 
 extern struct pci_ops pnv_pci_ops;
-#ifdef CONFIG_EEH
-extern struct pnv_eeh_ops ioda_eeh_ops;
-#endif
 
 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
 				unsigned char *log_buff);
-int pnv_pci_cfg_read(struct device_node *dn,
+int pnv_pci_cfg_read(struct pci_dn *pdn,
 		     int where, int size, u32 *val);
-int pnv_pci_cfg_write(struct device_node *dn,
+int pnv_pci_cfg_write(struct pci_dn *pdn,
 		      int where, int size, u32 val);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 				      void *tce_mem, u64 tce_size,
@@ -232,6 +216,6 @@ extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 					__be64 *startp, __be64 *endp, bool rm);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
-extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
+extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
 #endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 604c48e7879a..826d2c9bea56 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
 }
 #endif
 
+extern struct pci_controller_ops pnv_pci_controller_ops;
+
 extern u32 pnv_get_supported_cpuidle_states(void);
 
 extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d2de7d5d7574..16fdcb23f4c3 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -32,7 +32,6 @@
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/xics.h>
-#include <asm/rtas.h>
 #include <asm/opal.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
@@ -278,20 +277,6 @@ static void __init pnv_setup_machdep_opal(void)
 	ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
 }
 
-#ifdef CONFIG_PPC_POWERNV_RTAS
-static void __init pnv_setup_machdep_rtas(void)
-{
-	if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) {
-		ppc_md.get_boot_time = rtas_get_boot_time;
-		ppc_md.get_rtc_time = rtas_get_rtc_time;
-		ppc_md.set_rtc_time = rtas_set_rtc_time;
-	}
-	ppc_md.restart = rtas_restart;
-	pm_power_off = rtas_power_off;
-	ppc_md.halt = rtas_halt;
-}
-#endif /* CONFIG_PPC_POWERNV_RTAS */
-
 static u32 supported_cpuidle_states;
 
 int pnv_save_sprs_for_winkle(void)
@@ -409,37 +394,39 @@ static int __init pnv_init_idle_states(void)
 {
 	struct device_node *power_mgt;
 	int dt_idle_states;
-	const __be32 *idle_state_flags;
-	u32 len_flags, flags;
+	u32 *flags;
 	int i;
 
 	supported_cpuidle_states = 0;
 
 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
-		return 0;
+		goto out;
 
 	if (!firmware_has_feature(FW_FEATURE_OPALv3))
-		return 0;
+		goto out;
 
 	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
 	if (!power_mgt) {
 		pr_warn("opal: PowerMgmt Node not found\n");
-		return 0;
+		goto out;
+	}
+	dt_idle_states = of_property_count_u32_elems(power_mgt,
+			"ibm,cpu-idle-state-flags");
+	if (dt_idle_states < 0) {
+		pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+		goto out;
 	}
 
-	idle_state_flags = of_get_property(power_mgt,
-			"ibm,cpu-idle-state-flags", &len_flags);
-	if (!idle_state_flags) {
-		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
-		return 0;
+	flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
+	if (of_property_read_u32_array(power_mgt,
+			"ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+		goto out_free;
 	}
 
-	dt_idle_states = len_flags / sizeof(u32);
+	for (i = 0; i < dt_idle_states; i++)
+		supported_cpuidle_states |= flags[i];
 
-	for (i = 0; i < dt_idle_states; i++) {
-		flags = be32_to_cpu(idle_state_flags[i]);
-		supported_cpuidle_states |= flags;
-	}
 	if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
 		patch_instruction(
 			(unsigned int *)pnv_fastsleep_workaround_at_entry,
@@ -449,6 +436,9 @@ static int __init pnv_init_idle_states(void)
 			PPC_INST_NOP);
 	}
 	pnv_alloc_idle_core_states();
+out_free:
+	kfree(flags);
+out:
 	return 0;
 }
 
@@ -465,10 +455,6 @@ static int __init pnv_probe(void)
 
 	if (firmware_has_feature(FW_FEATURE_OPAL))
 		pnv_setup_machdep_opal();
-#ifdef CONFIG_PPC_POWERNV_RTAS
-	else if (rtas.base)
-		pnv_setup_machdep_rtas();
-#endif /* CONFIG_PPC_POWERNV_RTAS */
 
 	pr_debug("PowerNV detected !\n");
 
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 38a45088f633..8f70ba681a78 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -25,7 +25,6 @@
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/firmware.h>
-#include <asm/rtas.h>
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>
 #include <asm/xics.h>
@@ -251,18 +250,6 @@ void __init pnv_smp_init(void)
 {
 	smp_ops = &pnv_smp_ops;
 
-	/* XXX We don't yet have a proper entry point from HAL, for
-	 * now we rely on kexec-style entry from BML
-	 */
-
-#ifdef CONFIG_PPC_RTAS
-	/* Non-lpar has additional take/give timebase */
-	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
-		smp_ops->give_timebase = rtas_give_timebase;
-		smp_ops->take_timebase = rtas_take_timebase;
-	}
-#endif /* CONFIG_PPC_RTAS */
-
 #ifdef CONFIG_HOTPLUG_CPU
 	ppc_md.cpu_die	= pnv_smp_cpu_kill_self;
 #endif
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index b358bec6c8cb..3c7707af3384 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -57,7 +57,7 @@ static void ps3_smp_message_pass(int cpu, int msg)
 			" (%d)\n", __func__, __LINE__, cpu, msg, result);
 }
 
-static int __init ps3_smp_probe(void)
+static void __init ps3_smp_probe(void)
 {
 	int cpu;
 
@@ -100,8 +100,6 @@ static int __init ps3_smp_probe(void)
 
 		DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
 	}
-
-	return 2;
 }
 
 void ps3_smp_cleanup_cpu(int cpu)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index a758a9c3bbba..54c87d5d349d 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -16,7 +16,6 @@ config PPC_PSERIES
 	select PPC_UDBG_16550
 	select PPC_NATIVE
 	select PPC_PCI_CHOICE if EXPERT
-	select ZLIB_DEFLATE
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
 	select HOTPLUG_CPU if SMP
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index c22bb1b4beb8..b4b11096ea8b 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -10,6 +10,8 @@
  * 2 as published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt)	"dlpar: " fmt
+
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
@@ -535,13 +537,125 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 	return count;
 }
 
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+{
+	int rc;
+
+	/* pseries error logs are in BE format, convert to cpu type */
+	switch (hp_elog->id_type) {
+	case PSERIES_HP_ELOG_ID_DRC_COUNT:
+		hp_elog->_drc_u.drc_count =
+					be32_to_cpu(hp_elog->_drc_u.drc_count);
+		break;
+	case PSERIES_HP_ELOG_ID_DRC_INDEX:
+		hp_elog->_drc_u.drc_index =
+					be32_to_cpu(hp_elog->_drc_u.drc_index);
+	}
+
+	switch (hp_elog->resource) {
+	case PSERIES_HP_ELOG_RESOURCE_MEM:
+		rc = dlpar_memory(hp_elog);
+		break;
+	default:
+		pr_warn_ratelimited("Invalid resource (%d) specified\n",
+				    hp_elog->resource);
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct pseries_hp_errorlog *hp_elog;
+	const char *arg;
+	int rc;
+
+	hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
+	if (!hp_elog) {
+		rc = -ENOMEM;
+		goto dlpar_store_out;
+	}
+
+	/* Parse out the request from the user, this will be in the form
+	 * <resource> <action> <id_type> <id>
+	 */
+	arg = buf;
+	if (!strncmp(arg, "memory", 6)) {
+		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+		arg += strlen("memory ");
+	} else {
+		pr_err("Invalid resource specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	if (!strncmp(arg, "add", 3)) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD;
+		arg += strlen("add ");
+	} else if (!strncmp(arg, "remove", 6)) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE;
+		arg += strlen("remove ");
+	} else {
+		pr_err("Invalid action specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	if (!strncmp(arg, "index", 5)) {
+		u32 index;
+
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+		arg += strlen("index ");
+		if (kstrtou32(arg, 0, &index)) {
+			rc = -EINVAL;
+			pr_err("Invalid drc_index specified: \"%s\"\n", buf);
+			goto dlpar_store_out;
+		}
+
+		hp_elog->_drc_u.drc_index = cpu_to_be32(index);
+	} else if (!strncmp(arg, "count", 5)) {
+		u32 count;
+
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT;
+		arg += strlen("count ");
+		if (kstrtou32(arg, 0, &count)) {
+			rc = -EINVAL;
+			pr_err("Invalid count specified: \"%s\"\n", buf);
+			goto dlpar_store_out;
+		}
+
+		hp_elog->_drc_u.drc_count = cpu_to_be32(count);
+	} else {
+		pr_err("Invalid id_type specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	rc = handle_dlpar_errorlog(hp_elog);
+
+dlpar_store_out:
+	kfree(hp_elog);
+	return rc ? rc : count;
+}
+
+static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store);
+
 static int __init pseries_dlpar_init(void)
 {
+	int rc;
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 	ppc_md.cpu_probe = dlpar_cpu_probe;
 	ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 
-	return 0;
+	rc = sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
+
+	return rc;
 }
 machine_device_initcall(pseries, pseries_dlpar_init);
 
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index a6c7e19f5eb3..2039397cc75d 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -118,9 +118,8 @@ static int pseries_eeh_init(void)
 	return 0;
 }
 
-static int pseries_eeh_cap_start(struct device_node *dn)
+static int pseries_eeh_cap_start(struct pci_dn *pdn)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
 	u32 status;
 
 	if (!pdn)
@@ -134,10 +133,9 @@ static int pseries_eeh_cap_start(struct device_node *dn)
 }
 
 
-static int pseries_eeh_find_cap(struct device_node *dn, int cap)
+static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
-	int pos = pseries_eeh_cap_start(dn);
+	int pos = pseries_eeh_cap_start(pdn);
 	int cnt = 48;	/* Maximal number of capabilities */
 	u32 id;
 
@@ -160,10 +158,9 @@ static int pseries_eeh_find_cap(struct device_node *dn, int cap)
 	return 0;
 }
 
-static int pseries_eeh_find_ecap(struct device_node *dn, int cap)
+static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
 {
-	struct pci_dn *pdn = PCI_DN(dn);
-	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	u32 header;
 	int pos = 256;
 	int ttl = (4096 - 256) / 8;
@@ -191,53 +188,44 @@ static int pseries_eeh_find_ecap(struct device_node *dn, int cap)
 }
 
 /**
- * pseries_eeh_of_probe - EEH probe on the given device
- * @dn: OF node
- * @flag: Unused
+ * pseries_eeh_probe - EEH probe on the given device
+ * @pdn: PCI device node
+ * @data: Unused
  *
  * When EEH module is installed during system boot, all PCI devices
  * are checked one by one to see if it supports EEH. The function
  * is introduced for the purpose.
  */
-static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
+static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
 {
 	struct eeh_dev *edev;
 	struct eeh_pe pe;
-	struct pci_dn *pdn = PCI_DN(dn);
-	const __be32 *classp, *vendorp, *devicep;
-	u32 class_code;
-	const __be32 *regs;
 	u32 pcie_flags;
 	int enable = 0;
 	int ret;
 
 	/* Retrieve OF node and eeh device */
-	edev = of_node_to_eeh_dev(dn);
-	if (edev->pe || !of_device_is_available(dn))
+	edev = pdn_to_eeh_dev(pdn);
+	if (!edev || edev->pe)
 		return NULL;
 
-	/* Retrieve class/vendor/device IDs */
-	classp = of_get_property(dn, "class-code", NULL);
-	vendorp = of_get_property(dn, "vendor-id", NULL);
-	devicep = of_get_property(dn, "device-id", NULL);
-
-	/* Skip for bad OF node or PCI-ISA bridge */
-	if (!classp || !vendorp || !devicep)
-		return NULL;
-	if (dn->type && !strcmp(dn->type, "isa"))
+	/* Check class/vendor/device IDs */
+	if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code)
 		return NULL;
 
-	class_code = of_read_number(classp, 1);
+	/* Skip for PCI-ISA bridge */
+        if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
 
 	/*
 	 * Update class code and mode of eeh device. We need
 	 * correctly reflects that current device is root port
 	 * or PCIe switch downstream port.
 	 */
-	edev->class_code = class_code;
-	edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX);
-	edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
-	edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR);
+	edev->class_code = pdn->class_code;
+	edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
 	edev->mode &= 0xFFFFFF00;
 	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
@@ -252,24 +240,16 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 		}
 	}
 
-	/* Retrieve the device address */
-	regs = of_get_property(dn, "reg", NULL);
-	if (!regs) {
-		pr_warn("%s: OF node property %s::reg not found\n",
-			__func__, dn->full_name);
-		return NULL;
-	}
-
 	/* Initialize the fake PE */
 	memset(&pe, 0, sizeof(struct eeh_pe));
 	pe.phb = edev->phb;
-	pe.config_addr = of_read_number(regs, 1);
+	pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
 
 	/* Enable EEH on the device */
 	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
 	if (!ret) {
-		edev->config_addr = of_read_number(regs, 1);
 		/* Retrieve PE address */
+		edev->config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
 		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
 		pe.addr = edev->pe_config_addr;
 
@@ -285,16 +265,17 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 			eeh_add_flag(EEH_ENABLED);
 			eeh_add_to_parent_pe(edev);
 
-			pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
-				__func__, dn->full_name, pe.phb->global_number,
-				pe.addr, pe.config_addr);
-		} else if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
-			   (of_node_to_eeh_dev(dn->parent))->pe) {
+			pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%d-PE#%x\n",
+				__func__, pdn->busno, PCI_SLOT(pdn->devfn),
+				PCI_FUNC(pdn->devfn), pe.phb->global_number,
+				pe.addr);
+		} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
+			   (pdn_to_eeh_dev(pdn->parent))->pe) {
 			/* This device doesn't support EEH, but it may have an
 			 * EEH parent, in which case we mark it as supported.
 			 */
-			edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
-			edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr;
+			edev->config_addr = pdn_to_eeh_dev(pdn->parent)->config_addr;
+			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
 			eeh_add_to_parent_pe(edev);
 		}
 	}
@@ -670,45 +651,36 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
 /**
  * pseries_eeh_read_config - Read PCI config space
- * @dn: device node
+ * @pdn: PCI device node
  * @where: PCI address
  * @size: size to read
  * @val: return value
  *
  * Read config space from the speicifed device
  */
-static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val)
+static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 {
-	struct pci_dn *pdn;
-
-	pdn = PCI_DN(dn);
-
 	return rtas_read_config(pdn, where, size, val);
 }
 
 /**
  * pseries_eeh_write_config - Write PCI config space
- * @dn: device node
+ * @pdn: PCI device node
  * @where: PCI address
  * @size: size to write
  * @val: value to be written
  *
  * Write config space to the specified device
  */
-static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val)
+static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val)
 {
-	struct pci_dn *pdn;
-
-	pdn = PCI_DN(dn);
-
 	return rtas_write_config(pdn, where, size, val);
 }
 
 static struct eeh_ops pseries_eeh_ops = {
 	.name			= "pseries",
 	.init			= pseries_eeh_init,
-	.of_probe		= pseries_eeh_of_probe,
-	.dev_probe		= NULL,
+	.probe			= pseries_eeh_probe,
 	.set_option		= pseries_eeh_set_option,
 	.get_pe_addr		= pseries_eeh_get_pe_addr,
 	.get_state		= pseries_eeh_get_state,
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index fa41f0da5b6f..0ced387e1463 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -9,11 +9,14 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt)	"pseries-hotplug-mem: " fmt
+
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
+#include <linux/slab.h>
 
 #include <asm/firmware.h>
 #include <asm/machdep.h>
@@ -21,6 +24,8 @@
 #include <asm/sparsemem.h>
 #include "pseries.h"
 
+static bool rtas_hp_event;
+
 unsigned long pseries_memory_block_size(void)
 {
 	struct device_node *np;
@@ -64,6 +69,67 @@ unsigned long pseries_memory_block_size(void)
 	return memblock_size;
 }
 
+static void dlpar_free_drconf_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct property *dlpar_clone_drconf_property(struct device_node *dn)
+{
+	struct property *prop, *new_prop;
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i;
+
+	prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+	if (!prop)
+		return NULL;
+
+	new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+	if (!new_prop)
+		return NULL;
+
+	new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+	new_prop->value = kmalloc(prop->length, GFP_KERNEL);
+	if (!new_prop->name || !new_prop->value) {
+		dlpar_free_drconf_property(new_prop);
+		return NULL;
+	}
+
+	memcpy(new_prop->value, prop->value, prop->length);
+	new_prop->length = prop->length;
+
+	/* Convert the property to cpu endian-ness */
+	p = new_prop->value;
+	*p = be32_to_cpu(*p);
+
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	for (i = 0; i < num_lmbs; i++) {
+		lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
+		lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
+		lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
+	}
+
+	return new_prop;
+}
+
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+	unsigned long section_nr;
+	struct mem_section *mem_sect;
+	struct memory_block *mem_block;
+
+	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+	mem_sect = __nr_to_section(section_nr);
+
+	mem_block = find_memory_block(mem_sect);
+	return mem_block;
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
@@ -122,6 +188,173 @@ static int pseries_remove_mem_node(struct device_node *np)
 	pseries_remove_memblock(base, lmb_size);
 	return 0;
 }
+
+static bool lmb_is_removable(struct of_drconf_cell *lmb)
+{
+	int i, scns_per_block;
+	int rc = 1;
+	unsigned long pfn, block_sz;
+	u64 phys_addr;
+
+	if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+		return false;
+
+	block_sz = memory_block_size_bytes();
+	scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+	phys_addr = lmb->base_addr;
+
+	for (i = 0; i < scns_per_block; i++) {
+		pfn = PFN_DOWN(phys_addr);
+		if (!pfn_present(pfn))
+			continue;
+
+		rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+		phys_addr += MIN_MEMORY_BLOCK_SIZE;
+	}
+
+	return rc ? true : false;
+}
+
+static int dlpar_add_lmb(struct of_drconf_cell *);
+
+static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+{
+	struct memory_block *mem_block;
+	unsigned long block_sz;
+	int nid, rc;
+
+	if (!lmb_is_removable(lmb))
+		return -EINVAL;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block)
+		return -EINVAL;
+
+	rc = device_offline(&mem_block->dev);
+	put_device(&mem_block->dev);
+	if (rc)
+		return rc;
+
+	block_sz = pseries_memory_block_size();
+	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
+	remove_memory(nid, lmb->base_addr, block_sz);
+
+	/* Update memory regions for memory remove */
+	memblock_remove(lmb->base_addr, block_sz);
+
+	dlpar_release_drc(lmb->drc_index);
+
+	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+	return 0;
+}
+
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
+					struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	int lmbs_removed = 0;
+	int lmbs_available = 0;
+	u32 num_lmbs, *p;
+	int i, rc;
+
+	pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
+
+	if (lmbs_to_remove == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+			lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_remove)
+		return -EINVAL;
+
+	for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
+		rc = dlpar_remove_lmb(&lmbs[i]);
+		if (rc)
+			continue;
+
+		lmbs_removed++;
+
+		/* Mark this lmb so we can add it later if all of the
+		 * requested LMBs cannot be removed.
+		 */
+		lmbs[i].reserved = 1;
+	}
+
+	if (lmbs_removed != lmbs_to_remove) {
+		pr_err("Memory hot-remove failed, adding LMB's back\n");
+
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_add_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to add LMB back, drc index %x\n",
+				       lmbs[i].drc_index);
+
+			lmbs[i].reserved = 0;
+		}
+
+		rc = -EINVAL;
+	} else {
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			pr_info("Memory at %llx was hot-removed\n",
+				lmbs[i].base_addr);
+
+			lmbs[i].reserved = 0;
+		}
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int lmb_found;
+	int i, rc;
+
+	pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	lmb_found = 0;
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_remove_lmb(&lmbs[i]);
+			break;
+		}
+	}
+
+	if (!lmb_found)
+		rc = -EINVAL;
+
+	if (rc)
+		pr_info("Failed to hot-remove memory at %llx\n",
+			lmbs[i].base_addr);
+	else
+		pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr);
+
+	return rc;
+}
+
 #else
 static inline int pseries_remove_memblock(unsigned long base,
 					  unsigned int memblock_size)
@@ -132,8 +365,261 @@ static inline int pseries_remove_mem_node(struct device_node *np)
 {
 	return 0;
 }
+static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
+					struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+{
+	struct memory_block *mem_block;
+	unsigned long block_sz;
+	int nid, rc;
+
+	if (lmb->flags & DRCONF_MEM_ASSIGNED)
+		return -EINVAL;
+
+	block_sz = memory_block_size_bytes();
+
+	rc = dlpar_acquire_drc(lmb->drc_index);
+	if (rc)
+		return rc;
+
+	/* Find the node id for this address */
+	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
+	/* Add the memory */
+	rc = add_memory(nid, lmb->base_addr, block_sz);
+	if (rc) {
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	/* Register this block of memory */
+	rc = memblock_add(lmb->base_addr, block_sz);
+	if (rc) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return -EINVAL;
+	}
+
+	rc = device_online(&mem_block->dev);
+	put_device(&mem_block->dev);
+	if (rc) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	lmb->flags |= DRCONF_MEM_ASSIGNED;
+	return 0;
+}
+
+static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int lmbs_available = 0;
+	int lmbs_added = 0;
+	int i, rc;
+
+	pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
+
+	if (lmbs_to_add == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for (i = 0; i < num_lmbs; i++) {
+		if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+			lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_add)
+		return -EINVAL;
+
+	for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
+		rc = dlpar_add_lmb(&lmbs[i]);
+		if (rc)
+			continue;
+
+		lmbs_added++;
+
+		/* Mark this lmb so we can remove it later if all of the
+		 * requested LMBs cannot be added.
+		 */
+		lmbs[i].reserved = 1;
+	}
+
+	if (lmbs_added != lmbs_to_add) {
+		pr_err("Memory hot-add failed, removing any added LMBs\n");
+
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_remove_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to remove LMB, drc index %x\n",
+				       be32_to_cpu(lmbs[i].drc_index));
+		}
+		rc = -EINVAL;
+	} else {
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			pr_info("Memory at %llx (drc index %x) was hot-added\n",
+				lmbs[i].base_addr, lmbs[i].drc_index);
+			lmbs[i].reserved = 0;
+		}
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i, lmb_found;
+	int rc;
+
+	pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	lmb_found = 0;
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_add_lmb(&lmbs[i]);
+			break;
+		}
+	}
+
+	if (!lmb_found)
+		rc = -EINVAL;
+
+	if (rc)
+		pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
+	else
+		pr_info("Memory at %llx (drc index %x) was hot-added\n",
+			lmbs[i].base_addr, drc_index);
+
+	return rc;
+}
+
+static void dlpar_update_drconf_property(struct device_node *dn,
+					 struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i;
+
+	/* Convert the property back to BE */
+	p = prop->value;
+	num_lmbs = *p;
+	*p = cpu_to_be32(*p);
+	p++;
+
+	lmbs = (struct of_drconf_cell *)p;
+	for (i = 0; i < num_lmbs; i++) {
+		lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
+		lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+		lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
+	}
+
+	rtas_hp_event = true;
+	of_update_property(dn, prop);
+	rtas_hp_event = false;
+}
+
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	struct device_node *dn;
+	struct property *prop;
+	u32 count, drc_index;
+	int rc;
+
+	count = hp_elog->_drc_u.drc_count;
+	drc_index = hp_elog->_drc_u.drc_index;
+
+	lock_device_hotplug();
+
+	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (!dn) {
+		rc = -EINVAL;
+		goto dlpar_memory_out;
+	}
+
+	prop = dlpar_clone_drconf_property(dn);
+	if (!prop) {
+		rc = -EINVAL;
+		goto dlpar_memory_out;
+	}
+
+	switch (hp_elog->action) {
+	case PSERIES_HP_ELOG_ACTION_ADD:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_memory_add_by_count(count, prop);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_memory_add_by_index(drc_index, prop);
+		else
+			rc = -EINVAL;
+		break;
+	case PSERIES_HP_ELOG_ACTION_REMOVE:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_memory_remove_by_count(count, prop);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_memory_remove_by_index(drc_index, prop);
+		else
+			rc = -EINVAL;
+		break;
+	default:
+		pr_err("Invalid action (%d) specified\n", hp_elog->action);
+		rc = -EINVAL;
+		break;
+	}
+
+	if (rc)
+		dlpar_free_drconf_property(prop);
+	else
+		dlpar_update_drconf_property(dn, prop);
+
+dlpar_memory_out:
+	of_node_put(dn);
+	unlock_device_hotplug();
+	return rc;
+}
+
 static int pseries_add_mem_node(struct device_node *np)
 {
 	const char *type;
@@ -174,6 +660,9 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
 	__be32 *p;
 	int i, rc = -EINVAL;
 
+	if (rtas_hp_event)
+		return 0;
+
 	memblock_size = pseries_memory_block_size();
 	if (!memblock_size)
 		return -EINVAL;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 7803a19adb31..61d5a17f45c0 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -49,6 +49,7 @@
 #include <asm/mmzone.h>
 #include <asm/plpar_wrappers.h>
 
+#include "pseries.h"
 
 static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
 				      __be64 *startp, __be64 *endp)
@@ -1307,16 +1308,16 @@ void iommu_init_early_pSeries(void)
 			ppc_md.tce_free	 = tce_free_pSeriesLP;
 		}
 		ppc_md.tce_get   = tce_get_pSeriesLP;
-		ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
 		ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
 		ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
 	} else {
 		ppc_md.tce_build = tce_build_pSeries;
 		ppc_md.tce_free  = tce_free_pSeries;
 		ppc_md.tce_get   = tce_get_pseries;
-		ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeries;
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeries;
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
 	}
 
 
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 8f35d525cede..ceb18d349459 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -320,28 +320,34 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
 {
 	u64 streamid;
 	int rc;
-	int vasi_rc = 0;
 
 	rc = kstrtou64(buf, 0, &streamid);
 	if (rc)
 		return rc;
 
 	do {
-		rc = rtas_ibm_suspend_me(streamid, &vasi_rc);
-		if (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE)
+		rc = rtas_ibm_suspend_me(streamid);
+		if (rc == -EAGAIN)
 			ssleep(1);
-	} while (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE);
+	} while (rc == -EAGAIN);
 
 	if (rc)
 		return rc;
-	if (vasi_rc)
-		return vasi_rc;
 
 	post_mobility_fixup();
 	return count;
 }
 
+/*
+ * Used by drmgr to determine the kernel behavior of the migration interface.
+ *
+ * Version 1: Performs all PAPR requirements for migration including
+ *	firmware activation and device tree update.
+ */
+#define MIGRATION_API_VERSION	1
+
 static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store);
+static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
 
 static int __init mobility_sysfs_init(void)
 {
@@ -352,7 +358,13 @@ static int __init mobility_sysfs_init(void)
 		return -ENOMEM;
 
 	rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
+	if (rc)
+		pr_err("mobility: unable to create migration sysfs file (%d)\n", rc);
 
-	return rc;
+	rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
+	if (rc)
+		pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc);
+
+	return 0;
 }
 machine_device_initcall(pseries, mobility_sysfs_init);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 691a154c286d..c8d24f9a6948 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -195,6 +195,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 {
 	struct device_node *dn;
+	struct pci_dn *pdn;
 	struct eeh_dev *edev;
 
 	/* Found our PE and assume 8 at that point. */
@@ -204,10 +205,11 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 		return NULL;
 
 	/* Get the top level device in the PE */
-	edev = of_node_to_eeh_dev(dn);
+	edev = pdn_to_eeh_dev(PCI_DN(dn));
 	if (edev->pe)
 		edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
-	dn = eeh_dev_to_of_node(edev);
+	pdn = eeh_dev_to_pdn(edev);
+	dn = pdn ? pdn->node : NULL;
 	if (!dn)
 		return NULL;
 
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 054a0ed5c7ee..9f8184175c86 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -20,7 +20,6 @@
 #include <linux/kmsg_dump.h>
 #include <linux/pstore.h>
 #include <linux/ctype.h>
-#include <linux/zlib.h>
 #include <asm/uaccess.h>
 #include <asm/nvram.h>
 #include <asm/rtas.h>
@@ -30,129 +29,17 @@
 /* Max bytes to read/write in one go */
 #define NVRW_CNT 0x20
 
-/*
- * Set oops header version to distinguish between old and new format header.
- * lnx,oops-log partition max size is 4000, header version > 4000 will
- * help in identifying new header.
- */
-#define OOPS_HDR_VERSION 5000
-
 static unsigned int nvram_size;
 static int nvram_fetch, nvram_store;
 static char nvram_buf[NVRW_CNT];	/* assume this is in the first 4GB */
 static DEFINE_SPINLOCK(nvram_lock);
 
-struct err_log_info {
-	__be32 error_type;
-	__be32 seq_num;
-};
-
-struct nvram_os_partition {
-	const char *name;
-	int req_size;	/* desired size, in bytes */
-	int min_size;	/* minimum acceptable size (0 means req_size) */
-	long size;	/* size of data portion (excluding err_log_info) */
-	long index;	/* offset of data portion of partition */
-	bool os_partition; /* partition initialized by OS, not FW */
-};
-
-static struct nvram_os_partition rtas_log_partition = {
-	.name = "ibm,rtas-log",
-	.req_size = 2079,
-	.min_size = 1055,
-	.index = -1,
-	.os_partition = true
-};
-
-static struct nvram_os_partition oops_log_partition = {
-	.name = "lnx,oops-log",
-	.req_size = 4000,
-	.min_size = 2000,
-	.index = -1,
-	.os_partition = true
-};
-
-static const char *pseries_nvram_os_partitions[] = {
-	"ibm,rtas-log",
-	"lnx,oops-log",
-	NULL
-};
-
-struct oops_log_info {
-	__be16 version;
-	__be16 report_length;
-	__be64 timestamp;
-} __attribute__((packed));
-
-static void oops_to_nvram(struct kmsg_dumper *dumper,
-			  enum kmsg_dump_reason reason);
-
-static struct kmsg_dumper nvram_kmsg_dumper = {
-	.dump = oops_to_nvram
-};
-
 /* See clobbering_unread_rtas_event() */
 #define NVRAM_RTAS_READ_TIMEOUT 5		/* seconds */
-static unsigned long last_unread_rtas_event;	/* timestamp */
-
-/*
- * For capturing and compressing an oops or panic report...
-
- * big_oops_buf[] holds the uncompressed text we're capturing.
- *
- * oops_buf[] holds the compressed text, preceded by a oops header.
- * oops header has u16 holding the version of oops header (to differentiate
- * between old and new format header) followed by u16 holding the length of
- * the compressed* text (*Or uncompressed, if compression fails.) and u64
- * holding the timestamp. oops_buf[] gets written to NVRAM.
- *
- * oops_log_info points to the header. oops_data points to the compressed text.
- *
- * +- oops_buf
- * |                                   +- oops_data
- * v                                   v
- * +-----------+-----------+-----------+------------------------+
- * | version   | length    | timestamp | text                   |
- * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
- * +-----------+-----------+-----------+------------------------+
- * ^
- * +- oops_log_info
- *
- * We preallocate these buffers during init to avoid kmalloc during oops/panic.
- */
-static size_t big_oops_buf_sz;
-static char *big_oops_buf, *oops_buf;
-static char *oops_data;
-static size_t oops_data_sz;
-
-/* Compression parameters */
-#define COMPR_LEVEL 6
-#define WINDOW_BITS 12
-#define MEM_LEVEL 4
-static struct z_stream_s stream;
+static time64_t last_unread_rtas_event;		/* timestamp */
 
 #ifdef CONFIG_PSTORE
-static struct nvram_os_partition of_config_partition = {
-	.name = "of-config",
-	.index = -1,
-	.os_partition = false
-};
-
-static struct nvram_os_partition common_partition = {
-	.name = "common",
-	.index = -1,
-	.os_partition = false
-};
-
-static enum pstore_type_id nvram_type_ids[] = {
-	PSTORE_TYPE_DMESG,
-	PSTORE_TYPE_PPC_RTAS,
-	PSTORE_TYPE_PPC_OF,
-	PSTORE_TYPE_PPC_COMMON,
-	-1
-};
-static int read_type;
-static unsigned long last_rtas_event;
+time64_t last_rtas_event;
 #endif
 
 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
@@ -246,132 +133,26 @@ static ssize_t pSeries_nvram_get_size(void)
 	return nvram_size ? nvram_size : -ENODEV;
 }
 
-
-/* nvram_write_os_partition, nvram_write_error_log
+/* nvram_write_error_log
  *
  * We need to buffer the error logs into nvram to ensure that we have
- * the failure information to decode.  If we have a severe error there
- * is no way to guarantee that the OS or the machine is in a state to
- * get back to user land and write the error to disk.  For example if
- * the SCSI device driver causes a Machine Check by writing to a bad
- * IO address, there is no way of guaranteeing that the device driver
- * is in any state that is would also be able to write the error data
- * captured to disk, thus we buffer it in NVRAM for analysis on the
- * next boot.
- *
- * In NVRAM the partition containing the error log buffer will looks like:
- * Header (in bytes):
- * +-----------+----------+--------+------------+------------------+
- * | signature | checksum | length | name       | data             |
- * |0          |1         |2      3|4         15|16        length-1|
- * +-----------+----------+--------+------------+------------------+
- *
- * The 'data' section would look like (in bytes):
- * +--------------+------------+-----------------------------------+
- * | event_logged | sequence # | error log                         |
- * |0            3|4          7|8                  error_log_size-1|
- * +--------------+------------+-----------------------------------+
- *
- * event_logged: 0 if event has not been logged to syslog, 1 if it has
- * sequence #: The unique sequence # for each event. (until it wraps)
- * error log: The error log from event_scan
+ * the failure information to decode.
  */
-static int nvram_write_os_partition(struct nvram_os_partition *part,
-				    char *buff, int length,
-				    unsigned int err_type,
-				    unsigned int error_log_cnt)
-{
-	int rc;
-	loff_t tmp_index;
-	struct err_log_info info;
-	
-	if (part->index == -1) {
-		return -ESPIPE;
-	}
-
-	if (length > part->size) {
-		length = part->size;
-	}
-
-	info.error_type = cpu_to_be32(err_type);
-	info.seq_num = cpu_to_be32(error_log_cnt);
-
-	tmp_index = part->index;
-
-	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
-	if (rc <= 0) {
-		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
-		return rc;
-	}
-
-	rc = ppc_md.nvram_write(buff, length, &tmp_index);
-	if (rc <= 0) {
-		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
-		return rc;
-	}
-	
-	return 0;
-}
-
 int nvram_write_error_log(char * buff, int length,
                           unsigned int err_type, unsigned int error_log_cnt)
 {
 	int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
 						err_type, error_log_cnt);
 	if (!rc) {
-		last_unread_rtas_event = get_seconds();
+		last_unread_rtas_event = ktime_get_real_seconds();
 #ifdef CONFIG_PSTORE
-		last_rtas_event = get_seconds();
+		last_rtas_event = ktime_get_real_seconds();
 #endif
 	}
 
 	return rc;
 }
 
-/* nvram_read_partition
- *
- * Reads nvram partition for at most 'length'
- */
-static int nvram_read_partition(struct nvram_os_partition *part, char *buff,
-				int length, unsigned int *err_type,
-				unsigned int *error_log_cnt)
-{
-	int rc;
-	loff_t tmp_index;
-	struct err_log_info info;
-	
-	if (part->index == -1)
-		return -1;
-
-	if (length > part->size)
-		length = part->size;
-
-	tmp_index = part->index;
-
-	if (part->os_partition) {
-		rc = ppc_md.nvram_read((char *)&info,
-					sizeof(struct err_log_info),
-					&tmp_index);
-		if (rc <= 0) {
-			pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
-			return rc;
-		}
-	}
-
-	rc = ppc_md.nvram_read(buff, length, &tmp_index);
-	if (rc <= 0) {
-		pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
-		return rc;
-	}
-
-	if (part->os_partition) {
-		*error_log_cnt = be32_to_cpu(info.seq_num);
-		*err_type = be32_to_cpu(info.error_type);
-	}
-
-	return 0;
-}
-
 /* nvram_read_error_log
  *
  * Reads nvram for error log for at most 'length'
@@ -407,67 +188,6 @@ int nvram_clear_error_log(void)
 	return 0;
 }
 
-/* pseries_nvram_init_os_partition
- *
- * This sets up a partition with an "OS" signature.
- *
- * The general strategy is the following:
- * 1.) If a partition with the indicated name already exists...
- *	- If it's large enough, use it.
- *	- Otherwise, recycle it and keep going.
- * 2.) Search for a free partition that is large enough.
- * 3.) If there's not a free partition large enough, recycle any obsolete
- * OS partitions and try again.
- * 4.) Will first try getting a chunk that will satisfy the requested size.
- * 5.) If a chunk of the requested size cannot be allocated, then try finding
- * a chunk that will satisfy the minum needed.
- *
- * Returns 0 on success, else -1.
- */
-static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
-									*part)
-{
-	loff_t p;
-	int size;
-
-	/* Look for ours */
-	p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
-
-	/* Found one but too small, remove it */
-	if (p && size < part->min_size) {
-		pr_info("nvram: Found too small %s partition,"
-					" removing it...\n", part->name);
-		nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
-		p = 0;
-	}
-
-	/* Create one if we didn't find */
-	if (!p) {
-		p = nvram_create_partition(part->name, NVRAM_SIG_OS,
-					part->req_size, part->min_size);
-		if (p == -ENOSPC) {
-			pr_info("nvram: No room to create %s partition, "
-				"deleting any obsolete OS partitions...\n",
-				part->name);
-			nvram_remove_partition(NULL, NVRAM_SIG_OS,
-						pseries_nvram_os_partitions);
-			p = nvram_create_partition(part->name, NVRAM_SIG_OS,
-					part->req_size, part->min_size);
-		}
-	}
-
-	if (p <= 0) {
-		pr_err("nvram: Failed to find or create %s"
-		       " partition, err %d\n", part->name, (int)p);
-		return -1;
-	}
-
-	part->index = p;
-	part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
-	
-	return 0;
-}
-
 /*
  * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
  * would logging this oops/panic overwrite an RTAS event that rtas_errd
@@ -476,321 +196,14 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
  * We assume that if rtas_errd hasn't read the RTAS event in
  * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
  */
-static int clobbering_unread_rtas_event(void)
+int clobbering_unread_rtas_event(void)
 {
 	return (oops_log_partition.index == rtas_log_partition.index
 		&& last_unread_rtas_event
-		&& get_seconds() - last_unread_rtas_event <=
+		&& ktime_get_real_seconds() - last_unread_rtas_event <=
 						NVRAM_RTAS_READ_TIMEOUT);
 }
 
-/* Derived from logfs_compress() */
-static int nvram_compress(const void *in, void *out, size_t inlen,
-							size_t outlen)
-{
-	int err, ret;
-
-	ret = -EIO;
-	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
-						MEM_LEVEL, Z_DEFAULT_STRATEGY);
-	if (err != Z_OK)
-		goto error;
-
-	stream.next_in = in;
-	stream.avail_in = inlen;
-	stream.total_in = 0;
-	stream.next_out = out;
-	stream.avail_out = outlen;
-	stream.total_out = 0;
-
-	err = zlib_deflate(&stream, Z_FINISH);
-	if (err != Z_STREAM_END)
-		goto error;
-
-	err = zlib_deflateEnd(&stream);
-	if (err != Z_OK)
-		goto error;
-
-	if (stream.total_out >= stream.total_in)
-		goto error;
-
-	ret = stream.total_out;
-error:
-	return ret;
-}
-
-/* Compress the text from big_oops_buf into oops_buf. */
-static int zip_oops(size_t text_len)
-{
-	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
-	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
-								oops_data_sz);
-	if (zipped_len < 0) {
-		pr_err("nvram: compression failed; returned %d\n", zipped_len);
-		pr_err("nvram: logging uncompressed oops/panic report\n");
-		return -1;
-	}
-	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
-	oops_hdr->report_length = cpu_to_be16(zipped_len);
-	oops_hdr->timestamp = cpu_to_be64(get_seconds());
-	return 0;
-}
-
-#ifdef CONFIG_PSTORE
-static int nvram_pstore_open(struct pstore_info *psi)
-{
-	/* Reset the iterator to start reading partitions again */
-	read_type = -1;
-	return 0;
-}
-
-/**
- * nvram_pstore_write - pstore write callback for nvram
- * @type:               Type of message logged
- * @reason:             reason behind dump (oops/panic)
- * @id:                 identifier to indicate the write performed
- * @part:               pstore writes data to registered buffer in parts,
- *                      part number will indicate the same.
- * @count:              Indicates oops count
- * @compressed:         Flag to indicate the log is compressed
- * @size:               number of bytes written to the registered buffer
- * @psi:                registered pstore_info structure
- *
- * Called by pstore_dump() when an oops or panic report is logged in the
- * printk buffer.
- * Returns 0 on successful write.
- */
-static int nvram_pstore_write(enum pstore_type_id type,
-				enum kmsg_dump_reason reason,
-				u64 *id, unsigned int part, int count,
-				bool compressed, size_t size,
-				struct pstore_info *psi)
-{
-	int rc;
-	unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
-	struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
-
-	/* part 1 has the recent messages from printk buffer */
-	if (part > 1 || type != PSTORE_TYPE_DMESG ||
-				clobbering_unread_rtas_event())
-		return -1;
-
-	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
-	oops_hdr->report_length = cpu_to_be16(size);
-	oops_hdr->timestamp = cpu_to_be64(get_seconds());
-
-	if (compressed)
-		err_type = ERR_TYPE_KERNEL_PANIC_GZ;
-
-	rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
-		(int) (sizeof(*oops_hdr) + size), err_type, count);
-
-	if (rc != 0)
-		return rc;
-
-	*id = part;
-	return 0;
-}
-
-/*
- * Reads the oops/panic report, rtas, of-config and common partition.
- * Returns the length of the data we read from each partition.
- * Returns 0 if we've been called before.
- */
-static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
-				int *count, struct timespec *time, char **buf,
-				bool *compressed, struct pstore_info *psi)
-{
-	struct oops_log_info *oops_hdr;
-	unsigned int err_type, id_no, size = 0;
-	struct nvram_os_partition *part = NULL;
-	char *buff = NULL;
-	int sig = 0;
-	loff_t p;
-
-	read_type++;
-
-	switch (nvram_type_ids[read_type]) {
-	case PSTORE_TYPE_DMESG:
-		part = &oops_log_partition;
-		*type = PSTORE_TYPE_DMESG;
-		break;
-	case PSTORE_TYPE_PPC_RTAS:
-		part = &rtas_log_partition;
-		*type = PSTORE_TYPE_PPC_RTAS;
-		time->tv_sec = last_rtas_event;
-		time->tv_nsec = 0;
-		break;
-	case PSTORE_TYPE_PPC_OF:
-		sig = NVRAM_SIG_OF;
-		part = &of_config_partition;
-		*type = PSTORE_TYPE_PPC_OF;
-		*id = PSTORE_TYPE_PPC_OF;
-		time->tv_sec = 0;
-		time->tv_nsec = 0;
-		break;
-	case PSTORE_TYPE_PPC_COMMON:
-		sig = NVRAM_SIG_SYS;
-		part = &common_partition;
-		*type = PSTORE_TYPE_PPC_COMMON;
-		*id = PSTORE_TYPE_PPC_COMMON;
-		time->tv_sec = 0;
-		time->tv_nsec = 0;
-		break;
-	default:
-		return 0;
-	}
-
-	if (!part->os_partition) {
-		p = nvram_find_partition(part->name, sig, &size);
-		if (p <= 0) {
-			pr_err("nvram: Failed to find partition %s, "
-				"err %d\n", part->name, (int)p);
-			return 0;
-		}
-		part->index = p;
-		part->size = size;
-	}
-
-	buff = kmalloc(part->size, GFP_KERNEL);
-
-	if (!buff)
-		return -ENOMEM;
-
-	if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
-		kfree(buff);
-		return 0;
-	}
-
-	*count = 0;
-
-	if (part->os_partition)
-		*id = id_no;
-
-	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
-		size_t length, hdr_size;
-
-		oops_hdr = (struct oops_log_info *)buff;
-		if (be16_to_cpu(oops_hdr->version) < OOPS_HDR_VERSION) {
-			/* Old format oops header had 2-byte record size */
-			hdr_size = sizeof(u16);
-			length = be16_to_cpu(oops_hdr->version);
-			time->tv_sec = 0;
-			time->tv_nsec = 0;
-		} else {
-			hdr_size = sizeof(*oops_hdr);
-			length = be16_to_cpu(oops_hdr->report_length);
-			time->tv_sec = be64_to_cpu(oops_hdr->timestamp);
-			time->tv_nsec = 0;
-		}
-		*buf = kmalloc(length, GFP_KERNEL);
-		if (*buf == NULL)
-			return -ENOMEM;
-		memcpy(*buf, buff + hdr_size, length);
-		kfree(buff);
-
-		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
-			*compressed = true;
-		else
-			*compressed = false;
-		return length;
-	}
-
-	*buf = buff;
-	return part->size;
-}
-
-static struct pstore_info nvram_pstore_info = {
-	.owner = THIS_MODULE,
-	.name = "nvram",
-	.open = nvram_pstore_open,
-	.read = nvram_pstore_read,
-	.write = nvram_pstore_write,
-};
-
-static int nvram_pstore_init(void)
-{
-	int rc = 0;
-
-	nvram_pstore_info.buf = oops_data;
-	nvram_pstore_info.bufsize = oops_data_sz;
-
-	spin_lock_init(&nvram_pstore_info.buf_lock);
-
-	rc = pstore_register(&nvram_pstore_info);
-	if (rc != 0)
-		pr_err("nvram: pstore_register() failed, defaults to "
-				"kmsg_dump; returned %d\n", rc);
-
-	return rc;
-}
-#else
-static int nvram_pstore_init(void)
-{
-	return -1;
-}
-#endif
-
-static void __init nvram_init_oops_partition(int rtas_partition_exists)
-{
-	int rc;
-
-	rc = pseries_nvram_init_os_partition(&oops_log_partition);
-	if (rc != 0) {
-		if (!rtas_partition_exists)
-			return;
-		pr_notice("nvram: Using %s partition to log both"
-			" RTAS errors and oops/panic reports\n",
-			rtas_log_partition.name);
-		memcpy(&oops_log_partition, &rtas_log_partition,
-						sizeof(rtas_log_partition));
-	}
-	oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
-	if (!oops_buf) {
-		pr_err("nvram: No memory for %s partition\n",
-						oops_log_partition.name);
-		return;
-	}
-	oops_data = oops_buf + sizeof(struct oops_log_info);
-	oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
-
-	rc = nvram_pstore_init();
-
-	if (!rc)
-		return;
-
-	/*
-	 * Figure compression (preceded by elimination of each line's <n>
-	 * severity prefix) will reduce the oops/panic report to at most
-	 * 45% of its original size.
-	 */
-	big_oops_buf_sz = (oops_data_sz * 100) / 45;
-	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
-	if (big_oops_buf) {
-		stream.workspace =  kmalloc(zlib_deflate_workspacesize(
-					WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
-		if (!stream.workspace) {
-			pr_err("nvram: No memory for compression workspace; "
-				"skipping compression of %s partition data\n",
-				oops_log_partition.name);
-			kfree(big_oops_buf);
-			big_oops_buf = NULL;
-		}
-	} else {
-		pr_err("No memory for uncompressed %s data; "
-			"skipping compression\n", oops_log_partition.name);
-		stream.workspace = NULL;
-	}
-
-	rc = kmsg_dump_register(&nvram_kmsg_dumper);
-	if (rc != 0) {
-		pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
-		kfree(oops_buf);
-		kfree(big_oops_buf);
-		kfree(stream.workspace);
-	}
-}
-
 static int __init pseries_nvram_init_log_partitions(void)
 {
 	int rc;
@@ -798,7 +211,7 @@ static int __init pseries_nvram_init_log_partitions(void)
 	/* Scan nvram for partitions */
 	nvram_scan_partitions();
 
-	rc = pseries_nvram_init_os_partition(&rtas_log_partition);
+	rc = nvram_init_os_partition(&rtas_log_partition);
 	nvram_init_oops_partition(rc == 0);
 	return 0;
 }
@@ -834,72 +247,3 @@ int __init pSeries_nvram_init(void)
 	return 0;
 }
 
-
-/*
- * This is our kmsg_dump callback, called after an oops or panic report
- * has been written to the printk buffer.  We want to capture as much
- * of the printk buffer as possible.  First, capture as much as we can
- * that we think will compress sufficiently to fit in the lnx,oops-log
- * partition.  If that's too much, go back and capture uncompressed text.
- */
-static void oops_to_nvram(struct kmsg_dumper *dumper,
-			  enum kmsg_dump_reason reason)
-{
-	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
-	static unsigned int oops_count = 0;
-	static bool panicking = false;
-	static DEFINE_SPINLOCK(lock);
-	unsigned long flags;
-	size_t text_len;
-	unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
-	int rc = -1;
-
-	switch (reason) {
-	case KMSG_DUMP_RESTART:
-	case KMSG_DUMP_HALT:
-	case KMSG_DUMP_POWEROFF:
-		/* These are almost always orderly shutdowns. */
-		return;
-	case KMSG_DUMP_OOPS:
-		break;
-	case KMSG_DUMP_PANIC:
-		panicking = true;
-		break;
-	case KMSG_DUMP_EMERG:
-		if (panicking)
-			/* Panic report already captured. */
-			return;
-		break;
-	default:
-		pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
-		       __func__, (int) reason);
-		return;
-	}
-
-	if (clobbering_unread_rtas_event())
-		return;
-
-	if (!spin_trylock_irqsave(&lock, flags))
-		return;
-
-	if (big_oops_buf) {
-		kmsg_dump_get_buffer(dumper, false,
-				     big_oops_buf, big_oops_buf_sz, &text_len);
-		rc = zip_oops(text_len);
-	}
-	if (rc != 0) {
-		kmsg_dump_rewind(dumper);
-		kmsg_dump_get_buffer(dumper, false,
-				     oops_data, oops_data_sz, &text_len);
-		err_type = ERR_TYPE_KERNEL_PANIC;
-		oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
-		oops_hdr->report_length = cpu_to_be16(text_len);
-		oops_hdr->timestamp = cpu_to_be64(get_seconds());
-	}
-
-	(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
-		(int) (sizeof(*oops_hdr) + text_len), err_type,
-		++oops_count);
-
-	spin_unlock_irqrestore(&lock, flags);
-}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 89e23811199c..5d4a3df59d0c 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -32,6 +32,8 @@
 #include <asm/firmware.h>
 #include <asm/eeh.h>
 
+#include "pseries.h"
+
 static struct pci_bus *
 find_bus_among_children(struct pci_bus *bus,
                         struct device_node *dn)
@@ -75,6 +77,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
 		return NULL;
 	rtas_setup_phb(phb);
 	pci_process_bridge_OF_ranges(phb, dn, 0);
+	phb->controller_ops = pseries_pci_controller_ops;
 
 	pci_devs_phb_init_dynamic(phb);
 
@@ -82,7 +85,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
 	eeh_dev_phb_init_dynamic(phb);
 
 	if (dn->child)
-		eeh_add_device_tree_early(dn);
+		eeh_add_device_tree_early(PCI_DN(dn));
 
 	pcibios_scan_phb(phb);
 	pcibios_finish_adding_to_bus(phb->bus);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1796c5438cc6..8411c27293e4 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -11,6 +11,7 @@
 #define _PSERIES_PSERIES_H
 
 #include <linux/interrupt.h>
+#include <asm/rtas.h>
 
 struct device_node;
 
@@ -60,11 +61,24 @@ extern struct device_node *dlpar_configure_connector(__be32,
 						struct device_node *);
 extern int dlpar_attach_node(struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
+extern int dlpar_acquire_drc(u32 drc_index);
+extern int dlpar_release_drc(u32 drc_index);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+#else
+static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 
 /* PCI root bridge prepare function override for pseries */
 struct pci_host_bridge;
 int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
 
+extern struct pci_controller_ops pseries_pci_controller_ops;
+
 unsigned long pseries_memory_block_size(void);
 
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e445b6701f50..df6a7041922b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -265,7 +265,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
 			update_dn_pci_info(np, pci->phb);
 
 			/* Create EEH device for the OF node */
-			eeh_dev_init(np, pci->phb);
+			eeh_dev_init(PCI_DN(np), pci->phb);
 		}
 		break;
 	default:
@@ -461,6 +461,47 @@ static long pseries_little_endian_exceptions(void)
 }
 #endif
 
+static void __init find_and_init_phbs(void)
+{
+	struct device_node *node;
+	struct pci_controller *phb;
+	struct device_node *root = of_find_node_by_path("/");
+
+	for_each_child_of_node(root, node) {
+		if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
+					   strcmp(node->type, "pciex") != 0))
+			continue;
+
+		phb = pcibios_alloc_controller(node);
+		if (!phb)
+			continue;
+		rtas_setup_phb(phb);
+		pci_process_bridge_OF_ranges(phb, node, 0);
+		isa_bridge_find_early(phb);
+		phb->controller_ops = pseries_pci_controller_ops;
+	}
+
+	of_node_put(root);
+	pci_devs_phb_init();
+
+	/*
+	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
+	 * in chosen.
+	 */
+	if (of_chosen) {
+		const int *prop;
+
+		prop = of_get_property(of_chosen,
+				"linux,pci-probe-only", NULL);
+		if (prop) {
+			if (*prop)
+				pci_add_flags(PCI_PROBE_ONLY);
+			else
+				pci_clear_flags(PCI_PROBE_ONLY);
+		}
+	}
+}
+
 static void __init pSeries_setup_arch(void)
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -793,6 +834,10 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
 void pSeries_final_fixup(void) { }
 #endif
 
+struct pci_controller_ops pseries_pci_controller_ops = {
+	.probe_mode		= pSeries_pci_probe_mode,
+};
+
 define_machine(pseries) {
 	.name			= "pSeries",
 	.probe			= pSeries_probe,
@@ -801,7 +846,6 @@ define_machine(pseries) {
 	.show_cpuinfo		= pSeries_show_cpuinfo,
 	.log_error		= pSeries_log_error,
 	.pcibios_fixup		= pSeries_final_fixup,
-	.pci_probe_mode		= pSeries_pci_probe_mode,
 	.restart		= rtas_restart,
 	.halt			= rtas_halt,
 	.panic			= rtas_os_term,
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index a3555b10c1a5..6932ea803e33 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -197,16 +197,14 @@ static void pSeries_cause_ipi_mux(int cpu, unsigned long data)
 		xics_cause_ipi(cpu, data);
 }
 
-static __init int pSeries_smp_probe(void)
+static __init void pSeries_smp_probe(void)
 {
-	int ret = xics_smp_probe();
+	xics_smp_probe();
 
 	if (cpu_has_feature(CPU_FTR_DBELL)) {
 		xics_cause_ipi = smp_ops->cause_ipi;
 		smp_ops->cause_ipi = pSeries_cause_ipi_mux;
 	}
-
-	return ret;
 }
 
 static struct smp_ops_t pSeries_mpic_smp_ops = {
diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl
deleted file mode 100755
index 3f46e8b9c56d..000000000000
--- a/arch/powerpc/relocs_check.pl
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright © 2009 IBM Corporation
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
-
-# This script checks the relocations of a vmlinux for "suspicious"
-# relocations.
-
-use strict;
-use warnings;
-
-if ($#ARGV != 1) {
-	die "$0 [path to objdump] [path to vmlinux]\n";
-}
-
-# Have Kbuild supply the path to objdump so we handle cross compilation.
-my $objdump = shift;
-my $vmlinux = shift;
-my $bad_relocs_count = 0;
-my $bad_relocs = "";
-my $old_binutils = 0;
-
-open(FD, "$objdump -R $vmlinux|") or die;
-while (<FD>) {
-	study $_;
-
-	# Only look at relocation lines.
-	next if (!/\s+R_/);
-
-	# These relocations are okay
-	# On PPC64:
-	# 	R_PPC64_RELATIVE, R_PPC64_NONE, R_PPC64_ADDR64
-	# On PPC:
-	# 	R_PPC_RELATIVE, R_PPC_ADDR16_HI, 
-	# 	R_PPC_ADDR16_HA,R_PPC_ADDR16_LO,
-	# 	R_PPC_NONE
-
-	next if (/\bR_PPC64_RELATIVE\b/ or /\bR_PPC64_NONE\b/ or
-	         /\bR_PPC64_ADDR64\s+mach_/);
-	next if (/\bR_PPC_ADDR16_LO\b/ or /\bR_PPC_ADDR16_HI\b/ or
-		 /\bR_PPC_ADDR16_HA\b/ or /\bR_PPC_RELATIVE\b/ or
-		 /\bR_PPC_NONE\b/);
-
-	# If we see this type of relocation it's an idication that
-	# we /may/ be using an old version of binutils.
-	if (/R_PPC64_UADDR64/) {
-		$old_binutils++;
-	}
-
-	$bad_relocs_count++;
-	$bad_relocs .= $_;
-}
-
-if ($bad_relocs_count) {
-	print "WARNING: $bad_relocs_count bad relocations\n";
-	print $bad_relocs;
-}
-
-if ($old_binutils) {
-	print "WARNING: You need at least binutils >= 2.19 to build a ".
-	      "CONFIG_RELOCATABLE kernel\n";
-}
diff --git a/arch/powerpc/relocs_check.sh b/arch/powerpc/relocs_check.sh
new file mode 100755
index 000000000000..2e4ebd0e25b3
--- /dev/null
+++ b/arch/powerpc/relocs_check.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+# Copyright © 2015 IBM Corporation
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks the relocations of a vmlinux for "suspicious"
+# relocations.
+
+# based on relocs_check.pl
+# Copyright © 2009 IBM Corporation
+
+if [ $# -lt 2 ]; then
+	echo "$0 [path to objdump] [path to vmlinux]" 1>&2
+	exit 1
+fi
+
+# Have Kbuild supply the path to objdump so we handle cross compilation.
+objdump="$1"
+vmlinux="$2"
+
+bad_relocs=$(
+"$objdump" -R "$vmlinux" |
+	# Only look at relocation lines.
+	grep -E '\<R_' |
+	# These relocations are okay
+	# On PPC64:
+	#	R_PPC64_RELATIVE, R_PPC64_NONE
+	#	R_PPC64_ADDR64 mach_<name>
+	# On PPC:
+	#	R_PPC_RELATIVE, R_PPC_ADDR16_HI,
+	#	R_PPC_ADDR16_HA,R_PPC_ADDR16_LO,
+	#	R_PPC_NONE
+	grep -F -w -v 'R_PPC64_RELATIVE
+R_PPC64_NONE
+R_PPC_ADDR16_LO
+R_PPC_ADDR16_HI
+R_PPC_ADDR16_HA
+R_PPC_RELATIVE
+R_PPC_NONE' |
+	grep -E -v '\<R_PPC64_ADDR64[[:space:]]+mach_'
+)
+
+if [ -z "$bad_relocs" ]; then
+	exit 0
+fi
+
+num_bad=$(echo "$bad_relocs" | wc -l)
+echo "WARNING: $num_bad bad relocations"
+echo "$bad_relocs"
+
+# If we see this type of relocation it's an idication that
+# we /may/ be using an old version of binutils.
+if echo "$bad_relocs" | grep -q -F -w R_PPC64_UADDR64; then
+	echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel"
+fi
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 9e5353ff6d1b..d00a5663e312 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -369,7 +369,7 @@ static int dart_dma_set_mask(struct device *dev, u64 dma_mask)
 	return 0;
 }
 
-void __init iommu_init_early_dart(void)
+void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
 {
 	struct device_node *dn;
 
@@ -395,8 +395,8 @@ void __init iommu_init_early_dart(void)
 	if (dart_is_u4)
 		ppc_md.dma_set_mask = dart_dma_set_mask;
 
-	ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart;
-	ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart;
+	controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
+	controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
 
 	/* Setup pci_dma ops */
 	set_pci_dma_ops(&dma_iommu_ops);
@@ -404,8 +404,8 @@ void __init iommu_init_early_dart(void)
 
  bail:
 	/* If init failed, use direct iommu and null setup functions */
-	ppc_md.pci_dma_dev_setup = NULL;
-	ppc_md.pci_dma_bus_setup = NULL;
+	controller_ops->dma_dev_setup = NULL;
+	controller_ops->dma_bus_setup = NULL;
 
 	/* Setup pci_dma ops */
 	set_pci_dma_ops(&dma_direct_ops);
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
index 2d8a101b6b9e..121e26fffd50 100644
--- a/arch/powerpc/sysdev/dcr.c
+++ b/arch/powerpc/sysdev/dcr.c
@@ -54,7 +54,7 @@ bool dcr_map_ok_generic(dcr_host_t host)
 	else if (host.type == DCR_HOST_MMIO)
 		return dcr_map_ok_mmio(host.host.mmio);
 	else
-		return 0;
+		return false;
 }
 EXPORT_SYMBOL_GPL(dcr_map_ok_generic);
 
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 4bbb4b8dfd09..f086c6f22dc9 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -162,7 +162,17 @@ static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
 	msg->address_lo = lower_32_bits(address);
 	msg->address_hi = upper_32_bits(address);
 
-	msg->data = hwirq;
+	/*
+	 * MPIC version 2.0 has erratum PIC1. It causes
+	 * that neither MSI nor MSI-X can work fine.
+	 * This is a workaround to allow MSI-X to function
+	 * properly. It only works for MSI-X, we prevent
+	 * MSI on buggy chips in fsl_setup_msi_irqs().
+	 */
+	if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+		msg->data = __swab32(hwirq);
+	else
+		msg->data = hwirq;
 
 	pr_debug("%s: allocated srs: %d, ibs: %d\n", __func__,
 		 (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK,
@@ -180,8 +190,16 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	struct msi_msg msg;
 	struct fsl_msi *msi_data;
 
-	if (type == PCI_CAP_ID_MSIX)
-		pr_debug("fslmsi: MSI-X untested, trying anyway.\n");
+	if (type == PCI_CAP_ID_MSI) {
+		/*
+		 * MPIC version 2.0 has erratum PIC1. For now MSI
+		 * could not work. So check to prevent MSI from
+		 * being used on the board with this erratum.
+		 */
+		list_for_each_entry(msi_data, &msi_head, list)
+			if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+				return -EINVAL;
+	}
 
 	/*
 	 * If the PCI node has an fsl,msi property, then we need to use it
@@ -446,6 +464,11 @@ static int fsl_of_msi_probe(struct platform_device *dev)
 
 	msi->feature = features->fsl_pic_ip;
 
+	/* For erratum PIC1 on MPIC version 2.0*/
+	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) == FSL_PIC_IP_MPIC
+			&& (fsl_mpic_primary_get_version() == 0x0200))
+		msi->feature |= MSI_HW_ERRATA_ENDIAN;
+
 	/*
 	 * Remember the phandle, so that we can match with any PCI nodes
 	 * that have an "fsl,msi" property.
diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
index 420cfcbdac01..a67359d993e5 100644
--- a/arch/powerpc/sysdev/fsl_msi.h
+++ b/arch/powerpc/sysdev/fsl_msi.h
@@ -27,6 +27,8 @@
 #define FSL_PIC_IP_IPIC   0x00000002
 #define FSL_PIC_IP_VMPIC  0x00000003
 
+#define MSI_HW_ERRATA_ENDIAN 0x00000010
+
 struct fsl_msi_cascade_data;
 
 struct fsl_msi {
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 4b74c276e427..9a8fcf0d79d7 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -111,6 +111,18 @@ static struct pci_ops fsl_indirect_pcie_ops =
 #define MAX_PHYS_ADDR_BITS	40
 static u64 pci64_dma_offset = 1ull << MAX_PHYS_ADDR_BITS;
 
+#ifdef CONFIG_SWIOTLB
+static void setup_swiotlb_ops(struct pci_controller *hose)
+{
+	if (ppc_swiotlb_enable) {
+		hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
+		set_pci_dma_ops(&swiotlb_dma_ops);
+	}
+}
+#else
+static inline void setup_swiotlb_ops(struct pci_controller *hose) {}
+#endif
+
 static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
@@ -548,6 +560,9 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
 	/* Setup PEX window registers */
 	setup_pci_atmu(hose);
 
+	/* Set up controller operations */
+	setup_swiotlb_ops(hose);
+
 	return 0;
 
 no_bridge:
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index bbfbbf2025fd..b2b8447a227a 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -655,7 +655,6 @@ static inline struct mpic * mpic_from_irq_data(struct irq_data *d)
 static inline void mpic_eoi(struct mpic *mpic)
 {
 	mpic_cpu_write(MPIC_INFO(CPU_EOI), 0);
-	(void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
 }
 
 /*
@@ -1676,31 +1675,6 @@ void __init mpic_init(struct mpic *mpic)
 		mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
 }
 
-void __init mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio)
-{
-	u32 v;
-
-	v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1);
-	v &= ~MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK;
-	v |= MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO(clock_ratio);
-	mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v);
-}
-
-void __init mpic_set_serial_int(struct mpic *mpic, int enable)
-{
-	unsigned long flags;
-	u32 v;
-
-	raw_spin_lock_irqsave(&mpic_lock, flags);
-	v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1);
-	if (enable)
-		v |= MPIC_GREG_GLOBAL_CONF_1_SIE;
-	else
-		v &= ~MPIC_GREG_GLOBAL_CONF_1_SIE;
-	mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v);
-	raw_spin_unlock_irqrestore(&mpic_lock, flags);
-}
-
 void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 {
 	struct mpic *mpic = mpic_find(irq);
@@ -1923,7 +1897,7 @@ void smp_mpic_message_pass(int cpu, int msg)
 		       msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
 }
 
-int __init smp_mpic_probe(void)
+void __init smp_mpic_probe(void)
 {
 	int nr_cpus;
 
@@ -1935,8 +1909,6 @@ int __init smp_mpic_probe(void)
 
 	if (nr_cpus > 1)
 		mpic_request_ipis();
-
-	return nr_cpus;
 }
 
 void smp_mpic_setup_cpu(int cpu)
diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c
index d09994164daf..7ea0174f6d3d 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_io.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_io.c
@@ -190,28 +190,3 @@ int par_io_of_config(struct device_node *np)
 	return 0;
 }
 EXPORT_SYMBOL(par_io_of_config);
-
-#ifdef DEBUG
-static void dump_par_io(void)
-{
-	unsigned int i;
-
-	printk(KERN_INFO "%s: par_io=%p\n", __func__, par_io);
-	for (i = 0; i < num_par_io_ports; i++) {
-		printk(KERN_INFO "	cpodr[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpodr));
-		printk(KERN_INFO "	cpdata[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpdata));
-		printk(KERN_INFO "	cpdir1[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpdir1));
-		printk(KERN_INFO "	cpdir2[%u]=%08x\n", i,
-			in_be32(&par_io[i].cpdir2));
-		printk(KERN_INFO "	cppar1[%u]=%08x\n", i,
-			in_be32(&par_io[i].cppar1));
-		printk(KERN_INFO "	cppar2[%u]=%08x\n", i,
-			in_be32(&par_io[i].cppar2));
-	}
-
-}
-EXPORT_SYMBOL(dump_par_io);
-#endif /* DEBUG */
diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
index befaf1123f7f..5f91628209eb 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
@@ -43,11 +43,6 @@ u32 ucc_slow_get_qe_cr_subblock(int uccs_num)
 }
 EXPORT_SYMBOL(ucc_slow_get_qe_cr_subblock);
 
-void ucc_slow_poll_transmitter_now(struct ucc_slow_private * uccs)
-{
-	out_be16(&uccs->us_regs->utodr, UCC_SLOW_TOD);
-}
-
 void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs)
 {
 	struct ucc_slow_info *us_info = uccs->us_info;
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 125743b58c70..878a54036a25 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -140,15 +140,13 @@ static void xics_request_ipi(void)
 			   IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
 }
 
-int __init xics_smp_probe(void)
+void __init xics_smp_probe(void)
 {
 	/* Setup cause_ipi callback  based on which ICP is used */
 	smp_ops->cause_ipi = icp_ops->cause_ipi;
 
 	/* Register all the IPIs */
 	xics_request_ipi();
-
-	return num_possible_cpus();
 }
 
 #endif /* CONFIG_SMP */
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index 3323c2745248..a55abb9f6c5e 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -19,6 +19,9 @@ quiet_cmd_syshdr = SYSHDR  $@
 quiet_cmd_systbl = SYSTBL  $@
       cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
 
+quiet_cmd_hypercalls = HYPERCALLS $@
+      cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<,$^)
+
 syshdr_abi_unistd_32 := i386
 $(uapi)/unistd_32.h: $(syscall32) $(syshdr)
 	$(call if_changed,syshdr)
@@ -47,10 +50,16 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl)
 $(out)/syscalls_64.h: $(syscall64) $(systbl)
 	$(call if_changed,systbl)
 
+$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh
+	$(call if_changed,hypercalls)
+
+$(out)/xen-hypercalls.h: $(srctree)/include/xen/interface/xen*.h
+
 uapisyshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
 syshdr-y			+= syscalls_32.h
 syshdr-$(CONFIG_X86_64)		+= unistd_32_ia32.h unistd_64_x32.h
 syshdr-$(CONFIG_X86_64)		+= syscalls_64.h
+syshdr-$(CONFIG_XEN)		+= xen-hypercalls.h
 
 targets	+= $(uapisyshdr-y) $(syshdr-y)
 
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 7005ced5d1ad..70e060ad879a 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -7,6 +7,7 @@
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
 #include "xen-ops.h"
+#include "smp.h"
 
 static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
 {
@@ -28,7 +29,186 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
 	return 0xfd;
 }
 
+static unsigned long xen_set_apic_id(unsigned int x)
+{
+	WARN_ON(1);
+	return x;
+}
+
+static unsigned int xen_get_apic_id(unsigned long x)
+{
+	return ((x)>>24) & 0xFFu;
+}
+
+static u32 xen_apic_read(u32 reg)
+{
+	struct xen_platform_op op = {
+		.cmd = XENPF_get_cpuinfo,
+		.interface_version = XENPF_INTERFACE_VERSION,
+		.u.pcpu_info.xen_cpuid = 0,
+	};
+	int ret = 0;
+
+	/* Shouldn't need this as APIC is turned off for PV, and we only
+	 * get called on the bootup processor. But just in case. */
+	if (!xen_initial_domain() || smp_processor_id())
+		return 0;
+
+	if (reg == APIC_LVR)
+		return 0x10;
+#ifdef CONFIG_X86_32
+	if (reg == APIC_LDR)
+		return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+#endif
+	if (reg != APIC_ID)
+		return 0;
+
+	ret = HYPERVISOR_dom0_op(&op);
+	if (ret)
+		return 0;
+
+	return op.u.pcpu_info.apic_id << 24;
+}
+
+static void xen_apic_write(u32 reg, u32 val)
+{
+	/* Warn to see if there's any stray references */
+	WARN(1,"register: %x, value: %x\n", reg, val);
+}
+
+static u64 xen_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void xen_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static u32 xen_safe_apic_wait_icr_idle(void)
+{
+        return 0;
+}
+
+static int xen_apic_probe_pv(void)
+{
+	if (xen_pv_domain())
+		return 1;
+
+	return 0;
+}
+
+static int xen_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return xen_pv_domain();
+}
+
+static int xen_id_always_valid(int apicid)
+{
+	return 1;
+}
+
+static int xen_id_always_registered(void)
+{
+	return 1;
+}
+
+static int xen_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+	return initial_apic_id >> index_msb;
+}
+
+#ifdef CONFIG_X86_32
+static int xen_x86_32_early_logical_apicid(int cpu)
+{
+	/* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */
+	return 1 << cpu;
+}
+#endif
+
+static void xen_noop(void)
+{
+}
+
+static void xen_silent_inquire(int apicid)
+{
+}
+
+static struct apic xen_pv_apic = {
+	.name 				= "Xen PV",
+	.probe 				= xen_apic_probe_pv,
+	.acpi_madt_oem_check		= xen_madt_oem_check,
+	.apic_id_valid 			= xen_id_always_valid,
+	.apic_id_registered 		= xen_id_always_registered,
+
+	/* .irq_delivery_mode - used in native_compose_msi_msg only */
+	/* .irq_dest_mode     - used in native_compose_msi_msg only */
+
+	.target_cpus			= default_target_cpus,
+	.disable_esr			= 0,
+	/* .dest_logical      -  default_send_IPI_ use it but we use our own. */
+	.check_apicid_used		= default_check_apicid_used, /* Used on 32-bit */
+
+	.vector_allocation_domain	= flat_vector_allocation_domain,
+	.init_apic_ldr			= xen_noop, /* setup_local_APIC calls it */
+
+	.ioapic_phys_id_map		= default_ioapic_phys_id_map, /* Used on 32-bit */
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= physid_set_mask_of_physid, /* Used on 32-bit */
+	.check_phys_apicid_present	= default_check_phys_apicid_present, /* smp_sanity_check needs it */
+	.phys_pkg_id			= xen_phys_pkg_id, /* detect_ht */
+
+	.get_apic_id 			= xen_get_apic_id,
+	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
+	.apic_id_mask			= 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */
+
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+
+#ifdef CONFIG_SMP
+	.send_IPI_mask 			= xen_send_IPI_mask,
+	.send_IPI_mask_allbutself 	= xen_send_IPI_mask_allbutself,
+	.send_IPI_allbutself 		= xen_send_IPI_allbutself,
+	.send_IPI_all 			= xen_send_IPI_all,
+	.send_IPI_self 			= xen_send_IPI_self,
+#endif
+	/* .wait_for_init_deassert- used  by AP bootup - smp_callin which we don't use */
+	.inquire_remote_apic		= xen_silent_inquire,
+
+	.read				= xen_apic_read,
+	.write				= xen_apic_write,
+	.eoi_write			= xen_apic_write,
+
+	.icr_read 			= xen_apic_icr_read,
+	.icr_write 			= xen_apic_icr_write,
+	.wait_icr_idle 			= xen_noop,
+	.safe_wait_icr_idle 		= xen_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+	/* generic_processor_info and setup_local_APIC. */
+	.x86_32_early_logical_apicid	= xen_x86_32_early_logical_apicid,
+#endif
+};
+
+static void __init xen_apic_check(void)
+{
+	if (apic == &xen_pv_apic)
+		return;
+
+	pr_info("Switched APIC routing from %s to %s.\n", apic->name,
+		xen_pv_apic.name);
+	apic = &xen_pv_apic;
+}
 void __init xen_init_apic(void)
 {
 	x86_io_apic_ops.read = xen_io_apic_read;
+	/* On PV guests the APIC CPUID bit is disabled so none of the
+	 * routines end up executing. */
+	if (!xen_initial_domain())
+		apic = &xen_pv_apic;
+
+	x86_platform.apic_post_init = xen_apic_check;
 }
+apic_driver(xen_pv_apic);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 81665c9f2132..94578efd3067 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -928,92 +928,6 @@ static void xen_io_delay(void)
 {
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_set_apic_id(unsigned int x)
-{
-	WARN_ON(1);
-	return x;
-}
-static unsigned int xen_get_apic_id(unsigned long x)
-{
-	return ((x)>>24) & 0xFFu;
-}
-static u32 xen_apic_read(u32 reg)
-{
-	struct xen_platform_op op = {
-		.cmd = XENPF_get_cpuinfo,
-		.interface_version = XENPF_INTERFACE_VERSION,
-		.u.pcpu_info.xen_cpuid = 0,
-	};
-	int ret = 0;
-
-	/* Shouldn't need this as APIC is turned off for PV, and we only
-	 * get called on the bootup processor. But just in case. */
-	if (!xen_initial_domain() || smp_processor_id())
-		return 0;
-
-	if (reg == APIC_LVR)
-		return 0x10;
-
-	if (reg != APIC_ID)
-		return 0;
-
-	ret = HYPERVISOR_dom0_op(&op);
-	if (ret)
-		return 0;
-
-	return op.u.pcpu_info.apic_id << 24;
-}
-
-static void xen_apic_write(u32 reg, u32 val)
-{
-	/* Warn to see if there's any stray references */
-	WARN_ON(1);
-}
-
-static u64 xen_apic_icr_read(void)
-{
-	return 0;
-}
-
-static void xen_apic_icr_write(u32 low, u32 id)
-{
-	/* Warn to see if there's any stray references */
-	WARN_ON(1);
-}
-
-static void xen_apic_wait_icr_idle(void)
-{
-        return;
-}
-
-static u32 xen_safe_apic_wait_icr_idle(void)
-{
-        return 0;
-}
-
-static void set_xen_basic_apic_ops(void)
-{
-	apic->read = xen_apic_read;
-	apic->write = xen_apic_write;
-	apic->icr_read = xen_apic_icr_read;
-	apic->icr_write = xen_apic_icr_write;
-	apic->wait_icr_idle = xen_apic_wait_icr_idle;
-	apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
-	apic->set_apic_id = xen_set_apic_id;
-	apic->get_apic_id = xen_get_apic_id;
-
-#ifdef CONFIG_SMP
-	apic->send_IPI_allbutself = xen_send_IPI_allbutself;
-	apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself;
-	apic->send_IPI_mask = xen_send_IPI_mask;
-	apic->send_IPI_all = xen_send_IPI_all;
-	apic->send_IPI_self = xen_send_IPI_self;
-#endif
-}
-
-#endif
-
 static void xen_clts(void)
 {
 	struct multicall_space mcs;
@@ -1619,7 +1533,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	/*
 	 * set up the basic apic ops.
 	 */
-	set_xen_basic_apic_ops();
+	xen_init_apic();
 #endif
 
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
@@ -1732,8 +1646,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 		if (HYPERVISOR_dom0_op(&op) == 0)
 			boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
 
-		xen_init_apic();
-
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 65083ad63b6f..dd151b2045b0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2436,99 +2436,11 @@ void __init xen_hvm_init_mmu_ops(void)
 }
 #endif
 
-#ifdef CONFIG_XEN_PVH
-/*
- * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user
- * space creating new guest on pvh dom0 and needing to map domU pages.
- */
-static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn,
-			    unsigned int domid)
-{
-	int rc, err = 0;
-	xen_pfn_t gpfn = lpfn;
-	xen_ulong_t idx = fgfn;
-
-	struct xen_add_to_physmap_range xatp = {
-		.domid = DOMID_SELF,
-		.foreign_domid = domid,
-		.size = 1,
-		.space = XENMAPSPACE_gmfn_foreign,
-	};
-	set_xen_guest_handle(xatp.idxs, &idx);
-	set_xen_guest_handle(xatp.gpfns, &gpfn);
-	set_xen_guest_handle(xatp.errs, &err);
-
-	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
-	if (rc < 0)
-		return rc;
-	return err;
-}
-
-static int xlate_remove_from_p2m(unsigned long spfn, int count)
-{
-	struct xen_remove_from_physmap xrp;
-	int i, rc;
-
-	for (i = 0; i < count; i++) {
-		xrp.domid = DOMID_SELF;
-		xrp.gpfn = spfn+i;
-		rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
-		if (rc)
-			break;
-	}
-	return rc;
-}
-
-struct xlate_remap_data {
-	unsigned long fgfn; /* foreign domain's gfn */
-	pgprot_t prot;
-	domid_t  domid;
-	int index;
-	struct page **pages;
-};
-
-static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
-			    void *data)
-{
-	int rc;
-	struct xlate_remap_data *remap = data;
-	unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
-	pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
-
-	rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid);
-	if (rc)
-		return rc;
-	native_set_pte(ptep, pteval);
-
-	return 0;
-}
-
-static int xlate_remap_gfn_range(struct vm_area_struct *vma,
-				 unsigned long addr, unsigned long mfn,
-				 int nr, pgprot_t prot, unsigned domid,
-				 struct page **pages)
-{
-	int err;
-	struct xlate_remap_data pvhdata;
-
-	BUG_ON(!pages);
-
-	pvhdata.fgfn = mfn;
-	pvhdata.prot = prot;
-	pvhdata.domid = domid;
-	pvhdata.index = 0;
-	pvhdata.pages = pages;
-	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
-				  xlate_map_pte_fn, &pvhdata);
-	flush_tlb_all();
-	return err;
-}
-#endif
-
 #define REMAP_BATCH_SIZE 16
 
 struct remap_data {
-	unsigned long mfn;
+	xen_pfn_t *mfn;
+	bool contiguous;
 	pgprot_t prot;
 	struct mmu_update *mmu_update;
 };
@@ -2537,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 				 unsigned long addr, void *data)
 {
 	struct remap_data *rmd = data;
-	pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
+	pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
+
+	/* If we have a contigious range, just update the mfn itself,
+	   else update pointer to be "next mfn". */
+	if (rmd->contiguous)
+		(*rmd->mfn)++;
+	else
+		rmd->mfn++;
 
 	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
 	rmd->mmu_update->val = pte_val_ma(pte);
@@ -2546,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 	return 0;
 }
 
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t mfn, int nr,
-			       pgprot_t prot, unsigned domid,
-			       struct page **pages)
-
+static int do_remap_mfn(struct vm_area_struct *vma,
+			unsigned long addr,
+			xen_pfn_t *mfn, int nr,
+			int *err_ptr, pgprot_t prot,
+			unsigned domid,
+			struct page **pages)
 {
+	int err = 0;
 	struct remap_data rmd;
 	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-	int batch;
 	unsigned long range;
-	int err = 0;
+	int mapped = 0;
 
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
 
 	if (xen_feature(XENFEAT_auto_translated_physmap)) {
 #ifdef CONFIG_XEN_PVH
 		/* We need to update the local page tables and the xen HAP */
-		return xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
-					     domid, pages);
+		return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+						 prot, domid, pages);
 #else
 		return -EINVAL;
 #endif
@@ -2573,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 
 	rmd.mfn = mfn;
 	rmd.prot = prot;
+	/* We use the err_ptr to indicate if there we are doing a contigious
+	 * mapping or a discontigious mapping. */
+	rmd.contiguous = !err_ptr;
 
 	while (nr) {
-		batch = min(REMAP_BATCH_SIZE, nr);
+		int index = 0;
+		int done = 0;
+		int batch = min(REMAP_BATCH_SIZE, nr);
+		int batch_left = batch;
 		range = (unsigned long)batch << PAGE_SHIFT;
 
 		rmd.mmu_update = mmu_update;
@@ -2584,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 		if (err)
 			goto out;
 
-		err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
-		if (err < 0)
-			goto out;
+		/* We record the error for each page that gives an error, but
+		 * continue mapping until the whole set is done */
+		do {
+			int i;
+
+			err = HYPERVISOR_mmu_update(&mmu_update[index],
+						    batch_left, &done, domid);
+
+			/*
+			 * @err_ptr may be the same buffer as @mfn, so
+			 * only clear it after each chunk of @mfn is
+			 * used.
+			 */
+			if (err_ptr) {
+				for (i = index; i < index + done; i++)
+					err_ptr[i] = 0;
+			}
+			if (err < 0) {
+				if (!err_ptr)
+					goto out;
+				err_ptr[i] = err;
+				done++; /* Skip failed frame. */
+			} else
+				mapped += done;
+			batch_left -= done;
+			index += done;
+		} while (batch_left);
 
 		nr -= batch;
 		addr += range;
+		if (err_ptr)
+			err_ptr += batch;
 	}
-
-	err = 0;
 out:
 
 	xen_flush_tlb_all();
 
-	return err;
+	return err < 0 ? err : mapped;
+}
+
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       xen_pfn_t mfn, int nr,
+			       pgprot_t prot, unsigned domid,
+			       struct page **pages)
+{
+	return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
 
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       xen_pfn_t *mfn, int nr,
+			       int *err_ptr, pgprot_t prot,
+			       unsigned domid, struct page **pages)
+{
+	/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
+	 * and the consequences later is quite hard to detect what the actual
+	 * cause of "wrong memory was mapped in".
+	 */
+	BUG_ON(err_ptr == NULL);
+	return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
+}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+
+
 /* Returns: 0 success */
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 			       int numpgs, struct page **pages)
@@ -2609,22 +2583,7 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 		return 0;
 
 #ifdef CONFIG_XEN_PVH
-	while (numpgs--) {
-		/*
-		 * The mmu has already cleaned up the process mmu
-		 * resources at this point (lookup_address will return
-		 * NULL).
-		 */
-		unsigned long pfn = page_to_pfn(pages[numpgs]);
-
-		xlate_remove_from_p2m(pfn, 1);
-	}
-	/*
-	 * We don't need to flush tlbs because as part of
-	 * xlate_remove_from_p2m, the hypervisor will do tlb flushes
-	 * after removing the p2m entries from the EPT/NPT
-	 */
-	return 0;
+	return xen_xlate_unmap_gfn_range(vma, numpgs, pages);
 #else
 	return -EINVAL;
 #endif
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c
index 520022d1a181..a702ec2f5931 100644
--- a/arch/x86/xen/trace.c
+++ b/arch/x86/xen/trace.c
@@ -1,54 +1,12 @@
 #include <linux/ftrace.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/xen-mca.h>
 
-#define N(x)	[__HYPERVISOR_##x] = "("#x")"
+#define HYPERCALL(x)	[__HYPERVISOR_##x] = "("#x")",
 static const char *xen_hypercall_names[] = {
-	N(set_trap_table),
-	N(mmu_update),
-	N(set_gdt),
-	N(stack_switch),
-	N(set_callbacks),
-	N(fpu_taskswitch),
-	N(sched_op_compat),
-	N(dom0_op),
-	N(set_debugreg),
-	N(get_debugreg),
-	N(update_descriptor),
-	N(memory_op),
-	N(multicall),
-	N(update_va_mapping),
-	N(set_timer_op),
-	N(event_channel_op_compat),
-	N(xen_version),
-	N(console_io),
-	N(physdev_op_compat),
-	N(grant_table_op),
-	N(vm_assist),
-	N(update_va_mapping_otherdomain),
-	N(iret),
-	N(vcpu_op),
-	N(set_segment_base),
-	N(mmuext_op),
-	N(acm_op),
-	N(nmi_op),
-	N(sched_op),
-	N(callback_op),
-	N(xenoprof_op),
-	N(event_channel_op),
-	N(physdev_op),
-	N(hvm_op),
-
-/* Architecture-specific hypercall definitions. */
-	N(arch_0),
-	N(arch_1),
-	N(arch_2),
-	N(arch_3),
-	N(arch_4),
-	N(arch_5),
-	N(arch_6),
-	N(arch_7),
+#include <asm/xen-hypercalls.h>
 };
-#undef N
+#undef HYPERCALL
 
 static const char *xen_hypercall_name(unsigned op)
 {
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 674b222544b7..8afdfccf6086 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -12,6 +12,8 @@
 
 #include <xen/interface/elfnote.h>
 #include <xen/interface/features.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/xen-mca.h>
 #include <asm/xen/interface.h>
 
 #ifdef CONFIG_XEN_PVH
@@ -85,59 +87,14 @@ ENTRY(xen_pvh_early_cpu_init)
 .pushsection .text
 	.balign PAGE_SIZE
 ENTRY(hypercall_page)
-#define NEXT_HYPERCALL(x) \
-	ENTRY(xen_hypercall_##x) \
-	.skip 32
-
-NEXT_HYPERCALL(set_trap_table)
-NEXT_HYPERCALL(mmu_update)
-NEXT_HYPERCALL(set_gdt)
-NEXT_HYPERCALL(stack_switch)
-NEXT_HYPERCALL(set_callbacks)
-NEXT_HYPERCALL(fpu_taskswitch)
-NEXT_HYPERCALL(sched_op_compat)
-NEXT_HYPERCALL(platform_op)
-NEXT_HYPERCALL(set_debugreg)
-NEXT_HYPERCALL(get_debugreg)
-NEXT_HYPERCALL(update_descriptor)
-NEXT_HYPERCALL(ni)
-NEXT_HYPERCALL(memory_op)
-NEXT_HYPERCALL(multicall)
-NEXT_HYPERCALL(update_va_mapping)
-NEXT_HYPERCALL(set_timer_op)
-NEXT_HYPERCALL(event_channel_op_compat)
-NEXT_HYPERCALL(xen_version)
-NEXT_HYPERCALL(console_io)
-NEXT_HYPERCALL(physdev_op_compat)
-NEXT_HYPERCALL(grant_table_op)
-NEXT_HYPERCALL(vm_assist)
-NEXT_HYPERCALL(update_va_mapping_otherdomain)
-NEXT_HYPERCALL(iret)
-NEXT_HYPERCALL(vcpu_op)
-NEXT_HYPERCALL(set_segment_base)
-NEXT_HYPERCALL(mmuext_op)
-NEXT_HYPERCALL(xsm_op)
-NEXT_HYPERCALL(nmi_op)
-NEXT_HYPERCALL(sched_op)
-NEXT_HYPERCALL(callback_op)
-NEXT_HYPERCALL(xenoprof_op)
-NEXT_HYPERCALL(event_channel_op)
-NEXT_HYPERCALL(physdev_op)
-NEXT_HYPERCALL(hvm_op)
-NEXT_HYPERCALL(sysctl)
-NEXT_HYPERCALL(domctl)
-NEXT_HYPERCALL(kexec_op)
-NEXT_HYPERCALL(tmem_op) /* 38 */
-ENTRY(xen_hypercall_rsvr)
-	.skip 320
-NEXT_HYPERCALL(mca) /* 48 */
-NEXT_HYPERCALL(arch_1)
-NEXT_HYPERCALL(arch_2)
-NEXT_HYPERCALL(arch_3)
-NEXT_HYPERCALL(arch_4)
-NEXT_HYPERCALL(arch_5)
-NEXT_HYPERCALL(arch_6)
-	.balign PAGE_SIZE
+	.skip PAGE_SIZE
+
+#define HYPERCALL(n) \
+	.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
+	.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
+#include <asm/xen-hypercalls.h>
+#undef HYPERCALL
+
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
diff --git a/block/blk-core.c b/block/blk-core.c
index 794c3e7f01cf..fd154b94447a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -557,6 +557,18 @@ void blk_cleanup_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
 
+/* Allocate memory local to the request queue */
+static void *alloc_request_struct(gfp_t gfp_mask, void *data)
+{
+	int nid = (int)(long)data;
+	return kmem_cache_alloc_node(request_cachep, gfp_mask, nid);
+}
+
+static void free_request_struct(void *element, void *unused)
+{
+	kmem_cache_free(request_cachep, element);
+}
+
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask)
 {
@@ -569,9 +581,10 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q,
 	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
 	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
 
-	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
-					  mempool_free_slab, request_cachep,
-					  gfp_mask, q->node);
+	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct,
+					  free_request_struct,
+					  (void *)(long)q->node, gfp_mask,
+					  q->node);
 	if (!rl->rq_pool)
 		return -ENOMEM;
 
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 1630a20d5dcf..b79685e06b70 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -436,6 +436,7 @@ int blk_mq_register_disk(struct gendisk *disk)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(blk_mq_register_disk);
 
 void blk_mq_sysfs_unregister(struct request_queue *q)
 {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 33c428530193..c82de08f3721 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
 
 static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
-static void blk_mq_run_queues(struct request_queue *q);
 
 /*
  * Check if any of the ctx's have pending work in this hardware queue
@@ -78,7 +77,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 	clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
 }
 
-static int blk_mq_queue_enter(struct request_queue *q)
+static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
 {
 	while (true) {
 		int ret;
@@ -86,6 +85,9 @@ static int blk_mq_queue_enter(struct request_queue *q)
 		if (percpu_ref_tryget_live(&q->mq_usage_counter))
 			return 0;
 
+		if (!(gfp & __GFP_WAIT))
+			return -EBUSY;
+
 		ret = wait_event_interruptible(q->mq_freeze_wq,
 				!q->mq_freeze_depth || blk_queue_dying(q));
 		if (blk_queue_dying(q))
@@ -118,7 +120,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
 
 	if (freeze) {
 		percpu_ref_kill(&q->mq_usage_counter);
-		blk_mq_run_queues(q);
+		blk_mq_run_hw_queues(q, false);
 	}
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
@@ -257,7 +259,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
 	struct blk_mq_alloc_data alloc_data;
 	int ret;
 
-	ret = blk_mq_queue_enter(q);
+	ret = blk_mq_queue_enter(q, gfp);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -904,7 +906,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 			&hctx->run_work, 0);
 }
 
-static void blk_mq_run_queues(struct request_queue *q)
+void blk_mq_run_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
@@ -915,9 +917,10 @@ static void blk_mq_run_queues(struct request_queue *q)
 		    test_bit(BLK_MQ_S_STOPPED, &hctx->state))
 			continue;
 
-		blk_mq_run_hw_queue(hctx, false);
+		blk_mq_run_hw_queue(hctx, async);
 	}
 }
+EXPORT_SYMBOL(blk_mq_run_hw_queues);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
@@ -1186,7 +1189,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 	int rw = bio_data_dir(bio);
 	struct blk_mq_alloc_data alloc_data;
 
-	if (unlikely(blk_mq_queue_enter(q))) {
+	if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) {
 		bio_endio(bio, -EIO);
 		return NULL;
 	}
@@ -1517,8 +1520,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
 	if (!bitmap->map)
 		return -ENOMEM;
 
-	bitmap->map_size = num_maps;
-
 	total = nr_cpu_ids;
 	for (i = 0; i < num_maps; i++) {
 		bitmap->map[i].depth = min(total, bitmap->bits_per_word);
@@ -1759,8 +1760,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 			continue;
 
 		hctx = q->mq_ops->map_queue(q, i);
-		cpumask_set_cpu(i, hctx->cpumask);
-		hctx->nr_ctx++;
 
 		/*
 		 * Set local node, IFF we have more than one hw queue. If
@@ -1797,6 +1796,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	}
 
 	queue_for_each_hw_ctx(q, hctx, i) {
+		struct blk_mq_ctxmap *map = &hctx->ctx_map;
+
 		/*
 		 * If no software queues are mapped to this hardware queue,
 		 * disable it and free the request entries.
@@ -1813,6 +1814,13 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 		}
 
 		/*
+		 * Set the map size to the number of mapped software queues.
+		 * This is more accurate and more efficient than looping
+		 * over all possibly mapped software queues.
+		 */
+		map->map_size = hctx->nr_ctx / map->bits_per_word;
+
+		/*
 		 * Initialize batch roundrobin counts
 		 */
 		hctx->next_cpu = cpumask_first(hctx->cpumask);
@@ -1889,9 +1897,25 @@ void blk_mq_release(struct request_queue *q)
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
+	struct request_queue *uninit_q, *q;
+
+	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+	if (!uninit_q)
+		return ERR_PTR(-ENOMEM);
+
+	q = blk_mq_init_allocated_queue(set, uninit_q);
+	if (IS_ERR(q))
+		blk_cleanup_queue(uninit_q);
+
+	return q;
+}
+EXPORT_SYMBOL(blk_mq_init_queue);
+
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+						  struct request_queue *q)
+{
 	struct blk_mq_hw_ctx **hctxs;
 	struct blk_mq_ctx __percpu *ctx;
-	struct request_queue *q;
 	unsigned int *map;
 	int i;
 
@@ -1926,20 +1950,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 		hctxs[i]->queue_num = i;
 	}
 
-	q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
-	if (!q)
-		goto err_hctxs;
-
 	/*
 	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
 	 * See blk_register_queue() for details.
 	 */
 	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
 			    PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
-		goto err_mq_usage;
+		goto err_hctxs;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
-	blk_queue_rq_timeout(q, 30000);
+	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000);
 
 	q->nr_queues = nr_cpu_ids;
 	q->nr_hw_queues = set->nr_hw_queues;
@@ -1965,9 +1985,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	else
 		blk_queue_make_request(q, blk_sq_make_request);
 
-	if (set->timeout)
-		blk_queue_rq_timeout(q, set->timeout);
-
 	/*
 	 * Do this after blk_queue_make_request() overrides it...
 	 */
@@ -1979,7 +1996,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
 	if (blk_mq_init_hw_queues(q, set))
-		goto err_mq_usage;
+		goto err_hctxs;
 
 	mutex_lock(&all_q_mutex);
 	list_add_tail(&q->all_q_node, &all_q_list);
@@ -1991,8 +2008,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
 	return q;
 
-err_mq_usage:
-	blk_cleanup_queue(q);
 err_hctxs:
 	kfree(map);
 	for (i = 0; i < set->nr_hw_queues; i++) {
@@ -2007,7 +2022,7 @@ err_percpu:
 	free_percpu(ctx);
 	return ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL(blk_mq_init_queue);
+EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 
 void blk_mq_free_queue(struct request_queue *q)
 {
@@ -2159,7 +2174,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
 		return -EINVAL;
 
-	if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
+	if (!set->ops->queue_rq || !set->ops->map_queue)
 		return -EINVAL;
 
 	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index a02f76fdcfcd..b0028588ff1c 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -540,9 +540,9 @@ static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
 			BUG_ON (pi++ >= MAX_DCMDS);
 
 			len = (sg_len < MAX_DBDMA_SEG) ? sg_len : MAX_DBDMA_SEG;
-			st_le16(&table->command, write ? OUTPUT_MORE: INPUT_MORE);
-			st_le16(&table->req_count, len);
-			st_le32(&table->phy_addr, addr);
+			table->command = cpu_to_le16(write ? OUTPUT_MORE: INPUT_MORE);
+			table->req_count = cpu_to_le16(len);
+			table->phy_addr = cpu_to_le32(addr);
 			table->cmd_dep = 0;
 			table->xfer_status = 0;
 			table->res_count = 0;
@@ -557,12 +557,12 @@ static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
 
 	/* Convert the last command to an input/output */
 	table--;
-	st_le16(&table->command, write ? OUTPUT_LAST: INPUT_LAST);
+	table->command = cpu_to_le16(write ? OUTPUT_LAST: INPUT_LAST);
 	table++;
 
 	/* Add the stop command to the end of the list */
 	memset(table, 0, sizeof(struct dbdma_cmd));
-	st_le16(&table->command, DBDMA_STOP);
+	table->command = cpu_to_le16(DBDMA_STOP);
 
 	dev_dbgdma(priv->dev, "%s: %d DMA list entries\n", __func__, pi);
 }
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index ff8307b30ff0..ff614be55d0f 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -47,11 +47,7 @@
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi.h>
 #include <linux/libata.h>
-
-#ifdef CONFIG_PPC_OF
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#endif /* CONFIG_PPC_OF */
+#include <linux/of.h>
 
 #define DRV_NAME	"sata_svw"
 #define DRV_VERSION	"2.3"
@@ -320,7 +316,6 @@ static u8 k2_stat_check_status(struct ata_port *ap)
 	return readl(ap->ioaddr.status_addr);
 }
 
-#ifdef CONFIG_PPC_OF
 static int k2_sata_show_info(struct seq_file *m, struct Scsi_Host *shost)
 {
 	struct ata_port *ap;
@@ -350,14 +345,10 @@ static int k2_sata_show_info(struct seq_file *m, struct Scsi_Host *shost)
 	}
 	return 0;
 }
-#endif /* CONFIG_PPC_OF */
-
 
 static struct scsi_host_template k2_sata_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
-#ifdef CONFIG_PPC_OF
 	.show_info		= k2_sata_show_info,
-#endif
 };
 
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 1fc83427199c..81fde9ef7f8e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2107,13 +2107,12 @@ static int drbd_create_mempools(void)
 	if (drbd_md_io_page_pool == NULL)
 		goto Enomem;
 
-	drbd_request_mempool = mempool_create(number,
-		mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
+	drbd_request_mempool = mempool_create_slab_pool(number,
+		drbd_request_cache);
 	if (drbd_request_mempool == NULL)
 		goto Enomem;
 
-	drbd_ee_mempool = mempool_create(number,
-		mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
+	drbd_ee_mempool = mempool_create_slab_pool(number, drbd_ee_cache);
 	if (drbd_ee_mempool == NULL)
 		goto Enomem;
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 34f2f0ba409b..3907202fb9d9 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -52,9 +52,10 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
 {
 	struct drbd_request *req;
 
-	req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO);
+	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
 	if (!req)
 		return NULL;
+	memset(req, 0, sizeof(*req));
 
 	drbd_req_make_private_bio(req, bio_src);
 	req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c4fd1e45ce1e..ae3fcb4199e9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,28 +88,6 @@ static int part_shift;
 
 static struct workqueue_struct *loop_wq;
 
-/*
- * Transfer functions
- */
-static int transfer_none(struct loop_device *lo, int cmd,
-			 struct page *raw_page, unsigned raw_off,
-			 struct page *loop_page, unsigned loop_off,
-			 int size, sector_t real_block)
-{
-	char *raw_buf = kmap_atomic(raw_page) + raw_off;
-	char *loop_buf = kmap_atomic(loop_page) + loop_off;
-
-	if (cmd == READ)
-		memcpy(loop_buf, raw_buf, size);
-	else
-		memcpy(raw_buf, loop_buf, size);
-
-	kunmap_atomic(loop_buf);
-	kunmap_atomic(raw_buf);
-	cond_resched();
-	return 0;
-}
-
 static int transfer_xor(struct loop_device *lo, int cmd,
 			struct page *raw_page, unsigned raw_off,
 			struct page *loop_page, unsigned loop_off,
@@ -148,14 +126,13 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
 
 static struct loop_func_table none_funcs = {
 	.number = LO_CRYPT_NONE,
-	.transfer = transfer_none,
-}; 	
+}; 
 
 static struct loop_func_table xor_funcs = {
 	.number = LO_CRYPT_XOR,
 	.transfer = transfer_xor,
 	.init = xor_init
-}; 	
+}; 
 
 /* xfer_funcs[0] is special - its release function is never called */
 static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
@@ -215,207 +192,169 @@ lo_do_transfer(struct loop_device *lo, int cmd,
 	       struct page *lpage, unsigned loffs,
 	       int size, sector_t rblock)
 {
-	if (unlikely(!lo->transfer))
+	int ret;
+
+	ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
+	if (likely(!ret))
 		return 0;
 
-	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
+	printk_ratelimited(KERN_ERR
+		"loop: Transfer error at byte offset %llu, length %i.\n",
+		(unsigned long long)rblock << 9, size);
+	return ret;
 }
 
-/**
- * __do_lo_send_write - helper for writing data to a loop device
- *
- * This helper just factors out common code between do_lo_send_direct_write()
- * and do_lo_send_write().
- */
-static int __do_lo_send_write(struct file *file,
-		u8 *buf, const int len, loff_t pos)
+static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 {
-	struct kvec kvec = {.iov_base = buf, .iov_len = len};
-	struct iov_iter from;
+	struct iov_iter i;
 	ssize_t bw;
 
-	iov_iter_kvec(&from, ITER_KVEC | WRITE, &kvec, 1, len);
+	iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);
 
 	file_start_write(file);
-	bw = vfs_iter_write(file, &from, &pos);
+	bw = vfs_iter_write(file, &i, ppos);
 	file_end_write(file);
-	if (likely(bw == len))
+
+	if (likely(bw ==  bvec->bv_len))
 		return 0;
-	printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
-			(unsigned long long)pos, len);
+
+	printk_ratelimited(KERN_ERR
+		"loop: Write error at byte offset %llu, length %i.\n",
+		(unsigned long long)*ppos, bvec->bv_len);
 	if (bw >= 0)
 		bw = -EIO;
 	return bw;
 }
 
-/**
- * do_lo_send_direct_write - helper for writing data to a loop device
- *
- * This is the fast, non-transforming version that does not need double
- * buffering.
- */
-static int do_lo_send_direct_write(struct loop_device *lo,
-		struct bio_vec *bvec, loff_t pos, struct page *page)
+static int lo_write_simple(struct loop_device *lo, struct request *rq,
+		loff_t pos)
 {
-	ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
-			kmap(bvec->bv_page) + bvec->bv_offset,
-			bvec->bv_len, pos);
-	kunmap(bvec->bv_page);
-	cond_resched();
-	return bw;
+	struct bio_vec bvec;
+	struct req_iterator iter;
+	int ret = 0;
+
+	rq_for_each_segment(bvec, rq, iter) {
+		ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
+		if (ret < 0)
+			break;
+		cond_resched();
+	}
+
+	return ret;
 }
 
-/**
- * do_lo_send_write - helper for writing data to a loop device
- *
+/*
  * This is the slow, transforming version that needs to double buffer the
  * data as it cannot do the transformations in place without having direct
  * access to the destination pages of the backing file.
  */
-static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
-		loff_t pos, struct page *page)
+static int lo_write_transfer(struct loop_device *lo, struct request *rq,
+		loff_t pos)
 {
-	int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
-			bvec->bv_offset, bvec->bv_len, pos >> 9);
-	if (likely(!ret))
-		return __do_lo_send_write(lo->lo_backing_file,
-				page_address(page), bvec->bv_len,
-				pos);
-	printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, "
-			"length %i.\n", (unsigned long long)pos, bvec->bv_len);
-	if (ret > 0)
-		ret = -EIO;
-	return ret;
-}
-
-static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos)
-{
-	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
-			struct page *page);
-	struct bio_vec bvec;
+	struct bio_vec bvec, b;
 	struct req_iterator iter;
-	struct page *page = NULL;
+	struct page *page;
 	int ret = 0;
 
-	if (lo->transfer != transfer_none) {
-		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
-		if (unlikely(!page))
-			goto fail;
-		kmap(page);
-		do_lo_send = do_lo_send_write;
-	} else {
-		do_lo_send = do_lo_send_direct_write;
-	}
+	page = alloc_page(GFP_NOIO);
+	if (unlikely(!page))
+		return -ENOMEM;
 
 	rq_for_each_segment(bvec, rq, iter) {
-		ret = do_lo_send(lo, &bvec, pos, page);
+		ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page,
+			bvec.bv_offset, bvec.bv_len, pos >> 9);
+		if (unlikely(ret))
+			break;
+
+		b.bv_page = page;
+		b.bv_offset = 0;
+		b.bv_len = bvec.bv_len;
+		ret = lo_write_bvec(lo->lo_backing_file, &b, &pos);
 		if (ret < 0)
 			break;
-		pos += bvec.bv_len;
 	}
-	if (page) {
-		kunmap(page);
-		__free_page(page);
-	}
-out:
+
+	__free_page(page);
 	return ret;
-fail:
-	printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
-	ret = -ENOMEM;
-	goto out;
 }
 
-struct lo_read_data {
-	struct loop_device *lo;
-	struct page *page;
-	unsigned offset;
-	int bsize;
-};
+static int lo_read_simple(struct loop_device *lo, struct request *rq,
+		loff_t pos)
+{
+	struct bio_vec bvec;
+	struct req_iterator iter;
+	struct iov_iter i;
+	ssize_t len;
 
-static int
-lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-		struct splice_desc *sd)
-{
-	struct lo_read_data *p = sd->u.data;
-	struct loop_device *lo = p->lo;
-	struct page *page = buf->page;
-	sector_t IV;
-	int size;
-
-	IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
-							(buf->offset >> 9);
-	size = sd->len;
-	if (size > p->bsize)
-		size = p->bsize;
-
-	if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
-		printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n",
-		       page->index);
-		size = -EINVAL;
-	}
+	rq_for_each_segment(bvec, rq, iter) {
+		iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+		len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+		if (len < 0)
+			return len;
 
-	flush_dcache_page(p->page);
+		flush_dcache_page(bvec.bv_page);
 
-	if (size > 0)
-		p->offset += size;
+		if (len != bvec.bv_len) {
+			struct bio *bio;
 
-	return size;
-}
+			__rq_for_each_bio(bio, rq)
+				zero_fill_bio(bio);
+			break;
+		}
+		cond_resched();
+	}
 
-static int
-lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
-{
-	return __splice_from_pipe(pipe, sd, lo_splice_actor);
+	return 0;
 }
 
-static ssize_t
-do_lo_receive(struct loop_device *lo,
-	      struct bio_vec *bvec, int bsize, loff_t pos)
+static int lo_read_transfer(struct loop_device *lo, struct request *rq,
+		loff_t pos)
 {
-	struct lo_read_data cookie;
-	struct splice_desc sd;
-	struct file *file;
-	ssize_t retval;
+	struct bio_vec bvec, b;
+	struct req_iterator iter;
+	struct iov_iter i;
+	struct page *page;
+	ssize_t len;
+	int ret = 0;
 
-	cookie.lo = lo;
-	cookie.page = bvec->bv_page;
-	cookie.offset = bvec->bv_offset;
-	cookie.bsize = bsize;
+	page = alloc_page(GFP_NOIO);
+	if (unlikely(!page))
+		return -ENOMEM;
 
-	sd.len = 0;
-	sd.total_len = bvec->bv_len;
-	sd.flags = 0;
-	sd.pos = pos;
-	sd.u.data = &cookie;
+	rq_for_each_segment(bvec, rq, iter) {
+		loff_t offset = pos;
 
-	file = lo->lo_backing_file;
-	retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+		b.bv_page = page;
+		b.bv_offset = 0;
+		b.bv_len = bvec.bv_len;
 
-	return retval;
-}
+		iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+		len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+		if (len < 0) {
+			ret = len;
+			goto out_free_page;
+		}
 
-static int
-lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos)
-{
-	struct bio_vec bvec;
-	struct req_iterator iter;
-	ssize_t s;
+		ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page,
+			bvec.bv_offset, len, offset >> 9);
+		if (ret)
+			goto out_free_page;
 
-	rq_for_each_segment(bvec, rq, iter) {
-		s = do_lo_receive(lo, &bvec, bsize, pos);
-		if (s < 0)
-			return s;
+		flush_dcache_page(bvec.bv_page);
 
-		if (s != bvec.bv_len) {
+		if (len != bvec.bv_len) {
 			struct bio *bio;
 
 			__rq_for_each_bio(bio, rq)
 				zero_fill_bio(bio);
 			break;
 		}
-		pos += bvec.bv_len;
 	}
-	return 0;
+
+	ret = 0;
+out_free_page:
+	__free_page(page);
+	return ret;
 }
 
 static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
@@ -464,10 +403,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 			ret = lo_req_flush(lo, rq);
 		else if (rq->cmd_flags & REQ_DISCARD)
 			ret = lo_discard(lo, rq, pos);
+		else if (lo->transfer)
+			ret = lo_write_transfer(lo, rq, pos);
 		else
-			ret = lo_send(lo, rq, pos);
-	} else
-		ret = lo_receive(lo, rq, lo->lo_blocksize, pos);
+			ret = lo_write_simple(lo, rq, pos);
+
+	} else {
+		if (lo->transfer)
+			ret = lo_read_transfer(lo, rq, pos);
+		else
+			ret = lo_read_simple(lo, rq, pos);
+	}
 
 	return ret;
 }
@@ -788,7 +734,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
-	lo->transfer = transfer_none;
+	lo->transfer = NULL;
 	lo->ioctl = NULL;
 	lo->lo_sizelimit = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
@@ -1007,7 +953,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 		memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
 		       info->lo_encrypt_key_size);
 		lo->lo_key_owner = uid;
-	}	
+	}
 
 	return 0;
 }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a98c41f72c63..39e5f7fae3ef 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -32,28 +32,36 @@
 #include <net/sock.h>
 #include <linux/net.h>
 #include <linux/kthread.h>
+#include <linux/types.h>
 
 #include <asm/uaccess.h>
 #include <asm/types.h>
 
 #include <linux/nbd.h>
 
-#define NBD_MAGIC 0x68797548
+struct nbd_device {
+	int flags;
+	int harderror;		/* Code of hard error			*/
+	struct socket * sock;	/* If == NULL, device is not ready, yet	*/
+	int magic;
+
+	spinlock_t queue_lock;
+	struct list_head queue_head;	/* Requests waiting result */
+	struct request *active_req;
+	wait_queue_head_t active_wq;
+	struct list_head waiting_queue;	/* Requests to be sent */
+	wait_queue_head_t waiting_wq;
+
+	struct mutex tx_lock;
+	struct gendisk *disk;
+	int blksize;
+	loff_t bytesize;
+	pid_t pid; /* pid of nbd-client, if attached */
+	int xmit_timeout;
+	int disconnect; /* a disconnect has been requested by user */
+};
 
-#ifdef NDEBUG
-#define dprintk(flags, fmt...)
-#else /* NDEBUG */
-#define dprintk(flags, fmt...) do { \
-	if (debugflags & (flags)) printk(KERN_DEBUG fmt); \
-} while (0)
-#define DBG_IOCTL       0x0004
-#define DBG_INIT        0x0010
-#define DBG_EXIT        0x0020
-#define DBG_BLKDEV      0x0100
-#define DBG_RX          0x0200
-#define DBG_TX          0x0400
-static unsigned int debugflags;
-#endif /* NDEBUG */
+#define NBD_MAGIC 0x68797548
 
 static unsigned int nbds_max = 16;
 static struct nbd_device *nbd_dev;
@@ -71,25 +79,9 @@ static int max_part;
  */
 static DEFINE_SPINLOCK(nbd_lock);
 
-#ifndef NDEBUG
-static const char *ioctl_cmd_to_ascii(int cmd)
+static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
-	switch (cmd) {
-	case NBD_SET_SOCK: return "set-sock";
-	case NBD_SET_BLKSIZE: return "set-blksize";
-	case NBD_SET_SIZE: return "set-size";
-	case NBD_SET_TIMEOUT: return "set-timeout";
-	case NBD_SET_FLAGS: return "set-flags";
-	case NBD_DO_IT: return "do-it";
-	case NBD_CLEAR_SOCK: return "clear-sock";
-	case NBD_CLEAR_QUE: return "clear-que";
-	case NBD_PRINT_DEBUG: return "print-debug";
-	case NBD_SET_SIZE_BLOCKS: return "set-size-blocks";
-	case NBD_DISCONNECT: return "disconnect";
-	case BLKROSET: return "set-read-only";
-	case BLKFLSBUF: return "flush-buffer-cache";
-	}
-	return "unknown";
+	return disk_to_dev(nbd->disk);
 }
 
 static const char *nbdcmd_to_ascii(int cmd)
@@ -103,30 +95,26 @@ static const char *nbdcmd_to_ascii(int cmd)
 	}
 	return "invalid";
 }
-#endif /* NDEBUG */
 
-static void nbd_end_request(struct request *req)
+static void nbd_end_request(struct nbd_device *nbd, struct request *req)
 {
 	int error = req->errors ? -EIO : 0;
 	struct request_queue *q = req->q;
 	unsigned long flags;
 
-	dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name,
-			req, error ? "failed" : "done");
+	dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", req,
+		error ? "failed" : "done");
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_end_request_all(req, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
+/*
+ * Forcibly shutdown the socket causing all listeners to error
+ */
 static void sock_shutdown(struct nbd_device *nbd, int lock)
 {
-	/* Forcibly shutdown the socket causing all listeners
-	 * to error
-	 *
-	 * FIXME: This code is duplicated from sys_shutdown, but
-	 * there should be a more generic interface rather than
-	 * calling socket ops directly here */
 	if (lock)
 		mutex_lock(&nbd->tx_lock);
 	if (nbd->sock) {
@@ -253,17 +241,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 	}
 	memcpy(request.handle, &req, sizeof(req));
 
-	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
-			nbd->disk->disk_name, req,
-			nbdcmd_to_ascii(nbd_cmd(req)),
-			(unsigned long long)blk_rq_pos(req) << 9,
-			blk_rq_bytes(req));
+	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
+		req, nbdcmd_to_ascii(nbd_cmd(req)),
+		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
 	result = sock_xmit(nbd, 1, &request, sizeof(request),
 			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
 		dev_err(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
-		goto error_out;
+		return -EIO;
 	}
 
 	if (nbd_cmd(req) == NBD_CMD_WRITE) {
@@ -277,21 +263,18 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 			flags = 0;
 			if (!rq_iter_last(bvec, iter))
 				flags = MSG_MORE;
-			dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
-					nbd->disk->disk_name, req, bvec.bv_len);
+			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
+				req, bvec.bv_len);
 			result = sock_send_bvec(nbd, &bvec, flags);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
 					result);
-				goto error_out;
+				return -EIO;
 			}
 		}
 	}
 	return 0;
-
-error_out:
-	return -EIO;
 }
 
 static struct request *nbd_find_request(struct nbd_device *nbd,
@@ -302,7 +285,7 @@ static struct request *nbd_find_request(struct nbd_device *nbd,
 
 	err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
 	if (unlikely(err))
-		goto out;
+		return ERR_PTR(err);
 
 	spin_lock(&nbd->queue_lock);
 	list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
@@ -314,10 +297,7 @@ static struct request *nbd_find_request(struct nbd_device *nbd,
 	}
 	spin_unlock(&nbd->queue_lock);
 
-	err = -ENOENT;
-
-out:
-	return ERR_PTR(err);
+	return ERR_PTR(-ENOENT);
 }
 
 static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
@@ -371,8 +351,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
 		return req;
 	}
 
-	dprintk(DBG_RX, "%s: request %p: got reply\n",
-			nbd->disk->disk_name, req);
+	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
 	if (nbd_cmd(req) == NBD_CMD_READ) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
@@ -385,8 +364,8 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
 				req->errors++;
 				return req;
 			}
-			dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
-				nbd->disk->disk_name, req, bvec.bv_len);
+			dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
+				req, bvec.bv_len);
 		}
 	}
 	return req;
@@ -426,7 +405,7 @@ static int nbd_do_it(struct nbd_device *nbd)
 	}
 
 	while ((req = nbd_read_stat(nbd)) != NULL)
-		nbd_end_request(req);
+		nbd_end_request(nbd, req);
 
 	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
 	nbd->pid = 0;
@@ -455,7 +434,7 @@ static void nbd_clear_que(struct nbd_device *nbd)
 				 queuelist);
 		list_del_init(&req->queuelist);
 		req->errors++;
-		nbd_end_request(req);
+		nbd_end_request(nbd, req);
 	}
 
 	while (!list_empty(&nbd->waiting_queue)) {
@@ -463,7 +442,7 @@ static void nbd_clear_que(struct nbd_device *nbd)
 				 queuelist);
 		list_del_init(&req->queuelist);
 		req->errors++;
-		nbd_end_request(req);
+		nbd_end_request(nbd, req);
 	}
 }
 
@@ -507,7 +486,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 	if (nbd_send_req(nbd, req) != 0) {
 		dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
 		req->errors++;
-		nbd_end_request(req);
+		nbd_end_request(nbd, req);
 	} else {
 		spin_lock(&nbd->queue_lock);
 		list_add_tail(&req->queuelist, &nbd->queue_head);
@@ -522,7 +501,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 
 error_out:
 	req->errors++;
-	nbd_end_request(req);
+	nbd_end_request(nbd, req);
 }
 
 static int nbd_thread(void *data)
@@ -570,18 +549,18 @@ static void do_nbd_request(struct request_queue *q)
 
 		spin_unlock_irq(q->queue_lock);
 
-		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
-				req->rq_disk->disk_name, req, req->cmd_type);
-
 		nbd = req->rq_disk->private_data;
 
 		BUG_ON(nbd->magic != NBD_MAGIC);
 
+		dev_dbg(nbd_to_dev(nbd), "request %p: dequeued (flags=%x)\n",
+			req, req->cmd_type);
+
 		if (unlikely(!nbd->sock)) {
 			dev_err(disk_to_dev(nbd->disk),
 				"Attempted send on closed socket\n");
 			req->errors++;
-			nbd_end_request(req);
+			nbd_end_request(nbd, req);
 			spin_lock_irq(q->queue_lock);
 			continue;
 		}
@@ -706,13 +685,13 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		else
 			blk_queue_flush(nbd->disk->queue, 0);
 
-		thread = kthread_create(nbd_thread, nbd, "%s",
-					nbd->disk->disk_name);
+		thread = kthread_run(nbd_thread, nbd, "%s",
+				     nbd->disk->disk_name);
 		if (IS_ERR(thread)) {
 			mutex_lock(&nbd->tx_lock);
 			return PTR_ERR(thread);
 		}
-		wake_up_process(thread);
+
 		error = nbd_do_it(nbd);
 		kthread_stop(thread);
 
@@ -768,10 +747,6 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 
 	BUG_ON(nbd->magic != NBD_MAGIC);
 
-	/* Anyone capable of this syscall can do *real bad* things */
-	dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
-		nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
-
 	mutex_lock(&nbd->tx_lock);
 	error = __nbd_ioctl(bdev, nbd, cmd, arg);
 	mutex_unlock(&nbd->tx_lock);
@@ -861,7 +836,6 @@ static int __init nbd_init(void)
 	}
 
 	printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
-	dprintk(DBG_INIT, "nbd: debugflags=0x%x\n", debugflags);
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = nbd_dev[i].disk;
@@ -920,7 +894,3 @@ module_param(nbds_max, int, 0444);
 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
 module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
-#ifndef NDEBUG
-module_param(debugflags, int, 0644);
-MODULE_PARM_DESC(debugflags, "flags for controlling debug output");
-#endif
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e23be20a3417..85b8036deaa3 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -44,7 +44,7 @@
 
 #define NVME_MINORS		(1U << MINORBITS)
 #define NVME_Q_DEPTH		1024
-#define NVME_AQ_DEPTH		64
+#define NVME_AQ_DEPTH		256
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 #define ADMIN_TIMEOUT		(admin_timeout * HZ)
@@ -152,6 +152,7 @@ struct nvme_cmd_info {
  */
 #define NVME_INT_PAGES		2
 #define NVME_INT_BYTES(dev)	(NVME_INT_PAGES * (dev)->page_size)
+#define NVME_INT_MASK		0x01
 
 /*
  * Will slightly overestimate the number of pages needed.  This is OK
@@ -257,7 +258,7 @@ static void *iod_get_private(struct nvme_iod *iod)
  */
 static bool iod_should_kfree(struct nvme_iod *iod)
 {
-	return (iod->private & 0x01) == 0;
+	return (iod->private & NVME_INT_MASK) == 0;
 }
 
 /* Special values must be less than 0x1000 */
@@ -301,8 +302,6 @@ static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
 static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
-	struct request *req = ctx;
-
 	u32 result = le32_to_cpup(&cqe->result);
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 
@@ -311,8 +310,6 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 	if (status == NVME_SC_SUCCESS)
 		dev_warn(nvmeq->q_dmadev,
 			"async event result %08x\n", result);
-
-	blk_mq_free_hctx_request(nvmeq->hctx, req);
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -432,7 +429,6 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
 {
 	unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
                                                 sizeof(struct nvme_dsm_range);
-	unsigned long mask = 0;
 	struct nvme_iod *iod;
 
 	if (rq->nr_phys_segments <= NVME_INT_PAGES &&
@@ -440,9 +436,8 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
 		struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);
 
 		iod = cmd->iod;
-		mask = 0x01;
 		iod_init(iod, size, rq->nr_phys_segments,
-				(unsigned long) rq | 0x01);
+				(unsigned long) rq | NVME_INT_MASK);
 		return iod;
 	}
 
@@ -522,8 +517,6 @@ static void nvme_dif_remap(struct request *req,
 		return;
 
 	pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
-	if (!pmap)
-		return;
 
 	p = pmap;
 	virt = bip_get_seed(bip);
@@ -645,12 +638,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	struct scatterlist *sg = iod->sg;
 	int dma_len = sg_dma_len(sg);
 	u64 dma_addr = sg_dma_address(sg);
-	int offset = offset_in_page(dma_addr);
+	u32 page_size = dev->page_size;
+	int offset = dma_addr & (page_size - 1);
 	__le64 *prp_list;
 	__le64 **list = iod_list(iod);
 	dma_addr_t prp_dma;
 	int nprps, i;
-	u32 page_size = dev->page_size;
 
 	length -= (page_size - offset);
 	if (length <= 0)
@@ -1028,18 +1021,19 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
 	struct nvme_cmd_info *cmd_info;
 	struct request *req;
 
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false);
+	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	req->cmd_flags |= REQ_NO_TIMEOUT;
 	cmd_info = blk_mq_rq_to_pdu(req);
-	nvme_set_info(cmd_info, req, async_req_completion);
+	nvme_set_info(cmd_info, NULL, async_req_completion);
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_async_event;
 	c.common.command_id = req->tag;
 
+	blk_mq_free_hctx_request(nvmeq->hctx, req);
 	return __nvme_submit_cmd(nvmeq, &c);
 }
 
@@ -1347,6 +1341,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 	nvmeq->cq_vector = -1;
 	spin_unlock_irq(&nvmeq->q_lock);
 
+	if (!nvmeq->qid && nvmeq->dev->admin_q)
+		blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
+
 	irq_set_affinity_hint(vector, NULL);
 	free_irq(vector, nvmeq);
 
@@ -1378,8 +1375,6 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 		adapter_delete_sq(dev, qid);
 		adapter_delete_cq(dev, qid);
 	}
-	if (!qid && dev->admin_q)
-		blk_mq_freeze_queue_start(dev->admin_q);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	nvme_process_cq(nvmeq);
@@ -1583,6 +1578,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 		dev->admin_tagset.ops = &nvme_mq_admin_ops;
 		dev->admin_tagset.nr_hw_queues = 1;
 		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
+		dev->admin_tagset.reserved_tags = 1;
 		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
 		dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
 		dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
@@ -1749,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_user_io io;
 	struct nvme_command c;
-	unsigned length, meta_len;
-	int status, i;
-	struct nvme_iod *iod, *meta_iod = NULL;
-	dma_addr_t meta_dma_addr;
-	void *meta, *uninitialized_var(meta_mem);
+	unsigned length, meta_len, prp_len;
+	int status, write;
+	struct nvme_iod *iod;
+	dma_addr_t meta_dma = 0;
+	void *meta = NULL;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
 	length = (io.nblocks + 1) << ns->lba_shift;
 	meta_len = (io.nblocks + 1) * ns->ms;
 
-	if (meta_len && ((io.metadata & 3) || !io.metadata))
+	if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
 		return -EINVAL;
+	else if (meta_len && ns->ext) {
+		length += meta_len;
+		meta_len = 0;
+	}
+
+	write = io.opcode & 1;
 
 	switch (io.opcode) {
 	case nvme_cmd_write:
 	case nvme_cmd_read:
 	case nvme_cmd_compare:
-		iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length);
+		iod = nvme_map_user_pages(dev, write, io.addr, length);
 		break;
 	default:
 		return -EINVAL;
@@ -1776,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	if (IS_ERR(iod))
 		return PTR_ERR(iod);
 
+	prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
+	if (length != prp_len) {
+		status = -ENOMEM;
+		goto unmap;
+	}
+	if (meta_len) {
+		meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+						&meta_dma, GFP_KERNEL);
+		if (!meta) {
+			status = -ENOMEM;
+			goto unmap;
+		}
+		if (write) {
+			if (copy_from_user(meta, (void __user *)io.metadata,
+								meta_len)) {
+				status = -EFAULT;
+				goto unmap;
+			}
+		}
+	}
+
 	memset(&c, 0, sizeof(c));
 	c.rw.opcode = io.opcode;
 	c.rw.flags = io.flags;
@@ -1787,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	c.rw.reftag = cpu_to_le32(io.reftag);
 	c.rw.apptag = cpu_to_le16(io.apptag);
 	c.rw.appmask = cpu_to_le16(io.appmask);
-
-	if (meta_len) {
-		meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
-								meta_len);
-		if (IS_ERR(meta_iod)) {
-			status = PTR_ERR(meta_iod);
-			meta_iod = NULL;
-			goto unmap;
-		}
-
-		meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
-						&meta_dma_addr, GFP_KERNEL);
-		if (!meta_mem) {
-			status = -ENOMEM;
-			goto unmap;
-		}
-
-		if (io.opcode & 1) {
-			int meta_offset = 0;
-
-			for (i = 0; i < meta_iod->nents; i++) {
-				meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
-						meta_iod->sg[i].offset;
-				memcpy(meta_mem + meta_offset, meta,
-						meta_iod->sg[i].length);
-				kunmap_atomic(meta);
-				meta_offset += meta_iod->sg[i].length;
-			}
-		}
-
-		c.rw.metadata = cpu_to_le64(meta_dma_addr);
-	}
-
-	length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
 	c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
 	c.rw.prp2 = cpu_to_le64(iod->first_dma);
-
-	if (length != (io.nblocks + 1) << ns->lba_shift)
-		status = -ENOMEM;
-	else
-		status = nvme_submit_io_cmd(dev, ns, &c, NULL);
-
-	if (meta_len) {
-		if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
-			int meta_offset = 0;
-
-			for (i = 0; i < meta_iod->nents; i++) {
-				meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
-						meta_iod->sg[i].offset;
-				memcpy(meta, meta_mem + meta_offset,
-						meta_iod->sg[i].length);
-				kunmap_atomic(meta);
-				meta_offset += meta_iod->sg[i].length;
-			}
-		}
-
-		dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem,
-								meta_dma_addr);
-	}
-
+	c.rw.metadata = cpu_to_le64(meta_dma);
+	status = nvme_submit_io_cmd(dev, ns, &c, NULL);
  unmap:
-	nvme_unmap_user_pages(dev, io.opcode & 1, iod);
+	nvme_unmap_user_pages(dev, write, iod);
 	nvme_free_iod(dev, iod);
-
-	if (meta_iod) {
-		nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod);
-		nvme_free_iod(dev, meta_iod);
+	if (meta) {
+		if (status == NVME_SC_SUCCESS && !write) {
+			if (copy_to_user((void __user *)io.metadata, meta,
+								meta_len))
+				status = -EFAULT;
+		}
+		dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
 	}
-
 	return status;
 }
 
@@ -2018,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id;
 	dma_addr_t dma_addr;
-	int lbaf, pi_type, old_ms;
+	u8 lbaf, pi_type;
+	u16 old_ms;
 	unsigned short bs;
 
 	id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
@@ -2039,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
 	ns->lba_shift = id->lbaf[lbaf].ds;
 	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
 
 	/*
 	 * If identify namespace failed, use default 512 byte block size so
@@ -2055,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
 				ns->ms != old_ms ||
 				bs != queue_logical_block_size(disk->queue) ||
-				(ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
+				(ns->ms && ns->ext)))
 		blk_integrity_unregister(disk);
 
 	ns->pi_type = pi_type;
 	blk_queue_logical_block_size(ns->queue, bs);
 
 	if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
-				!(id->flbas & NVME_NS_FLBAS_META_EXT))
+								!ns->ext)
 		nvme_init_integrity(ns);
 
 	if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
@@ -2334,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
 	dev->vwc = ctrl->vwc;
-	dev->event_limit = min(ctrl->aerl + 1, 8);
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@ -2881,6 +2851,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
 
 	nvme_set_irq_hints(dev);
 
+	dev->event_limit = 1;
 	return result;
 
  free_tags:
@@ -3166,8 +3137,10 @@ static int __init nvme_init(void)
 		nvme_char_major = result;
 
 	nvme_class = class_create(THIS_MODULE, "nvme");
-	if (!nvme_class)
+	if (IS_ERR(nvme_class)) {
+		result = PTR_ERR(nvme_class);
 		goto unregister_chrdev;
+	}
 
 	result = pci_register_driver(&nvme_driver);
 	if (result)
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index e10196e0182d..6b736b00f63e 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -55,6 +55,7 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define VPD_SERIAL_NUMBER				0x80
 #define VPD_DEVICE_IDENTIFIERS				0x83
 #define VPD_EXTENDED_INQUIRY				0x86
+#define VPD_BLOCK_LIMITS				0xB0
 #define VPD_BLOCK_DEV_CHARACTERISTICS			0xB1
 
 /* CDB offsets */
@@ -132,9 +133,10 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define INQ_UNIT_SERIAL_NUMBER_PAGE			0x80
 #define INQ_DEVICE_IDENTIFICATION_PAGE			0x83
 #define INQ_EXTENDED_INQUIRY_DATA_PAGE			0x86
+#define INQ_BDEV_LIMITS_PAGE				0xB0
 #define INQ_BDEV_CHARACTERISTICS_PAGE			0xB1
 #define INQ_SERIAL_NUMBER_LENGTH			0x14
-#define INQ_NUM_SUPPORTED_VPD_PAGES			5
+#define INQ_NUM_SUPPORTED_VPD_PAGES			6
 #define VERSION_SPC_4					0x06
 #define ACA_UNSUPPORTED					0
 #define STANDARD_INQUIRY_LENGTH				36
@@ -747,6 +749,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
 	inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
 	inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
 	inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
+	inq_response[9] = INQ_BDEV_LIMITS_PAGE;
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@@ -938,6 +941,25 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	return res;
 }
 
+static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+					u8 *inq_response, int alloc_len)
+{
+	__be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue));
+	__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
+	__be32 discard_desc_count = cpu_to_be32(0x100);
+
+	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
+	inq_response[1] = VPD_BLOCK_LIMITS;
+	inq_response[3] = 0x3c; /* Page Length */
+	memcpy(&inq_response[8], &max_sectors, sizeof(u32));
+	memcpy(&inq_response[20], &max_discard, sizeof(u32));
+
+	if (max_discard)
+		memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
+
+	return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
+}
+
 static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
@@ -2268,6 +2290,10 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		case VPD_EXTENDED_INQUIRY:
 			res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
 			break;
+		case VPD_BLOCK_LIMITS:
+			res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
+								alloc_len);
+			break;
 		case VPD_BLOCK_DEV_CHARACTERISTICS:
 			res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
 			break;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 523ee8fd4c15..c264f2d284a7 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -440,9 +440,9 @@ static inline void seek_track(struct floppy_state *fs, int n)
 static inline void init_dma(struct dbdma_cmd *cp, int cmd,
 			    void *buf, int count)
 {
-	st_le16(&cp->req_count, count);
-	st_le16(&cp->command, cmd);
-	st_le32(&cp->phy_addr, virt_to_bus(buf));
+	cp->req_count = cpu_to_le16(count);
+	cp->command = cpu_to_le16(cmd);
+	cp->phy_addr = cpu_to_le32(virt_to_bus(buf));
 	cp->xfer_status = 0;
 }
 
@@ -771,8 +771,8 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 		}
 		/* turn off DMA */
 		out_le32(&dr->control, (RUN | PAUSE) << 16);
-		stat = ld_le16(&cp->xfer_status);
-		resid = ld_le16(&cp->res_count);
+		stat = le16_to_cpu(cp->xfer_status);
+		resid = le16_to_cpu(cp->res_count);
 		if (intr & ERROR_INTR) {
 			n = fs->scount - 1 - resid / 512;
 			if (n > 0) {
@@ -1170,7 +1170,7 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
 
 	fs->dma_cmd = (struct dbdma_cmd *) DBDMA_ALIGN(fs->dbdma_cmd_space);
 	memset(fs->dma_cmd, 0, 2 * sizeof(struct dbdma_cmd));
-	st_le16(&fs->dma_cmd[1].command, DBDMA_STOP);
+	fs->dma_cmd[1].command = cpu_to_le16(DBDMA_STOP);
 
 	if (mdev->media_bay == NULL || check_media_bay(mdev->media_bay) == MB_FD)
 		swim3_mb_event(mdev, MB_FD);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 655e570b9b31..5ea2f0bbbc7c 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -342,7 +342,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
 	struct request_queue *q = vblk->disk->queue;
 	char cap_str_2[10], cap_str_10[10];
 	char *envp[] = { "RESIZE=1", NULL };
-	u64 capacity, size;
+	u64 capacity;
 
 	/* Host must always specify the capacity. */
 	virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
@@ -354,9 +354,10 @@ static void virtblk_config_changed_work(struct work_struct *work)
 		capacity = (sector_t)-1;
 	}
 
-	size = capacity * queue_logical_block_size(q);
-	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
-	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
+	string_get_size(capacity, queue_logical_block_size(q),
+			STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
+	string_get_size(capacity, queue_logical_block_size(q),
+			STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
 
 	dev_notice(&vdev->dev,
 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 2a04d341e598..bd2b3bbbb22c 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -34,6 +34,8 @@
  * IN THE SOFTWARE.
  */
 
+#define pr_fmt(fmt) "xen-blkback: " fmt
+
 #include <linux/spinlock.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
@@ -211,7 +213,7 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
 		else if (persistent_gnt->gnt > this->gnt)
 			new = &((*new)->rb_right);
 		else {
-			pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n");
+			pr_alert_ratelimited("trying to add a gref that's already in the tree\n");
 			return -EINVAL;
 		}
 	}
@@ -242,7 +244,7 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
 			node = node->rb_right;
 		else {
 			if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
-				pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n");
+				pr_alert_ratelimited("requesting a grant already in use\n");
 				return NULL;
 			}
 			set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
@@ -257,7 +259,7 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
                                struct persistent_gnt *persistent_gnt)
 {
 	if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
-	          pr_alert_ratelimited(DRV_PFX " freeing a grant already unused");
+		pr_alert_ratelimited("freeing a grant already unused\n");
 	set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
 	clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
 	atomic_dec(&blkif->persistent_gnt_in_use);
@@ -374,7 +376,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 	}
 
 	if (work_pending(&blkif->persistent_purge_work)) {
-		pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n");
+		pr_alert_ratelimited("Scheduled work from previous purge is still pending, cannot purge list\n");
 		return;
 	}
 
@@ -396,7 +398,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 
 	total = num_clean;
 
-	pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
+	pr_debug("Going to purge %u persistent grants\n", num_clean);
 
 	BUG_ON(!list_empty(&blkif->persistent_purge_list));
 	root = &blkif->persistent_gnts;
@@ -428,13 +430,13 @@ purge_list:
 	 * with the requested num
 	 */
 	if (!scan_used && !clean_used) {
-		pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
+		pr_debug("Still missing %u purged frames\n", num_clean);
 		scan_used = true;
 		goto purge_list;
 	}
 finished:
 	if (!clean_used) {
-		pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n");
+		pr_debug("Finished scanning for grants to clean, removing used flag\n");
 		clean_used = true;
 		goto purge_list;
 	}
@@ -444,7 +446,7 @@ finished:
 
 	/* We can defer this work */
 	schedule_work(&blkif->persistent_purge_work);
-	pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
+	pr_debug("Purged %u/%u\n", (total - num_clean), total);
 	return;
 }
 
@@ -520,20 +522,20 @@ static void xen_vbd_resize(struct xen_blkif *blkif)
 	struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
 	unsigned long long new_size = vbd_sz(vbd);
 
-	pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n",
+	pr_info("VBD Resize: Domid: %d, Device: (%d, %d)\n",
 		blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
-	pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size);
+	pr_info("VBD Resize: new size %llu\n", new_size);
 	vbd->size = new_size;
 again:
 	err = xenbus_transaction_start(&xbt);
 	if (err) {
-		pr_warn(DRV_PFX "Error starting transaction");
+		pr_warn("Error starting transaction\n");
 		return;
 	}
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    (unsigned long long)vbd_sz(vbd));
 	if (err) {
-		pr_warn(DRV_PFX "Error writing new size");
+		pr_warn("Error writing new size\n");
 		goto abort;
 	}
 	/*
@@ -543,7 +545,7 @@ again:
 	 */
 	err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
 	if (err) {
-		pr_warn(DRV_PFX "Error writing the state");
+		pr_warn("Error writing the state\n");
 		goto abort;
 	}
 
@@ -551,7 +553,7 @@ again:
 	if (err == -EAGAIN)
 		goto again;
 	if (err)
-		pr_warn(DRV_PFX "Error ending transaction");
+		pr_warn("Error ending transaction\n");
 	return;
 abort:
 	xenbus_transaction_end(xbt, 1);
@@ -578,7 +580,7 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 
 static void print_stats(struct xen_blkif *blkif)
 {
-	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
+	pr_info("(%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
 		 "  |  ds %4llu | pg: %4u/%4d\n",
 		 current->comm, blkif->st_oo_req,
 		 blkif->st_rd_req, blkif->st_wr_req,
@@ -855,7 +857,7 @@ again:
 			/* This is a newly mapped grant */
 			BUG_ON(new_map_idx >= segs_to_map);
 			if (unlikely(map[new_map_idx].status != 0)) {
-				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
+				pr_debug("invalid buffer -- could not remap it\n");
 				put_free_pages(blkif, &pages[seg_idx]->page, 1);
 				pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
 				ret |= 1;
@@ -891,14 +893,14 @@ again:
 				goto next;
 			}
 			pages[seg_idx]->persistent_gnt = persistent_gnt;
-			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+			pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
 				 persistent_gnt->gnt, blkif->persistent_gnt_c,
 				 xen_blkif_max_pgrants);
 			goto next;
 		}
 		if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
 			blkif->vbd.overflow_max_grants = 1;
-			pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+			pr_debug("domain %u, device %#x is using maximum number of persistent grants\n",
 			         blkif->domid, blkif->vbd.handle);
 		}
 		/*
@@ -916,7 +918,7 @@ next:
 	return ret;
 
 out_of_memory:
-	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+	pr_alert("%s: out of memory\n", __func__);
 	put_free_pages(blkif, pages_to_gnt, segs_to_map);
 	return -ENOMEM;
 }
@@ -996,7 +998,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
 
 	err = xen_vbd_translate(&preq, blkif, WRITE);
 	if (err) {
-		pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n",
+		pr_warn("access denied: DISCARD [%llu->%llu] on dev=%04x\n",
 			preq.sector_number,
 			preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
 		goto fail_response;
@@ -1012,7 +1014,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
 				   GFP_KERNEL, secure);
 fail_response:
 	if (err == -EOPNOTSUPP) {
-		pr_debug(DRV_PFX "discard op failed, not supported\n");
+		pr_debug("discard op failed, not supported\n");
 		status = BLKIF_RSP_EOPNOTSUPP;
 	} else if (err)
 		status = BLKIF_RSP_ERROR;
@@ -1056,16 +1058,16 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 	/* An error fails the entire request. */
 	if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
 	    (error == -EOPNOTSUPP)) {
-		pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
+		pr_debug("flush diskcache op failed, not supported\n");
 		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
 	} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
 		    (error == -EOPNOTSUPP)) {
-		pr_debug(DRV_PFX "write barrier op failed, not supported\n");
+		pr_debug("write barrier op failed, not supported\n");
 		xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
 	} else if (error) {
-		pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
+		pr_debug("Buffer not up-to-date at end of operation,"
 			 " error=%d\n", error);
 		pending_req->status = BLKIF_RSP_ERROR;
 	}
@@ -1110,7 +1112,7 @@ __do_block_io_op(struct xen_blkif *blkif)
 
 	if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
 		rc = blk_rings->common.rsp_prod_pvt;
-		pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
+		pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
 			rp, rc, rp - rc, blkif->vbd.pdevice);
 		return -EACCES;
 	}
@@ -1217,8 +1219,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	if ((req->operation == BLKIF_OP_INDIRECT) &&
 	    (req_operation != BLKIF_OP_READ) &&
 	    (req_operation != BLKIF_OP_WRITE)) {
-		pr_debug(DRV_PFX "Invalid indirect operation (%u)\n",
-			 req_operation);
+		pr_debug("Invalid indirect operation (%u)\n", req_operation);
 		goto fail_response;
 	}
 
@@ -1252,8 +1253,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 		     (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
 	    unlikely((req->operation == BLKIF_OP_INDIRECT) &&
 		     (nseg > MAX_INDIRECT_SEGMENTS))) {
-		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
-			 nseg);
+		pr_debug("Bad number of segments in request (%d)\n", nseg);
 		/* Haven't submitted any bio's yet. */
 		goto fail_response;
 	}
@@ -1288,7 +1288,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	}
 
 	if (xen_vbd_translate(&preq, blkif, operation) != 0) {
-		pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
+		pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
 			 operation == READ ? "read" : "write",
 			 preq.sector_number,
 			 preq.sector_number + preq.nr_sects,
@@ -1303,7 +1303,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	for (i = 0; i < nseg; i++) {
 		if (((int)preq.sector_number|(int)seg[i].nsec) &
 		    ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
-			pr_debug(DRV_PFX "Misaligned I/O request from domain %d",
+			pr_debug("Misaligned I/O request from domain %d\n",
 				 blkif->domid);
 			goto fail_response;
 		}
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 375d28851860..f620b5d3f77c 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -44,12 +44,6 @@
 #include <xen/interface/io/blkif.h>
 #include <xen/interface/io/protocols.h>
 
-#define DRV_PFX "xen-blkback:"
-#define DPRINTK(fmt, args...)				\
-	pr_debug(DRV_PFX "(%s:%d) " fmt ".\n",		\
-		 __func__, __LINE__, ##args)
-
-
 /*
  * This is the maximum number of segments that would be allowed in indirect
  * requests. This value will also be passed to the frontend.
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index e3afe97280b1..6ab69ad61ee1 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -14,6 +14,8 @@
 
 */
 
+#define pr_fmt(fmt) "xen-blkback: " fmt
+
 #include <stdarg.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
@@ -21,6 +23,9 @@
 #include <xen/grant_table.h>
 #include "common.h"
 
+/* Enlarge the array size in order to fully show blkback name. */
+#define BLKBACK_NAME_LEN (20)
+
 struct backend_info {
 	struct xenbus_device	*dev;
 	struct xen_blkif	*blkif;
@@ -70,7 +75,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf)
 	else
 		devname  = devpath;
 
-	snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
+	snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
 	kfree(devpath);
 
 	return 0;
@@ -79,7 +84,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf)
 static void xen_update_blkif_status(struct xen_blkif *blkif)
 {
 	int err;
-	char name[TASK_COMM_LEN];
+	char name[BLKBACK_NAME_LEN];
 
 	/* Not ready to connect? */
 	if (!blkif->irq || !blkif->vbd.bdev)
@@ -193,7 +198,7 @@ fail:
 	return ERR_PTR(-ENOMEM);
 }
 
-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
 			 unsigned int evtchn)
 {
 	int err;
@@ -202,7 +207,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
 	if (blkif->irq)
 		return 0;
 
-	err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+	err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+				     &blkif->blk_ring);
 	if (err < 0)
 		return err;
 
@@ -423,14 +429,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 				 FMODE_READ : FMODE_WRITE, NULL);
 
 	if (IS_ERR(bdev)) {
-		DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
+		pr_warn("xen_vbd_create: device %08x could not be opened\n",
 			vbd->pdevice);
 		return -ENOENT;
 	}
 
 	vbd->bdev = bdev;
 	if (vbd->bdev->bd_disk == NULL) {
-		DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
+		pr_warn("xen_vbd_create: device %08x doesn't exist\n",
 			vbd->pdevice);
 		xen_vbd_free(vbd);
 		return -ENOENT;
@@ -449,7 +455,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 	if (q && blk_queue_secdiscard(q))
 		vbd->discard_secure = true;
 
-	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
 		handle, blkif->domid);
 	return 0;
 }
@@ -457,7 +463,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 {
 	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
-	DPRINTK("");
+	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
 
 	if (be->major || be->minor)
 		xenvbd_sysfs_delif(dev);
@@ -563,6 +569,10 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
 	int err;
 	struct backend_info *be = kzalloc(sizeof(struct backend_info),
 					  GFP_KERNEL);
+
+	/* match the pr_debug in xen_blkbk_remove */
+	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
+
 	if (!be) {
 		xenbus_dev_fatal(dev, -ENOMEM,
 				 "allocating backend structure");
@@ -594,7 +604,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
 	return 0;
 
 fail:
-	DPRINTK("failed");
+	pr_warn("%s failed\n", __func__);
 	xen_blkbk_remove(dev);
 	return err;
 }
@@ -618,7 +628,7 @@ static void backend_changed(struct xenbus_watch *watch,
 	unsigned long handle;
 	char *device_type;
 
-	DPRINTK("");
+	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
 
 	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
 			   &major, &minor);
@@ -637,7 +647,7 @@ static void backend_changed(struct xenbus_watch *watch,
 
 	if (be->major | be->minor) {
 		if (be->major != major || be->minor != minor)
-			pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
+			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
 				be->major, be->minor, major, minor);
 		return;
 	}
@@ -698,13 +708,12 @@ static void frontend_changed(struct xenbus_device *dev,
 	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
-	DPRINTK("%s", xenbus_strstate(frontend_state));
+	pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
 
 	switch (frontend_state) {
 	case XenbusStateInitialising:
 		if (dev->state == XenbusStateClosed) {
-			pr_info(DRV_PFX "%s: prepare for reconnect\n",
-				dev->nodename);
+			pr_info("%s: prepare for reconnect\n", dev->nodename);
 			xenbus_switch_state(dev, XenbusStateInitWait);
 		}
 		break;
@@ -771,7 +780,7 @@ static void connect(struct backend_info *be)
 	int err;
 	struct xenbus_device *dev = be->dev;
 
-	DPRINTK("%s", dev->otherend);
+	pr_debug("%s %s\n", __func__, dev->otherend);
 
 	/* Supply the information about the device the frontend needs */
 again:
@@ -857,7 +866,7 @@ static int connect_ring(struct backend_info *be)
 	char protocol[64] = "";
 	int err;
 
-	DPRINTK("%s", dev->otherend);
+	pr_debug("%s %s\n", __func__, dev->otherend);
 
 	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
 			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
@@ -892,7 +901,7 @@ static int connect_ring(struct backend_info *be)
 	be->blkif->vbd.feature_gnt_persistent = pers_grants;
 	be->blkif->vbd.overflow_max_grants = 0;
 
-	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
+	pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
 		ring_ref, evtchn, be->blkif->blk_protocol, protocol,
 		pers_grants ? "persistent grants" : "");
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 37779e4c4585..2c61cf8c6f61 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1245,6 +1245,7 @@ static int setup_blkring(struct xenbus_device *dev,
 			 struct blkfront_info *info)
 {
 	struct blkif_sring *sring;
+	grant_ref_t gref;
 	int err;
 
 	info->ring_ref = GRANT_INVALID_REF;
@@ -1257,13 +1258,13 @@ static int setup_blkring(struct xenbus_device *dev,
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+	err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
 	if (err < 0) {
 		free_page((unsigned long)sring);
 		info->ring.sring = NULL;
 		goto fail;
 	}
-	info->ring_ref = err;
+	info->ring_ref = gref;
 
 	err = xenbus_alloc_evtchn(dev, &info->evtchn);
 	if (err)
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index c3b4f5a5ac10..3111f2778079 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -193,6 +193,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
 	struct xenbus_transaction xbt;
 	const char *message = NULL;
 	int rv;
+	grant_ref_t gref;
 
 	priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	if (!priv->shr) {
@@ -200,11 +201,11 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
 		return -ENOMEM;
 	}
 
-	rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr));
+	rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref);
 	if (rv < 0)
 		return rv;
 
-	priv->ring_ref = rv;
+	priv->ring_ref = gref;
 
 	rv = xenbus_alloc_evtchn(dev, &priv->evtchn);
 	if (rv)
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 2db803cd095c..d24a3f8b49bc 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1497,9 +1497,9 @@ static int pmac_ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd)
 				       drive->name);
 				return 0;
 			}
-			st_le16(&table->command, wr? OUTPUT_MORE: INPUT_MORE);
-			st_le16(&table->req_count, tc);
-			st_le32(&table->phy_addr, cur_addr);
+			table->command = cpu_to_le16(wr? OUTPUT_MORE: INPUT_MORE);
+			table->req_count = cpu_to_le16(tc);
+			table->phy_addr = cpu_to_le32(cur_addr);
 			table->cmd_dep = 0;
 			table->xfer_status = 0;
 			table->res_count = 0;
@@ -1513,10 +1513,10 @@ static int pmac_ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd)
 
 	/* convert the last command to an input/output last command */
 	if (count) {
-		st_le16(&table[-1].command, wr? OUTPUT_LAST: INPUT_LAST);
+		table[-1].command = cpu_to_le16(wr? OUTPUT_LAST: INPUT_LAST);
 		/* add the stop command to the end of the list */
 		memset(table, 0, sizeof(struct dbdma_cmd));
-		st_le16(&table->command, DBDMA_STOP);
+		table->command = cpu_to_le16(DBDMA_STOP);
 		mb();
 		writel(hwif->dmatable_dma, &dma->cmdptr);
 		return 1;
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 4192901cab40..048901a1111a 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -182,31 +182,31 @@ static void rackmeter_setup_dbdma(struct rackmeter *rm)
 
 	/* Prepare 4 dbdma commands for the 2 buffers */
 	memset(cmd, 0, 4 * sizeof(struct dbdma_cmd));
-	st_le16(&cmd->req_count, 4);
-	st_le16(&cmd->command, STORE_WORD | INTR_ALWAYS | KEY_SYSTEM);
-	st_le32(&cmd->phy_addr, rm->dma_buf_p +
+	cmd->req_count = cpu_to_le16(4);
+	cmd->command = cpu_to_le16(STORE_WORD | INTR_ALWAYS | KEY_SYSTEM);
+	cmd->phy_addr = cpu_to_le32(rm->dma_buf_p +
 		offsetof(struct rackmeter_dma, mark));
-	st_le32(&cmd->cmd_dep, 0x02000000);
+	cmd->cmd_dep = cpu_to_le32(0x02000000);
 	cmd++;
 
-	st_le16(&cmd->req_count, SAMPLE_COUNT * 4);
-	st_le16(&cmd->command, OUTPUT_MORE);
-	st_le32(&cmd->phy_addr, rm->dma_buf_p +
+	cmd->req_count = cpu_to_le16(SAMPLE_COUNT * 4);
+	cmd->command = cpu_to_le16(OUTPUT_MORE);
+	cmd->phy_addr = cpu_to_le32(rm->dma_buf_p +
 		offsetof(struct rackmeter_dma, buf1));
 	cmd++;
 
-	st_le16(&cmd->req_count, 4);
-	st_le16(&cmd->command, STORE_WORD | INTR_ALWAYS | KEY_SYSTEM);
-	st_le32(&cmd->phy_addr, rm->dma_buf_p +
+	cmd->req_count = cpu_to_le16(4);
+	cmd->command = cpu_to_le16(STORE_WORD | INTR_ALWAYS | KEY_SYSTEM);
+	cmd->phy_addr = cpu_to_le32(rm->dma_buf_p +
 		offsetof(struct rackmeter_dma, mark));
-	st_le32(&cmd->cmd_dep, 0x01000000);
+	cmd->cmd_dep = cpu_to_le32(0x01000000);
 	cmd++;
 
-	st_le16(&cmd->req_count, SAMPLE_COUNT * 4);
-	st_le16(&cmd->command, OUTPUT_MORE | BR_ALWAYS);
-	st_le32(&cmd->phy_addr, rm->dma_buf_p +
+	cmd->req_count = cpu_to_le16(SAMPLE_COUNT * 4);
+	cmd->command = cpu_to_le16(OUTPUT_MORE | BR_ALWAYS);
+	cmd->phy_addr = cpu_to_le32(rm->dma_buf_p +
 		offsetof(struct rackmeter_dma, buf2));
-	st_le32(&cmd->cmd_dep, rm->dma_buf_p);
+	cmd->cmd_dep = cpu_to_le32(rm->dma_buf_p);
 
 	rackmeter_do_pause(rm, 0);
 }
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 10ae69bcbbd2..d531f804455d 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -557,8 +557,7 @@ int __init smu_init (void)
 	return 0;
 
 fail_msg_node:
-	if (smu->msg_node)
-		of_node_put(smu->msg_node);
+	of_node_put(smu->msg_node);
 fail_db_node:
 	of_node_put(smu->db_node);
 fail_bootmem:
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index dee88e59f0d3..f9512bfa6c3c 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -329,10 +329,11 @@ int __init find_via_pmu(void)
 				gaddr = of_translate_address(gpiop, reg);
 			if (gaddr != OF_BAD_ADDR)
 				gpio_reg = ioremap(gaddr, 0x10);
+			of_node_put(gpiop);
 		}
 		if (gpio_reg == NULL) {
 			printk(KERN_ERR "via-pmu: Can't find GPIO reg !\n");
-			goto fail_gpio;
+			goto fail;
 		}
 	} else
 		pmu_kind = PMU_UNKNOWN;
@@ -340,7 +341,7 @@ int __init find_via_pmu(void)
 	via = ioremap(taddr, 0x2000);
 	if (via == NULL) {
 		printk(KERN_ERR "via-pmu: Can't map address !\n");
-		goto fail;
+		goto fail_via_remap;
 	}
 	
 	out_8(&via[IER], IER_CLR | 0x7f);	/* disable all intrs */
@@ -348,10 +349,8 @@ int __init find_via_pmu(void)
 
 	pmu_state = idle;
 
-	if (!init_pmu()) {
-		via = NULL;
-		return 0;
-	}
+	if (!init_pmu())
+		goto fail_init;
 
 	printk(KERN_INFO "PMU driver v%d initialized for %s, firmware: %02x\n",
 	       PMU_DRIVER_VERSION, pbook_type[pmu_kind], pmu_version);
@@ -359,11 +358,15 @@ int __init find_via_pmu(void)
 	sys_ctrler = SYS_CTRLER_PMU;
 	
 	return 1;
- fail:
-	of_node_put(vias);
+
+ fail_init:
+	iounmap(via);
+	via = NULL;
+ fail_via_remap:
 	iounmap(gpio_reg);
 	gpio_reg = NULL;
- fail_gpio:
+ fail:
+	of_node_put(vias);
 	vias = NULL;
 	return 0;
 }
@@ -2109,7 +2112,7 @@ pmu_read(struct file *file, char __user *buf,
 
 	spin_lock_irqsave(&pp->lock, flags);
 	add_wait_queue(&pp->wait, &wait);
-	current->state = TASK_INTERRUPTIBLE;
+	set_current_state(TASK_INTERRUPTIBLE);
 
 	for (;;) {
 		ret = -EAGAIN;
@@ -2138,7 +2141,7 @@ pmu_read(struct file *file, char __user *buf,
 		schedule();
 		spin_lock_irqsave(&pp->lock, flags);
 	}
-	current->state = TASK_RUNNING;
+	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&pp->wait, &wait);
 	spin_unlock_irqrestore(&pp->lock, flags);
 	
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 84325f267acf..84b0a2d74d60 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -6,6 +6,15 @@ menuconfig MAILBOX
 	  signals. Say Y if your platform supports hardware mailboxes.
 
 if MAILBOX
+
+config ARM_MHU
+	tristate "ARM MHU Mailbox"
+	depends on ARM_AMBA
+	help
+	  Say Y here if you want to build the ARM MHU controller driver.
+	  The controller has 3 mailbox channels, the last of which can be
+	  used in Secure mode only.
+
 config PL320_MBOX
 	bool "ARM PL320 Mailbox"
 	depends on ARM_AMBA
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 2e79231154cf..b18201e97e29 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -2,6 +2,8 @@
 
 obj-$(CONFIG_MAILBOX)		+= mailbox.o
 
+obj-$(CONFIG_ARM_MHU)	+= arm_mhu.o
+
 obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
 
 obj-$(CONFIG_OMAP2PLUS_MBOX)	+= omap-mailbox.o
diff --git a/drivers/mailbox/arm_mhu.c b/drivers/mailbox/arm_mhu.c
new file mode 100644
index 000000000000..ac693c635357
--- /dev/null
+++ b/drivers/mailbox/arm_mhu.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2013-2015 Fujitsu Semiconductor Ltd.
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Jassi Brar <jaswinder.singh@linaro.org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/amba/bus.h>
+#include <linux/mailbox_controller.h>
+
+#define INTR_STAT_OFS	0x0
+#define INTR_SET_OFS	0x8
+#define INTR_CLR_OFS	0x10
+
+#define MHU_LP_OFFSET	0x0
+#define MHU_HP_OFFSET	0x20
+#define MHU_SEC_OFFSET	0x200
+#define TX_REG_OFFSET	0x100
+
+#define MHU_CHANS	3
+
+struct mhu_link {
+	unsigned irq;
+	void __iomem *tx_reg;
+	void __iomem *rx_reg;
+};
+
+struct arm_mhu {
+	void __iomem *base;
+	struct mhu_link mlink[MHU_CHANS];
+	struct mbox_chan chan[MHU_CHANS];
+	struct mbox_controller mbox;
+};
+
+static irqreturn_t mhu_rx_interrupt(int irq, void *p)
+{
+	struct mbox_chan *chan = p;
+	struct mhu_link *mlink = chan->con_priv;
+	u32 val;
+
+	val = readl_relaxed(mlink->rx_reg + INTR_STAT_OFS);
+	if (!val)
+		return IRQ_NONE;
+
+	mbox_chan_received_data(chan, (void *)&val);
+
+	writel_relaxed(val, mlink->rx_reg + INTR_CLR_OFS);
+
+	return IRQ_HANDLED;
+}
+
+static bool mhu_last_tx_done(struct mbox_chan *chan)
+{
+	struct mhu_link *mlink = chan->con_priv;
+	u32 val = readl_relaxed(mlink->tx_reg + INTR_STAT_OFS);
+
+	return (val == 0);
+}
+
+static int mhu_send_data(struct mbox_chan *chan, void *data)
+{
+	struct mhu_link *mlink = chan->con_priv;
+	u32 *arg = data;
+
+	writel_relaxed(*arg, mlink->tx_reg + INTR_SET_OFS);
+
+	return 0;
+}
+
+static int mhu_startup(struct mbox_chan *chan)
+{
+	struct mhu_link *mlink = chan->con_priv;
+	u32 val;
+	int ret;
+
+	val = readl_relaxed(mlink->tx_reg + INTR_STAT_OFS);
+	writel_relaxed(val, mlink->tx_reg + INTR_CLR_OFS);
+
+	ret = request_irq(mlink->irq, mhu_rx_interrupt,
+			  IRQF_SHARED, "mhu_link", chan);
+	if (ret) {
+		dev_err(chan->mbox->dev,
+			"Unable to aquire IRQ %d\n", mlink->irq);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mhu_shutdown(struct mbox_chan *chan)
+{
+	struct mhu_link *mlink = chan->con_priv;
+
+	free_irq(mlink->irq, chan);
+}
+
+static struct mbox_chan_ops mhu_ops = {
+	.send_data = mhu_send_data,
+	.startup = mhu_startup,
+	.shutdown = mhu_shutdown,
+	.last_tx_done = mhu_last_tx_done,
+};
+
+static int mhu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int i, err;
+	struct arm_mhu *mhu;
+	struct device *dev = &adev->dev;
+	int mhu_reg[MHU_CHANS] = {MHU_LP_OFFSET, MHU_HP_OFFSET, MHU_SEC_OFFSET};
+
+	/* Allocate memory for device */
+	mhu = devm_kzalloc(dev, sizeof(*mhu), GFP_KERNEL);
+	if (!mhu)
+		return -ENOMEM;
+
+	mhu->base = devm_ioremap_resource(dev, &adev->res);
+	if (IS_ERR(mhu->base)) {
+		dev_err(dev, "ioremap failed\n");
+		return PTR_ERR(mhu->base);
+	}
+
+	for (i = 0; i < MHU_CHANS; i++) {
+		mhu->chan[i].con_priv = &mhu->mlink[i];
+		mhu->mlink[i].irq = adev->irq[i];
+		mhu->mlink[i].rx_reg = mhu->base + mhu_reg[i];
+		mhu->mlink[i].tx_reg = mhu->mlink[i].rx_reg + TX_REG_OFFSET;
+	}
+
+	mhu->mbox.dev = dev;
+	mhu->mbox.chans = &mhu->chan[0];
+	mhu->mbox.num_chans = MHU_CHANS;
+	mhu->mbox.ops = &mhu_ops;
+	mhu->mbox.txdone_irq = false;
+	mhu->mbox.txdone_poll = true;
+	mhu->mbox.txpoll_period = 10;
+
+	amba_set_drvdata(adev, mhu);
+
+	err = mbox_controller_register(&mhu->mbox);
+	if (err) {
+		dev_err(dev, "Failed to register mailboxes %d\n", err);
+		return err;
+	}
+
+	dev_info(dev, "ARM MHU Mailbox registered\n");
+	return 0;
+}
+
+static int mhu_remove(struct amba_device *adev)
+{
+	struct arm_mhu *mhu = amba_get_drvdata(adev);
+
+	mbox_controller_unregister(&mhu->mbox);
+
+	return 0;
+}
+
+static struct amba_id mhu_ids[] = {
+	{
+		.id	= 0x1bb098,
+		.mask	= 0xffffff,
+	},
+	{ 0, 0 },
+};
+MODULE_DEVICE_TABLE(amba, mhu_ids);
+
+static struct amba_driver arm_mhu_driver = {
+	.drv = {
+		.name	= "mhu",
+	},
+	.id_table	= mhu_ids,
+	.probe		= mhu_probe,
+	.remove		= mhu_remove,
+};
+module_amba_driver(arm_mhu_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ARM MHU Driver");
+MODULE_AUTHOR("Jassi Brar <jassisinghbrar@gmail.com>");
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index 977c814cdf6f..7e91d68a3ac3 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -20,10 +20,35 @@
  *  shared memory regions as defined in the PCC table entries. The PCC
  *  specification supports a Doorbell mechanism for the PCC clients
  *  to notify the platform about new data. This Doorbell information
- *  is also specified in each PCC table entry. See pcc_send_data()
- *  and pcc_tx_done() for basic mode of operation.
+ *  is also specified in each PCC table entry.
  *
- *  For more details about PCC, please see the ACPI specification from
+ *  Typical high level flow of operation is:
+ *
+ *  PCC Reads:
+ *  * Client tries to acquire a channel lock.
+ *  * After it is acquired it writes READ cmd in communication region cmd
+ *		address.
+ *  * Client issues mbox_send_message() which rings the PCC doorbell
+ *		for its PCC channel.
+ *  * If command completes, then client has control over channel and
+ *		it can proceed with its reads.
+ *  * Client releases lock.
+ *
+ *  PCC Writes:
+ *  * Client tries to acquire channel lock.
+ *  * Client writes to its communication region after it acquires a
+ *		channel lock.
+ *  * Client writes WRITE cmd in communication region cmd address.
+ *  * Client issues mbox_send_message() which rings the PCC doorbell
+ *		for its PCC channel.
+ *  * If command completes, then writes have succeded and it can release
+ *		the channel lock.
+ *
+ *  There is a Nominal latency defined for each channel which indicates
+ *  how long to wait until a command completes. If command is not complete
+ *  the client needs to retry or assume failure.
+ *
+ *	For more details about PCC, please see the ACPI specification from
  *  http://www.uefi.org/ACPIv5.1 Section 14.
  *
  *  This file implements PCC as a Mailbox controller and allows for PCC
@@ -42,8 +67,6 @@
 #include "mailbox.h"
 
 #define MAX_PCC_SUBSPACES	256
-#define PCCS_SS_SIG_MAGIC	0x50434300
-#define PCC_CMD_COMPLETE	0x1
 
 static struct mbox_chan *pcc_mbox_channels;
 
@@ -71,23 +94,6 @@ static struct mbox_chan *get_pcc_channel(int id)
 }
 
 /**
- * get_subspace_id - Given a Mailbox channel, find out the
- *		PCC subspace id.
- * @chan: Pointer to Mailbox Channel from which we want
- *		the index.
- * Return: Errno if not found, else positive index number.
- */
-static int get_subspace_id(struct mbox_chan *chan)
-{
-	unsigned int id = chan - pcc_mbox_channels;
-
-	if (id < 0 || id > pcc_mbox_ctrl.num_chans)
-		return -ENOENT;
-
-	return id;
-}
-
-/**
  * pcc_mbox_request_channel - PCC clients call this function to
  *		request a pointer to their PCC subspace, from which they
  *		can get the details of communicating with the remote.
@@ -117,7 +123,7 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
 	chan = get_pcc_channel(subspace_id);
 
 	if (!chan || chan->cl) {
-		dev_err(dev, "%s: PCC mailbox not free\n", __func__);
+		dev_err(dev, "Channel not found for idx: %d\n", subspace_id);
 		return ERR_PTR(-EBUSY);
 	}
 
@@ -161,81 +167,30 @@ void pcc_mbox_free_channel(struct mbox_chan *chan)
 EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
 
 /**
- * pcc_tx_done - Callback from Mailbox controller code to
- *		check if PCC message transmission completed.
- * @chan: Pointer to Mailbox channel on which previous
- *		transmission occurred.
- *
- * Return: TRUE if succeeded.
- */
-static bool pcc_tx_done(struct mbox_chan *chan)
-{
-	struct acpi_pcct_hw_reduced *pcct_ss = chan->con_priv;
-	struct acpi_pcct_shared_memory *generic_comm_base =
-		(struct acpi_pcct_shared_memory *) pcct_ss->base_address;
-	u16 cmd_delay = pcct_ss->latency;
-	unsigned int retries = 0;
-
-	/* Try a few times while waiting for platform to consume */
-	while (!(readw_relaxed(&generic_comm_base->status)
-		    & PCC_CMD_COMPLETE)) {
-
-		if (retries++ < 5)
-			udelay(cmd_delay);
-		else {
-			/*
-			 * If the remote is dead, this will cause the Mbox
-			 * controller to timeout after mbox client.tx_tout
-			 * msecs.
-			 */
-			pr_err("PCC platform did not respond.\n");
-			return false;
-		}
-	}
-	return true;
-}
-
-/**
- * pcc_send_data - Called from Mailbox Controller code to finally
- *	transmit data over channel.
+ * pcc_send_data - Called from Mailbox Controller code. Used
+ *		here only to ring the channel doorbell. The PCC client
+ *		specific read/write is done in the client driver in
+ *		order to maintain atomicity over PCC channel once
+ *		OS has control over it. See above for flow of operations.
  * @chan: Pointer to Mailbox channel over which to send data.
- * @data: Actual data to be written over channel.
+ * @data: Client specific data written over channel. Used here
+ *		only for debug after PCC transaction completes.
  *
  * Return: Err if something failed else 0 for success.
  */
 static int pcc_send_data(struct mbox_chan *chan, void *data)
 {
 	struct acpi_pcct_hw_reduced *pcct_ss = chan->con_priv;
-	struct acpi_pcct_shared_memory *generic_comm_base =
-		(struct acpi_pcct_shared_memory *) pcct_ss->base_address;
 	struct acpi_generic_address doorbell;
 	u64 doorbell_preserve;
 	u64 doorbell_val;
 	u64 doorbell_write;
-	u16 cmd = *(u16 *) data;
-	u16 ss_idx = -1;
-
-	ss_idx = get_subspace_id(chan);
-
-	if (ss_idx < 0) {
-		pr_err("Invalid Subspace ID from PCC client\n");
-		return -EINVAL;
-	}
 
 	doorbell = pcct_ss->doorbell_register;
 	doorbell_preserve = pcct_ss->preserve_mask;
 	doorbell_write = pcct_ss->write_mask;
 
-	/* Write to the shared comm region. */
-	writew(cmd, &generic_comm_base->command);
-
-	/* Write Subspace MAGIC value so platform can identify destination. */
-	writel((PCCS_SS_SIG_MAGIC | ss_idx), &generic_comm_base->signature);
-
-	/* Flip CMD COMPLETE bit */
-	writew(0, &generic_comm_base->status);
-
-	/* Sync notification from OSPM to Platform. */
+	/* Sync notification from OS to Platform. */
 	acpi_read(&doorbell_val, &doorbell);
 	acpi_write((doorbell_val & doorbell_preserve) | doorbell_write,
 			&doorbell);
@@ -245,7 +200,6 @@ static int pcc_send_data(struct mbox_chan *chan, void *data)
 
 static struct mbox_chan_ops pcc_chan_ops = {
 	.send_data = pcc_send_data,
-	.last_tx_done = pcc_tx_done,
 };
 
 /**
@@ -351,8 +305,6 @@ static int pcc_mbox_probe(struct platform_device *pdev)
 
 	pcc_mbox_ctrl.chans = pcc_mbox_channels;
 	pcc_mbox_ctrl.ops = &pcc_chan_ops;
-	pcc_mbox_ctrl.txdone_poll = true;
-	pcc_mbox_ctrl.txpoll_period = 10;
 	pcc_mbox_ctrl.dev = &pdev->dev;
 
 	pr_info("Registering PCC driver as Mailbox controller\n");
diff --git a/drivers/media/pci/bt8xx/bt878.c b/drivers/media/pci/bt8xx/bt878.c
index 0939d399b774..8aa726651630 100644
--- a/drivers/media/pci/bt8xx/bt878.c
+++ b/drivers/media/pci/bt8xx/bt878.c
@@ -416,9 +416,6 @@ static int bt878_probe(struct pci_dev *dev, const struct pci_device_id *pci_id)
 	int result = 0;
 	unsigned char lat;
 	struct bt878 *bt;
-#if defined(__powerpc__)
-	unsigned int cmd;
-#endif
 	unsigned int cardid;
 
 	printk(KERN_INFO "bt878: Bt878 AUDIO function found (%d).\n",
@@ -461,15 +458,6 @@ static int bt878_probe(struct pci_dev *dev, const struct pci_device_id *pci_id)
 	printk("irq: %d, latency: %d, memory: 0x%lx\n",
 	       bt->irq, lat, bt->bt878_adr);
 
-
-#if defined(__powerpc__)
-	/* on OpenFirmware machines (PowerMac at least), PCI memory cycle */
-	/* response on cards with no firmware is not enabled by OF */
-	pci_read_config_dword(dev, PCI_COMMAND, &cmd);
-	cmd = (cmd | PCI_COMMAND_MEMORY);
-	pci_write_config_dword(dev, PCI_COMMAND, cmd);
-#endif
-
 #ifdef __sparc__
 	bt->bt878_mem = (unsigned char *) bt->bt878_adr;
 #else
diff --git a/drivers/media/pci/bt8xx/bt878.h b/drivers/media/pci/bt8xx/bt878.h
index d19b59299d78..49af240b5894 100644
--- a/drivers/media/pci/bt8xx/bt878.h
+++ b/drivers/media/pci/bt8xx/bt878.h
@@ -142,18 +142,7 @@ void bt878_start(struct bt878 *bt, u32 controlreg, u32 op_sync_orin,
 		u32 irq_err_ignore);
 void bt878_stop(struct bt878 *bt);
 
-#if defined(__powerpc__)	/* big-endian */
-static inline void io_st_le32(volatile unsigned __iomem *addr, unsigned val)
-{
-	st_le32(addr, val);
-	eieio();
-}
-
-#define bmtwrite(dat,adr)  io_st_le32((adr),(dat))
-#define bmtread(adr)       ld_le32((adr))
-#else
 #define bmtwrite(dat,adr)  writel((dat), (adr))
 #define bmtread(adr)       readl(adr)
-#endif
 
 #endif
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index c69afb5e264e..2fc426926574 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2230,7 +2230,7 @@ static int mmc_blk_alloc_part(struct mmc_card *card,
 	part_md->part_type = part_type;
 	list_add(&part_md->part, &md->part);
 
-	string_get_size((u64)get_capacity(part_md->disk) << 9, STRING_UNITS_2,
+	string_get_size((u64)get_capacity(part_md->disk), 512, STRING_UNITS_2,
 			cap_str, sizeof(cap_str));
 	pr_info("%s: %s %s partition %u %s\n",
 	       part_md->disk->disk_name, mmc_card_id(card),
@@ -2436,7 +2436,7 @@ static int mmc_blk_probe(struct device *dev)
 	if (IS_ERR(md))
 		return PTR_ERR(md);
 
-	string_get_size((u64)get_capacity(md->disk) << 9, STRING_UNITS_2,
+	string_get_size((u64)get_capacity(md->disk), 512, STRING_UNITS_2,
 			cap_str, sizeof(cap_str));
 	pr_info("%s: %s %s %s %s\n",
 		md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 5316d9b9e7b4..317d709f7550 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -281,7 +281,7 @@ static inline void buffer_swap32(u32 *buf, int len)
 	int i;
 
 	for (i = 0; i < ((len + 3) / 4); i++) {
-		st_le32(buf, *buf);
+		*buf = swab32(*buf);
 		buf++;
 	}
 }
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 2f98846e2d89..a65d7a60f116 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -483,8 +483,8 @@ static int bmac_suspend(struct macio_dev *mdev, pm_message_t state)
        		bmwrite(dev, TXCFG, (config & ~TxMACEnable));
 		bmwrite(dev, INTDISABLE, DisableAll); /* disable all intrs */
        		/* disable rx and tx dma */
-       		st_le32(&rd->control, DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
-       		st_le32(&td->control, DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
+		rd->control = cpu_to_le32(DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
+		td->control = cpu_to_le32(DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
        		/* free some skb's */
        		for (i=0; i<N_RX_RING; i++) {
        			if (bp->rx_bufs[i] != NULL) {
@@ -699,8 +699,8 @@ static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id)
 
 	while (1) {
 		cp = &bp->rx_cmds[i];
-		stat = ld_le16(&cp->xfer_status);
-		residual = ld_le16(&cp->res_count);
+		stat = le16_to_cpu(cp->xfer_status);
+		residual = le16_to_cpu(cp->res_count);
 		if ((stat & ACTIVE) == 0)
 			break;
 		nb = RX_BUFLEN - residual - 2;
@@ -728,8 +728,8 @@ static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id)
 				skb_reserve(bp->rx_bufs[i], 2);
 		}
 		bmac_construct_rxbuff(skb, &bp->rx_cmds[i]);
-		st_le16(&cp->res_count, 0);
-		st_le16(&cp->xfer_status, 0);
+		cp->res_count = cpu_to_le16(0);
+		cp->xfer_status = cpu_to_le16(0);
 		last = i;
 		if (++i >= N_RX_RING) i = 0;
 	}
@@ -769,7 +769,7 @@ static irqreturn_t bmac_txdma_intr(int irq, void *dev_id)
 
 	while (1) {
 		cp = &bp->tx_cmds[bp->tx_empty];
-		stat = ld_le16(&cp->xfer_status);
+		stat = le16_to_cpu(cp->xfer_status);
 		if (txintcount < 10) {
 			XXDEBUG(("bmac_txdma_xfer_stat=%#0x\n", stat));
 		}
@@ -1411,8 +1411,8 @@ static int bmac_close(struct net_device *dev)
 	bmwrite(dev, INTDISABLE, DisableAll); /* disable all intrs */
 
 	/* disable rx and tx dma */
-	st_le32(&rd->control, DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
-	st_le32(&td->control, DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
+	rd->control = cpu_to_le32(DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
+	td->control = cpu_to_le32(DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
 
 	/* free some skb's */
 	XXDEBUG(("bmac: free rx bufs\n"));
@@ -1493,7 +1493,7 @@ static void bmac_tx_timeout(unsigned long data)
 
 	cp = &bp->tx_cmds[bp->tx_empty];
 /*	XXDEBUG((KERN_DEBUG "bmac: tx dmastat=%x %x runt=%d pr=%x fs=%x fc=%x\n", */
-/* 	   ld_le32(&td->status), ld_le16(&cp->xfer_status), bp->tx_bad_runt, */
+/* 	   le32_to_cpu(td->status), le16_to_cpu(cp->xfer_status), bp->tx_bad_runt, */
 /* 	   mb->pr, mb->xmtfs, mb->fifofc)); */
 
 	/* turn off both tx and rx and reset the chip */
@@ -1506,7 +1506,7 @@ static void bmac_tx_timeout(unsigned long data)
 	bmac_enable_and_reset_chip(dev);
 
 	/* restart rx dma */
-	cp = bus_to_virt(ld_le32(&rd->cmdptr));
+	cp = bus_to_virt(le32_to_cpu(rd->cmdptr));
 	out_le32(&rd->control, DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE|ACTIVE|DEAD));
 	out_le16(&cp->xfer_status, 0);
 	out_le32(&rd->cmdptr, virt_to_bus(cp));
@@ -1553,10 +1553,10 @@ static void dump_dbdma(volatile struct dbdma_cmd *cp,int count)
 		ip = (int*)(cp+i);
 
 		printk("dbdma req 0x%x addr 0x%x baddr 0x%x xfer/res 0x%x\n",
-		       ld_le32(ip+0),
-		       ld_le32(ip+1),
-		       ld_le32(ip+2),
-		       ld_le32(ip+3));
+		       le32_to_cpup(ip+0),
+		       le32_to_cpup(ip+1),
+		       le32_to_cpup(ip+2),
+		       le32_to_cpup(ip+3));
 	}
 
 }
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index a18948286682..e58a7c73766e 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -310,7 +310,7 @@ static void dbdma_reset(volatile struct dbdma_regs __iomem *dma)
      * way on some machines.
      */
     for (i = 200; i > 0; --i)
-	if (ld_le32(&dma->control) & RUN)
+	if (le32_to_cpu(dma->control) & RUN)
 	    udelay(1);
 }
 
@@ -452,21 +452,21 @@ static int mace_open(struct net_device *dev)
 	    data = skb->data;
 	}
 	mp->rx_bufs[i] = skb;
-	st_le16(&cp->req_count, RX_BUFLEN);
-	st_le16(&cp->command, INPUT_LAST + INTR_ALWAYS);
-	st_le32(&cp->phy_addr, virt_to_bus(data));
+	cp->req_count = cpu_to_le16(RX_BUFLEN);
+	cp->command = cpu_to_le16(INPUT_LAST + INTR_ALWAYS);
+	cp->phy_addr = cpu_to_le32(virt_to_bus(data));
 	cp->xfer_status = 0;
 	++cp;
     }
     mp->rx_bufs[i] = NULL;
-    st_le16(&cp->command, DBDMA_STOP);
+    cp->command = cpu_to_le16(DBDMA_STOP);
     mp->rx_fill = i;
     mp->rx_empty = 0;
 
     /* Put a branch back to the beginning of the receive command list */
     ++cp;
-    st_le16(&cp->command, DBDMA_NOP + BR_ALWAYS);
-    st_le32(&cp->cmd_dep, virt_to_bus(mp->rx_cmds));
+    cp->command = cpu_to_le16(DBDMA_NOP + BR_ALWAYS);
+    cp->cmd_dep = cpu_to_le32(virt_to_bus(mp->rx_cmds));
 
     /* start rx dma */
     out_le32(&rd->control, (RUN|PAUSE|FLUSH|WAKE) << 16); /* clear run bit */
@@ -475,8 +475,8 @@ static int mace_open(struct net_device *dev)
 
     /* put a branch at the end of the tx command list */
     cp = mp->tx_cmds + NCMDS_TX * N_TX_RING;
-    st_le16(&cp->command, DBDMA_NOP + BR_ALWAYS);
-    st_le32(&cp->cmd_dep, virt_to_bus(mp->tx_cmds));
+    cp->command = cpu_to_le16(DBDMA_NOP + BR_ALWAYS);
+    cp->cmd_dep = cpu_to_le32(virt_to_bus(mp->tx_cmds));
 
     /* reset tx dma */
     out_le32(&td->control, (RUN|PAUSE|FLUSH|WAKE) << 16);
@@ -507,8 +507,8 @@ static int mace_close(struct net_device *dev)
     out_8(&mb->imr, 0xff);		/* disable all intrs */
 
     /* disable rx and tx dma */
-    st_le32(&rd->control, (RUN|PAUSE|FLUSH|WAKE) << 16); /* clear run bit */
-    st_le32(&td->control, (RUN|PAUSE|FLUSH|WAKE) << 16); /* clear run bit */
+    rd->control = cpu_to_le32((RUN|PAUSE|FLUSH|WAKE) << 16); /* clear run bit */
+    td->control = cpu_to_le32((RUN|PAUSE|FLUSH|WAKE) << 16); /* clear run bit */
 
     mace_clean_rings(mp);
 
@@ -558,8 +558,8 @@ static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
     }
     mp->tx_bufs[fill] = skb;
     cp = mp->tx_cmds + NCMDS_TX * fill;
-    st_le16(&cp->req_count, len);
-    st_le32(&cp->phy_addr, virt_to_bus(skb->data));
+    cp->req_count = cpu_to_le16(len);
+    cp->phy_addr = cpu_to_le32(virt_to_bus(skb->data));
 
     np = mp->tx_cmds + NCMDS_TX * next;
     out_le16(&np->command, DBDMA_STOP);
@@ -691,7 +691,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
 	    out_8(&mb->xmtfc, AUTO_PAD_XMIT);
 	    continue;
 	}
-	dstat = ld_le32(&td->status);
+	dstat = le32_to_cpu(td->status);
 	/* stop DMA controller */
 	out_le32(&td->control, RUN << 16);
 	/*
@@ -724,7 +724,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
 		 */
 	}
 	cp = mp->tx_cmds + NCMDS_TX * i;
-	stat = ld_le16(&cp->xfer_status);
+	stat = le16_to_cpu(cp->xfer_status);
 	if ((fs & (UFLO|LCOL|LCAR|RTRY)) || (dstat & DEAD) || xcount == 0) {
 	    /*
 	     * Check whether there were in fact 2 bytes written to
@@ -830,7 +830,7 @@ static void mace_tx_timeout(unsigned long data)
     mace_reset(dev);
 
     /* restart rx dma */
-    cp = bus_to_virt(ld_le32(&rd->cmdptr));
+    cp = bus_to_virt(le32_to_cpu(rd->cmdptr));
     dbdma_reset(rd);
     out_le16(&cp->xfer_status, 0);
     out_le32(&rd->cmdptr, virt_to_bus(cp));
@@ -889,20 +889,20 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id)
     spin_lock_irqsave(&mp->lock, flags);
     for (i = mp->rx_empty; i != mp->rx_fill; ) {
 	cp = mp->rx_cmds + i;
-	stat = ld_le16(&cp->xfer_status);
+	stat = le16_to_cpu(cp->xfer_status);
 	if ((stat & ACTIVE) == 0) {
 	    next = i + 1;
 	    if (next >= N_RX_RING)
 		next = 0;
 	    np = mp->rx_cmds + next;
 	    if (next != mp->rx_fill &&
-		(ld_le16(&np->xfer_status) & ACTIVE) != 0) {
+		(le16_to_cpu(np->xfer_status) & ACTIVE) != 0) {
 		printk(KERN_DEBUG "mace: lost a status word\n");
 		++mace_lost_status;
 	    } else
 		break;
 	}
-	nb = ld_le16(&cp->req_count) - ld_le16(&cp->res_count);
+	nb = le16_to_cpu(cp->req_count) - le16_to_cpu(cp->res_count);
 	out_le16(&cp->command, DBDMA_STOP);
 	/* got a packet, have a look at it */
 	skb = mp->rx_bufs[i];
@@ -962,13 +962,13 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id)
 		mp->rx_bufs[i] = skb;
 	    }
 	}
-	st_le16(&cp->req_count, RX_BUFLEN);
+	cp->req_count = cpu_to_le16(RX_BUFLEN);
 	data = skb? skb->data: dummy_buf;
-	st_le32(&cp->phy_addr, virt_to_bus(data));
+	cp->phy_addr = cpu_to_le32(virt_to_bus(data));
 	out_le16(&cp->xfer_status, 0);
 	out_le16(&cp->command, INPUT_LAST + INTR_ALWAYS);
 #if 0
-	if ((ld_le32(&rd->status) & ACTIVE) != 0) {
+	if ((le32_to_cpu(rd->status) & ACTIVE) != 0) {
 	    out_le32(&rd->control, (PAUSE << 16) | PAUSE);
 	    while ((in_le32(&rd->status) & ACTIVE) != 0)
 		;
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 8a17b97baa20..de7919322190 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -79,13 +79,6 @@ MODULE_AUTHOR
     ("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>");
 MODULE_LICENSE("GPL");
 
-/*
- * PPC64 doesn't (yet) have a cacheable_memcpy
- */
-#ifdef CONFIG_PPC64
-#define cacheable_memcpy(d,s,n) memcpy((d),(s),(n))
-#endif
-
 /* minimum number of free TX descriptors required to wake up TX process */
 #define EMAC_TX_WAKEUP_THRESH		(NUM_TX_BUFF / 4)
 
@@ -1673,7 +1666,7 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot)
 			dev_kfree_skb(dev->rx_sg_skb);
 			dev->rx_sg_skb = NULL;
 		} else {
-			cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
+			memcpy(skb_tail_pointer(dev->rx_sg_skb),
 					 dev->rx_skb[slot]->data, len);
 			skb_put(dev->rx_sg_skb, len);
 			emac_recycle_rx_skb(dev, slot, len);
@@ -1730,8 +1723,7 @@ static int emac_poll_rx(void *param, int budget)
 				goto oom;
 
 			skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2);
-			cacheable_memcpy(copy_skb->data - 2, skb->data - 2,
-					 len + 2);
+			memcpy(copy_skb->data - 2, skb->data - 2, len + 2);
 			emac_recycle_rx_skb(dev, slot, len);
 			skb = copy_skb;
 		} else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC)))
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 33d2f9aa1b53..4b00545a3ace 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2523,9 +2523,7 @@ int efx_try_recovery(struct efx_nic *efx)
 	 * schedule a 'recover or reset', leading to this recovery handler.
 	 * Manually call the eeh failure check function.
 	 */
-	struct eeh_dev *eehdev =
-		of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev));
-
+	struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
 	if (eeh_dev_check_failure(eehdev)) {
 		/* The EEH mechanisms will handle the error and reset the
 		 * device if necessary.
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 3583f0208a6e..f12c811938d2 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -205,8 +205,7 @@ static int siena_map_reset_flags(u32 *flags)
  */
 static void siena_monitor(struct efx_nic *efx)
 {
-	struct eeh_dev *eehdev =
-		of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev));
+	struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
 
 	eeh_dev_check_failure(eehdev);
 }
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index b8c471813f4c..4de46aa61d95 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1780,7 +1780,7 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue,
 	int err = -ENOMEM;
 
 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
-				     tx_ring_ref, &addr);
+				     &tx_ring_ref, 1, &addr);
 	if (err)
 		goto err;
 
@@ -1788,7 +1788,7 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue,
 	BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
 
 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
-				     rx_ring_ref, &addr);
+				     &rx_ring_ref, 1, &addr);
 	if (err)
 		goto err;
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 720aaf6313d2..4c08f98f4484 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1483,6 +1483,7 @@ static int setup_netfront(struct xenbus_device *dev,
 {
 	struct xen_netif_tx_sring *txs;
 	struct xen_netif_rx_sring *rxs;
+	grant_ref_t gref;
 	int err;
 
 	queue->tx_ring_ref = GRANT_INVALID_REF;
@@ -1499,10 +1500,10 @@ static int setup_netfront(struct xenbus_device *dev,
 	SHARED_RING_INIT(txs);
 	FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE);
 
-	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+	err = xenbus_grant_ring(dev, txs, 1, &gref);
 	if (err < 0)
 		goto grant_tx_ring_fail;
-	queue->tx_ring_ref = err;
+	queue->tx_ring_ref = gref;
 
 	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
 	if (!rxs) {
@@ -1513,10 +1514,10 @@ static int setup_netfront(struct xenbus_device *dev,
 	SHARED_RING_INIT(rxs);
 	FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
 
-	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+	err = xenbus_grant_ring(dev, rxs, 1, &gref);
 	if (err < 0)
 		goto grant_rx_ring_fail;
-	queue->rx_ring_ref = err;
+	queue->rx_ring_ref = gref;
 
 	if (feature_split_evtchn)
 		err = setup_netfront_split(queue);
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index 7660232ef460..e12bafdc42e0 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -146,7 +146,7 @@ static void dlpar_pci_add_bus(struct device_node *dn)
 	struct pci_controller *phb = pdn->phb;
 	struct pci_dev *dev = NULL;
 
-	eeh_add_device_tree_early(dn);
+	eeh_add_device_tree_early(pdn);
 
 	/* Add EADS device to PHB bus, adding new entry to bus->devices */
 	dev = of_create_pci_dev(dn, phb->bus, pdn->devfn);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 4b3a4eaad996..ee0ebff103a4 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -19,16 +19,59 @@
 
 #define VIRTFN_ID_LEN	16
 
-static inline u8 virtfn_bus(struct pci_dev *dev, int id)
+int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
 {
+	if (!dev->is_physfn)
+		return -EINVAL;
 	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
-				    dev->sriov->stride * id) >> 8);
+				    dev->sriov->stride * vf_id) >> 8);
 }
 
-static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
+int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
 {
+	if (!dev->is_physfn)
+		return -EINVAL;
 	return (dev->devfn + dev->sriov->offset +
-		dev->sriov->stride * id) & 0xff;
+		dev->sriov->stride * vf_id) & 0xff;
+}
+
+/*
+ * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
+ * change when NumVFs changes.
+ *
+ * Update iov->offset and iov->stride when NumVFs is written.
+ */
+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
+}
+
+/*
+ * The PF consumes one bus number.  NumVFs, First VF Offset, and VF Stride
+ * determine how many additional bus numbers will be consumed by VFs.
+ *
+ * Iterate over all valid NumVFs and calculate the maximum number of bus
+ * numbers that could ever be required.
+ */
+static inline u8 virtfn_max_buses(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+	int nr_virtfn;
+	u8 max = 0;
+	int busnr;
+
+	for (nr_virtfn = 1; nr_virtfn <= iov->total_VFs; nr_virtfn++) {
+		pci_iov_set_numvfs(dev, nr_virtfn);
+		busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
+		if (busnr > max)
+			max = busnr;
+	}
+
+	return max;
 }
 
 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
@@ -57,6 +100,14 @@ static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)
 		pci_remove_bus(virtbus);
 }
 
+resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
+{
+	if (!dev->is_physfn)
+		return 0;
+
+	return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
+}
+
 static int virtfn_add(struct pci_dev *dev, int id, int reset)
 {
 	int i;
@@ -69,7 +120,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset)
 	struct pci_bus *bus;
 
 	mutex_lock(&iov->dev->sriov->lock);
-	bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
+	bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
 	if (!bus)
 		goto failed;
 
@@ -77,7 +128,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset)
 	if (!virtfn)
 		goto failed0;
 
-	virtfn->devfn = virtfn_devfn(dev, id);
+	virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
 	virtfn->vendor = dev->vendor;
 	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
 	pci_setup_device(virtfn);
@@ -87,13 +138,12 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset)
 	virtfn->multifunction = 0;
 
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = dev->resource + PCI_IOV_RESOURCES + i;
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
 		if (!res->parent)
 			continue;
 		virtfn->resource[i].name = pci_name(virtfn);
 		virtfn->resource[i].flags = res->flags;
-		size = resource_size(res);
-		do_div(size, iov->total_VFs);
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
 		virtfn->resource[i].start = res->start + size * id;
 		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
 		rc = request_resource(res, &virtfn->resource[i]);
@@ -140,8 +190,8 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset)
 	struct pci_sriov *iov = dev->sriov;
 
 	virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
-					     virtfn_bus(dev, id),
-					     virtfn_devfn(dev, id));
+					     pci_iov_virtfn_bus(dev, id),
+					     pci_iov_virtfn_devfn(dev, id));
 	if (!virtfn)
 		return;
 
@@ -170,6 +220,11 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset)
 	pci_dev_put(dev);
 }
 
+int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+       return 0;
+}
+
 static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 {
 	int rc;
@@ -180,6 +235,8 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	struct pci_dev *pdev;
 	struct pci_sriov *iov = dev->sriov;
 	int bars = 0;
+	int bus;
+	int retval;
 
 	if (!nr_virtfn)
 		return 0;
@@ -204,7 +261,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	nres = 0;
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 		bars |= (1 << (i + PCI_IOV_RESOURCES));
-		res = dev->resource + PCI_IOV_RESOURCES + i;
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
 		if (res->parent)
 			nres++;
 	}
@@ -216,8 +273,10 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	iov->offset = offset;
 	iov->stride = stride;
 
-	if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->busn_res.end) {
-		dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
+	bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
+	if (bus > dev->bus->busn_res.end) {
+		dev_err(&dev->dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",
+			nr_virtfn, bus, &dev->bus->busn_res);
 		return -ENOMEM;
 	}
 
@@ -243,7 +302,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 			return rc;
 	}
 
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_iov_set_numvfs(dev, nr_virtfn);
 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
 	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
@@ -254,6 +313,12 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	if (nr_virtfn < initial)
 		initial = nr_virtfn;
 
+	if ((retval = pcibios_sriov_enable(dev, initial))) {
+		dev_err(&dev->dev, "failure %d from pcibios_sriov_enable()\n",
+			retval);
+		return retval;
+	}
+
 	for (i = 0; i < initial; i++) {
 		rc = virtfn_add(dev, i, 0);
 		if (rc)
@@ -272,7 +337,7 @@ failed:
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
+	pci_iov_set_numvfs(dev, 0);
 	ssleep(1);
 	pci_cfg_access_unlock(dev);
 
@@ -282,6 +347,11 @@ failed:
 	return rc;
 }
 
+int __weak pcibios_sriov_disable(struct pci_dev *pdev)
+{
+       return 0;
+}
+
 static void sriov_disable(struct pci_dev *dev)
 {
 	int i;
@@ -293,6 +363,8 @@ static void sriov_disable(struct pci_dev *dev)
 	for (i = 0; i < iov->num_VFs; i++)
 		virtfn_remove(dev, i, 0);
 
+	pcibios_sriov_disable(dev);
+
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
@@ -303,12 +375,12 @@ static void sriov_disable(struct pci_dev *dev)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
 
 	iov->num_VFs = 0;
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0);
+	pci_iov_set_numvfs(dev, 0);
 }
 
 static int sriov_init(struct pci_dev *dev, int pos)
 {
-	int i;
+	int i, bar64;
 	int rc;
 	int nres;
 	u32 pgsz;
@@ -357,27 +429,29 @@ found:
 	pgsz &= ~(pgsz - 1);
 	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
 
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov)
+		return -ENOMEM;
+
 	nres = 0;
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = dev->resource + PCI_IOV_RESOURCES + i;
-		i += __pci_read_base(dev, pci_bar_unknown, res,
-				     pos + PCI_SRIOV_BAR + i * 4);
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		bar64 = __pci_read_base(dev, pci_bar_unknown, res,
+					pos + PCI_SRIOV_BAR + i * 4);
 		if (!res->flags)
 			continue;
 		if (resource_size(res) & (PAGE_SIZE - 1)) {
 			rc = -EIO;
 			goto failed;
 		}
+		iov->barsz[i] = resource_size(res);
 		res->end = res->start + resource_size(res) * total - 1;
+		dev_info(&dev->dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
+			 i, res, i, total);
+		i += bar64;
 		nres++;
 	}
 
-	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
-	if (!iov) {
-		rc = -ENOMEM;
-		goto failed;
-	}
-
 	iov->pos = pos;
 	iov->nres = nres;
 	iov->ctrl = ctrl;
@@ -400,15 +474,17 @@ found:
 
 	dev->sriov = iov;
 	dev->is_physfn = 1;
+	iov->max_VF_buses = virtfn_max_buses(dev);
 
 	return 0;
 
 failed:
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = dev->resource + PCI_IOV_RESOURCES + i;
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
 		res->flags = 0;
 	}
 
+	kfree(iov);
 	return rc;
 }
 
@@ -439,7 +515,7 @@ static void sriov_restore_state(struct pci_dev *dev)
 		pci_update_resource(dev, i);
 
 	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
-	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs);
+	pci_iov_set_numvfs(dev, iov->num_VFs);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
 		msleep(100);
@@ -493,6 +569,12 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno)
 		4 * (resno - PCI_IOV_RESOURCES);
 }
 
+resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,
+						      int resno)
+{
+	return pci_iov_resource_size(dev, resno);
+}
+
 /**
  * pci_sriov_resource_alignment - get resource alignment for VF BAR
  * @dev: the PCI device
@@ -505,14 +587,7 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno)
  */
 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
 {
-	struct resource tmp;
-	int reg = pci_iov_resource_bar(dev, resno);
-
-	if (!reg)
-		return 0;
-
-	 __pci_read_base(dev, pci_bar_unknown, &tmp, reg);
-	return resource_alignment(&tmp);
+	return pcibios_iov_resource_alignment(dev, resno);
 }
 
 /**
@@ -535,15 +610,13 @@ void pci_restore_iov_state(struct pci_dev *dev)
 int pci_iov_bus_range(struct pci_bus *bus)
 {
 	int max = 0;
-	u8 busnr;
 	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		if (!dev->is_physfn)
 			continue;
-		busnr = virtfn_bus(dev, dev->sriov->total_VFs - 1);
-		if (busnr > max)
-			max = busnr;
+		if (dev->sriov->max_VF_buses > max)
+			max = dev->sriov->max_VF_buses;
 	}
 
 	return max ? max - bus->number : 0;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d72f849174a4..9bd762c237ab 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -243,10 +243,12 @@ struct pci_sriov {
 	u16 stride;		/* following VF stride */
 	u32 pgsz;		/* page size for BAR alignment */
 	u8 link;		/* Function Dependency Link */
+	u8 max_VF_buses;	/* max buses consumed by VFs */
 	u16 driver_max_VFs;	/* max num VFs driver supports */
 	struct pci_dev *dev;	/* lowest numbered PF */
 	struct pci_dev *self;	/* this PF */
 	struct mutex lock;	/* lock for VF bus */
+	resource_size_t barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
 };
 
 #ifdef CONFIG_PCI_ATS
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 8169597e47cb..4fd0cacf7ca0 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -99,8 +99,8 @@ static void remove_from_list(struct list_head *head,
 	}
 }
 
-static resource_size_t get_res_add_size(struct list_head *head,
-					struct resource *res)
+static struct pci_dev_resource *res_to_dev_res(struct list_head *head,
+					       struct resource *res)
 {
 	struct pci_dev_resource *dev_res;
 
@@ -109,17 +109,37 @@ static resource_size_t get_res_add_size(struct list_head *head,
 			int idx = res - &dev_res->dev->resource[0];
 
 			dev_printk(KERN_DEBUG, &dev_res->dev->dev,
-				 "res[%d]=%pR get_res_add_size add_size %llx\n",
+				 "res[%d]=%pR res_to_dev_res add_size %llx min_align %llx\n",
 				 idx, dev_res->res,
-				 (unsigned long long)dev_res->add_size);
+				 (unsigned long long)dev_res->add_size,
+				 (unsigned long long)dev_res->min_align);
 
-			return dev_res->add_size;
+			return dev_res;
 		}
 	}
 
-	return 0;
+	return NULL;
 }
 
+static resource_size_t get_res_add_size(struct list_head *head,
+					struct resource *res)
+{
+	struct pci_dev_resource *dev_res;
+
+	dev_res = res_to_dev_res(head, res);
+	return dev_res ? dev_res->add_size : 0;
+}
+
+static resource_size_t get_res_add_align(struct list_head *head,
+					 struct resource *res)
+{
+	struct pci_dev_resource *dev_res;
+
+	dev_res = res_to_dev_res(head, res);
+	return dev_res ? dev_res->min_align : 0;
+}
+
+
 /* Sort resources by alignment */
 static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head)
 {
@@ -215,7 +235,7 @@ static void reassign_resources_sorted(struct list_head *realloc_head,
 	struct resource *res;
 	struct pci_dev_resource *add_res, *tmp;
 	struct pci_dev_resource *dev_res;
-	resource_size_t add_size;
+	resource_size_t add_size, align;
 	int idx;
 
 	list_for_each_entry_safe(add_res, tmp, realloc_head, list) {
@@ -238,13 +258,13 @@ static void reassign_resources_sorted(struct list_head *realloc_head,
 
 		idx = res - &add_res->dev->resource[0];
 		add_size = add_res->add_size;
+		align = add_res->min_align;
 		if (!resource_size(res)) {
-			res->start = add_res->start;
+			res->start = align;
 			res->end = res->start + add_size - 1;
 			if (pci_assign_resource(add_res->dev, idx))
 				reset_resource(res);
 		} else {
-			resource_size_t align = add_res->min_align;
 			res->flags |= add_res->flags &
 				 (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN);
 			if (pci_reassign_resource(add_res->dev, idx,
@@ -368,8 +388,9 @@ static void __assign_resources_sorted(struct list_head *head,
 	LIST_HEAD(save_head);
 	LIST_HEAD(local_fail_head);
 	struct pci_dev_resource *save_res;
-	struct pci_dev_resource *dev_res, *tmp_res;
+	struct pci_dev_resource *dev_res, *tmp_res, *dev_res2;
 	unsigned long fail_type;
+	resource_size_t add_align, align;
 
 	/* Check if optional add_size is there */
 	if (!realloc_head || list_empty(realloc_head))
@@ -384,10 +405,44 @@ static void __assign_resources_sorted(struct list_head *head,
 	}
 
 	/* Update res in head list with add_size in realloc_head list */
-	list_for_each_entry(dev_res, head, list)
+	list_for_each_entry_safe(dev_res, tmp_res, head, list) {
 		dev_res->res->end += get_res_add_size(realloc_head,
 							dev_res->res);
 
+		/*
+		 * There are two kinds of additional resources in the list:
+		 * 1. bridge resource  -- IORESOURCE_STARTALIGN
+		 * 2. SR-IOV resource   -- IORESOURCE_SIZEALIGN
+		 * Here just fix the additional alignment for bridge
+		 */
+		if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
+			continue;
+
+		add_align = get_res_add_align(realloc_head, dev_res->res);
+
+		/*
+		 * The "head" list is sorted by the alignment to make sure
+		 * resources with bigger alignment will be assigned first.
+		 * After we change the alignment of a dev_res in "head" list,
+		 * we need to reorder the list by alignment to make it
+		 * consistent.
+		 */
+		if (add_align > dev_res->res->start) {
+			dev_res->res->start = add_align;
+			dev_res->res->end = add_align +
+				            resource_size(dev_res->res);
+
+			list_for_each_entry(dev_res2, head, list) {
+				align = pci_resource_alignment(dev_res2->dev,
+							       dev_res2->res);
+				if (add_align > align)
+					list_move_tail(&dev_res->list,
+						       &dev_res2->list);
+			}
+               }
+
+	}
+
 	/* Try updated head list with add_size added */
 	assign_requested_resources_sorted(head, &local_fail_head);
 
@@ -962,6 +1017,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 	struct resource *b_res = find_free_bus_resource(bus,
 					mask | IORESOURCE_PREFETCH, type);
 	resource_size_t children_add_size = 0;
+	resource_size_t children_add_align = 0;
+	resource_size_t add_align = 0;
 
 	if (!b_res)
 		return -ENOSPC;
@@ -986,6 +1043,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 			/* put SRIOV requested res to the optional list */
 			if (realloc_head && i >= PCI_IOV_RESOURCES &&
 					i <= PCI_IOV_RESOURCE_END) {
+				add_align = max(pci_resource_alignment(dev, r), add_align);
 				r->end = r->start - 1;
 				add_to_list(realloc_head, dev, r, r_size, 0/* don't care */);
 				children_add_size += r_size;
@@ -1016,19 +1074,23 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 			if (order > max_order)
 				max_order = order;
 
-			if (realloc_head)
+			if (realloc_head) {
 				children_add_size += get_res_add_size(realloc_head, r);
+				children_add_align = get_res_add_align(realloc_head, r);
+				add_align = max(add_align, children_add_align);
+			}
 		}
 	}
 
 	min_align = calculate_mem_align(aligns, max_order);
 	min_align = max(min_align, window_alignment(bus, b_res->flags));
 	size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align);
+	add_align = max(min_align, add_align);
 	if (children_add_size > add_size)
 		add_size = children_add_size;
 	size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
 		calculate_memsize(size, min_size, add_size,
-				resource_size(b_res), min_align);
+				resource_size(b_res), add_align);
 	if (!size0 && !size1) {
 		if (b_res->start || b_res->end)
 			dev_info(&bus->self->dev, "disabling bridge window %pR to %pR (unused)\n",
@@ -1040,10 +1102,11 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 	b_res->end = size0 + min_align - 1;
 	b_res->flags |= IORESOURCE_STARTALIGN;
 	if (size1 > size0 && realloc_head) {
-		add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
-		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx\n",
+		add_to_list(realloc_head, bus->self, b_res, size1-size0, add_align);
+		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx add_align %llx\n",
 			   b_res, &bus->busn_res,
-			   (unsigned long long)size1-size0);
+			   (unsigned long long) (size1 - size0),
+			   (unsigned long long) add_align);
 	}
 	return 0;
 }
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index b1ffebec9b9e..7cfd2db02deb 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -777,12 +777,13 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
 {
 	int err = 0;
 	struct xenbus_transaction trans;
+	grant_ref_t gref;
 
-	err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+	err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &gref);
 	if (err < 0)
 		goto out;
 
-	pdev->gnt_ref = err;
+	pdev->gnt_ref = gref;
 
 	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
 	if (err)
diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c
index b139b7792e9f..cb7d3a67380d 100644
--- a/drivers/ps3/ps3-lpm.c
+++ b/drivers/ps3/ps3-lpm.c
@@ -105,7 +105,7 @@ struct ps3_lpm_shadow_regs {
  * @open: An atomic variable indicating the lpm driver has been opened.
  * @rights: The lpm rigths granted by the system policy module.  A logical
  *  OR of enum ps3_lpm_rights.
- * @node_id: The node id of a BE prosessor whose performance monitor this
+ * @node_id: The node id of a BE processor whose performance monitor this
  *  lpar has the right to use.
  * @pu_id: The lv1 id of the logical PU.
  * @lpm_id: The lv1 id of this lpm instance.
@@ -412,7 +412,7 @@ u32 ps3_read_pm(u32 cpu, enum pm_reg_name reg)
 		result = lv1_set_lpm_interval(lpm_priv->lpm_id, 0, 0, &val);
 		if (result) {
 			val = 0;
-			dev_dbg(sbd_core(), "%s:%u: lv1 set_inteval failed: "
+			dev_dbg(sbd_core(), "%s:%u: lv1 set_interval failed: "
 				"reg %u, %s\n", __func__, __LINE__, reg,
 				ps3_result(result));
 		}
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index 8981701802ca..a777e5c412df 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -474,11 +474,11 @@ static void NCR5380_print_phase(struct Scsi_Host *instance)
  */
 #ifndef USLEEP_SLEEP
 /* 20 ms (reasonable hard disk speed) */
-#define USLEEP_SLEEP (20*HZ/1000)
+#define USLEEP_SLEEP msecs_to_jiffies(20)
 #endif
 /* 300 RPM (floppy speed) */
 #ifndef USLEEP_POLL
-#define USLEEP_POLL (200*HZ/1000)
+#define USLEEP_POLL msecs_to_jiffies(200)
 #endif
 #ifndef USLEEP_WAITLONG
 /* RvC: (reasonable time to wait on select error) */
@@ -576,7 +576,7 @@ static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
 		if ((mask & possible) && (request_irq(i, &probe_intr, 0, "NCR-probe", NULL) == 0))
 			trying_irqs |= mask;
 
-	timeout = jiffies + (250 * HZ / 1000);
+	timeout = jiffies + msecs_to_jiffies(250);
 	probe_irq = NO_IRQ;
 
 	/*
@@ -634,7 +634,7 @@ static void prepare_info(struct Scsi_Host *instance)
 	         "sg_tablesize %d, this_id %d, "
 	         "flags { %s%s%s}, "
 #if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
-	         "USLEEP_POLL %d, USLEEP_WAITLONG %d, "
+		 "USLEEP_POLL %lu, USLEEP_WAITLONG %lu, "
 #endif
 	         "options { %s} ",
 	         instance->hostt->name, instance->io_port, instance->n_io_port,
@@ -1346,7 +1346,7 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 	 * selection.
 	 */
 
-	timeout = jiffies + (250 * HZ / 1000);
+	timeout = jiffies + msecs_to_jiffies(250);
 
 	/* 
 	 * XXX very interesting - we're seeing a bounce where the BSY we 
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index b32e77db0c48..9b3dd6ef6a0b 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -111,6 +111,41 @@
 #define BYTE2(x) (unsigned char)((x) >> 16)
 #define BYTE3(x) (unsigned char)((x) >> 24)
 
+/* MODE_SENSE data format */
+typedef struct {
+	struct {
+		u8	data_length;
+		u8	med_type;
+		u8	dev_par;
+		u8	bd_length;
+	} __attribute__((packed)) hd;
+	struct {
+		u8	dens_code;
+		u8	block_count[3];
+		u8	reserved;
+		u8	block_length[3];
+	} __attribute__((packed)) bd;
+		u8	mpc_buf[3];
+} __attribute__((packed)) aac_modep_data;
+
+/* MODE_SENSE_10 data format */
+typedef struct {
+	struct {
+		u8	data_length[2];
+		u8	med_type;
+		u8	dev_par;
+		u8	rsrvd[2];
+		u8	bd_length[2];
+	} __attribute__((packed)) hd;
+	struct {
+		u8	dens_code;
+		u8	block_count[3];
+		u8	reserved;
+		u8	block_length[3];
+	} __attribute__((packed)) bd;
+		u8	mpc_buf[3];
+} __attribute__((packed)) aac_modep10_data;
+
 /*------------------------------------------------------------------------------
  *              S T R U C T S / T Y P E D E F S
  *----------------------------------------------------------------------------*/
@@ -128,6 +163,48 @@ struct inquiry_data {
 	u8 inqd_prl[4];	/* Product Revision Level */
 };
 
+/* Added for VPD 0x83 */
+typedef struct {
+	u8 CodeSet:4;	/* VPD_CODE_SET */
+	u8 Reserved:4;
+	u8 IdentifierType:4;	/* VPD_IDENTIFIER_TYPE */
+	u8 Reserved2:4;
+	u8 Reserved3;
+	u8 IdentifierLength;
+	u8 VendId[8];
+	u8 ProductId[16];
+	u8 SerialNumber[8];	/* SN in ASCII */
+
+} TVPD_ID_Descriptor_Type_1;
+
+typedef struct {
+	u8 CodeSet:4;	/* VPD_CODE_SET */
+	u8 Reserved:4;
+	u8 IdentifierType:4;	/* VPD_IDENTIFIER_TYPE */
+	u8 Reserved2:4;
+	u8 Reserved3;
+	u8 IdentifierLength;
+	struct TEU64Id {
+		u32 Serial;
+		 /* The serial number supposed to be 40 bits,
+		  * bit we only support 32, so make the last byte zero. */
+		u8 Reserved;
+		u8 VendId[3];
+	} EU64Id;
+
+} TVPD_ID_Descriptor_Type_2;
+
+typedef struct {
+	u8 DeviceType:5;
+	u8 DeviceTypeQualifier:3;
+	u8 PageCode;
+	u8 Reserved;
+	u8 PageLength;
+	TVPD_ID_Descriptor_Type_1 IdDescriptorType1;
+	TVPD_ID_Descriptor_Type_2 IdDescriptorType2;
+
+} TVPD_Page83;
+
 /*
  *              M O D U L E   G L O B A L S
  */
@@ -385,6 +462,11 @@ int aac_get_containers(struct aac_dev *dev)
 	if (status >= 0) {
 		dresp = (struct aac_get_container_count_resp *)fib_data(fibptr);
 		maximum_num_containers = le32_to_cpu(dresp->ContainerSwitchEntries);
+		if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+		    AAC_OPTION_SUPPORTED_240_VOLUMES) {
+			maximum_num_containers =
+				le32_to_cpu(dresp->MaxSimpleVolumes);
+		}
 		aac_fib_complete(fibptr);
 	}
 	/* FIB should be freed only after getting the response from the F/W */
@@ -438,7 +520,7 @@ static void get_container_name_callback(void *context, struct fib * fibptr)
 	if ((le32_to_cpu(get_name_reply->status) == CT_OK)
 	 && (get_name_reply->data[0] != '\0')) {
 		char *sp = get_name_reply->data;
-		sp[sizeof(((struct aac_get_name_resp *)NULL)->data)-1] = '\0';
+		sp[sizeof(((struct aac_get_name_resp *)NULL)->data)] = '\0';
 		while (*sp == ' ')
 			++sp;
 		if (*sp) {
@@ -539,6 +621,14 @@ static void _aac_probe_container2(void * context, struct fib * fibptr)
 		if ((le32_to_cpu(dresp->status) == ST_OK) &&
 		    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE) &&
 		    (le32_to_cpu(dresp->mnt[0].state) != FSCS_HIDDEN)) {
+			if (!(fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+			    AAC_OPTION_VARIABLE_BLOCK_SIZE)) {
+				dresp->mnt[0].fileinfo.bdevinfo.block_size = 0x200;
+				fsa_dev_ptr->block_size = 0x200;
+			} else {
+				fsa_dev_ptr->block_size =
+					le32_to_cpu(dresp->mnt[0].fileinfo.bdevinfo.block_size);
+			}
 			fsa_dev_ptr->valid = 1;
 			/* sense_key holds the current state of the spin-up */
 			if (dresp->mnt[0].state & cpu_to_le32(FSCS_NOT_READY))
@@ -571,7 +661,9 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
 	int status;
 
 	dresp = (struct aac_mount *) fib_data(fibptr);
-	dresp->mnt[0].capacityhigh = 0;
+	if (!(fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+	    AAC_OPTION_VARIABLE_BLOCK_SIZE))
+		dresp->mnt[0].capacityhigh = 0;
 	if ((le32_to_cpu(dresp->status) != ST_OK) ||
 	    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
 		_aac_probe_container2(context, fibptr);
@@ -586,7 +678,12 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
 
 	dinfo = (struct aac_query_mount *)fib_data(fibptr);
 
-	dinfo->command = cpu_to_le32(VM_NameServe64);
+	if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+	    AAC_OPTION_VARIABLE_BLOCK_SIZE)
+		dinfo->command = cpu_to_le32(VM_NameServeAllBlk);
+	else
+		dinfo->command = cpu_to_le32(VM_NameServe64);
+
 	dinfo->count = cpu_to_le32(scmd_id(scsicmd));
 	dinfo->type = cpu_to_le32(FT_FILESYS);
 
@@ -621,7 +718,12 @@ static int _aac_probe_container(struct scsi_cmnd * scsicmd, int (*callback)(stru
 
 		dinfo = (struct aac_query_mount *)fib_data(fibptr);
 
-		dinfo->command = cpu_to_le32(VM_NameServe);
+		if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+		    AAC_OPTION_VARIABLE_BLOCK_SIZE)
+			dinfo->command = cpu_to_le32(VM_NameServeAllBlk);
+		else
+			dinfo->command = cpu_to_le32(VM_NameServe);
+
 		dinfo->count = cpu_to_le32(scmd_id(scsicmd));
 		dinfo->type = cpu_to_le32(FT_FILESYS);
 		scsicmd->SCp.ptr = (char *)callback;
@@ -835,14 +937,88 @@ static void get_container_serial_callback(void *context, struct fib * fibptr)
 	get_serial_reply = (struct aac_get_serial_resp *) fib_data(fibptr);
 	/* Failure is irrelevant, using default value instead */
 	if (le32_to_cpu(get_serial_reply->status) == CT_OK) {
-		char sp[13];
-		/* EVPD bit set */
-		sp[0] = INQD_PDT_DA;
-		sp[1] = scsicmd->cmnd[2];
-		sp[2] = 0;
-		sp[3] = snprintf(sp+4, sizeof(sp)-4, "%08X",
-		  le32_to_cpu(get_serial_reply->uid));
-		scsi_sg_copy_from_buffer(scsicmd, sp, sizeof(sp));
+		/*Check to see if it's for VPD 0x83 or 0x80 */
+		if (scsicmd->cmnd[2] == 0x83) {
+			/* vpd page 0x83 - Device Identification Page */
+			int i;
+			TVPD_Page83 VPDPage83Data;
+
+			memset(((u8 *)&VPDPage83Data), 0,
+			       sizeof(VPDPage83Data));
+
+			/* DIRECT_ACCESS_DEVIC */
+			VPDPage83Data.DeviceType = 0;
+			/* DEVICE_CONNECTED */
+			VPDPage83Data.DeviceTypeQualifier = 0;
+			/* VPD_DEVICE_IDENTIFIERS */
+			VPDPage83Data.PageCode = 0x83;
+			VPDPage83Data.Reserved = 0;
+			VPDPage83Data.PageLength =
+				sizeof(VPDPage83Data.IdDescriptorType1) +
+				sizeof(VPDPage83Data.IdDescriptorType2);
+
+			/* T10 Vendor Identifier Field Format */
+			/* VpdCodeSetAscii */
+			VPDPage83Data.IdDescriptorType1.CodeSet = 2;
+			/* VpdIdentifierTypeVendorId */
+			VPDPage83Data.IdDescriptorType1.IdentifierType = 1;
+			VPDPage83Data.IdDescriptorType1.IdentifierLength =
+				sizeof(VPDPage83Data.IdDescriptorType1) - 4;
+
+			/* "ADAPTEC " for adaptec */
+			memcpy(VPDPage83Data.IdDescriptorType1.VendId,
+				"ADAPTEC ",
+				sizeof(VPDPage83Data.IdDescriptorType1.VendId));
+			memcpy(VPDPage83Data.IdDescriptorType1.ProductId,
+				"ARRAY           ",
+				sizeof(
+				VPDPage83Data.IdDescriptorType1.ProductId));
+
+			/* Convert to ascii based serial number.
+			 * The LSB is the the end.
+			 */
+			for (i = 0; i < 8; i++) {
+				u8 temp =
+					(u8)((get_serial_reply->uid >> ((7 - i) * 4)) & 0xF);
+				if (temp  > 0x9) {
+					VPDPage83Data.IdDescriptorType1.SerialNumber[i] =
+							'A' + (temp - 0xA);
+				} else {
+					VPDPage83Data.IdDescriptorType1.SerialNumber[i] =
+							'0' + temp;
+				}
+			}
+
+			/* VpdCodeSetBinary */
+			VPDPage83Data.IdDescriptorType2.CodeSet = 1;
+			/* VpdIdentifierTypeEUI64 */
+			VPDPage83Data.IdDescriptorType2.IdentifierType = 2;
+			VPDPage83Data.IdDescriptorType2.IdentifierLength =
+				sizeof(VPDPage83Data.IdDescriptorType2) - 4;
+
+			VPDPage83Data.IdDescriptorType2.EU64Id.VendId[0] = 0xD0;
+			VPDPage83Data.IdDescriptorType2.EU64Id.VendId[1] = 0;
+			VPDPage83Data.IdDescriptorType2.EU64Id.VendId[2] = 0;
+
+			VPDPage83Data.IdDescriptorType2.EU64Id.Serial =
+							get_serial_reply->uid;
+			VPDPage83Data.IdDescriptorType2.EU64Id.Reserved = 0;
+
+			/* Move the inquiry data to the response buffer. */
+			scsi_sg_copy_from_buffer(scsicmd, &VPDPage83Data,
+						 sizeof(VPDPage83Data));
+		} else {
+			/* It must be for VPD 0x80 */
+			char sp[13];
+			/* EVPD bit set */
+			sp[0] = INQD_PDT_DA;
+			sp[1] = scsicmd->cmnd[2];
+			sp[2] = 0;
+			sp[3] = snprintf(sp+4, sizeof(sp)-4, "%08X",
+				le32_to_cpu(get_serial_reply->uid));
+			scsi_sg_copy_from_buffer(scsicmd, sp,
+						 sizeof(sp));
+		}
 	}
 
 	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
@@ -982,7 +1158,8 @@ static int aac_read_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u3
 		memset(readcmd2, 0, sizeof(struct aac_raw_io2));
 		readcmd2->blockLow = cpu_to_le32((u32)(lba&0xffffffff));
 		readcmd2->blockHigh = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
-		readcmd2->byteCount = cpu_to_le32(count<<9);
+		readcmd2->byteCount = cpu_to_le32(count *
+			dev->fsa_dev[scmd_id(cmd)].block_size);
 		readcmd2->cid = cpu_to_le16(scmd_id(cmd));
 		readcmd2->flags = cpu_to_le16(RIO2_IO_TYPE_READ);
 		ret = aac_build_sgraw2(cmd, readcmd2,
@@ -997,7 +1174,8 @@ static int aac_read_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u3
 		readcmd = (struct aac_raw_io *) fib_data(fib);
 		readcmd->block[0] = cpu_to_le32((u32)(lba&0xffffffff));
 		readcmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
-		readcmd->count = cpu_to_le32(count<<9);
+		readcmd->count = cpu_to_le32(count *
+			dev->fsa_dev[scmd_id(cmd)].block_size);
 		readcmd->cid = cpu_to_le16(scmd_id(cmd));
 		readcmd->flags = cpu_to_le16(RIO_TYPE_READ);
 		readcmd->bpTotal = 0;
@@ -1062,6 +1240,7 @@ static int aac_read_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32
 {
 	u16 fibsize;
 	struct aac_read *readcmd;
+	struct aac_dev *dev = fib->dev;
 	long ret;
 
 	aac_fib_init(fib);
@@ -1069,7 +1248,8 @@ static int aac_read_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32
 	readcmd->command = cpu_to_le32(VM_CtBlockRead);
 	readcmd->cid = cpu_to_le32(scmd_id(cmd));
 	readcmd->block = cpu_to_le32((u32)(lba&0xffffffff));
-	readcmd->count = cpu_to_le32(count * 512);
+	readcmd->count = cpu_to_le32(count *
+		dev->fsa_dev[scmd_id(cmd)].block_size);
 
 	ret = aac_build_sg(cmd, &readcmd->sg);
 	if (ret < 0)
@@ -1104,7 +1284,8 @@ static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u
 		memset(writecmd2, 0, sizeof(struct aac_raw_io2));
 		writecmd2->blockLow = cpu_to_le32((u32)(lba&0xffffffff));
 		writecmd2->blockHigh = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
-		writecmd2->byteCount = cpu_to_le32(count<<9);
+		writecmd2->byteCount = cpu_to_le32(count *
+			dev->fsa_dev[scmd_id(cmd)].block_size);
 		writecmd2->cid = cpu_to_le16(scmd_id(cmd));
 		writecmd2->flags = (fua && ((aac_cache & 5) != 1) &&
 						   (((aac_cache & 5) != 5) || !fib->dev->cache_protected)) ?
@@ -1122,7 +1303,8 @@ static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u
 		writecmd = (struct aac_raw_io *) fib_data(fib);
 		writecmd->block[0] = cpu_to_le32((u32)(lba&0xffffffff));
 		writecmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
-		writecmd->count = cpu_to_le32(count<<9);
+		writecmd->count = cpu_to_le32(count *
+			dev->fsa_dev[scmd_id(cmd)].block_size);
 		writecmd->cid = cpu_to_le16(scmd_id(cmd));
 		writecmd->flags = (fua && ((aac_cache & 5) != 1) &&
 						   (((aac_cache & 5) != 5) || !fib->dev->cache_protected)) ?
@@ -1190,6 +1372,7 @@ static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u3
 {
 	u16 fibsize;
 	struct aac_write *writecmd;
+	struct aac_dev *dev = fib->dev;
 	long ret;
 
 	aac_fib_init(fib);
@@ -1197,7 +1380,8 @@ static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u3
 	writecmd->command = cpu_to_le32(VM_CtBlockWrite);
 	writecmd->cid = cpu_to_le32(scmd_id(cmd));
 	writecmd->block = cpu_to_le32((u32)(lba&0xffffffff));
-	writecmd->count = cpu_to_le32(count * 512);
+	writecmd->count = cpu_to_le32(count *
+		dev->fsa_dev[scmd_id(cmd)].block_size);
 	writecmd->sg.count = cpu_to_le32(1);
 	/* ->stable is not used - it did mean which type of write */
 
@@ -2246,9 +2430,10 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 			  INQD_PDT_PROC : INQD_PDT_DA;
 			if (scsicmd->cmnd[2] == 0) {
 				/* supported vital product data pages */
-				arr[3] = 2;
+				arr[3] = 3;
 				arr[4] = 0x0;
 				arr[5] = 0x80;
+				arr[6] = 0x83;
 				arr[1] = scsicmd->cmnd[2];
 				scsi_sg_copy_from_buffer(scsicmd, &inq_data,
 							 sizeof(inq_data));
@@ -2264,7 +2449,16 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 				if (aac_wwn != 2)
 					return aac_get_container_serial(
 						scsicmd);
-				/* SLES 10 SP1 special */
+				scsicmd->result = DID_OK << 16 |
+				  COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+			} else if (scsicmd->cmnd[2] == 0x83) {
+				/* vpd page 0x83 - Device Identification Page */
+				char *sno = (char *)&inq_data;
+				sno[3] = setinqserial(dev, &sno[4],
+						      scmd_id(scsicmd));
+				if (aac_wwn != 2)
+					return aac_get_container_serial(
+						scsicmd);
 				scsicmd->result = DID_OK << 16 |
 				  COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
 			} else {
@@ -2329,10 +2523,10 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 		cp[5] = (capacity >> 16) & 0xff;
 		cp[6] = (capacity >> 8) & 0xff;
 		cp[7] = (capacity >> 0) & 0xff;
-		cp[8] = 0;
-		cp[9] = 0;
-		cp[10] = 2;
-		cp[11] = 0;
+		cp[8] = (fsa_dev_ptr[cid].block_size >> 24) & 0xff;
+		cp[9] = (fsa_dev_ptr[cid].block_size >> 16) & 0xff;
+		cp[10] = (fsa_dev_ptr[cid].block_size >> 8) & 0xff;
+		cp[11] = (fsa_dev_ptr[cid].block_size) & 0xff;
 		cp[12] = 0;
 
 		alloc_len = ((scsicmd->cmnd[10] << 24)
@@ -2369,10 +2563,10 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 		cp[1] = (capacity >> 16) & 0xff;
 		cp[2] = (capacity >> 8) & 0xff;
 		cp[3] = (capacity >> 0) & 0xff;
-		cp[4] = 0;
-		cp[5] = 0;
-		cp[6] = 2;
-		cp[7] = 0;
+		cp[4] = (fsa_dev_ptr[cid].block_size >> 24) & 0xff;
+		cp[5] = (fsa_dev_ptr[cid].block_size >> 16) & 0xff;
+		cp[6] = (fsa_dev_ptr[cid].block_size >> 8) & 0xff;
+		cp[7] = (fsa_dev_ptr[cid].block_size) & 0xff;
 		scsi_sg_copy_from_buffer(scsicmd, cp, sizeof(cp));
 		/* Do not cache partition table for arrays */
 		scsicmd->device->removable = 1;
@@ -2385,30 +2579,79 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 
 	case MODE_SENSE:
 	{
-		char mode_buf[7];
 		int mode_buf_length = 4;
+		u32 capacity;
+		aac_modep_data mpd;
+
+		if (fsa_dev_ptr[cid].size <= 0x100000000ULL)
+			capacity = fsa_dev_ptr[cid].size - 1;
+		else
+			capacity = (u32)-1;
 
 		dprintk((KERN_DEBUG "MODE SENSE command.\n"));
-		mode_buf[0] = 3;	/* Mode data length */
-		mode_buf[1] = 0;	/* Medium type - default */
-		mode_buf[2] = 0;	/* Device-specific param,
-					   bit 8: 0/1 = write enabled/protected
-					   bit 4: 0/1 = FUA enabled */
+		memset((char *)&mpd, 0, sizeof(aac_modep_data));
+
+		/* Mode data length */
+		mpd.hd.data_length = sizeof(mpd.hd) - 1;
+		/* Medium type - default */
+		mpd.hd.med_type = 0;
+		/* Device-specific param,
+		   bit 8: 0/1 = write enabled/protected
+		   bit 4: 0/1 = FUA enabled */
+		mpd.hd.dev_par = 0;
+
 		if (dev->raw_io_interface && ((aac_cache & 5) != 1))
-			mode_buf[2] = 0x10;
-		mode_buf[3] = 0;	/* Block descriptor length */
+			mpd.hd.dev_par = 0x10;
+		if (scsicmd->cmnd[1] & 0x8)
+			mpd.hd.bd_length = 0;	/* Block descriptor length */
+		else {
+			mpd.hd.bd_length = sizeof(mpd.bd);
+			mpd.hd.data_length += mpd.hd.bd_length;
+			mpd.bd.block_length[0] =
+				(fsa_dev_ptr[cid].block_size >> 16) & 0xff;
+			mpd.bd.block_length[1] =
+				(fsa_dev_ptr[cid].block_size >> 8) &  0xff;
+			mpd.bd.block_length[2] =
+				fsa_dev_ptr[cid].block_size  & 0xff;
+
+			mpd.mpc_buf[0] = scsicmd->cmnd[2];
+			if (scsicmd->cmnd[2] == 0x1C) {
+				/* page length */
+				mpd.mpc_buf[1] = 0xa;
+				/* Mode data length */
+				mpd.hd.data_length = 23;
+			} else {
+				/* Mode data length */
+				mpd.hd.data_length = 15;
+			}
+
+			if (capacity > 0xffffff) {
+				mpd.bd.block_count[0] = 0xff;
+				mpd.bd.block_count[1] = 0xff;
+				mpd.bd.block_count[2] = 0xff;
+			} else {
+				mpd.bd.block_count[0] = (capacity >> 16) & 0xff;
+				mpd.bd.block_count[1] = (capacity >> 8) & 0xff;
+				mpd.bd.block_count[2] = capacity  & 0xff;
+			}
+		}
 		if (((scsicmd->cmnd[2] & 0x3f) == 8) ||
 		  ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) {
-			mode_buf[0] = 6;
-			mode_buf[4] = 8;
-			mode_buf[5] = 1;
-			mode_buf[6] = ((aac_cache & 6) == 2)
+			mpd.hd.data_length += 3;
+			mpd.mpc_buf[0] = 8;
+			mpd.mpc_buf[1] = 1;
+			mpd.mpc_buf[2] = ((aac_cache & 6) == 2)
 				? 0 : 0x04; /* WCE */
-			mode_buf_length = 7;
-			if (mode_buf_length > scsicmd->cmnd[4])
-				mode_buf_length = scsicmd->cmnd[4];
+			mode_buf_length = sizeof(mpd);
 		}
-		scsi_sg_copy_from_buffer(scsicmd, mode_buf, mode_buf_length);
+
+		if (mode_buf_length > scsicmd->cmnd[4])
+			mode_buf_length = scsicmd->cmnd[4];
+		else
+			mode_buf_length = sizeof(mpd);
+		scsi_sg_copy_from_buffer(scsicmd,
+					 (char *)&mpd,
+					 mode_buf_length);
 		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
 		scsicmd->scsi_done(scsicmd);
 
@@ -2416,34 +2659,77 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 	}
 	case MODE_SENSE_10:
 	{
-		char mode_buf[11];
+		u32 capacity;
 		int mode_buf_length = 8;
+		aac_modep10_data mpd10;
+
+		if (fsa_dev_ptr[cid].size <= 0x100000000ULL)
+			capacity = fsa_dev_ptr[cid].size - 1;
+		else
+			capacity = (u32)-1;
 
 		dprintk((KERN_DEBUG "MODE SENSE 10 byte command.\n"));
-		mode_buf[0] = 0;	/* Mode data length (MSB) */
-		mode_buf[1] = 6;	/* Mode data length (LSB) */
-		mode_buf[2] = 0;	/* Medium type - default */
-		mode_buf[3] = 0;	/* Device-specific param,
-					   bit 8: 0/1 = write enabled/protected
-					   bit 4: 0/1 = FUA enabled */
+		memset((char *)&mpd10, 0, sizeof(aac_modep10_data));
+		/* Mode data length (MSB) */
+		mpd10.hd.data_length[0] = 0;
+		/* Mode data length (LSB) */
+		mpd10.hd.data_length[1] = sizeof(mpd10.hd) - 1;
+		/* Medium type - default */
+		mpd10.hd.med_type = 0;
+		/* Device-specific param,
+		   bit 8: 0/1 = write enabled/protected
+		   bit 4: 0/1 = FUA enabled */
+		mpd10.hd.dev_par = 0;
+
 		if (dev->raw_io_interface && ((aac_cache & 5) != 1))
-			mode_buf[3] = 0x10;
-		mode_buf[4] = 0;	/* reserved */
-		mode_buf[5] = 0;	/* reserved */
-		mode_buf[6] = 0;	/* Block descriptor length (MSB) */
-		mode_buf[7] = 0;	/* Block descriptor length (LSB) */
+			mpd10.hd.dev_par = 0x10;
+		mpd10.hd.rsrvd[0] = 0;	/* reserved */
+		mpd10.hd.rsrvd[1] = 0;	/* reserved */
+		if (scsicmd->cmnd[1] & 0x8) {
+			/* Block descriptor length (MSB) */
+			mpd10.hd.bd_length[0] = 0;
+			/* Block descriptor length (LSB) */
+			mpd10.hd.bd_length[1] = 0;
+		} else {
+			mpd10.hd.bd_length[0] = 0;
+			mpd10.hd.bd_length[1] = sizeof(mpd10.bd);
+
+			mpd10.hd.data_length[1] += mpd10.hd.bd_length[1];
+
+			mpd10.bd.block_length[0] =
+				(fsa_dev_ptr[cid].block_size >> 16) & 0xff;
+			mpd10.bd.block_length[1] =
+				(fsa_dev_ptr[cid].block_size >> 8) & 0xff;
+			mpd10.bd.block_length[2] =
+				fsa_dev_ptr[cid].block_size  & 0xff;
+
+			if (capacity > 0xffffff) {
+				mpd10.bd.block_count[0] = 0xff;
+				mpd10.bd.block_count[1] = 0xff;
+				mpd10.bd.block_count[2] = 0xff;
+			} else {
+				mpd10.bd.block_count[0] =
+					(capacity >> 16) & 0xff;
+				mpd10.bd.block_count[1] =
+					(capacity >> 8) & 0xff;
+				mpd10.bd.block_count[2] =
+					capacity  & 0xff;
+			}
+		}
 		if (((scsicmd->cmnd[2] & 0x3f) == 8) ||
 		  ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) {
-			mode_buf[1] = 9;
-			mode_buf[8] = 8;
-			mode_buf[9] = 1;
-			mode_buf[10] = ((aac_cache & 6) == 2)
+			mpd10.hd.data_length[1] += 3;
+			mpd10.mpc_buf[0] = 8;
+			mpd10.mpc_buf[1] = 1;
+			mpd10.mpc_buf[2] = ((aac_cache & 6) == 2)
 				? 0 : 0x04; /* WCE */
-			mode_buf_length = 11;
+			mode_buf_length = sizeof(mpd10);
 			if (mode_buf_length > scsicmd->cmnd[8])
 				mode_buf_length = scsicmd->cmnd[8];
 		}
-		scsi_sg_copy_from_buffer(scsicmd, mode_buf, mode_buf_length);
+		scsi_sg_copy_from_buffer(scsicmd,
+					 (char *)&mpd10,
+					 mode_buf_length);
 
 		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
 		scsicmd->scsi_done(scsicmd);
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index eaaf8705a5f4..40fe65c91b41 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -6,13 +6,63 @@
 #define nblank(x) _nblank(x)[0]
 
 #include <linux/interrupt.h>
+#include <linux/pci.h>
 
 /*------------------------------------------------------------------------------
  *              D E F I N E S
  *----------------------------------------------------------------------------*/
 
+#define AAC_MAX_MSIX		8	/* vectors */
+#define AAC_PCI_MSI_ENABLE	0x8000
+
+enum {
+	AAC_ENABLE_INTERRUPT	= 0x0,
+	AAC_DISABLE_INTERRUPT,
+	AAC_ENABLE_MSIX,
+	AAC_DISABLE_MSIX,
+	AAC_CLEAR_AIF_BIT,
+	AAC_CLEAR_SYNC_BIT,
+	AAC_ENABLE_INTX
+};
+
+#define AAC_INT_MODE_INTX		(1<<0)
+#define AAC_INT_MODE_MSI		(1<<1)
+#define AAC_INT_MODE_AIF		(1<<2)
+#define AAC_INT_MODE_SYNC		(1<<3)
+
+#define AAC_INT_ENABLE_TYPE1_INTX	0xfffffffb
+#define AAC_INT_ENABLE_TYPE1_MSIX	0xfffffffa
+#define AAC_INT_DISABLE_ALL		0xffffffff
+
+/* Bit definitions in IOA->Host Interrupt Register */
+#define PMC_TRANSITION_TO_OPERATIONAL	(1<<31)
+#define PMC_IOARCB_TRANSFER_FAILED	(1<<28)
+#define PMC_IOA_UNIT_CHECK		(1<<27)
+#define PMC_NO_HOST_RRQ_FOR_CMD_RESPONSE (1<<26)
+#define PMC_CRITICAL_IOA_OP_IN_PROGRESS	(1<<25)
+#define PMC_IOARRIN_LOST		(1<<4)
+#define PMC_SYSTEM_BUS_MMIO_ERROR	(1<<3)
+#define PMC_IOA_PROCESSOR_IN_ERROR_STATE (1<<2)
+#define PMC_HOST_RRQ_VALID		(1<<1)
+#define PMC_OPERATIONAL_STATUS		(1<<31)
+#define PMC_ALLOW_MSIX_VECTOR0		(1<<0)
+
+#define PMC_IOA_ERROR_INTERRUPTS	(PMC_IOARCB_TRANSFER_FAILED | \
+					 PMC_IOA_UNIT_CHECK | \
+					 PMC_NO_HOST_RRQ_FOR_CMD_RESPONSE | \
+					 PMC_IOARRIN_LOST | \
+					 PMC_SYSTEM_BUS_MMIO_ERROR | \
+					 PMC_IOA_PROCESSOR_IN_ERROR_STATE)
+
+#define PMC_ALL_INTERRUPT_BITS		(PMC_IOA_ERROR_INTERRUPTS | \
+					 PMC_HOST_RRQ_VALID | \
+					 PMC_TRANSITION_TO_OPERATIONAL | \
+					 PMC_ALLOW_MSIX_VECTOR0)
+#define	PMC_GLOBAL_INT_BIT2		0x00000004
+#define	PMC_GLOBAL_INT_BIT0		0x00000001
+
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 30300
+# define AAC_DRIVER_BUILD 40709
 # define AAC_DRIVER_BRANCH "-ms"
 #endif
 #define MAXIMUM_NUM_CONTAINERS	32
@@ -36,6 +86,7 @@
 #define CONTAINER_TO_ID(cont)		(cont)
 #define CONTAINER_TO_LUN(cont)		(0)
 
+#define PMC_DEVICE_S6	0x28b
 #define PMC_DEVICE_S7	0x28c
 #define PMC_DEVICE_S8	0x28d
 #define PMC_DEVICE_S9	0x28f
@@ -434,7 +485,7 @@ enum fib_xfer_state {
 struct aac_init
 {
 	__le32	InitStructRevision;
-	__le32	MiniPortRevision;
+	__le32	Sa_MSIXVectors;
 	__le32	fsrev;
 	__le32	CommHeaderAddress;
 	__le32	FastIoCommAreaAddress;
@@ -582,7 +633,8 @@ struct aac_queue {
 	spinlock_t		lockdata;	/* Actual lock (used only on one side of the lock) */
 	struct list_head	cmdq;		/* A queue of FIBs which need to be prcessed by the FS thread. This is */
 						/* only valid for command queues which receive entries from the adapter. */
-	u32			numpending;	/* Number of entries on outstanding queue. */
+	/* Number of entries on outstanding queue. */
+	atomic_t		numpending;
 	struct aac_dev *	dev;		/* Back pointer to adapter structure */
 };
 
@@ -755,7 +807,8 @@ struct rkt_registers {
 
 struct src_mu_registers {
 				/*	PCI*| Name */
-	__le32	reserved0[8];	/*	00h | Reserved */
+	__le32	reserved0[6];	/*	00h | Reserved */
+	__le32	IOAR[2];	/*	18h | IOA->host interrupt register */
 	__le32	IDR;		/*	20h | Inbound Doorbell Register */
 	__le32	IISR;		/*	24h | Inbound Int. Status Register */
 	__le32	reserved1[3];	/*	28h | Reserved */
@@ -767,17 +820,18 @@ struct src_mu_registers {
 	__le32	OMR;		/*	bch | Outbound Message Register */
 	__le32	IQ_L;		/*  c0h | Inbound Queue (Low address) */
 	__le32	IQ_H;		/*  c4h | Inbound Queue (High address) */
+	__le32	ODR_MSI;	/*  c8h | MSI register for sync./AIF */
 };
 
 struct src_registers {
-	struct src_mu_registers MUnit;	/* 00h - c7h */
+	struct src_mu_registers MUnit;	/* 00h - cbh */
 	union {
 		struct {
-			__le32 reserved1[130790];	/* c8h - 7fc5fh */
+			__le32 reserved1[130789];	/* cch - 7fc5fh */
 			struct src_inbound IndexRegs;	/* 7fc60h */
 		} tupelo;
 		struct {
-			__le32 reserved1[974];		/* c8h - fffh */
+			__le32 reserved1[973];		/* cch - fffh */
 			struct src_inbound IndexRegs;	/* 1000h */
 		} denali;
 	} u;
@@ -857,6 +911,7 @@ struct fsa_dev_info {
 	u8		deleted;
 	char		devname[8];
 	struct sense_data sense_data;
+	u32		block_size;
 };
 
 struct fib {
@@ -960,6 +1015,10 @@ struct aac_supplement_adapter_info
 #define AAC_OPTION_IGNORE_RESET		cpu_to_le32(0x00000002)
 #define AAC_OPTION_POWER_MANAGEMENT	cpu_to_le32(0x00000004)
 #define AAC_OPTION_DOORBELL_RESET	cpu_to_le32(0x00004000)
+/* 4KB sector size */
+#define AAC_OPTION_VARIABLE_BLOCK_SIZE	cpu_to_le32(0x00040000)
+/* 240 simple volume support */
+#define AAC_OPTION_SUPPORTED_240_VOLUMES cpu_to_le32(0x10000000)
 #define AAC_SIS_VERSION_V3	3
 #define AAC_SIS_SLOT_UNKNOWN	0xFF
 
@@ -1026,6 +1085,11 @@ struct aac_bus_info_response {
 #define AAC_OPT_NEW_COMM_TYPE3		cpu_to_le32(1<<30)
 #define AAC_OPT_NEW_COMM_TYPE4		cpu_to_le32(1<<31)
 
+/* MSIX context */
+struct aac_msix_ctx {
+	int		vector_no;
+	struct aac_dev	*dev;
+};
 
 struct aac_dev
 {
@@ -1081,8 +1145,10 @@ struct aac_dev
 						 * if AAC_COMM_MESSAGE_TYPE1 */
 
 	dma_addr_t		host_rrq_pa;	/* phys. address */
-	u32			host_rrq_idx;	/* index into rrq buffer */
-
+	/* index into rrq buffer */
+	u32			host_rrq_idx[AAC_MAX_MSIX];
+	atomic_t		rrq_outstanding[AAC_MAX_MSIX];
+	u32			fibs_pushed_no;
 	struct pci_dev		*pdev;		/* Our PCI interface */
 	void *			printfbuf;	/* pointer to buffer used for printf's from the adapter */
 	void *			comm_addr;	/* Base address of Comm area */
@@ -1151,6 +1217,13 @@ struct aac_dev
 	int			sync_mode;
 	struct fib		*sync_fib;
 	struct list_head	sync_fib_list;
+	u32			doorbell_mask;
+	u32			max_msix;	/* max. MSI-X vectors */
+	u32			vector_cap;	/* MSI-X vector capab.*/
+	int			msi_enabled;	/* MSI/MSI-X enabled */
+	struct msix_entry	msixentry[AAC_MAX_MSIX];
+	struct aac_msix_ctx	aac_msix[AAC_MAX_MSIX]; /* context */
+	u8			adapter_shutdown;
 };
 
 #define aac_adapter_interrupt(dev) \
@@ -1589,6 +1662,7 @@ struct aac_srb_reply
 #define		VM_CtHostWrite64	20
 #define		VM_DrvErrTblLog		21
 #define		VM_NameServe64		22
+#define		VM_NameServeAllBlk	30
 
 #define		MAX_VMCOMMAND_NUM	23	/* used for sizing stats array - leave last */
 
@@ -1611,8 +1685,13 @@ struct aac_fsinfo {
 	__le32  fsInodeDensity;
 };	/* valid iff ObjType == FT_FILESYS && !(ContentState & FSCS_NOTCLEAN) */
 
+struct  aac_blockdevinfo {
+	__le32	block_size;
+};
+
 union aac_contentinfo {
-	struct aac_fsinfo filesys;	/* valid iff ObjType == FT_FILESYS && !(ContentState & FSCS_NOTCLEAN) */
+	struct	aac_fsinfo		filesys;
+	struct	aac_blockdevinfo	bdevinfo;
 };
 
 /*
@@ -1677,6 +1756,7 @@ struct aac_get_container_count_resp {
 	__le32		MaxContainers;
 	__le32		ContainerSwitchEntries;
 	__le32		MaxPartitions;
+	__le32		MaxSimpleVolumes;
 };
 
 
@@ -1951,6 +2031,8 @@ extern struct aac_common aac_config;
 #define			AifEnEnclosureManagement 13	/* EM_DRIVE_* */
 #define				EM_DRIVE_INSERTION	31
 #define				EM_DRIVE_REMOVAL	32
+#define			EM_SES_DRIVE_INSERTION	33
+#define			EM_SES_DRIVE_REMOVAL	26
 #define			AifEnBatteryEvent	14	/* Change in Battery State */
 #define			AifEnAddContainer	15	/* A new array was created */
 #define			AifEnDeleteContainer	16	/* A container was deleted */
@@ -1983,6 +2065,9 @@ extern struct aac_common aac_config;
 /* PMC NEW COMM: Request the event data */
 #define		AifReqEvent		200
 
+/* RAW device deleted */
+#define		AifRawDeviceRemove	203
+
 /*
  *	Adapter Initiated FIB command structures. Start with the adapter
  *	initiated FIBs that really come from the adapter, and get responded
@@ -2025,6 +2110,7 @@ void aac_consumer_free(struct aac_dev * dev, struct aac_queue * q, u32 qnum);
 int aac_fib_complete(struct fib * context);
 #define fib_data(fibctx) ((void *)(fibctx)->hw_fib_va->data)
 struct aac_dev *aac_init_adapter(struct aac_dev *dev);
+void aac_src_access_devreg(struct aac_dev *dev, int mode);
 int aac_get_config_status(struct aac_dev *dev, int commit_flag);
 int aac_get_containers(struct aac_dev *dev);
 int aac_scsi_cmd(struct scsi_cmnd *cmd);
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index fbcd48d0bfc3..54195a117f72 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -689,7 +689,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
 			kfree (usg);
 		}
 		srbcmd->count = cpu_to_le32(byte_count);
-		psg->count = cpu_to_le32(sg_indx+1);
+		if (user_srbcmd->sg.count)
+			psg->count = cpu_to_le32(sg_indx+1);
+		else
+			psg->count = 0;
 		status = aac_fib_send(ScsiPortCommand64, srbfib, actual_fibsize, FsaNormal, 1, 1,NULL,NULL);
 	} else {
 		struct user_sgmap* upsg = &user_srbcmd->sg;
@@ -775,7 +778,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
 			}
 		}
 		srbcmd->count = cpu_to_le32(byte_count);
-		psg->count = cpu_to_le32(sg_indx+1);
+		if (user_srbcmd->sg.count)
+			psg->count = cpu_to_le32(sg_indx+1);
+		else
+			psg->count = 0;
 		status = aac_fib_send(ScsiPortCommand, srbfib, actual_fibsize, FsaNormal, 1, 1, NULL, NULL);
 	}
 	if (status == -ERESTARTSYS) {
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 177b094c7792..45db84ad322f 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -43,6 +43,8 @@
 
 #include "aacraid.h"
 
+static void aac_define_int_mode(struct aac_dev *dev);
+
 struct aac_common aac_config = {
 	.irq_mod = 1
 };
@@ -51,7 +53,7 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co
 {
 	unsigned char *base;
 	unsigned long size, align;
-	const unsigned long fibsize = 4096;
+	const unsigned long fibsize = dev->max_fib_size;
 	const unsigned long printfbufsiz = 256;
 	unsigned long host_rrq_size = 0;
 	struct aac_init *init;
@@ -91,7 +93,7 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co
 	init->InitStructRevision = cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION);
 	if (dev->max_fib_size != sizeof(struct hw_fib))
 		init->InitStructRevision = cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_4);
-	init->MiniPortRevision = cpu_to_le32(Sa_MINIPORT_REVISION);
+	init->Sa_MSIXVectors = cpu_to_le32(Sa_MINIPORT_REVISION);
 	init->fsrev = cpu_to_le32(dev->fsrev);
 
 	/*
@@ -140,7 +142,8 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co
 			INITFLAGS_NEW_COMM_TYPE2_SUPPORTED | INITFLAGS_FAST_JBOD_SUPPORTED);
 		init->HostRRQ_AddrHigh = cpu_to_le32((u32)((u64)dev->host_rrq_pa >> 32));
 		init->HostRRQ_AddrLow = cpu_to_le32((u32)(dev->host_rrq_pa & 0xffffffff));
-		init->MiniPortRevision = cpu_to_le32(0L);		/* number of MSI-X */
+		/* number of MSI-X */
+		init->Sa_MSIXVectors = cpu_to_le32(dev->max_msix);
 		dprintk((KERN_WARNING"aacraid: New Comm Interface type2 enabled\n"));
 	}
 
@@ -179,7 +182,7 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co
     
 static void aac_queue_init(struct aac_dev * dev, struct aac_queue * q, u32 *mem, int qsize)
 {
-	q->numpending = 0;
+	atomic_set(&q->numpending, 0);
 	q->dev = dev;
 	init_waitqueue_head(&q->cmdready);
 	INIT_LIST_HEAD(&q->cmdq);
@@ -228,6 +231,12 @@ int aac_send_shutdown(struct aac_dev * dev)
 	/* FIB should be freed only after getting the response from the F/W */
 	if (status != -ERESTARTSYS)
 		aac_fib_free(fibctx);
+	dev->adapter_shutdown = 1;
+	if ((dev->pdev->device == PMC_DEVICE_S7 ||
+	     dev->pdev->device == PMC_DEVICE_S8 ||
+	     dev->pdev->device == PMC_DEVICE_S9) &&
+	     dev->msi_enabled)
+		aac_src_access_devreg(dev, AAC_ENABLE_INTX);
 	return status;
 }
 
@@ -350,8 +359,10 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
 	dev->raw_io_interface = dev->raw_io_64 = 0;
 
 	if ((!aac_adapter_sync_cmd(dev, GET_ADAPTER_PROPERTIES,
-		0, 0, 0, 0, 0, 0, status+0, status+1, status+2, NULL, NULL)) &&
+		0, 0, 0, 0, 0, 0,
+		status+0, status+1, status+2, status+3, NULL)) &&
 	 		(status[0] == 0x00000001)) {
+		dev->doorbell_mask = status[3];
 		if (status[1] & le32_to_cpu(AAC_OPT_NEW_COMM_64))
 			dev->raw_io_64 = 1;
 		dev->sync_mode = aac_sync_mode;
@@ -388,6 +399,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
 			}
 		}
 	}
+	dev->max_msix = 0;
+	dev->msi_enabled = 0;
+	dev->adapter_shutdown = 0;
 	if ((!aac_adapter_sync_cmd(dev, GET_COMM_PREFERRED_SETTINGS,
 	  0, 0, 0, 0, 0, 0,
 	  status+0, status+1, status+2, status+3, status+4))
@@ -461,6 +475,11 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
 	if (host->can_queue > AAC_NUM_IO_FIB)
 		host->can_queue = AAC_NUM_IO_FIB;
 
+	if (dev->pdev->device == PMC_DEVICE_S6 ||
+	    dev->pdev->device == PMC_DEVICE_S7 ||
+	    dev->pdev->device == PMC_DEVICE_S8 ||
+	    dev->pdev->device == PMC_DEVICE_S9)
+		aac_define_int_mode(dev);
 	/*
 	 *	Ok now init the communication subsystem
 	 */
@@ -489,4 +508,79 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
 	return dev;
 }
 
-    
+static void aac_define_int_mode(struct aac_dev *dev)
+{
+
+	int i, msi_count;
+
+	msi_count = i = 0;
+	/* max. vectors from GET_COMM_PREFERRED_SETTINGS */
+	if (dev->max_msix == 0 ||
+	    dev->pdev->device == PMC_DEVICE_S6 ||
+	    dev->sync_mode) {
+		dev->max_msix = 1;
+		dev->vector_cap =
+			dev->scsi_host_ptr->can_queue +
+			AAC_NUM_MGT_FIB;
+		return;
+	}
+
+	msi_count = min(dev->max_msix,
+		(unsigned int)num_online_cpus());
+
+	dev->max_msix = msi_count;
+
+	if (msi_count > AAC_MAX_MSIX)
+		msi_count = AAC_MAX_MSIX;
+
+	for (i = 0; i < msi_count; i++)
+		dev->msixentry[i].entry = i;
+
+	if (msi_count > 1 &&
+	    pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX)) {
+		i = pci_enable_msix(dev->pdev,
+				    dev->msixentry,
+				    msi_count);
+		 /* Check how many MSIX vectors are allocated */
+		if (i >= 0) {
+			dev->msi_enabled = 1;
+			if (i) {
+				msi_count = i;
+				if (pci_enable_msix(dev->pdev,
+				    dev->msixentry,
+				    msi_count)) {
+					dev->msi_enabled = 0;
+					printk(KERN_ERR "%s%d: MSIX not supported!! Will try MSI 0x%x.\n",
+							dev->name, dev->id, i);
+				}
+			}
+		} else {
+			dev->msi_enabled = 0;
+			printk(KERN_ERR "%s%d: MSIX not supported!! Will try MSI 0x%x.\n",
+					dev->name, dev->id, i);
+		}
+	}
+
+	if (!dev->msi_enabled) {
+		msi_count = 1;
+		i = pci_enable_msi(dev->pdev);
+
+		if (!i) {
+			dev->msi_enabled = 1;
+			dev->msi = 1;
+		} else {
+			printk(KERN_ERR "%s%d: MSI not supported!! Will try INTx 0x%x.\n",
+					dev->name, dev->id, i);
+		}
+	}
+
+	if (!dev->msi_enabled)
+		dev->max_msix = msi_count = 1;
+	else {
+		if (dev->max_msix > msi_count)
+			dev->max_msix = msi_count;
+	}
+	dev->vector_cap =
+		(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB) /
+		msi_count;
+}
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index cab190af6345..4da574925284 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -208,14 +208,10 @@ struct fib *aac_fib_alloc(struct aac_dev *dev)
 
 void aac_fib_free(struct fib *fibptr)
 {
-	unsigned long flags, flagsv;
+	unsigned long flags;
 
-	spin_lock_irqsave(&fibptr->event_lock, flagsv);
-	if (fibptr->done == 2) {
-		spin_unlock_irqrestore(&fibptr->event_lock, flagsv);
+	if (fibptr->done == 2)
 		return;
-	}
-	spin_unlock_irqrestore(&fibptr->event_lock, flagsv);
 
 	spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
 	if (unlikely(fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT))
@@ -321,7 +317,7 @@ static int aac_get_entry (struct aac_dev * dev, u32 qid, struct aac_entry **entr
 	/* Queue is full */
 	if ((*index + 1) == le32_to_cpu(*(q->headers.consumer))) {
 		printk(KERN_WARNING "Queue %d full, %u outstanding.\n",
-				qid, q->numpending);
+				qid, atomic_read(&q->numpending));
 		return 0;
 	} else {
 		*entry = q->base + *index;
@@ -414,7 +410,6 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 	struct aac_dev * dev = fibptr->dev;
 	struct hw_fib * hw_fib = fibptr->hw_fib_va;
 	unsigned long flags = 0;
-	unsigned long qflags;
 	unsigned long mflags = 0;
 	unsigned long sflags = 0;
 
@@ -568,9 +563,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 				int blink;
 				if (time_is_before_eq_jiffies(timeout)) {
 					struct aac_queue * q = &dev->queues->queue[AdapNormCmdQueue];
-					spin_lock_irqsave(q->lock, qflags);
-					q->numpending--;
-					spin_unlock_irqrestore(q->lock, qflags);
+					atomic_dec(&q->numpending);
 					if (wait == -1) {
 	        				printk(KERN_ERR "aacraid: aac_fib_send: first asynchronous command timed out.\n"
 						  "Usually a result of a PCI interrupt routing problem;\n"
@@ -775,7 +768,6 @@ int aac_fib_adapter_complete(struct fib *fibptr, unsigned short size)
 
 int aac_fib_complete(struct fib *fibptr)
 {
-	unsigned long flags;
 	struct hw_fib * hw_fib = fibptr->hw_fib_va;
 
 	/*
@@ -798,12 +790,6 @@ int aac_fib_complete(struct fib *fibptr)
 	 *	command is complete that we had sent to the adapter and this
 	 *	cdb could be reused.
 	 */
-	spin_lock_irqsave(&fibptr->event_lock, flags);
-	if (fibptr->done == 2) {
-		spin_unlock_irqrestore(&fibptr->event_lock, flags);
-		return 0;
-	}
-	spin_unlock_irqrestore(&fibptr->event_lock, flags);
 
 	if((hw_fib->header.XferState & cpu_to_le32(SentFromHost)) &&
 		(hw_fib->header.XferState & cpu_to_le32(AdapterProcessed)))
@@ -868,7 +854,7 @@ void aac_printf(struct aac_dev *dev, u32 val)
  *	dispatches it to the appropriate routine for handling.
  */
 
-#define AIF_SNIFF_TIMEOUT	(30*HZ)
+#define AIF_SNIFF_TIMEOUT	(500*HZ)
 static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 {
 	struct hw_fib * hw_fib = fibptr->hw_fib_va;
@@ -897,6 +883,39 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 	switch (le32_to_cpu(aifcmd->command)) {
 	case AifCmdDriverNotify:
 		switch (le32_to_cpu(((__le32 *)aifcmd->data)[0])) {
+		case AifRawDeviceRemove:
+			container = le32_to_cpu(((__le32 *)aifcmd->data)[1]);
+			if ((container >> 28)) {
+				container = (u32)-1;
+				break;
+			}
+			channel = (container >> 24) & 0xF;
+			if (channel >= dev->maximum_num_channels) {
+				container = (u32)-1;
+				break;
+			}
+			id = container & 0xFFFF;
+			if (id >= dev->maximum_num_physicals) {
+				container = (u32)-1;
+				break;
+			}
+			lun = (container >> 16) & 0xFF;
+			container = (u32)-1;
+			channel = aac_phys_to_logical(channel);
+			device_config_needed =
+			  (((__le32 *)aifcmd->data)[0] ==
+			    cpu_to_le32(AifRawDeviceRemove)) ? DELETE : ADD;
+
+			if (device_config_needed == ADD) {
+				device = scsi_device_lookup(
+					dev->scsi_host_ptr,
+					channel, id, lun);
+				if (device) {
+					scsi_remove_device(device);
+					scsi_device_put(device);
+				}
+			}
+			break;
 		/*
 		 *	Morph or Expand complete
 		 */
@@ -1044,6 +1063,8 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 			switch (le32_to_cpu(((__le32 *)aifcmd->data)[3])) {
 			case EM_DRIVE_INSERTION:
 			case EM_DRIVE_REMOVAL:
+			case EM_SES_DRIVE_INSERTION:
+			case EM_SES_DRIVE_REMOVAL:
 				container = le32_to_cpu(
 					((__le32 *)aifcmd->data)[2]);
 				if ((container >> 28)) {
@@ -1069,8 +1090,10 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 				}
 				channel = aac_phys_to_logical(channel);
 				device_config_needed =
-				  (((__le32 *)aifcmd->data)[3]
-				    == cpu_to_le32(EM_DRIVE_INSERTION)) ?
+				  ((((__le32 *)aifcmd->data)[3]
+				    == cpu_to_le32(EM_DRIVE_INSERTION)) ||
+				    (((__le32 *)aifcmd->data)[3]
+				    == cpu_to_le32(EM_SES_DRIVE_INSERTION))) ?
 				  ADD : DELETE;
 				break;
 			}
@@ -1247,12 +1270,13 @@ retry_next:
 static int _aac_reset_adapter(struct aac_dev *aac, int forced)
 {
 	int index, quirks;
-	int retval;
+	int retval, i;
 	struct Scsi_Host *host;
 	struct scsi_device *dev;
 	struct scsi_cmnd *command;
 	struct scsi_cmnd *command_list;
 	int jafo = 0;
+	int cpu;
 
 	/*
 	 * Assumptions:
@@ -1315,7 +1339,33 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced)
 	aac->comm_phys = 0;
 	kfree(aac->queues);
 	aac->queues = NULL;
-	free_irq(aac->pdev->irq, aac);
+	cpu = cpumask_first(cpu_online_mask);
+	if (aac->pdev->device == PMC_DEVICE_S6 ||
+	    aac->pdev->device == PMC_DEVICE_S7 ||
+	    aac->pdev->device == PMC_DEVICE_S8 ||
+	    aac->pdev->device == PMC_DEVICE_S9) {
+		if (aac->max_msix > 1) {
+			for (i = 0; i < aac->max_msix; i++) {
+				if (irq_set_affinity_hint(
+				    aac->msixentry[i].vector,
+				    NULL)) {
+					printk(KERN_ERR "%s%d: Failed to reset IRQ affinity for cpu %d\n",
+						aac->name,
+						aac->id,
+						cpu);
+				}
+				cpu = cpumask_next(cpu,
+						cpu_online_mask);
+				free_irq(aac->msixentry[i].vector,
+					 &(aac->aac_msix[i]));
+			}
+			pci_disable_msix(aac->pdev);
+		} else {
+			free_irq(aac->pdev->irq, &(aac->aac_msix[0]));
+		}
+	} else {
+		free_irq(aac->pdev->irq, aac);
+	}
 	if (aac->msi)
 		pci_disable_msi(aac->pdev);
 	kfree(aac->fsa_dev);
diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c
index d81b2810f0f7..da9d9936e995 100644
--- a/drivers/scsi/aacraid/dpcsup.c
+++ b/drivers/scsi/aacraid/dpcsup.c
@@ -84,7 +84,7 @@ unsigned int aac_response_normal(struct aac_queue * q)
 		 *	continue. The caller has already been notified that
 		 *	the fib timed out.
 		 */
-		dev->queues->queue[AdapNormCmdQueue].numpending--;
+		atomic_dec(&dev->queues->queue[AdapNormCmdQueue].numpending);
 
 		if (unlikely(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
 			spin_unlock_irqrestore(q->lock, flags);
@@ -354,7 +354,7 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index,
 		 *	continue. The caller has already been notified that
 		 *	the fib timed out.
 		 */
-		dev->queues->queue[AdapNormCmdQueue].numpending--;
+		atomic_dec(&dev->queues->queue[AdapNormCmdQueue].numpending);
 
 		if (unlikely(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
 			aac_fib_complete(fib);
@@ -389,8 +389,13 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index,
 			 *	NOTE:  we cannot touch the fib after this
 			 *	    call, because it may have been deallocated.
 			 */
-			fib->flags &= FIB_CONTEXT_FLAG_FASTRESP;
-			fib->callback(fib->callback_data, fib);
+			if (likely(fib->callback && fib->callback_data)) {
+				fib->flags &= FIB_CONTEXT_FLAG_FASTRESP;
+				fib->callback(fib->callback_data, fib);
+			} else {
+				aac_fib_complete(fib);
+				aac_fib_free(fib);
+			}
 		} else {
 			unsigned long flagv;
 	  		dprintk((KERN_INFO "event_wait up\n"));
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index fdcdf9f781bc..9eec02733c86 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -56,7 +56,7 @@
 
 #include "aacraid.h"
 
-#define AAC_DRIVER_VERSION		"1.2-0"
+#define AAC_DRIVER_VERSION		"1.2-1"
 #ifndef AAC_DRIVER_BRANCH
 #define AAC_DRIVER_BRANCH		""
 #endif
@@ -251,27 +251,15 @@ static struct aac_driver_ident aac_drivers[] = {
  *	TODO: unify with aac_scsi_cmd().
  */
 
-static int aac_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+static int aac_queuecommand(struct Scsi_Host *shost,
+			    struct scsi_cmnd *cmd)
 {
-	struct Scsi_Host *host = cmd->device->host;
-	struct aac_dev *dev = (struct aac_dev *)host->hostdata;
-	u32 count = 0;
-	cmd->scsi_done = done;
-	for (; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
-		struct fib * fib = &dev->fibs[count];
-		struct scsi_cmnd * command;
-		if (fib->hw_fib_va->header.XferState &&
-		    ((command = fib->callback_data)) &&
-		    (command == cmd) &&
-		    (cmd->SCp.phase == AAC_OWNER_FIRMWARE))
-			return 0; /* Already owned by Adapter */
-	}
+	int r = 0;
 	cmd->SCp.phase = AAC_OWNER_LOWLEVEL;
-	return (aac_scsi_cmd(cmd) ? FAILED : 0);
+	r = (aac_scsi_cmd(cmd) ? FAILED : 0);
+	return r;
 }
 
-static DEF_SCSI_QCMD(aac_queuecommand)
-
 /**
  *	aac_info		-	Returns the host adapter name
  *	@shost:		Scsi host to report on
@@ -713,7 +701,9 @@ static long aac_cfg_ioctl(struct file *file,
 		unsigned int cmd, unsigned long arg)
 {
 	int ret;
-	if (!capable(CAP_SYS_RAWIO))
+	struct aac_dev *aac;
+	aac = (struct aac_dev *)file->private_data;
+	if (!capable(CAP_SYS_RAWIO) || aac->adapter_shutdown)
 		return -EPERM;
 	mutex_lock(&aac_mutex);
 	ret = aac_do_ioctl(file->private_data, cmd, (void __user *)arg);
@@ -1082,6 +1072,9 @@ static struct scsi_host_template aac_driver_template = {
 
 static void __aac_shutdown(struct aac_dev * aac)
 {
+	int i;
+	int cpu;
+
 	if (aac->aif_thread) {
 		int i;
 		/* Clear out events first */
@@ -1095,9 +1088,37 @@ static void __aac_shutdown(struct aac_dev * aac)
 	}
 	aac_send_shutdown(aac);
 	aac_adapter_disable_int(aac);
-	free_irq(aac->pdev->irq, aac);
+	cpu = cpumask_first(cpu_online_mask);
+	if (aac->pdev->device == PMC_DEVICE_S6 ||
+	    aac->pdev->device == PMC_DEVICE_S7 ||
+	    aac->pdev->device == PMC_DEVICE_S8 ||
+	    aac->pdev->device == PMC_DEVICE_S9) {
+		if (aac->max_msix > 1) {
+			for (i = 0; i < aac->max_msix; i++) {
+				if (irq_set_affinity_hint(
+				    aac->msixentry[i].vector,
+				    NULL)) {
+					printk(KERN_ERR "%s%d: Failed to reset IRQ affinity for cpu %d\n",
+						aac->name,
+						aac->id,
+						cpu);
+				}
+				cpu = cpumask_next(cpu,
+						cpu_online_mask);
+				free_irq(aac->msixentry[i].vector,
+					 &(aac->aac_msix[i]));
+			}
+		} else {
+			free_irq(aac->pdev->irq,
+				 &(aac->aac_msix[0]));
+		}
+	} else {
+		free_irq(aac->pdev->irq, aac);
+	}
 	if (aac->msi)
 		pci_disable_msi(aac->pdev);
+	else if (aac->max_msix > 1)
+		pci_disable_msix(aac->pdev);
 }
 
 static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 5c6a8703f535..9570612b80ce 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -400,16 +400,13 @@ int aac_rx_deliver_producer(struct fib * fib)
 {
 	struct aac_dev *dev = fib->dev;
 	struct aac_queue *q = &dev->queues->queue[AdapNormCmdQueue];
-	unsigned long qflags;
 	u32 Index;
 	unsigned long nointr = 0;
 
-	spin_lock_irqsave(q->lock, qflags);
 	aac_queue_get( dev, &Index, AdapNormCmdQueue, fib->hw_fib_va, 1, fib, &nointr);
 
-	q->numpending++;
+	atomic_inc(&q->numpending);
 	*(q->headers.producer) = cpu_to_le32(Index + 1);
-	spin_unlock_irqrestore(q->lock, qflags);
 	if (!(nointr & aac_config.irq_mod))
 		aac_adapter_notify(dev, AdapNormCmdQueue);
 
@@ -426,15 +423,12 @@ static int aac_rx_deliver_message(struct fib * fib)
 {
 	struct aac_dev *dev = fib->dev;
 	struct aac_queue *q = &dev->queues->queue[AdapNormCmdQueue];
-	unsigned long qflags;
 	u32 Index;
 	u64 addr;
 	volatile void __iomem *device;
 
 	unsigned long count = 10000000L; /* 50 seconds */
-	spin_lock_irqsave(q->lock, qflags);
-	q->numpending++;
-	spin_unlock_irqrestore(q->lock, qflags);
+	atomic_inc(&q->numpending);
 	for(;;) {
 		Index = rx_readl(dev, MUnit.InboundQueue);
 		if (unlikely(Index == 0xFFFFFFFFL))
@@ -442,9 +436,7 @@ static int aac_rx_deliver_message(struct fib * fib)
 		if (likely(Index != 0xFFFFFFFFL))
 			break;
 		if (--count == 0) {
-			spin_lock_irqsave(q->lock, qflags);
-			q->numpending--;
-			spin_unlock_irqrestore(q->lock, qflags);
+			atomic_dec(&q->numpending);
 			return -ETIMEDOUT;
 		}
 		udelay(5);
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 9c65aed26212..4596e9dd757c 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -44,98 +44,128 @@
 
 #include "aacraid.h"
 
-static irqreturn_t aac_src_intr_message(int irq, void *dev_id)
+static int aac_src_get_sync_status(struct aac_dev *dev);
+
+irqreturn_t aac_src_intr_message(int irq, void *dev_id)
 {
-	struct aac_dev *dev = dev_id;
+	struct aac_msix_ctx *ctx;
+	struct aac_dev *dev;
 	unsigned long bellbits, bellbits_shifted;
-	int our_interrupt = 0;
-	int isFastResponse;
+	int vector_no;
+	int isFastResponse, mode;
 	u32 index, handle;
 
-	bellbits = src_readl(dev, MUnit.ODR_R);
-	if (bellbits & PmDoorBellResponseSent) {
-		bellbits = PmDoorBellResponseSent;
-		/* handle async. status */
-		src_writel(dev, MUnit.ODR_C, bellbits);
-		src_readl(dev, MUnit.ODR_C);
-		our_interrupt = 1;
-		index = dev->host_rrq_idx;
-		for (;;) {
-			isFastResponse = 0;
-			/* remove toggle bit (31) */
-			handle = le32_to_cpu(dev->host_rrq[index]) & 0x7fffffff;
-			/* check fast response bit (30) */
-			if (handle & 0x40000000)
-				isFastResponse = 1;
-			handle &= 0x0000ffff;
-			if (handle == 0)
-				break;
-
-			aac_intr_normal(dev, handle-1, 0, isFastResponse, NULL);
-
-			dev->host_rrq[index++] = 0;
-			if (index == dev->scsi_host_ptr->can_queue +
-						AAC_NUM_MGT_FIB)
-				index = 0;
-			dev->host_rrq_idx = index;
+	ctx = (struct aac_msix_ctx *)dev_id;
+	dev = ctx->dev;
+	vector_no = ctx->vector_no;
+
+	if (dev->msi_enabled) {
+		mode = AAC_INT_MODE_MSI;
+		if (vector_no == 0) {
+			bellbits = src_readl(dev, MUnit.ODR_MSI);
+			if (bellbits & 0x40000)
+				mode |= AAC_INT_MODE_AIF;
+			if (bellbits & 0x1000)
+				mode |= AAC_INT_MODE_SYNC;
 		}
 	} else {
-		bellbits_shifted = (bellbits >> SRC_ODR_SHIFT);
-		if (bellbits_shifted & DoorBellAifPending) {
+		mode = AAC_INT_MODE_INTX;
+		bellbits = src_readl(dev, MUnit.ODR_R);
+		if (bellbits & PmDoorBellResponseSent) {
+			bellbits = PmDoorBellResponseSent;
+			src_writel(dev, MUnit.ODR_C, bellbits);
+			src_readl(dev, MUnit.ODR_C);
+		} else {
+			bellbits_shifted = (bellbits >> SRC_ODR_SHIFT);
 			src_writel(dev, MUnit.ODR_C, bellbits);
 			src_readl(dev, MUnit.ODR_C);
-			our_interrupt = 1;
-			/* handle AIF */
-			aac_intr_normal(dev, 0, 2, 0, NULL);
-		} else if (bellbits_shifted & OUTBOUNDDOORBELL_0) {
-			unsigned long sflags;
-			struct list_head *entry;
-			int send_it = 0;
-			extern int aac_sync_mode;
 
+			if (bellbits_shifted & DoorBellAifPending)
+				mode |= AAC_INT_MODE_AIF;
+			else if (bellbits_shifted & OUTBOUNDDOORBELL_0)
+				mode |= AAC_INT_MODE_SYNC;
+		}
+	}
+
+	if (mode & AAC_INT_MODE_SYNC) {
+		unsigned long sflags;
+		struct list_head *entry;
+		int send_it = 0;
+		extern int aac_sync_mode;
+
+		if (!aac_sync_mode && !dev->msi_enabled) {
 			src_writel(dev, MUnit.ODR_C, bellbits);
 			src_readl(dev, MUnit.ODR_C);
+		}
 
-			if (!aac_sync_mode) {
-				src_writel(dev, MUnit.ODR_C, bellbits);
-				src_readl(dev, MUnit.ODR_C);
-				our_interrupt = 1;
+		if (dev->sync_fib) {
+			if (dev->sync_fib->callback)
+				dev->sync_fib->callback(dev->sync_fib->callback_data,
+					dev->sync_fib);
+			spin_lock_irqsave(&dev->sync_fib->event_lock, sflags);
+			if (dev->sync_fib->flags & FIB_CONTEXT_FLAG_WAIT) {
+				dev->management_fib_count--;
+				up(&dev->sync_fib->event_wait);
 			}
-
-			if (dev->sync_fib) {
-				our_interrupt = 1;
-				if (dev->sync_fib->callback)
-					dev->sync_fib->callback(dev->sync_fib->callback_data,
-						dev->sync_fib);
-				spin_lock_irqsave(&dev->sync_fib->event_lock, sflags);
-				if (dev->sync_fib->flags & FIB_CONTEXT_FLAG_WAIT) {
-					dev->management_fib_count--;
-					up(&dev->sync_fib->event_wait);
-				}
-				spin_unlock_irqrestore(&dev->sync_fib->event_lock, sflags);
-				spin_lock_irqsave(&dev->sync_lock, sflags);
-				if (!list_empty(&dev->sync_fib_list)) {
-					entry = dev->sync_fib_list.next;
-					dev->sync_fib = list_entry(entry, struct fib, fiblink);
-					list_del(entry);
-					send_it = 1;
-				} else {
-					dev->sync_fib = NULL;
-				}
-				spin_unlock_irqrestore(&dev->sync_lock, sflags);
-				if (send_it) {
-					aac_adapter_sync_cmd(dev, SEND_SYNCHRONOUS_FIB,
-						(u32)dev->sync_fib->hw_fib_pa, 0, 0, 0, 0, 0,
-						NULL, NULL, NULL, NULL, NULL);
-				}
+			spin_unlock_irqrestore(&dev->sync_fib->event_lock,
+						sflags);
+			spin_lock_irqsave(&dev->sync_lock, sflags);
+			if (!list_empty(&dev->sync_fib_list)) {
+				entry = dev->sync_fib_list.next;
+				dev->sync_fib = list_entry(entry,
+							   struct fib,
+							   fiblink);
+				list_del(entry);
+				send_it = 1;
+			} else {
+				dev->sync_fib = NULL;
+			}
+			spin_unlock_irqrestore(&dev->sync_lock, sflags);
+			if (send_it) {
+				aac_adapter_sync_cmd(dev, SEND_SYNCHRONOUS_FIB,
+					(u32)dev->sync_fib->hw_fib_pa,
+					0, 0, 0, 0, 0,
+					NULL, NULL, NULL, NULL, NULL);
 			}
 		}
+		if (!dev->msi_enabled)
+			mode = 0;
+
+	}
+
+	if (mode & AAC_INT_MODE_AIF) {
+		/* handle AIF */
+		aac_intr_normal(dev, 0, 2, 0, NULL);
+		if (dev->msi_enabled)
+			aac_src_access_devreg(dev, AAC_CLEAR_AIF_BIT);
+		mode = 0;
 	}
 
-	if (our_interrupt) {
-		return IRQ_HANDLED;
+	if (mode) {
+		index = dev->host_rrq_idx[vector_no];
+
+		for (;;) {
+			isFastResponse = 0;
+			/* remove toggle bit (31) */
+			handle = (dev->host_rrq[index] & 0x7fffffff);
+			/* check fast response bit (30) */
+			if (handle & 0x40000000)
+				isFastResponse = 1;
+			handle &= 0x0000ffff;
+			if (handle == 0)
+				break;
+			if (dev->msi_enabled && dev->max_msix > 1)
+				atomic_dec(&dev->rrq_outstanding[vector_no]);
+			aac_intr_normal(dev, handle-1, 0, isFastResponse, NULL);
+			dev->host_rrq[index++] = 0;
+			if (index == (vector_no + 1) * dev->vector_cap)
+				index = vector_no * dev->vector_cap;
+			dev->host_rrq_idx[vector_no] = index;
+		}
+		mode = 0;
 	}
-	return IRQ_NONE;
+
+	return IRQ_HANDLED;
 }
 
 /**
@@ -155,7 +185,7 @@ static void aac_src_disable_interrupt(struct aac_dev *dev)
 
 static void aac_src_enable_interrupt_message(struct aac_dev *dev)
 {
-	src_writel(dev, MUnit.OIMR, dev->OIMR = 0xfffffff8);
+	aac_src_access_devreg(dev, AAC_ENABLE_INTERRUPT);
 }
 
 /**
@@ -174,6 +204,7 @@ static int src_sync_cmd(struct aac_dev *dev, u32 command,
 	u32 *status, u32 * r1, u32 * r2, u32 * r3, u32 * r4)
 {
 	unsigned long start;
+	unsigned long delay;
 	int ok;
 
 	/*
@@ -191,7 +222,10 @@ static int src_sync_cmd(struct aac_dev *dev, u32 command,
 	/*
 	 *	Clear the synch command doorbell to start on a clean slate.
 	 */
-	src_writel(dev, MUnit.ODR_C, OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
+	if (!dev->msi_enabled)
+		src_writel(dev,
+			   MUnit.ODR_C,
+			   OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
 
 	/*
 	 *	Disable doorbell interrupts
@@ -213,19 +247,29 @@ static int src_sync_cmd(struct aac_dev *dev, u32 command,
 		ok = 0;
 		start = jiffies;
 
-		/*
-		 *	Wait up to 5 minutes
-		 */
-		while (time_before(jiffies, start+300*HZ)) {
+		if (command == IOP_RESET_ALWAYS) {
+			/* Wait up to 10 sec */
+			delay = 10*HZ;
+		} else {
+			/* Wait up to 5 minutes */
+			delay = 300*HZ;
+		}
+		while (time_before(jiffies, start+delay)) {
 			udelay(5);	/* Delay 5 microseconds to let Mon960 get info. */
 			/*
 			 *	Mon960 will set doorbell0 bit when it has completed the command.
 			 */
-			if ((src_readl(dev, MUnit.ODR_R) >> SRC_ODR_SHIFT) & OUTBOUNDDOORBELL_0) {
+			if (aac_src_get_sync_status(dev) & OUTBOUNDDOORBELL_0) {
 				/*
 				 *	Clear the doorbell.
 				 */
-				src_writel(dev, MUnit.ODR_C, OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
+				if (dev->msi_enabled)
+					aac_src_access_devreg(dev,
+						AAC_CLEAR_SYNC_BIT);
+				else
+					src_writel(dev,
+						MUnit.ODR_C,
+						OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
 				ok = 1;
 				break;
 			}
@@ -254,11 +298,16 @@ static int src_sync_cmd(struct aac_dev *dev, u32 command,
 			*r3 = readl(&dev->IndexRegs->Mailbox[3]);
 		if (r4)
 			*r4 = readl(&dev->IndexRegs->Mailbox[4]);
-
+		if (command == GET_COMM_PREFERRED_SETTINGS)
+			dev->max_msix =
+				readl(&dev->IndexRegs->Mailbox[5]) & 0xFFFF;
 		/*
 		 *	Clear the synch command doorbell.
 		 */
-		src_writel(dev, MUnit.ODR_C, OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
+		if (!dev->msi_enabled)
+			src_writel(dev,
+				MUnit.ODR_C,
+				OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
 	}
 
 	/*
@@ -335,9 +384,14 @@ static void aac_src_notify_adapter(struct aac_dev *dev, u32 event)
 static void aac_src_start_adapter(struct aac_dev *dev)
 {
 	struct aac_init *init;
+	int i;
 
 	 /* reset host_rrq_idx first */
-	dev->host_rrq_idx = 0;
+	for (i = 0; i < dev->max_msix; i++) {
+		dev->host_rrq_idx[i] = i * dev->vector_cap;
+		atomic_set(&dev->rrq_outstanding[i], 0);
+	}
+	dev->fibs_pushed_no = 0;
 
 	init = dev->init;
 	init->HostElapsedSeconds = cpu_to_le32(get_seconds());
@@ -390,15 +444,39 @@ static int aac_src_deliver_message(struct fib *fib)
 {
 	struct aac_dev *dev = fib->dev;
 	struct aac_queue *q = &dev->queues->queue[AdapNormCmdQueue];
-	unsigned long qflags;
 	u32 fibsize;
 	dma_addr_t address;
 	struct aac_fib_xporthdr *pFibX;
 	u16 hdr_size = le16_to_cpu(fib->hw_fib_va->header.Size);
 
-	spin_lock_irqsave(q->lock, qflags);
-	q->numpending++;
-	spin_unlock_irqrestore(q->lock, qflags);
+	atomic_inc(&q->numpending);
+
+	if (dev->msi_enabled && fib->hw_fib_va->header.Command != AifRequest &&
+	    dev->max_msix > 1) {
+		u_int16_t vector_no, first_choice = 0xffff;
+
+		vector_no = dev->fibs_pushed_no % dev->max_msix;
+		do {
+			vector_no += 1;
+			if (vector_no == dev->max_msix)
+				vector_no = 1;
+			if (atomic_read(&dev->rrq_outstanding[vector_no]) <
+			    dev->vector_cap)
+				break;
+			if (0xffff == first_choice)
+				first_choice = vector_no;
+			else if (vector_no == first_choice)
+				break;
+		} while (1);
+		if (vector_no == first_choice)
+			vector_no = 0;
+		atomic_inc(&dev->rrq_outstanding[vector_no]);
+		if (dev->fibs_pushed_no == 0xffffffff)
+			dev->fibs_pushed_no = 0;
+		else
+			dev->fibs_pushed_no++;
+		fib->hw_fib_va->header.Handle += (vector_no << 16);
+	}
 
 	if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE2) {
 		/* Calculate the amount to the fibsize bits */
@@ -498,15 +576,34 @@ static int aac_src_restart_adapter(struct aac_dev *dev, int bled)
 		if (bled)
 			printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
 				dev->name, dev->id, bled);
+		dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
 		bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
 			0, 0, 0, 0, 0, 0, &var, &reset_mask, NULL, NULL, NULL);
-			if (bled || (var != 0x00000001))
-				return -EINVAL;
-		if (dev->supplement_adapter_info.SupportedOptions2 &
-			AAC_OPTION_DOORBELL_RESET) {
-			src_writel(dev, MUnit.IDR, reset_mask);
+		if ((bled || (var != 0x00000001)) &&
+		    !dev->doorbell_mask)
+			return -EINVAL;
+		else if (dev->doorbell_mask) {
+			reset_mask = dev->doorbell_mask;
+			bled = 0;
+			var = 0x00000001;
+		}
+
+		if ((dev->pdev->device == PMC_DEVICE_S7 ||
+		    dev->pdev->device == PMC_DEVICE_S8 ||
+		    dev->pdev->device == PMC_DEVICE_S9) && dev->msi_enabled) {
+			aac_src_access_devreg(dev, AAC_ENABLE_INTX);
+			dev->msi_enabled = 0;
 			msleep(5000); /* Delay 5 seconds */
 		}
+
+		if (!bled && (dev->supplement_adapter_info.SupportedOptions2 &
+		    AAC_OPTION_DOORBELL_RESET)) {
+			src_writel(dev, MUnit.IDR, reset_mask);
+			ssleep(45);
+		} else {
+			src_writel(dev, MUnit.IDR, 0x100);
+			ssleep(45);
+		}
 	}
 
 	if (src_readl(dev, MUnit.OMR) & KERNEL_PANIC)
@@ -527,7 +624,6 @@ int aac_src_select_comm(struct aac_dev *dev, int comm)
 {
 	switch (comm) {
 	case AAC_COMM_MESSAGE:
-		dev->a_ops.adapter_enable_int = aac_src_enable_interrupt_message;
 		dev->a_ops.adapter_intr = aac_src_intr_message;
 		dev->a_ops.adapter_deliver = aac_src_deliver_message;
 		break;
@@ -625,6 +721,7 @@ int aac_src_init(struct aac_dev *dev)
 	 */
 	dev->a_ops.adapter_interrupt = aac_src_interrupt_adapter;
 	dev->a_ops.adapter_disable_int = aac_src_disable_interrupt;
+	dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
 	dev->a_ops.adapter_notify = aac_src_notify_adapter;
 	dev->a_ops.adapter_sync_cmd = src_sync_cmd;
 	dev->a_ops.adapter_check_health = aac_src_check_health;
@@ -646,8 +743,11 @@ int aac_src_init(struct aac_dev *dev)
 
 	dev->msi = aac_msi && !pci_enable_msi(dev->pdev);
 
+	dev->aac_msix[0].vector_no = 0;
+	dev->aac_msix[0].dev = dev;
+
 	if (request_irq(dev->pdev->irq, dev->a_ops.adapter_intr,
-			IRQF_SHARED, "aacraid", dev) < 0) {
+			IRQF_SHARED, "aacraid", &(dev->aac_msix[0]))  < 0) {
 
 		if (dev->msi)
 			pci_disable_msi(dev->pdev);
@@ -659,6 +759,7 @@ int aac_src_init(struct aac_dev *dev)
 	dev->dbg_base = pci_resource_start(dev->pdev, 2);
 	dev->dbg_base_mapped = dev->regs.src.bar1;
 	dev->dbg_size = AAC_MIN_SRC_BAR1_SIZE;
+	dev->a_ops.adapter_enable_int = aac_src_enable_interrupt_message;
 
 	aac_adapter_enable_int(dev);
 
@@ -688,7 +789,9 @@ int aac_srcv_init(struct aac_dev *dev)
 	unsigned long status;
 	int restart = 0;
 	int instance = dev->id;
+	int i, j;
 	const char *name = dev->name;
+	int cpu;
 
 	dev->a_ops.adapter_ioremap = aac_srcv_ioremap;
 	dev->a_ops.adapter_comm = aac_src_select_comm;
@@ -784,6 +887,7 @@ int aac_srcv_init(struct aac_dev *dev)
 	 */
 	dev->a_ops.adapter_interrupt = aac_src_interrupt_adapter;
 	dev->a_ops.adapter_disable_int = aac_src_disable_interrupt;
+	dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
 	dev->a_ops.adapter_notify = aac_src_notify_adapter;
 	dev->a_ops.adapter_sync_cmd = src_sync_cmd;
 	dev->a_ops.adapter_check_health = aac_src_check_health;
@@ -802,18 +906,54 @@ int aac_srcv_init(struct aac_dev *dev)
 		goto error_iounmap;
 	if (dev->comm_interface != AAC_COMM_MESSAGE_TYPE2)
 		goto error_iounmap;
-	dev->msi = aac_msi && !pci_enable_msi(dev->pdev);
-	if (request_irq(dev->pdev->irq, dev->a_ops.adapter_intr,
-		IRQF_SHARED, "aacraid", dev) < 0) {
-		if (dev->msi)
-			pci_disable_msi(dev->pdev);
-		printk(KERN_ERR "%s%d: Interrupt unavailable.\n",
-			name, instance);
-		goto error_iounmap;
+	if (dev->msi_enabled)
+		aac_src_access_devreg(dev, AAC_ENABLE_MSIX);
+	if (!dev->sync_mode && dev->msi_enabled && dev->max_msix > 1) {
+		cpu = cpumask_first(cpu_online_mask);
+		for (i = 0; i < dev->max_msix; i++) {
+			dev->aac_msix[i].vector_no = i;
+			dev->aac_msix[i].dev = dev;
+
+			if (request_irq(dev->msixentry[i].vector,
+					dev->a_ops.adapter_intr,
+					0,
+					"aacraid",
+					&(dev->aac_msix[i]))) {
+				printk(KERN_ERR "%s%d: Failed to register IRQ for vector %d.\n",
+						name, instance, i);
+				for (j = 0 ; j < i ; j++)
+					free_irq(dev->msixentry[j].vector,
+						 &(dev->aac_msix[j]));
+				pci_disable_msix(dev->pdev);
+				goto error_iounmap;
+			}
+			if (irq_set_affinity_hint(
+			   dev->msixentry[i].vector,
+			   get_cpu_mask(cpu))) {
+				printk(KERN_ERR "%s%d: Failed to set IRQ affinity for cpu %d\n",
+						name, instance, cpu);
+			}
+			cpu = cpumask_next(cpu, cpu_online_mask);
+		}
+	} else {
+		dev->aac_msix[0].vector_no = 0;
+		dev->aac_msix[0].dev = dev;
+
+		if (request_irq(dev->pdev->irq, dev->a_ops.adapter_intr,
+				IRQF_SHARED,
+				"aacraid",
+				&(dev->aac_msix[0])) < 0) {
+			if (dev->msi)
+				pci_disable_msi(dev->pdev);
+			printk(KERN_ERR "%s%d: Interrupt unavailable.\n",
+					name, instance);
+			goto error_iounmap;
+		}
 	}
 	dev->dbg_base = dev->base_start;
 	dev->dbg_base_mapped = dev->base;
 	dev->dbg_size = dev->base_size;
+	dev->a_ops.adapter_enable_int = aac_src_enable_interrupt_message;
 
 	aac_adapter_enable_int(dev);
 
@@ -831,3 +971,93 @@ error_iounmap:
 	return -1;
 }
 
+void aac_src_access_devreg(struct aac_dev *dev, int mode)
+{
+	u_int32_t val;
+
+	switch (mode) {
+	case AAC_ENABLE_INTERRUPT:
+		src_writel(dev,
+			   MUnit.OIMR,
+			   dev->OIMR = (dev->msi_enabled ?
+					AAC_INT_ENABLE_TYPE1_MSIX :
+					AAC_INT_ENABLE_TYPE1_INTX));
+		break;
+
+	case AAC_DISABLE_INTERRUPT:
+		src_writel(dev,
+			   MUnit.OIMR,
+			   dev->OIMR = AAC_INT_DISABLE_ALL);
+		break;
+
+	case AAC_ENABLE_MSIX:
+		/* set bit 6 */
+		val = src_readl(dev, MUnit.IDR);
+		val |= 0x40;
+		src_writel(dev,  MUnit.IDR, val);
+		src_readl(dev, MUnit.IDR);
+		/* unmask int. */
+		val = PMC_ALL_INTERRUPT_BITS;
+		src_writel(dev, MUnit.IOAR, val);
+		val = src_readl(dev, MUnit.OIMR);
+		src_writel(dev,
+			   MUnit.OIMR,
+			   val & (~(PMC_GLOBAL_INT_BIT2 | PMC_GLOBAL_INT_BIT0)));
+		break;
+
+	case AAC_DISABLE_MSIX:
+		/* reset bit 6 */
+		val = src_readl(dev, MUnit.IDR);
+		val &= ~0x40;
+		src_writel(dev, MUnit.IDR, val);
+		src_readl(dev, MUnit.IDR);
+		break;
+
+	case AAC_CLEAR_AIF_BIT:
+		/* set bit 5 */
+		val = src_readl(dev, MUnit.IDR);
+		val |= 0x20;
+		src_writel(dev, MUnit.IDR, val);
+		src_readl(dev, MUnit.IDR);
+		break;
+
+	case AAC_CLEAR_SYNC_BIT:
+		/* set bit 4 */
+		val = src_readl(dev, MUnit.IDR);
+		val |= 0x10;
+		src_writel(dev, MUnit.IDR, val);
+		src_readl(dev, MUnit.IDR);
+		break;
+
+	case AAC_ENABLE_INTX:
+		/* set bit 7 */
+		val = src_readl(dev, MUnit.IDR);
+		val |= 0x80;
+		src_writel(dev, MUnit.IDR, val);
+		src_readl(dev, MUnit.IDR);
+		/* unmask int. */
+		val = PMC_ALL_INTERRUPT_BITS;
+		src_writel(dev, MUnit.IOAR, val);
+		src_readl(dev, MUnit.IOAR);
+		val = src_readl(dev, MUnit.OIMR);
+		src_writel(dev, MUnit.OIMR,
+				val & (~(PMC_GLOBAL_INT_BIT2)));
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int aac_src_get_sync_status(struct aac_dev *dev)
+{
+
+	int val;
+
+	if (dev->msi_enabled)
+		val = src_readl(dev, MUnit.ODR_MSI) & 0x1000 ? 1 : 0;
+	else
+		val = src_readl(dev, MUnit.ODR_R) >> SRC_ODR_SHIFT;
+
+	return val;
+}
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 770c48ddbe5e..ec432763a29a 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -1,28 +1,9 @@
-/* $Id: aha1542.c,v 1.1 1992/07/24 06:27:38 root Exp root $
- *  linux/kernel/aha1542.c
+/*
+ *  Driver for Adaptec AHA-1542 SCSI host adapters
  *
  *  Copyright (C) 1992  Tommy Thorn
  *  Copyright (C) 1993, 1994, 1995 Eric Youngdale
- *
- *  Modified by Eric Youngdale
- *        Use request_irq and request_dma to help prevent unexpected conflicts
- *        Set up on-board DMA controller, such that we do not have to
- *        have the bios enabled to use the aha1542.
- *  Modified by David Gentzel
- *        Don't call request_dma if dma mask is 0 (for BusLogic BT-445S VL-Bus
- *        controller).
- *  Modified by Matti Aarnio
- *        Accept parameters from LILO cmd-line. -- 1-Oct-94
- *  Modified by Mike McLagan <mike.mclagan@linux.org>
- *        Recognise extended mode on AHA1542CP, different bit than 1542CF
- *        1-Jan-97
- *  Modified by Bjorn L. Thordarson and Einar Thor Einarsson
- *        Recognize that DMA0 is valid DMA channel -- 13-Jul-98
- *  Modified by Chris Faulhaber <jedgar@fxp.org>
- *        Added module command-line options
- *        19-Jul-99
- *  Modified by Adam Fritzler
- *        Added proper detection of the AHA-1640 (MCA, now deleted)
+ *  Copyright (C) 2015 Ondrej Zary
  */
 
 #include <linux/module.h>
@@ -30,96 +11,44 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <linux/ioport.h>
 #include <linux/delay.h>
-#include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/isapnp.h>
-#include <linux/blkdev.h>
+#include <linux/isa.h>
+#include <linux/pnp.h>
 #include <linux/slab.h>
-
+#include <linux/io.h>
 #include <asm/dma.h>
-#include <asm/io.h>
-
-#include "scsi.h"
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include "aha1542.h"
 
-#define SCSI_BUF_PA(address)	isa_virt_to_bus(address)
-#define SCSI_SG_PA(sgent)	(isa_page_to_bus(sg_page((sgent))) + (sgent)->offset)
-
-#include <linux/stat.h>
-
-#ifdef DEBUG
-#define DEB(x) x
-#else
-#define DEB(x)
-#endif
-
-/*
-   static const char RCSid[] = "$Header: /usr/src/linux/kernel/blk_drv/scsi/RCS/aha1542.c,v 1.1 1992/07/24 06:27:38 root Exp root $";
- */
-
-/* The adaptec can be configured for quite a number of addresses, but
-   I generally do not want the card poking around at random.  We allow
-   two addresses - this allows people to use the Adaptec with a Midi
-   card, which also used 0x330 -- can be overridden with LILO! */
-
-#define MAXBOARDS 4		/* Increase this and the sizes of the
-				   arrays below, if you need more.. */
-
-/* Boards 3,4 slots are reserved for ISAPnP scans */
-
-static unsigned int bases[MAXBOARDS] __initdata = {0x330, 0x334, 0, 0};
-
-/* set by aha1542_setup according to the command line; they also may
-   be marked __initdata, but require zero initializers then */
-
-static int setup_called[MAXBOARDS];
-static int setup_buson[MAXBOARDS];
-static int setup_busoff[MAXBOARDS];
-static int setup_dmaspeed[MAXBOARDS] __initdata = { -1, -1, -1, -1 };
+#define MAXBOARDS 4
 
-/*
- * LILO/Module params:  aha1542=<PORTBASE>[,<BUSON>,<BUSOFF>[,<DMASPEED>]]
- *
- * Where:  <PORTBASE> is any of the valid AHA addresses:
- *                      0x130, 0x134, 0x230, 0x234, 0x330, 0x334
- *         <BUSON>  is the time (in microsecs) that AHA spends on the AT-bus
- *                  when transferring data.  1542A power-on default is 11us,
- *                  valid values are in range: 2..15 (decimal)
- *         <BUSOFF> is the time that AHA spends OFF THE BUS after while
- *                  it is transferring data (not to monopolize the bus).
- *                  Power-on default is 4us, valid range: 1..64 microseconds.
- *         <DMASPEED> Default is jumper selected (1542A: on the J1),
- *                  but experimenter can alter it with this.
- *                  Valid values: 5, 6, 7, 8, 10 (MB/s)
- *                  Factory default is 5 MB/s.
- */
-
-#if defined(MODULE)
-static bool isapnp = 0;
-static int aha1542[] = {0x330, 11, 4, -1};
-module_param_array(aha1542, int, NULL, 0);
+static bool isapnp = 1;
 module_param(isapnp, bool, 0);
+MODULE_PARM_DESC(isapnp, "enable PnP support (default=1)");
 
-static struct isapnp_device_id id_table[] __initdata = {
-	{
-		ISAPNP_ANY_ID, ISAPNP_ANY_ID,
-		ISAPNP_VENDOR('A', 'D', 'P'), ISAPNP_FUNCTION(0x1542),
-		0
-	},
-	{0}
-};
+static int io[MAXBOARDS] = { 0x330, 0x334, 0, 0 };
+module_param_array(io, int, NULL, 0);
+MODULE_PARM_DESC(io, "base IO address of controller (0x130,0x134,0x230,0x234,0x330,0x334, default=0x330,0x334)");
 
-MODULE_DEVICE_TABLE(isapnp, id_table);
+/* time AHA spends on the AT-bus during data transfer */
+static int bus_on[MAXBOARDS] = { -1, -1, -1, -1 }; /* power-on default: 11us */
+module_param_array(bus_on, int, NULL, 0);
+MODULE_PARM_DESC(bus_on, "bus on time [us] (2-15, default=-1 [HW default: 11])");
 
-#else
-static int isapnp = 1;
-#endif
+/* time AHA spends off the bus (not to monopolize it) during data transfer  */
+static int bus_off[MAXBOARDS] = { -1, -1, -1, -1 }; /* power-on default: 4us */
+module_param_array(bus_off, int, NULL, 0);
+MODULE_PARM_DESC(bus_off, "bus off time [us] (1-64, default=-1 [HW default: 4])");
+
+/* default is jumper selected (J1 on 1542A), factory default = 5 MB/s */
+static int dma_speed[MAXBOARDS] = { -1, -1, -1, -1 };
+module_param_array(dma_speed, int, NULL, 0);
+MODULE_PARM_DESC(dma_speed, "DMA speed [MB/s] (5,6,7,8,10, default=-1 [by jumper])");
 
-#define BIOS_TRANSLATION_1632 0	/* Used by some old 1542A boards */
 #define BIOS_TRANSLATION_6432 1	/* Default case these days */
 #define BIOS_TRANSLATION_25563 2	/* Big disk case */
 
@@ -128,134 +57,71 @@ struct aha1542_hostdata {
 	int bios_translation;	/* Mapping bios uses - for compatibility */
 	int aha1542_last_mbi_used;
 	int aha1542_last_mbo_used;
-	Scsi_Cmnd *SCint[AHA1542_MAILBOXES];
+	struct scsi_cmnd *int_cmds[AHA1542_MAILBOXES];
 	struct mailbox mb[2 * AHA1542_MAILBOXES];
 	struct ccb ccb[AHA1542_MAILBOXES];
 };
 
-#define HOSTDATA(host) ((struct aha1542_hostdata *) &host->hostdata)
-
-static DEFINE_SPINLOCK(aha1542_lock);
-
-
-
-#define WAITnexttimeout 3000000
-
-static void setup_mailboxes(int base_io, struct Scsi_Host *shpnt);
-static int aha1542_restart(struct Scsi_Host *shost);
-static void aha1542_intr_handle(struct Scsi_Host *shost);
+static inline void aha1542_intr_reset(u16 base)
+{
+	outb(IRST, CONTROL(base));
+}
 
-#define aha1542_intr_reset(base)  outb(IRST, CONTROL(base))
+static inline bool wait_mask(u16 port, u8 mask, u8 allof, u8 noneof, int timeout)
+{
+	bool delayed = true;
 
-#define WAIT(port, mask, allof, noneof)					\
- { register int WAITbits;						\
-   register int WAITtimeout = WAITnexttimeout;				\
-   while (1) {								\
-     WAITbits = inb(port) & (mask);					\
-     if ((WAITbits & (allof)) == (allof) && ((WAITbits & (noneof)) == 0)) \
-       break;                                                         	\
-     if (--WAITtimeout == 0) goto fail;					\
-   }									\
- }
+	if (timeout == 0) {
+		timeout = 3000000;
+		delayed = false;
+	}
 
-/* Similar to WAIT, except we use the udelay call to regulate the
-   amount of time we wait.  */
-#define WAITd(port, mask, allof, noneof, timeout)			\
- { register int WAITbits;						\
-   register int WAITtimeout = timeout;					\
-   while (1) {								\
-     WAITbits = inb(port) & (mask);					\
-     if ((WAITbits & (allof)) == (allof) && ((WAITbits & (noneof)) == 0)) \
-       break;                                                         	\
-     mdelay(1);							\
-     if (--WAITtimeout == 0) goto fail;					\
-   }									\
- }
+	while (1) {
+		u8 bits = inb(port) & mask;
+		if ((bits & allof) == allof && ((bits & noneof) == 0))
+			break;
+		if (delayed)
+			mdelay(1);
+		if (--timeout == 0)
+			return false;
+	}
 
-static void aha1542_stat(void)
-{
-/*	int s = inb(STATUS), i = inb(INTRFLAGS);
-	printk("status=%x intrflags=%x\n", s, i, WAITnexttimeout-WAITtimeout); */
+	return true;
 }
 
-/* This is a bit complicated, but we need to make sure that an interrupt
-   routine does not send something out while we are in the middle of this.
-   Fortunately, it is only at boot time that multi-byte messages
-   are ever sent. */
-static int aha1542_out(unsigned int base, unchar * cmdp, int len)
+static int aha1542_outb(unsigned int base, u8 val)
 {
-	unsigned long flags = 0;
-	int got_lock;
-
-	if (len == 1) {
-		got_lock = 0;
-		while (1 == 1) {
-			WAIT(STATUS(base), CDF, 0, CDF);
-			spin_lock_irqsave(&aha1542_lock, flags);
-			if (inb(STATUS(base)) & CDF) {
-				spin_unlock_irqrestore(&aha1542_lock, flags);
-				continue;
-			}
-			outb(*cmdp, DATA(base));
-			spin_unlock_irqrestore(&aha1542_lock, flags);
-			return 0;
-		}
-	} else {
-		spin_lock_irqsave(&aha1542_lock, flags);
-		got_lock = 1;
-		while (len--) {
-			WAIT(STATUS(base), CDF, 0, CDF);
-			outb(*cmdp++, DATA(base));
-		}
-		spin_unlock_irqrestore(&aha1542_lock, flags);
-	}
+	if (!wait_mask(STATUS(base), CDF, 0, CDF, 0))
+		return 1;
+	outb(val, DATA(base));
+
 	return 0;
-fail:
-	if (got_lock)
-		spin_unlock_irqrestore(&aha1542_lock, flags);
-	printk(KERN_ERR "aha1542_out failed(%d): ", len + 1);
-	aha1542_stat();
-	return 1;
 }
 
-/* Only used at boot time, so we do not need to worry about latency as much
-   here */
-
-static int __init aha1542_in(unsigned int base, unchar * cmdp, int len)
+static int aha1542_out(unsigned int base, u8 *buf, int len)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&aha1542_lock, flags);
 	while (len--) {
-		WAIT(STATUS(base), DF, DF, 0);
-		*cmdp++ = inb(DATA(base));
+		if (!wait_mask(STATUS(base), CDF, 0, CDF, 0))
+			return 1;
+		outb(*buf++, DATA(base));
 	}
-	spin_unlock_irqrestore(&aha1542_lock, flags);
+	if (!wait_mask(INTRFLAGS(base), INTRMASK, HACC, 0, 0))
+		return 1;
+
 	return 0;
-fail:
-	spin_unlock_irqrestore(&aha1542_lock, flags);
-	printk(KERN_ERR "aha1542_in failed(%d): ", len + 1);
-	aha1542_stat();
-	return 1;
 }
 
-/* Similar to aha1542_in, except that we wait a very short period of time.
-   We use this if we know the board is alive and awake, but we are not sure
-   if the board will respond to the command we are about to send or not */
-static int __init aha1542_in1(unsigned int base, unchar * cmdp, int len)
-{
-	unsigned long flags;
+/* Only used at boot time, so we do not need to worry about latency as much
+   here */
 
-	spin_lock_irqsave(&aha1542_lock, flags);
+static int aha1542_in(unsigned int base, u8 *buf, int len, int timeout)
+{
 	while (len--) {
-		WAITd(STATUS(base), DF, DF, 0, 100);
-		*cmdp++ = inb(DATA(base));
+		if (!wait_mask(STATUS(base), DF, DF, 0, timeout))
+			return 1;
+		*buf++ = inb(DATA(base));
 	}
-	spin_unlock_irqrestore(&aha1542_lock, flags);
 	return 0;
-fail:
-	spin_unlock_irqrestore(&aha1542_lock, flags);
-	return 1;
 }
 
 static int makecode(unsigned hosterr, unsigned scsierr)
@@ -297,7 +163,9 @@ static int makecode(unsigned hosterr, unsigned scsierr)
 	case 0x1a:		/* Invalid CCB or Segment List Parameter-A segment list with a zero
 				   length segment or invalid segment list boundaries was received.
 				   A CCB parameter was invalid. */
-		DEB(printk("Aha1542: %x %x\n", hosterr, scsierr));
+#ifdef DEBUG
+		printk("Aha1542: %x %x\n", hosterr, scsierr);
+#endif
 		hosterr = DID_ERROR;	/* Couldn't find any better */
 		break;
 
@@ -314,106 +182,74 @@ static int makecode(unsigned hosterr, unsigned scsierr)
 	return scsierr | (hosterr << 16);
 }
 
-static int __init aha1542_test_port(int bse, struct Scsi_Host *shpnt)
+static int aha1542_test_port(struct Scsi_Host *sh)
 {
-	unchar inquiry_cmd[] = {CMD_INQUIRY};
-	unchar inquiry_result[4];
-	unchar *cmdp;
-	int len;
-	volatile int debug = 0;
+	u8 inquiry_result[4];
+	int i;
 
 	/* Quick and dirty test for presence of the card. */
-	if (inb(STATUS(bse)) == 0xff)
+	if (inb(STATUS(sh->io_port)) == 0xff)
 		return 0;
 
 	/* Reset the adapter. I ought to make a hard reset, but it's not really necessary */
 
-	/*  DEB(printk("aha1542_test_port called \n")); */
-
 	/* In case some other card was probing here, reset interrupts */
-	aha1542_intr_reset(bse);	/* reset interrupts, so they don't block */
+	aha1542_intr_reset(sh->io_port);	/* reset interrupts, so they don't block */
 
-	outb(SRST | IRST /*|SCRST */ , CONTROL(bse));
+	outb(SRST | IRST /*|SCRST */ , CONTROL(sh->io_port));
 
 	mdelay(20);		/* Wait a little bit for things to settle down. */
 
-	debug = 1;
 	/* Expect INIT and IDLE, any of the others are bad */
-	WAIT(STATUS(bse), STATMASK, INIT | IDLE, STST | DIAGF | INVDCMD | DF | CDF);
+	if (!wait_mask(STATUS(sh->io_port), STATMASK, INIT | IDLE, STST | DIAGF | INVDCMD | DF | CDF, 0))
+		return 0;
 
-	debug = 2;
 	/* Shouldn't have generated any interrupts during reset */
-	if (inb(INTRFLAGS(bse)) & INTRMASK)
-		goto fail;
-
+	if (inb(INTRFLAGS(sh->io_port)) & INTRMASK)
+		return 0;
 
 	/* Perform a host adapter inquiry instead so we do not need to set
 	   up the mailboxes ahead of time */
 
-	aha1542_out(bse, inquiry_cmd, 1);
-
-	debug = 3;
-	len = 4;
-	cmdp = &inquiry_result[0];
+	aha1542_outb(sh->io_port, CMD_INQUIRY);
 
-	while (len--) {
-		WAIT(STATUS(bse), DF, DF, 0);
-		*cmdp++ = inb(DATA(bse));
+	for (i = 0; i < 4; i++) {
+		if (!wait_mask(STATUS(sh->io_port), DF, DF, 0, 0))
+			return 0;
+		inquiry_result[i] = inb(DATA(sh->io_port));
 	}
 
-	debug = 8;
 	/* Reading port should reset DF */
-	if (inb(STATUS(bse)) & DF)
-		goto fail;
+	if (inb(STATUS(sh->io_port)) & DF)
+		return 0;
 
-	debug = 9;
 	/* When HACC, command is completed, and we're though testing */
-	WAIT(INTRFLAGS(bse), HACC, HACC, 0);
-	/* now initialize adapter */
+	if (!wait_mask(INTRFLAGS(sh->io_port), HACC, HACC, 0, 0))
+		return 0;
 
-	debug = 10;
 	/* Clear interrupts */
-	outb(IRST, CONTROL(bse));
-
-	debug = 11;
-
-	return debug;		/* 1 = ok */
-fail:
-	return 0;		/* 0 = not ok */
-}
+	outb(IRST, CONTROL(sh->io_port));
 
-/* A quick wrapper for do_aha1542_intr_handle to grab the spin lock */
-static irqreturn_t do_aha1542_intr_handle(int dummy, void *dev_id)
-{
-	unsigned long flags;
-	struct Scsi_Host *shost = dev_id;
-
-	spin_lock_irqsave(shost->host_lock, flags);
-	aha1542_intr_handle(shost);
-	spin_unlock_irqrestore(shost->host_lock, flags);
-	return IRQ_HANDLED;
+	return 1;
 }
 
-/* A "high" level interrupt handler */
-static void aha1542_intr_handle(struct Scsi_Host *shost)
+static irqreturn_t aha1542_interrupt(int irq, void *dev_id)
 {
-	void (*my_done) (Scsi_Cmnd *) = NULL;
+	struct Scsi_Host *sh = dev_id;
+	struct aha1542_hostdata *aha1542 = shost_priv(sh);
+	void (*my_done)(struct scsi_cmnd *) = NULL;
 	int errstatus, mbi, mbo, mbistatus;
 	int number_serviced;
 	unsigned long flags;
-	Scsi_Cmnd *SCtmp;
+	struct scsi_cmnd *tmp_cmd;
 	int flag;
-	int needs_restart;
-	struct mailbox *mb;
-	struct ccb *ccb;
-
-	mb = HOSTDATA(shost)->mb;
-	ccb = HOSTDATA(shost)->ccb;
+	struct mailbox *mb = aha1542->mb;
+	struct ccb *ccb = aha1542->ccb;
 
 #ifdef DEBUG
 	{
-		flag = inb(INTRFLAGS(shost->io_port));
-		printk(KERN_DEBUG "aha1542_intr_handle: ");
+		flag = inb(INTRFLAGS(sh->io_port));
+		shost_printk(KERN_DEBUG, sh, "aha1542_intr_handle: ");
 		if (!(flag & ANYINTR))
 			printk("no interrupt?");
 		if (flag & MBIF)
@@ -424,14 +260,14 @@ static void aha1542_intr_handle(struct Scsi_Host *shost)
 			printk("HACC ");
 		if (flag & SCRD)
 			printk("SCRD ");
-		printk("status %02x\n", inb(STATUS(shost->io_port)));
+		printk("status %02x\n", inb(STATUS(sh->io_port)));
 	};
 #endif
 	number_serviced = 0;
-	needs_restart = 0;
 
-	while (1 == 1) {
-		flag = inb(INTRFLAGS(shost->io_port));
+	spin_lock_irqsave(sh->host_lock, flags);
+	while (1) {
+		flag = inb(INTRFLAGS(sh->io_port));
 
 		/* Check for unusual interrupts.  If any of these happen, we should
 		   probably do something special, but for now just printing a message
@@ -442,15 +278,12 @@ static void aha1542_intr_handle(struct Scsi_Host *shost)
 				printk("MBOF ");
 			if (flag & HACC)
 				printk("HACC ");
-			if (flag & SCRD) {
-				needs_restart = 1;
+			if (flag & SCRD)
 				printk("SCRD ");
-			}
 		}
-		aha1542_intr_reset(shost->io_port);
+		aha1542_intr_reset(sh->io_port);
 
-		spin_lock_irqsave(&aha1542_lock, flags);
-		mbi = HOSTDATA(shost)->aha1542_last_mbi_used + 1;
+		mbi = aha1542->aha1542_last_mbi_used + 1;
 		if (mbi >= 2 * AHA1542_MAILBOXES)
 			mbi = AHA1542_MAILBOXES;
 
@@ -460,57 +293,51 @@ static void aha1542_intr_handle(struct Scsi_Host *shost)
 			mbi++;
 			if (mbi >= 2 * AHA1542_MAILBOXES)
 				mbi = AHA1542_MAILBOXES;
-		} while (mbi != HOSTDATA(shost)->aha1542_last_mbi_used);
+		} while (mbi != aha1542->aha1542_last_mbi_used);
 
 		if (mb[mbi].status == 0) {
-			spin_unlock_irqrestore(&aha1542_lock, flags);
+			spin_unlock_irqrestore(sh->host_lock, flags);
 			/* Hmm, no mail.  Must have read it the last time around */
-			if (!number_serviced && !needs_restart)
-				printk(KERN_WARNING "aha1542.c: interrupt received, but no mail.\n");
-			/* We detected a reset.  Restart all pending commands for
-			   devices that use the hard reset option */
-			if (needs_restart)
-				aha1542_restart(shost);
-			return;
+			if (!number_serviced)
+				shost_printk(KERN_WARNING, sh, "interrupt received, but no mail.\n");
+			return IRQ_HANDLED;
 		};
 
-		mbo = (scsi2int(mb[mbi].ccbptr) - (SCSI_BUF_PA(&ccb[0]))) / sizeof(struct ccb);
+		mbo = (scsi2int(mb[mbi].ccbptr) - (isa_virt_to_bus(&ccb[0]))) / sizeof(struct ccb);
 		mbistatus = mb[mbi].status;
 		mb[mbi].status = 0;
-		HOSTDATA(shost)->aha1542_last_mbi_used = mbi;
-		spin_unlock_irqrestore(&aha1542_lock, flags);
+		aha1542->aha1542_last_mbi_used = mbi;
 
 #ifdef DEBUG
-		{
-			if (ccb[mbo].tarstat | ccb[mbo].hastat)
-				printk(KERN_DEBUG "aha1542_command: returning %x (status %d)\n",
-				       ccb[mbo].tarstat + ((int) ccb[mbo].hastat << 16), mb[mbi].status);
-		};
+		if (ccb[mbo].tarstat | ccb[mbo].hastat)
+			shost_printk(KERN_DEBUG, sh, "aha1542_command: returning %x (status %d)\n",
+			       ccb[mbo].tarstat + ((int) ccb[mbo].hastat << 16), mb[mbi].status);
 #endif
 
 		if (mbistatus == 3)
 			continue;	/* Aborted command not found */
 
 #ifdef DEBUG
-		printk(KERN_DEBUG "...done %d %d\n", mbo, mbi);
+		shost_printk(KERN_DEBUG, sh, "...done %d %d\n", mbo, mbi);
 #endif
 
-		SCtmp = HOSTDATA(shost)->SCint[mbo];
+		tmp_cmd = aha1542->int_cmds[mbo];
 
-		if (!SCtmp || !SCtmp->scsi_done) {
-			printk(KERN_WARNING "aha1542_intr_handle: Unexpected interrupt\n");
-			printk(KERN_WARNING "tarstat=%x, hastat=%x idlun=%x ccb#=%d \n", ccb[mbo].tarstat,
+		if (!tmp_cmd || !tmp_cmd->scsi_done) {
+			spin_unlock_irqrestore(sh->host_lock, flags);
+			shost_printk(KERN_WARNING, sh, "Unexpected interrupt\n");
+			shost_printk(KERN_WARNING, sh, "tarstat=%x, hastat=%x idlun=%x ccb#=%d\n", ccb[mbo].tarstat,
 			       ccb[mbo].hastat, ccb[mbo].idlun, mbo);
-			return;
+			return IRQ_HANDLED;
 		}
-		my_done = SCtmp->scsi_done;
-		kfree(SCtmp->host_scribble);
-		SCtmp->host_scribble = NULL;
+		my_done = tmp_cmd->scsi_done;
+		kfree(tmp_cmd->host_scribble);
+		tmp_cmd->host_scribble = NULL;
 		/* Fetch the sense data, and tuck it away, in the required slot.  The
 		   Adaptec automatically fetches it, and there is no guarantee that
 		   we will still have it in the cdb when we come back */
 		if (ccb[mbo].tarstat == 2)
-			memcpy(SCtmp->sense_buffer, &ccb[mbo].cdb[ccb[mbo].cdblen],
+			memcpy(tmp_cmd->sense_buffer, &ccb[mbo].cdb[ccb[mbo].cdblen],
 			       SCSI_SENSE_BUFFERSIZE);
 
 
@@ -525,166 +352,122 @@ static void aha1542_intr_handle(struct Scsi_Host *shost)
 
 #ifdef DEBUG
 		if (errstatus)
-			printk(KERN_DEBUG "(aha1542 error:%x %x %x) ", errstatus,
+			shost_printk(KERN_DEBUG, sh, "(aha1542 error:%x %x %x) ", errstatus,
 			       ccb[mbo].hastat, ccb[mbo].tarstat);
+		if (ccb[mbo].tarstat == 2)
+			print_hex_dump_bytes("sense: ", DUMP_PREFIX_NONE, &ccb[mbo].cdb[ccb[mbo].cdblen], 12);
+		if (errstatus)
+			printk("aha1542_intr_handle: returning %6x\n", errstatus);
 #endif
-
-		if (ccb[mbo].tarstat == 2) {
-#ifdef DEBUG
-			int i;
-#endif
-			DEB(printk("aha1542_intr_handle: sense:"));
-#ifdef DEBUG
-			for (i = 0; i < 12; i++)
-				printk("%02x ", ccb[mbo].cdb[ccb[mbo].cdblen + i]);
-			printk("\n");
-#endif
-			/*
-			   DEB(printk("aha1542_intr_handle: buf:"));
-			   for (i = 0; i < bufflen; i++)
-			   printk("%02x ", ((unchar *)buff)[i]);
-			   printk("\n");
-			 */
-		}
-		DEB(if (errstatus) printk("aha1542_intr_handle: returning %6x\n", errstatus));
-		SCtmp->result = errstatus;
-		HOSTDATA(shost)->SCint[mbo] = NULL;	/* This effectively frees up the mailbox slot, as
-							   far as queuecommand is concerned */
-		my_done(SCtmp);
+		tmp_cmd->result = errstatus;
+		aha1542->int_cmds[mbo] = NULL;	/* This effectively frees up the mailbox slot, as
+						   far as queuecommand is concerned */
+		my_done(tmp_cmd);
 		number_serviced++;
 	};
 }
 
-static int aha1542_queuecommand_lck(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
+static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
 {
-	unchar ahacmd = CMD_START_SCSI;
-	unchar direction;
-	unchar *cmd = (unchar *) SCpnt->cmnd;
-	unchar target = SCpnt->device->id;
-	unchar lun = SCpnt->device->lun;
+	struct aha1542_hostdata *aha1542 = shost_priv(sh);
+	u8 direction;
+	u8 target = cmd->device->id;
+	u8 lun = cmd->device->lun;
 	unsigned long flags;
-	int bufflen = scsi_bufflen(SCpnt);
+	int bufflen = scsi_bufflen(cmd);
 	int mbo;
-	struct mailbox *mb;
-	struct ccb *ccb;
+	struct mailbox *mb = aha1542->mb;
+	struct ccb *ccb = aha1542->ccb;
 
-	DEB(int i);
-
-	mb = HOSTDATA(SCpnt->device->host)->mb;
-	ccb = HOSTDATA(SCpnt->device->host)->ccb;
-
-	DEB(if (target > 1) {
-	    SCpnt->result = DID_TIME_OUT << 16;
-	    done(SCpnt); return 0;
-	    }
-	);
-
-	if (*cmd == REQUEST_SENSE) {
+	if (*cmd->cmnd == REQUEST_SENSE) {
 		/* Don't do the command - we have the sense data already */
-#if 0
-		/* scsi_request_sense() provides a buffer of size 256,
-		   so there is no reason to expect equality */
-		if (bufflen != SCSI_SENSE_BUFFERSIZE)
-			printk(KERN_CRIT "aha1542: Wrong buffer length supplied "
-			       "for request sense (%d)\n", bufflen);
-#endif
-		SCpnt->result = 0;
-		done(SCpnt);
+		cmd->result = 0;
+		cmd->scsi_done(cmd);
 		return 0;
 	}
 #ifdef DEBUG
-	if (*cmd == READ_10 || *cmd == WRITE_10)
-		i = xscsi2int(cmd + 2);
-	else if (*cmd == READ_6 || *cmd == WRITE_6)
-		i = scsi2int(cmd + 2);
-	else
-		i = -1;
-	if (done)
-		printk(KERN_DEBUG "aha1542_queuecommand: dev %d cmd %02x pos %d len %d ", target, *cmd, i, bufflen);
-	else
-		printk(KERN_DEBUG "aha1542_command: dev %d cmd %02x pos %d len %d ", target, *cmd, i, bufflen);
-	aha1542_stat();
-	printk(KERN_DEBUG "aha1542_queuecommand: dumping scsi cmd:");
-	for (i = 0; i < SCpnt->cmd_len; i++)
-		printk("%02x ", cmd[i]);
-	printk("\n");
-	if (*cmd == WRITE_10 || *cmd == WRITE_6)
-		return 0;	/* we are still testing, so *don't* write */
+	{
+		int i = -1;
+		if (*cmd->cmnd == READ_10 || *cmd->cmnd == WRITE_10)
+			i = xscsi2int(cmd->cmnd + 2);
+		else if (*cmd->cmnd == READ_6 || *cmd->cmnd == WRITE_6)
+			i = scsi2int(cmd->cmnd + 2);
+		shost_printk(KERN_DEBUG, sh, "aha1542_queuecommand: dev %d cmd %02x pos %d len %d",
+						target, *cmd->cmnd, i, bufflen);
+		print_hex_dump_bytes("command: ", DUMP_PREFIX_NONE, cmd->cmnd, cmd->cmd_len);
+	}
 #endif
 	/* Use the outgoing mailboxes in a round-robin fashion, because this
 	   is how the host adapter will scan for them */
 
-	spin_lock_irqsave(&aha1542_lock, flags);
-	mbo = HOSTDATA(SCpnt->device->host)->aha1542_last_mbo_used + 1;
+	spin_lock_irqsave(sh->host_lock, flags);
+	mbo = aha1542->aha1542_last_mbo_used + 1;
 	if (mbo >= AHA1542_MAILBOXES)
 		mbo = 0;
 
 	do {
-		if (mb[mbo].status == 0 && HOSTDATA(SCpnt->device->host)->SCint[mbo] == NULL)
+		if (mb[mbo].status == 0 && aha1542->int_cmds[mbo] == NULL)
 			break;
 		mbo++;
 		if (mbo >= AHA1542_MAILBOXES)
 			mbo = 0;
-	} while (mbo != HOSTDATA(SCpnt->device->host)->aha1542_last_mbo_used);
+	} while (mbo != aha1542->aha1542_last_mbo_used);
 
-	if (mb[mbo].status || HOSTDATA(SCpnt->device->host)->SCint[mbo])
+	if (mb[mbo].status || aha1542->int_cmds[mbo])
 		panic("Unable to find empty mailbox for aha1542.\n");
 
-	HOSTDATA(SCpnt->device->host)->SCint[mbo] = SCpnt;	/* This will effectively prevent someone else from
-							   screwing with this cdb. */
+	aha1542->int_cmds[mbo] = cmd;	/* This will effectively prevent someone else from
+					   screwing with this cdb. */
 
-	HOSTDATA(SCpnt->device->host)->aha1542_last_mbo_used = mbo;
-	spin_unlock_irqrestore(&aha1542_lock, flags);
+	aha1542->aha1542_last_mbo_used = mbo;
 
 #ifdef DEBUG
-	printk(KERN_DEBUG "Sending command (%d %x)...", mbo, done);
+	shost_printk(KERN_DEBUG, sh, "Sending command (%d %p)...", mbo, cmd->scsi_done);
 #endif
 
-	any2scsi(mb[mbo].ccbptr, SCSI_BUF_PA(&ccb[mbo]));	/* This gets trashed for some reason */
+	any2scsi(mb[mbo].ccbptr, isa_virt_to_bus(&ccb[mbo]));	/* This gets trashed for some reason */
 
 	memset(&ccb[mbo], 0, sizeof(struct ccb));
 
-	ccb[mbo].cdblen = SCpnt->cmd_len;
+	ccb[mbo].cdblen = cmd->cmd_len;
 
 	direction = 0;
-	if (*cmd == READ_10 || *cmd == READ_6)
+	if (*cmd->cmnd == READ_10 || *cmd->cmnd == READ_6)
 		direction = 8;
-	else if (*cmd == WRITE_10 || *cmd == WRITE_6)
+	else if (*cmd->cmnd == WRITE_10 || *cmd->cmnd == WRITE_6)
 		direction = 16;
 
-	memcpy(ccb[mbo].cdb, cmd, ccb[mbo].cdblen);
+	memcpy(ccb[mbo].cdb, cmd->cmnd, ccb[mbo].cdblen);
 
 	if (bufflen) {
 		struct scatterlist *sg;
 		struct chain *cptr;
-#ifdef DEBUG
-		unsigned char *ptr;
-#endif
-		int i, sg_count = scsi_sg_count(SCpnt);
+		int i, sg_count = scsi_sg_count(cmd);
+
 		ccb[mbo].op = 2;	/* SCSI Initiator Command  w/scatter-gather */
-		SCpnt->host_scribble = kmalloc(sizeof(*cptr)*sg_count,
+		cmd->host_scribble = kmalloc(sizeof(*cptr)*sg_count,
 		                                         GFP_KERNEL | GFP_DMA);
-		cptr = (struct chain *) SCpnt->host_scribble;
+		cptr = (struct chain *) cmd->host_scribble;
 		if (cptr == NULL) {
 			/* free the claimed mailbox slot */
-			HOSTDATA(SCpnt->device->host)->SCint[mbo] = NULL;
+			aha1542->int_cmds[mbo] = NULL;
+			spin_unlock_irqrestore(sh->host_lock, flags);
 			return SCSI_MLQUEUE_HOST_BUSY;
 		}
-		scsi_for_each_sg(SCpnt, sg, sg_count, i) {
-			any2scsi(cptr[i].dataptr, SCSI_SG_PA(sg));
+		scsi_for_each_sg(cmd, sg, sg_count, i) {
+			any2scsi(cptr[i].dataptr, isa_page_to_bus(sg_page(sg))
+								+ sg->offset);
 			any2scsi(cptr[i].datalen, sg->length);
 		};
 		any2scsi(ccb[mbo].datalen, sg_count * sizeof(struct chain));
-		any2scsi(ccb[mbo].dataptr, SCSI_BUF_PA(cptr));
+		any2scsi(ccb[mbo].dataptr, isa_virt_to_bus(cptr));
 #ifdef DEBUG
-		printk("cptr %x: ", cptr);
-		ptr = (unsigned char *) cptr;
-		for (i = 0; i < 18; i++)
-			printk("%02x ", ptr[i]);
+		shost_printk(KERN_DEBUG, sh, "cptr %p: ", cptr);
+		print_hex_dump_bytes("cptr: ", DUMP_PREFIX_NONE, cptr, 18);
 #endif
 	} else {
 		ccb[mbo].op = 0;	/* SCSI Initiator Command */
-		SCpnt->host_scribble = NULL;
+		cmd->host_scribble = NULL;
 		any2scsi(ccb[mbo].datalen, 0);
 		any2scsi(ccb[mbo].dataptr, 0);
 	};
@@ -694,139 +477,116 @@ static int aha1542_queuecommand_lck(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *
 	ccb[mbo].commlinkid = 0;
 
 #ifdef DEBUG
-	{
-		int i;
-		printk(KERN_DEBUG "aha1542_command: sending.. ");
-		for (i = 0; i < sizeof(ccb[mbo]) - 10; i++)
-			printk("%02x ", ((unchar *) & ccb[mbo])[i]);
-	};
+	print_hex_dump_bytes("sending: ", DUMP_PREFIX_NONE, &ccb[mbo], sizeof(ccb[mbo]) - 10);
+	printk("aha1542_queuecommand: now waiting for interrupt ");
 #endif
-
-	if (done) {
-		DEB(printk("aha1542_queuecommand: now waiting for interrupt ");
-		    aha1542_stat());
-		SCpnt->scsi_done = done;
-		mb[mbo].status = 1;
-		aha1542_out(SCpnt->device->host->io_port, &ahacmd, 1);	/* start scsi command */
-		DEB(aha1542_stat());
-	} else
-		printk("aha1542_queuecommand: done can't be NULL\n");
+	mb[mbo].status = 1;
+	aha1542_outb(cmd->device->host->io_port, CMD_START_SCSI);
+	spin_unlock_irqrestore(sh->host_lock, flags);
 
 	return 0;
 }
 
-static DEF_SCSI_QCMD(aha1542_queuecommand)
-
 /* Initialize mailboxes */
-static void setup_mailboxes(int bse, struct Scsi_Host *shpnt)
+static void setup_mailboxes(struct Scsi_Host *sh)
 {
+	struct aha1542_hostdata *aha1542 = shost_priv(sh);
 	int i;
-	struct mailbox *mb;
-	struct ccb *ccb;
-
-	unchar cmd[5] = { CMD_MBINIT, AHA1542_MAILBOXES, 0, 0, 0};
+	struct mailbox *mb = aha1542->mb;
+	struct ccb *ccb = aha1542->ccb;
 
-	mb = HOSTDATA(shpnt)->mb;
-	ccb = HOSTDATA(shpnt)->ccb;
+	u8 mb_cmd[5] = { CMD_MBINIT, AHA1542_MAILBOXES, 0, 0, 0};
 
 	for (i = 0; i < AHA1542_MAILBOXES; i++) {
 		mb[i].status = mb[AHA1542_MAILBOXES + i].status = 0;
-		any2scsi(mb[i].ccbptr, SCSI_BUF_PA(&ccb[i]));
+		any2scsi(mb[i].ccbptr, isa_virt_to_bus(&ccb[i]));
 	};
-	aha1542_intr_reset(bse);	/* reset interrupts, so they don't block */
-	any2scsi((cmd + 2), SCSI_BUF_PA(mb));
-	aha1542_out(bse, cmd, 5);
-	WAIT(INTRFLAGS(bse), INTRMASK, HACC, 0);
-	while (0) {
-fail:
-		printk(KERN_ERR "aha1542_detect: failed setting up mailboxes\n");
-	}
-	aha1542_intr_reset(bse);
+	aha1542_intr_reset(sh->io_port);	/* reset interrupts, so they don't block */
+	any2scsi((mb_cmd + 2), isa_virt_to_bus(mb));
+	if (aha1542_out(sh->io_port, mb_cmd, 5))
+		shost_printk(KERN_ERR, sh, "failed setting up mailboxes\n");
+	aha1542_intr_reset(sh->io_port);
 }
 
-static int __init aha1542_getconfig(int base_io, unsigned char *irq_level, unsigned char *dma_chan, unsigned char *scsi_id)
+static int aha1542_getconfig(struct Scsi_Host *sh)
 {
-	unchar inquiry_cmd[] = {CMD_RETCONF};
-	unchar inquiry_result[3];
+	u8 inquiry_result[3];
 	int i;
-	i = inb(STATUS(base_io));
+	i = inb(STATUS(sh->io_port));
 	if (i & DF) {
-		i = inb(DATA(base_io));
+		i = inb(DATA(sh->io_port));
 	};
-	aha1542_out(base_io, inquiry_cmd, 1);
-	aha1542_in(base_io, inquiry_result, 3);
-	WAIT(INTRFLAGS(base_io), INTRMASK, HACC, 0);
-	while (0) {
-fail:
-		printk(KERN_ERR "aha1542_detect: query board settings\n");
-	}
-	aha1542_intr_reset(base_io);
+	aha1542_outb(sh->io_port, CMD_RETCONF);
+	aha1542_in(sh->io_port, inquiry_result, 3, 0);
+	if (!wait_mask(INTRFLAGS(sh->io_port), INTRMASK, HACC, 0, 0))
+		shost_printk(KERN_ERR, sh, "error querying board settings\n");
+	aha1542_intr_reset(sh->io_port);
 	switch (inquiry_result[0]) {
 	case 0x80:
-		*dma_chan = 7;
+		sh->dma_channel = 7;
 		break;
 	case 0x40:
-		*dma_chan = 6;
+		sh->dma_channel = 6;
 		break;
 	case 0x20:
-		*dma_chan = 5;
+		sh->dma_channel = 5;
 		break;
 	case 0x01:
-		*dma_chan = 0;
+		sh->dma_channel = 0;
 		break;
 	case 0:
 		/* This means that the adapter, although Adaptec 1542 compatible, doesn't use a DMA channel.
 		   Currently only aware of the BusLogic BT-445S VL-Bus adapter which needs this. */
-		*dma_chan = 0xFF;
+		sh->dma_channel = 0xFF;
 		break;
 	default:
-		printk(KERN_ERR "Unable to determine Adaptec DMA priority.  Disabling board\n");
+		shost_printk(KERN_ERR, sh, "Unable to determine DMA channel.\n");
 		return -1;
 	};
 	switch (inquiry_result[1]) {
 	case 0x40:
-		*irq_level = 15;
+		sh->irq = 15;
 		break;
 	case 0x20:
-		*irq_level = 14;
+		sh->irq = 14;
 		break;
 	case 0x8:
-		*irq_level = 12;
+		sh->irq = 12;
 		break;
 	case 0x4:
-		*irq_level = 11;
+		sh->irq = 11;
 		break;
 	case 0x2:
-		*irq_level = 10;
+		sh->irq = 10;
 		break;
 	case 0x1:
-		*irq_level = 9;
+		sh->irq = 9;
 		break;
 	default:
-		printk(KERN_ERR "Unable to determine Adaptec IRQ level.  Disabling board\n");
+		shost_printk(KERN_ERR, sh, "Unable to determine IRQ level.\n");
 		return -1;
 	};
-	*scsi_id = inquiry_result[2] & 7;
+	sh->this_id = inquiry_result[2] & 7;
 	return 0;
 }
 
 /* This function should only be called for 1542C boards - we can detect
    the special firmware settings and unlock the board */
 
-static int __init aha1542_mbenable(int base)
+static int aha1542_mbenable(struct Scsi_Host *sh)
 {
-	static unchar mbenable_cmd[3];
-	static unchar mbenable_result[2];
+	static u8 mbenable_cmd[3];
+	static u8 mbenable_result[2];
 	int retval;
 
 	retval = BIOS_TRANSLATION_6432;
 
-	mbenable_cmd[0] = CMD_EXTBIOS;
-	aha1542_out(base, mbenable_cmd, 1);
-	if (aha1542_in1(base, mbenable_result, 2))
+	aha1542_outb(sh->io_port, CMD_EXTBIOS);
+	if (aha1542_in(sh->io_port, mbenable_result, 2, 100))
 		return retval;
-	WAITd(INTRFLAGS(base), INTRMASK, HACC, 0, 100);
-	aha1542_intr_reset(base);
+	if (!wait_mask(INTRFLAGS(sh->io_port), INTRMASK, HACC, 0, 100))
+		goto fail;
+	aha1542_intr_reset(sh->io_port);
 
 	if ((mbenable_result[0] & 0x08) || mbenable_result[1]) {
 		mbenable_cmd[0] = CMD_MBENABLE;
@@ -836,37 +596,34 @@ static int __init aha1542_mbenable(int base)
 		if ((mbenable_result[0] & 0x08) && (mbenable_result[1] & 0x03))
 			retval = BIOS_TRANSLATION_25563;
 
-		aha1542_out(base, mbenable_cmd, 3);
-		WAIT(INTRFLAGS(base), INTRMASK, HACC, 0);
+		if (aha1542_out(sh->io_port, mbenable_cmd, 3))
+			goto fail;
 	};
 	while (0) {
 fail:
-		printk(KERN_ERR "aha1542_mbenable: Mailbox init failed\n");
+		shost_printk(KERN_ERR, sh, "Mailbox init failed\n");
 	}
-	aha1542_intr_reset(base);
+	aha1542_intr_reset(sh->io_port);
 	return retval;
 }
 
 /* Query the board to find out if it is a 1542 or a 1740, or whatever. */
-static int __init aha1542_query(int base_io, int *transl)
+static int aha1542_query(struct Scsi_Host *sh)
 {
-	unchar inquiry_cmd[] = {CMD_INQUIRY};
-	unchar inquiry_result[4];
+	struct aha1542_hostdata *aha1542 = shost_priv(sh);
+	u8 inquiry_result[4];
 	int i;
-	i = inb(STATUS(base_io));
+	i = inb(STATUS(sh->io_port));
 	if (i & DF) {
-		i = inb(DATA(base_io));
+		i = inb(DATA(sh->io_port));
 	};
-	aha1542_out(base_io, inquiry_cmd, 1);
-	aha1542_in(base_io, inquiry_result, 4);
-	WAIT(INTRFLAGS(base_io), INTRMASK, HACC, 0);
-	while (0) {
-fail:
-		printk(KERN_ERR "aha1542_detect: query card type\n");
-	}
-	aha1542_intr_reset(base_io);
+	aha1542_outb(sh->io_port, CMD_INQUIRY);
+	aha1542_in(sh->io_port, inquiry_result, 4, 0);
+	if (!wait_mask(INTRFLAGS(sh->io_port), INTRMASK, HACC, 0, 0))
+		shost_printk(KERN_ERR, sh, "error querying card type\n");
+	aha1542_intr_reset(sh->io_port);
 
-	*transl = BIOS_TRANSLATION_6432;	/* Default case */
+	aha1542->bios_translation = BIOS_TRANSLATION_6432;	/* Default case */
 
 	/* For an AHA1740 series board, we ignore the board since there is a
 	   hardware bug which can lead to wrong blocks being returned if the board
@@ -875,391 +632,198 @@ fail:
 	 */
 
 	if (inquiry_result[0] == 0x43) {
-		printk(KERN_INFO "aha1542.c: Emulation mode not supported for AHA 174N hardware.\n");
+		shost_printk(KERN_INFO, sh, "Emulation mode not supported for AHA-1740 hardware, use aha1740 driver instead.\n");
 		return 1;
 	};
 
 	/* Always call this - boards that do not support extended bios translation
 	   will ignore the command, and we will set the proper default */
 
-	*transl = aha1542_mbenable(base_io);
+	aha1542->bios_translation = aha1542_mbenable(sh);
 
 	return 0;
 }
 
-#ifndef MODULE
-static char *setup_str[MAXBOARDS] __initdata;
-static int setup_idx = 0;
-
-static void __init aha1542_setup(char *str, int *ints)
+static u8 dma_speed_hw(int dma_speed)
 {
-	const char *ahausage = "aha1542: usage: aha1542=<PORTBASE>[,<BUSON>,<BUSOFF>[,<DMASPEED>]]\n";
-	int setup_portbase;
-
-	if (setup_idx >= MAXBOARDS) {
-		printk(KERN_ERR "aha1542: aha1542_setup called too many times! Bad LILO params ?\n");
-		printk(KERN_ERR "   Entryline 1: %s\n", setup_str[0]);
-		printk(KERN_ERR "   Entryline 2: %s\n", setup_str[1]);
-		printk(KERN_ERR "   This line:   %s\n", str);
-		return;
-	}
-	if (ints[0] < 1 || ints[0] > 4) {
-		printk(KERN_ERR "aha1542: %s\n", str);
-		printk(ahausage);
-		printk(KERN_ERR "aha1542: Wrong parameters may cause system malfunction.. We try anyway..\n");
-	}
-	setup_called[setup_idx] = ints[0];
-	setup_str[setup_idx] = str;
-
-	setup_portbase = ints[0] >= 1 ? ints[1] : 0;	/* Preserve the default value.. */
-	setup_buson[setup_idx] = ints[0] >= 2 ? ints[2] : 7;
-	setup_busoff[setup_idx] = ints[0] >= 3 ? ints[3] : 5;
-	if (ints[0] >= 4) 
-	{
-		int atbt = -1;
-		switch (ints[4]) {
-		case 5:
-			atbt = 0x00;
-			break;
-		case 6:
-			atbt = 0x04;
-			break;
-		case 7:
-			atbt = 0x01;
-			break;
-		case 8:
-			atbt = 0x02;
-			break;
-		case 10:
-			atbt = 0x03;
-			break;
-		default:
-			printk(KERN_ERR "aha1542: %s\n", str);
-			printk(ahausage);
-			printk(KERN_ERR "aha1542: Valid values for DMASPEED are 5-8, 10 MB/s.  Using jumper defaults.\n");
-			break;
-		}
-		setup_dmaspeed[setup_idx] = atbt;
+	switch (dma_speed) {
+	case 5:
+		return 0x00;
+	case 6:
+		return 0x04;
+	case 7:
+		return 0x01;
+	case 8:
+		return 0x02;
+	case 10:
+		return 0x03;
 	}
-	if (setup_portbase != 0)
-		bases[setup_idx] = setup_portbase;
 
-	++setup_idx;
+	return 0xff;	/* invalid */
 }
 
-static int __init do_setup(char *str)
+/* Set the Bus on/off-times as not to ruin floppy performance */
+static void aha1542_set_bus_times(struct Scsi_Host *sh, int bus_on, int bus_off, int dma_speed)
 {
-	int ints[5];
+	if (bus_on > 0) {
+		u8 oncmd[] = { CMD_BUSON_TIME, clamp(bus_on, 2, 15) };
 
-	int count=setup_idx;
+		aha1542_intr_reset(sh->io_port);
+		if (aha1542_out(sh->io_port, oncmd, 2))
+			goto fail;
+	}
 
-	get_options(str, ARRAY_SIZE(ints), ints);
-	aha1542_setup(str,ints);
+	if (bus_off > 0) {
+		u8 offcmd[] = { CMD_BUSOFF_TIME, clamp(bus_off, 1, 64) };
 
-	return count<setup_idx;
-}
+		aha1542_intr_reset(sh->io_port);
+		if (aha1542_out(sh->io_port, offcmd, 2))
+			goto fail;
+	}
 
-__setup("aha1542=",do_setup);
-#endif
+	if (dma_speed_hw(dma_speed) != 0xff) {
+		u8 dmacmd[] = { CMD_DMASPEED, dma_speed_hw(dma_speed) };
+
+		aha1542_intr_reset(sh->io_port);
+		if (aha1542_out(sh->io_port, dmacmd, 2))
+			goto fail;
+	}
+	aha1542_intr_reset(sh->io_port);
+	return;
+fail:
+	shost_printk(KERN_ERR, sh, "setting bus on/off-time failed\n");
+	aha1542_intr_reset(sh->io_port);
+}
 
 /* return non-zero on detection */
-static int __init aha1542_detect(struct scsi_host_template * tpnt)
+static struct Scsi_Host *aha1542_hw_init(struct scsi_host_template *tpnt, struct device *pdev, int indx)
 {
-	unsigned char dma_chan;
-	unsigned char irq_level;
-	unsigned char scsi_id;
-	unsigned long flags;
-	unsigned int base_io;
-	int trans;
-	struct Scsi_Host *shpnt = NULL;
-	int count = 0;
-	int indx;
-
-	DEB(printk("aha1542_detect: \n"));
-
-	tpnt->proc_name = "aha1542";
-
-#ifdef MODULE
-	bases[0] = aha1542[0];
-	setup_buson[0] = aha1542[1];
-	setup_busoff[0] = aha1542[2];
-	{
-		int atbt = -1;
-		switch (aha1542[3]) {
-		case 5:
-			atbt = 0x00;
-			break;
-		case 6:
-			atbt = 0x04;
-			break;
-		case 7:
-			atbt = 0x01;
-			break;
-		case 8:
-			atbt = 0x02;
-			break;
-		case 10:
-			atbt = 0x03;
-			break;
-		};
-		setup_dmaspeed[0] = atbt;
+	unsigned int base_io = io[indx];
+	struct Scsi_Host *sh;
+	struct aha1542_hostdata *aha1542;
+	char dma_info[] = "no DMA";
+
+	if (base_io == 0)
+		return NULL;
+
+	if (!request_region(base_io, AHA1542_REGION_SIZE, "aha1542"))
+		return NULL;
+
+	sh = scsi_host_alloc(tpnt, sizeof(struct aha1542_hostdata));
+	if (!sh)
+		goto release;
+	aha1542 = shost_priv(sh);
+
+	sh->unique_id = base_io;
+	sh->io_port = base_io;
+	sh->n_io_port = AHA1542_REGION_SIZE;
+	aha1542->aha1542_last_mbi_used = 2 * AHA1542_MAILBOXES - 1;
+	aha1542->aha1542_last_mbo_used = AHA1542_MAILBOXES - 1;
+
+	if (!aha1542_test_port(sh))
+		goto unregister;
+
+	aha1542_set_bus_times(sh, bus_on[indx], bus_off[indx], dma_speed[indx]);
+	if (aha1542_query(sh))
+		goto unregister;
+	if (aha1542_getconfig(sh) == -1)
+		goto unregister;
+
+	if (sh->dma_channel != 0xFF)
+		snprintf(dma_info, sizeof(dma_info), "DMA %d", sh->dma_channel);
+	shost_printk(KERN_INFO, sh, "Adaptec AHA-1542 (SCSI-ID %d) at IO 0x%x, IRQ %d, %s\n",
+				sh->this_id, base_io, sh->irq, dma_info);
+	if (aha1542->bios_translation == BIOS_TRANSLATION_25563)
+		shost_printk(KERN_INFO, sh, "Using extended bios translation\n");
+
+	setup_mailboxes(sh);
+
+	if (request_irq(sh->irq, aha1542_interrupt, 0, "aha1542", sh)) {
+		shost_printk(KERN_ERR, sh, "Unable to allocate IRQ.\n");
+		goto unregister;
 	}
-#endif
-
-	/*
-	 *	Hunt for ISA Plug'n'Pray Adaptecs (AHA1535)
-	 */
-
-	if(isapnp)
-	{
-		struct pnp_dev *pdev = NULL;
-		for(indx = 0; indx < ARRAY_SIZE(bases); indx++) {
-			if(bases[indx])
-				continue;
-			pdev = pnp_find_dev(NULL, ISAPNP_VENDOR('A', 'D', 'P'), 
-				ISAPNP_FUNCTION(0x1542), pdev);
-			if(pdev==NULL)
-				break;
-			/*
-			 *	Activate the PnP card
-			 */
-
-			if(pnp_device_attach(pdev)<0)
-				continue;
-
-			if(pnp_activate_dev(pdev)<0) {
-				pnp_device_detach(pdev);
-				continue;
-			}
-
-			if(!pnp_port_valid(pdev, 0)) {
-				pnp_device_detach(pdev);
-				continue;
-			}
-
-			bases[indx] = pnp_port_start(pdev, 0);
-
-			/* The card can be queried for its DMA, we have 
-			   the DMA set up that is enough */
-
-			printk(KERN_INFO "ISAPnP found an AHA1535 at I/O 0x%03X\n", bases[indx]);
+	if (sh->dma_channel != 0xFF) {
+		if (request_dma(sh->dma_channel, "aha1542")) {
+			shost_printk(KERN_ERR, sh, "Unable to allocate DMA channel.\n");
+			goto free_irq;
+		}
+		if (sh->dma_channel == 0 || sh->dma_channel >= 5) {
+			set_dma_mode(sh->dma_channel, DMA_MODE_CASCADE);
+			enable_dma(sh->dma_channel);
 		}
 	}
-	for (indx = 0; indx < ARRAY_SIZE(bases); indx++)
-		if (bases[indx] != 0 && request_region(bases[indx], 4, "aha1542")) {
-			shpnt = scsi_register(tpnt,
-					sizeof(struct aha1542_hostdata));
-
-			if(shpnt==NULL) {
-				release_region(bases[indx], 4);
-				continue;
-			}
-			if (!aha1542_test_port(bases[indx], shpnt))
-				goto unregister;
-
-			base_io = bases[indx];
-
-			/* Set the Bus on/off-times as not to ruin floppy performance */
-			{
-				unchar oncmd[] = {CMD_BUSON_TIME, 7};
-				unchar offcmd[] = {CMD_BUSOFF_TIME, 5};
-
-				if (setup_called[indx]) {
-					oncmd[1] = setup_buson[indx];
-					offcmd[1] = setup_busoff[indx];
-				}
-				aha1542_intr_reset(base_io);
-				aha1542_out(base_io, oncmd, 2);
-				WAIT(INTRFLAGS(base_io), INTRMASK, HACC, 0);
-				aha1542_intr_reset(base_io);
-				aha1542_out(base_io, offcmd, 2);
-				WAIT(INTRFLAGS(base_io), INTRMASK, HACC, 0);
-				if (setup_dmaspeed[indx] >= 0) {
-					unchar dmacmd[] = {CMD_DMASPEED, 0};
-					dmacmd[1] = setup_dmaspeed[indx];
-					aha1542_intr_reset(base_io);
-					aha1542_out(base_io, dmacmd, 2);
-					WAIT(INTRFLAGS(base_io), INTRMASK, HACC, 0);
-				}
-				while (0) {
-fail:
-					printk(KERN_ERR "aha1542_detect: setting bus on/off-time failed\n");
-				}
-				aha1542_intr_reset(base_io);
-			}
-			if (aha1542_query(base_io, &trans))
-				goto unregister;
-
-			if (aha1542_getconfig(base_io, &irq_level, &dma_chan, &scsi_id) == -1)
-				goto unregister;
-
-			printk(KERN_INFO "Configuring Adaptec (SCSI-ID %d) at IO:%x, IRQ %d", scsi_id, base_io, irq_level);
-			if (dma_chan != 0xFF)
-				printk(", DMA priority %d", dma_chan);
-			printk("\n");
-
-			DEB(aha1542_stat());
-			setup_mailboxes(base_io, shpnt);
-
-			DEB(aha1542_stat());
-
-			DEB(printk("aha1542_detect: enable interrupt channel %d\n", irq_level));
-			spin_lock_irqsave(&aha1542_lock, flags);
-			if (request_irq(irq_level, do_aha1542_intr_handle, 0,
-					"aha1542", shpnt)) {
-				printk(KERN_ERR "Unable to allocate IRQ for adaptec controller.\n");
-				spin_unlock_irqrestore(&aha1542_lock, flags);
-				goto unregister;
-			}
-			if (dma_chan != 0xFF) {
-				if (request_dma(dma_chan, "aha1542")) {
-					printk(KERN_ERR "Unable to allocate DMA channel for Adaptec.\n");
-					free_irq(irq_level, shpnt);
-					spin_unlock_irqrestore(&aha1542_lock, flags);
-					goto unregister;
-				}
-				if (dma_chan == 0 || dma_chan >= 5) {
-					set_dma_mode(dma_chan, DMA_MODE_CASCADE);
-					enable_dma(dma_chan);
-				}
-			}
-
-			shpnt->this_id = scsi_id;
-			shpnt->unique_id = base_io;
-			shpnt->io_port = base_io;
-			shpnt->n_io_port = 4;	/* Number of bytes of I/O space used */
-			shpnt->dma_channel = dma_chan;
-			shpnt->irq = irq_level;
-			HOSTDATA(shpnt)->bios_translation = trans;
-			if (trans == BIOS_TRANSLATION_25563)
-				printk(KERN_INFO "aha1542.c: Using extended bios translation\n");
-			HOSTDATA(shpnt)->aha1542_last_mbi_used = (2 * AHA1542_MAILBOXES - 1);
-			HOSTDATA(shpnt)->aha1542_last_mbo_used = (AHA1542_MAILBOXES - 1);
-			memset(HOSTDATA(shpnt)->SCint, 0, sizeof(HOSTDATA(shpnt)->SCint));
-			spin_unlock_irqrestore(&aha1542_lock, flags);
-#if 0
-			DEB(printk(" *** READ CAPACITY ***\n"));
-
-			{
-				unchar buf[8];
-				static unchar cmd[] = { READ_CAPACITY, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-				int i;
-
-				for (i = 0; i < sizeof(buf); ++i)
-					buf[i] = 0x87;
-				for (i = 0; i < 2; ++i)
-					if (!aha1542_command(i, cmd, buf, sizeof(buf))) {
-						printk(KERN_DEBUG "aha_detect: LU %d sector_size %d device_size %d\n",
-						       i, xscsi2int(buf + 4), xscsi2int(buf));
-					}
-			}
 
-			DEB(printk(" *** NOW RUNNING MY OWN TEST *** \n"));
+	if (scsi_add_host(sh, pdev))
+		goto free_dma;
 
-			for (i = 0; i < 4; ++i) {
-				unsigned char cmd[10];
-				static buffer[512];
+	scsi_scan_host(sh);
 
-				cmd[0] = READ_10;
-				cmd[1] = 0;
-				xany2scsi(cmd + 2, i);
-				cmd[6] = 0;
-				cmd[7] = 0;
-				cmd[8] = 1;
-				cmd[9] = 0;
-				aha1542_command(0, cmd, buffer, 512);
-			}
-#endif
-			count++;
-			continue;
+	return sh;
+free_dma:
+	if (sh->dma_channel != 0xff)
+		free_dma(sh->dma_channel);
+free_irq:
+	free_irq(sh->irq, sh);
 unregister:
-			release_region(bases[indx], 4);
-			scsi_unregister(shpnt);
-			continue;
+	scsi_host_put(sh);
+release:
+	release_region(base_io, AHA1542_REGION_SIZE);
 
-		};
-
-	return count;
+	return NULL;
 }
 
-static int aha1542_release(struct Scsi_Host *shost)
+static int aha1542_release(struct Scsi_Host *sh)
 {
-	if (shost->irq)
-		free_irq(shost->irq, shost);
-	if (shost->dma_channel != 0xff)
-		free_dma(shost->dma_channel);
-	if (shost->io_port && shost->n_io_port)
-		release_region(shost->io_port, shost->n_io_port);
-	scsi_unregister(shost);
+	scsi_remove_host(sh);
+	if (sh->dma_channel != 0xff)
+		free_dma(sh->dma_channel);
+	if (sh->irq)
+		free_irq(sh->irq, sh);
+	if (sh->io_port && sh->n_io_port)
+		release_region(sh->io_port, sh->n_io_port);
+	scsi_host_put(sh);
 	return 0;
 }
 
-static int aha1542_restart(struct Scsi_Host *shost)
-{
-	int i;
-	int count = 0;
-#if 0
-	unchar ahacmd = CMD_START_SCSI;
-#endif
-
-	for (i = 0; i < AHA1542_MAILBOXES; i++)
-		if (HOSTDATA(shost)->SCint[i] &&
-		    !(HOSTDATA(shost)->SCint[i]->device->soft_reset)) {
-#if 0
-			HOSTDATA(shost)->mb[i].status = 1;	/* Indicate ready to restart... */
-#endif
-			count++;
-		}
-	printk(KERN_DEBUG "Potential to restart %d stalled commands...\n", count);
-#if 0
-	/* start scsi command */
-	if (count)
-		aha1542_out(shost->io_port, &ahacmd, 1);
-#endif
-	return 0;
-}
 
 /*
  * This is a device reset.  This is handled by sending a special command
  * to the device.
  */
-static int aha1542_dev_reset(Scsi_Cmnd * SCpnt)
+static int aha1542_dev_reset(struct scsi_cmnd *cmd)
 {
+	struct Scsi_Host *sh = cmd->device->host;
+	struct aha1542_hostdata *aha1542 = shost_priv(sh);
 	unsigned long flags;
-	struct mailbox *mb;
-	unchar target = SCpnt->device->id;
-	unchar lun = SCpnt->device->lun;
+	struct mailbox *mb = aha1542->mb;
+	u8 target = cmd->device->id;
+	u8 lun = cmd->device->lun;
 	int mbo;
-	struct ccb *ccb;
-	unchar ahacmd = CMD_START_SCSI;
-
-	ccb = HOSTDATA(SCpnt->device->host)->ccb;
-	mb = HOSTDATA(SCpnt->device->host)->mb;
+	struct ccb *ccb = aha1542->ccb;
 
-	spin_lock_irqsave(&aha1542_lock, flags);
-	mbo = HOSTDATA(SCpnt->device->host)->aha1542_last_mbo_used + 1;
+	spin_lock_irqsave(sh->host_lock, flags);
+	mbo = aha1542->aha1542_last_mbo_used + 1;
 	if (mbo >= AHA1542_MAILBOXES)
 		mbo = 0;
 
 	do {
-		if (mb[mbo].status == 0 && HOSTDATA(SCpnt->device->host)->SCint[mbo] == NULL)
+		if (mb[mbo].status == 0 && aha1542->int_cmds[mbo] == NULL)
 			break;
 		mbo++;
 		if (mbo >= AHA1542_MAILBOXES)
 			mbo = 0;
-	} while (mbo != HOSTDATA(SCpnt->device->host)->aha1542_last_mbo_used);
+	} while (mbo != aha1542->aha1542_last_mbo_used);
 
-	if (mb[mbo].status || HOSTDATA(SCpnt->device->host)->SCint[mbo])
+	if (mb[mbo].status || aha1542->int_cmds[mbo])
 		panic("Unable to find empty mailbox for aha1542.\n");
 
-	HOSTDATA(SCpnt->device->host)->SCint[mbo] = SCpnt;	/* This will effectively
-							   prevent someone else from
-							   screwing with this cdb. */
+	aha1542->int_cmds[mbo] = cmd;	/* This will effectively
+					   prevent someone else from
+					   screwing with this cdb. */
 
-	HOSTDATA(SCpnt->device->host)->aha1542_last_mbo_used = mbo;
-	spin_unlock_irqrestore(&aha1542_lock, flags);
+	aha1542->aha1542_last_mbo_used = mbo;
 
-	any2scsi(mb[mbo].ccbptr, SCSI_BUF_PA(&ccb[mbo]));	/* This gets trashed for some reason */
+	any2scsi(mb[mbo].ccbptr, isa_virt_to_bus(&ccb[mbo]));	/* This gets trashed for some reason */
 
 	memset(&ccb[mbo], 0, sizeof(struct ccb));
 
@@ -1274,141 +838,43 @@ static int aha1542_dev_reset(Scsi_Cmnd * SCpnt)
 	 * Now tell the 1542 to flush all pending commands for this 
 	 * target 
 	 */
-	aha1542_out(SCpnt->device->host->io_port, &ahacmd, 1);
+	aha1542_outb(sh->io_port, CMD_START_SCSI);
+	spin_unlock_irqrestore(sh->host_lock, flags);
 
-	scmd_printk(KERN_WARNING, SCpnt,
+	scmd_printk(KERN_WARNING, cmd,
 		"Trying device reset for target\n");
 
 	return SUCCESS;
-
-
-#ifdef ERIC_neverdef
-	/* 
-	 * With the 1542 we apparently never get an interrupt to
-	 * acknowledge a device reset being sent.  Then again, Leonard
-	 * says we are doing this wrong in the first place...
-	 *
-	 * Take a wait and see attitude.  If we get spurious interrupts,
-	 * then the device reset is doing something sane and useful, and
-	 * we will wait for the interrupt to post completion.
-	 */
-	printk(KERN_WARNING "Sent BUS DEVICE RESET to target %d\n", SCpnt->target);
-
-	/*
-	 * Free the command block for all commands running on this 
-	 * target... 
-	 */
-	for (i = 0; i < AHA1542_MAILBOXES; i++) {
-		if (HOSTDATA(SCpnt->host)->SCint[i] &&
-		    HOSTDATA(SCpnt->host)->SCint[i]->target == SCpnt->target) {
-			Scsi_Cmnd *SCtmp;
-			SCtmp = HOSTDATA(SCpnt->host)->SCint[i];
-			kfree(SCtmp->host_scribble);
-			SCtmp->host_scribble = NULL;
-			HOSTDATA(SCpnt->host)->SCint[i] = NULL;
-			HOSTDATA(SCpnt->host)->mb[i].status = 0;
-		}
-	}
-	return SUCCESS;
-
-	return FAILED;
-#endif				/* ERIC_neverdef */
 }
 
-static int aha1542_bus_reset(Scsi_Cmnd * SCpnt)
+static int aha1542_reset(struct scsi_cmnd *cmd, u8 reset_cmd)
 {
+	struct Scsi_Host *sh = cmd->device->host;
+	struct aha1542_hostdata *aha1542 = shost_priv(sh);
+	unsigned long flags;
 	int i;
 
+	spin_lock_irqsave(sh->host_lock, flags);
 	/* 
 	 * This does a scsi reset for all devices on the bus.
 	 * In principle, we could also reset the 1542 - should
 	 * we do this?  Try this first, and we can add that later
 	 * if it turns out to be useful.
 	 */
-	outb(SCRST, CONTROL(SCpnt->device->host->io_port));
+	outb(reset_cmd, CONTROL(cmd->device->host->io_port));
 
-	/*
-	 * Wait for the thing to settle down a bit.  Unfortunately
-	 * this is going to basically lock up the machine while we
-	 * wait for this to complete.  To be 100% correct, we need to
-	 * check for timeout, and if we are doing something like this
-	 * we are pretty desperate anyways.
-	 */
-	ssleep(4);
-
-	spin_lock_irq(SCpnt->device->host->host_lock);
-
-	WAIT(STATUS(SCpnt->device->host->io_port),
-	     STATMASK, INIT | IDLE, STST | DIAGF | INVDCMD | DF | CDF);
-
-	/*
-	 * Now try to pick up the pieces.  For all pending commands,
-	 * free any internal data structures, and basically clear things
-	 * out.  We do not try and restart any commands or anything - 
-	 * the strategy handler takes care of that crap.
-	 */
-	printk(KERN_WARNING "Sent BUS RESET to scsi host %d\n", SCpnt->device->host->host_no);
-
-	for (i = 0; i < AHA1542_MAILBOXES; i++) {
-		if (HOSTDATA(SCpnt->device->host)->SCint[i] != NULL) {
-			Scsi_Cmnd *SCtmp;
-			SCtmp = HOSTDATA(SCpnt->device->host)->SCint[i];
-
-
-			if (SCtmp->device->soft_reset) {
-				/*
-				 * If this device implements the soft reset option,
-				 * then it is still holding onto the command, and
-				 * may yet complete it.  In this case, we don't
-				 * flush the data.
-				 */
-				continue;
-			}
-			kfree(SCtmp->host_scribble);
-			SCtmp->host_scribble = NULL;
-			HOSTDATA(SCpnt->device->host)->SCint[i] = NULL;
-			HOSTDATA(SCpnt->device->host)->mb[i].status = 0;
-		}
+	if (!wait_mask(STATUS(cmd->device->host->io_port),
+	     STATMASK, IDLE, STST | DIAGF | INVDCMD | DF | CDF, 0)) {
+		spin_unlock_irqrestore(sh->host_lock, flags);
+		return FAILED;
 	}
 
-	spin_unlock_irq(SCpnt->device->host->host_lock);
-	return SUCCESS;
-
-fail:
-	spin_unlock_irq(SCpnt->device->host->host_lock);
-	return FAILED;
-}
-
-static int aha1542_host_reset(Scsi_Cmnd * SCpnt)
-{
-	int i;
-
-	/* 
-	 * This does a scsi reset for all devices on the bus.
-	 * In principle, we could also reset the 1542 - should
-	 * we do this?  Try this first, and we can add that later
-	 * if it turns out to be useful.
-	 */
-	outb(HRST | SCRST, CONTROL(SCpnt->device->host->io_port));
-
-	/*
-	 * Wait for the thing to settle down a bit.  Unfortunately
-	 * this is going to basically lock up the machine while we
-	 * wait for this to complete.  To be 100% correct, we need to
-	 * check for timeout, and if we are doing something like this
-	 * we are pretty desperate anyways.
-	 */
-	ssleep(4);
-	spin_lock_irq(SCpnt->device->host->host_lock);
-
-	WAIT(STATUS(SCpnt->device->host->io_port),
-	     STATMASK, INIT | IDLE, STST | DIAGF | INVDCMD | DF | CDF);
-
 	/*
 	 * We need to do this too before the 1542 can interact with
-	 * us again.
+	 * us again after host reset.
 	 */
-	setup_mailboxes(SCpnt->device->host->io_port, SCpnt->device->host);
+	if (reset_cmd & HRST)
+		setup_mailboxes(cmd->device->host);
 
 	/*
 	 * Now try to pick up the pieces.  For all pending commands,
@@ -1416,14 +882,14 @@ static int aha1542_host_reset(Scsi_Cmnd * SCpnt)
 	 * out.  We do not try and restart any commands or anything - 
 	 * the strategy handler takes care of that crap.
 	 */
-	printk(KERN_WARNING "Sent BUS RESET to scsi host %d\n", SCpnt->device->host->host_no);
+	shost_printk(KERN_WARNING, cmd->device->host, "Sent BUS RESET to scsi host %d\n", cmd->device->host->host_no);
 
 	for (i = 0; i < AHA1542_MAILBOXES; i++) {
-		if (HOSTDATA(SCpnt->device->host)->SCint[i] != NULL) {
-			Scsi_Cmnd *SCtmp;
-			SCtmp = HOSTDATA(SCpnt->device->host)->SCint[i];
+		if (aha1542->int_cmds[i] != NULL) {
+			struct scsi_cmnd *tmp_cmd;
+			tmp_cmd = aha1542->int_cmds[i];
 
-			if (SCtmp->device->soft_reset) {
+			if (tmp_cmd->device->soft_reset) {
 				/*
 				 * If this device implements the soft reset option,
 				 * then it is still holding onto the command, and
@@ -1432,241 +898,51 @@ static int aha1542_host_reset(Scsi_Cmnd * SCpnt)
 				 */
 				continue;
 			}
-			kfree(SCtmp->host_scribble);
-			SCtmp->host_scribble = NULL;
-			HOSTDATA(SCpnt->device->host)->SCint[i] = NULL;
-			HOSTDATA(SCpnt->device->host)->mb[i].status = 0;
+			kfree(tmp_cmd->host_scribble);
+			tmp_cmd->host_scribble = NULL;
+			aha1542->int_cmds[i] = NULL;
+			aha1542->mb[i].status = 0;
 		}
 	}
 
-	spin_unlock_irq(SCpnt->device->host->host_lock);
+	spin_unlock_irqrestore(sh->host_lock, flags);
 	return SUCCESS;
-
-fail:
-	spin_unlock_irq(SCpnt->device->host->host_lock);
-	return FAILED;
 }
 
-#if 0
-/*
- * These are the old error handling routines.  They are only temporarily
- * here while we play with the new error handling code.
- */
-static int aha1542_old_abort(Scsi_Cmnd * SCpnt)
+static int aha1542_bus_reset(struct scsi_cmnd *cmd)
 {
-#if 0
-	unchar ahacmd = CMD_START_SCSI;
-	unsigned long flags;
-	struct mailbox *mb;
-	int mbi, mbo, i;
-
-	printk(KERN_DEBUG "In aha1542_abort: %x %x\n",
-	       inb(STATUS(SCpnt->host->io_port)),
-	       inb(INTRFLAGS(SCpnt->host->io_port)));
-
-	spin_lock_irqsave(&aha1542_lock, flags);
-	mb = HOSTDATA(SCpnt->host)->mb;
-	mbi = HOSTDATA(SCpnt->host)->aha1542_last_mbi_used + 1;
-	if (mbi >= 2 * AHA1542_MAILBOXES)
-		mbi = AHA1542_MAILBOXES;
-
-	do {
-		if (mb[mbi].status != 0)
-			break;
-		mbi++;
-		if (mbi >= 2 * AHA1542_MAILBOXES)
-			mbi = AHA1542_MAILBOXES;
-	} while (mbi != HOSTDATA(SCpnt->host)->aha1542_last_mbi_used);
-	spin_unlock_irqrestore(&aha1542_lock, flags);
-
-	if (mb[mbi].status) {
-		printk(KERN_ERR "Lost interrupt discovered on irq %d - attempting to recover\n",
-		       SCpnt->host->irq);
-		aha1542_intr_handle(SCpnt->host, NULL);
-		return 0;
-	}
-	/* OK, no lost interrupt.  Try looking to see how many pending commands
-	   we think we have. */
-
-	for (i = 0; i < AHA1542_MAILBOXES; i++)
-		if (HOSTDATA(SCpnt->host)->SCint[i]) {
-			if (HOSTDATA(SCpnt->host)->SCint[i] == SCpnt) {
-				printk(KERN_ERR "Timed out command pending for %s\n",
-				       SCpnt->request->rq_disk ?
-				       SCpnt->request->rq_disk->disk_name : "?"
-				       );
-				if (HOSTDATA(SCpnt->host)->mb[i].status) {
-					printk(KERN_ERR "OGMB still full - restarting\n");
-					aha1542_out(SCpnt->host->io_port, &ahacmd, 1);
-				};
-			} else
-				printk(KERN_ERR "Other pending command %s\n",
-				       SCpnt->request->rq_disk ?
-				       SCpnt->request->rq_disk->disk_name : "?"
-				       );
-		}
-#endif
-
-	DEB(printk("aha1542_abort\n"));
-#if 0
-	spin_lock_irqsave(&aha1542_lock, flags);
-	for (mbo = 0; mbo < AHA1542_MAILBOXES; mbo++) {
-		if (SCpnt == HOSTDATA(SCpnt->host)->SCint[mbo]) {
-			mb[mbo].status = 2;	/* Abort command */
-			aha1542_out(SCpnt->host->io_port, &ahacmd, 1);	/* start scsi command */
-			spin_unlock_irqrestore(&aha1542_lock, flags);
-			break;
-		}
-	}
-	if (AHA1542_MAILBOXES == mbo)
-		spin_unlock_irqrestore(&aha1542_lock, flags);
-#endif
-	return SCSI_ABORT_SNOOZE;
+	return aha1542_reset(cmd, SCRST);
 }
 
-/* We do not implement a reset function here, but the upper level code
-   assumes that it will get some kind of response for the command in
-   SCpnt.  We must oblige, or the command will hang the scsi system.
-   For a first go, we assume that the 1542 notifies us with all of the
-   pending commands (it does implement soft reset, after all). */
-
-static int aha1542_old_reset(Scsi_Cmnd * SCpnt, unsigned int reset_flags)
+static int aha1542_host_reset(struct scsi_cmnd *cmd)
 {
-	unchar ahacmd = CMD_START_SCSI;
-	int i;
-
-	/*
-	 * See if a bus reset was suggested.
-	 */
-	if (reset_flags & SCSI_RESET_SUGGEST_BUS_RESET) {
-		/* 
-		 * This does a scsi reset for all devices on the bus.
-		 * In principle, we could also reset the 1542 - should
-		 * we do this?  Try this first, and we can add that later
-		 * if it turns out to be useful.
-		 */
-		outb(HRST | SCRST, CONTROL(SCpnt->host->io_port));
-
-		/*
-		 * Wait for the thing to settle down a bit.  Unfortunately
-		 * this is going to basically lock up the machine while we
-		 * wait for this to complete.  To be 100% correct, we need to
-		 * check for timeout, and if we are doing something like this
-		 * we are pretty desperate anyways.
-		 */
-		WAIT(STATUS(SCpnt->host->io_port),
-		STATMASK, INIT | IDLE, STST | DIAGF | INVDCMD | DF | CDF);
-
-		/*
-		 * We need to do this too before the 1542 can interact with
-		 * us again.
-		 */
-		setup_mailboxes(SCpnt->host->io_port, SCpnt->host);
-
-		/*
-		 * Now try to pick up the pieces.  Restart all commands
-		 * that are currently active on the bus, and reset all of
-		 * the datastructures.  We have some time to kill while
-		 * things settle down, so print a nice message.
-		 */
-		printk(KERN_WARNING "Sent BUS RESET to scsi host %d\n", SCpnt->host->host_no);
-
-		for (i = 0; i < AHA1542_MAILBOXES; i++)
-			if (HOSTDATA(SCpnt->host)->SCint[i] != NULL) {
-				Scsi_Cmnd *SCtmp;
-				SCtmp = HOSTDATA(SCpnt->host)->SCint[i];
-				SCtmp->result = DID_RESET << 16;
-				kfree(SCtmp->host_scribble);
-				SCtmp->host_scribble = NULL;
-				printk(KERN_WARNING "Sending DID_RESET for target %d\n", SCpnt->target);
-				SCtmp->scsi_done(SCpnt);
-
-				HOSTDATA(SCpnt->host)->SCint[i] = NULL;
-				HOSTDATA(SCpnt->host)->mb[i].status = 0;
-			}
-		/*
-		 * Now tell the mid-level code what we did here.  Since
-		 * we have restarted all of the outstanding commands,
-		 * then report SUCCESS.
-		 */
-		return (SCSI_RESET_SUCCESS | SCSI_RESET_BUS_RESET);
-fail:
-		printk(KERN_CRIT "aha1542.c: Unable to perform hard reset.\n");
-		printk(KERN_CRIT "Power cycle machine to reset\n");
-		return (SCSI_RESET_ERROR | SCSI_RESET_BUS_RESET);
-
-
-	} else {
-		/* This does a selective reset of just the one device */
-		/* First locate the ccb for this command */
-		for (i = 0; i < AHA1542_MAILBOXES; i++)
-			if (HOSTDATA(SCpnt->host)->SCint[i] == SCpnt) {
-				HOSTDATA(SCpnt->host)->ccb[i].op = 0x81;	/* BUS DEVICE RESET */
-				/* Now tell the 1542 to flush all pending commands for this target */
-				aha1542_out(SCpnt->host->io_port, &ahacmd, 1);
-
-				/* Here is the tricky part.  What to do next.  Do we get an interrupt
-				   for the commands that we aborted with the specified target, or
-				   do we generate this on our own?  Try it without first and see
-				   what happens */
-				printk(KERN_WARNING "Sent BUS DEVICE RESET to target %d\n", SCpnt->target);
-
-				/* If the first does not work, then try the second.  I think the
-				   first option is more likely to be correct. Free the command
-				   block for all commands running on this target... */
-				for (i = 0; i < AHA1542_MAILBOXES; i++)
-					if (HOSTDATA(SCpnt->host)->SCint[i] &&
-					    HOSTDATA(SCpnt->host)->SCint[i]->target == SCpnt->target) {
-						Scsi_Cmnd *SCtmp;
-						SCtmp = HOSTDATA(SCpnt->host)->SCint[i];
-						SCtmp->result = DID_RESET << 16;
-						kfree(SCtmp->host_scribble);
-						SCtmp->host_scribble = NULL;
-						printk(KERN_WARNING "Sending DID_RESET for target %d\n", SCpnt->target);
-						SCtmp->scsi_done(SCpnt);
-
-						HOSTDATA(SCpnt->host)->SCint[i] = NULL;
-						HOSTDATA(SCpnt->host)->mb[i].status = 0;
-					}
-				return SCSI_RESET_SUCCESS;
-			}
-	}
-	/* No active command at this time, so this means that each time we got
-	   some kind of response the last time through.  Tell the mid-level code
-	   to request sense information in order to decide what to do next. */
-	return SCSI_RESET_PUNT;
+	return aha1542_reset(cmd, HRST | SCRST);
 }
-#endif    /* end of big comment block around old_abort + old_reset */
 
 static int aha1542_biosparam(struct scsi_device *sdev,
-		struct block_device *bdev, sector_t capacity, int *ip)
+		struct block_device *bdev, sector_t capacity, int geom[])
 {
-	int translation_algorithm;
-	int size = capacity;
-
-	translation_algorithm = HOSTDATA(sdev->host)->bios_translation;
+	struct aha1542_hostdata *aha1542 = shost_priv(sdev->host);
 
-	if ((size >> 11) > 1024 && translation_algorithm == BIOS_TRANSLATION_25563) {
+	if (capacity >= 0x200000 &&
+			aha1542->bios_translation == BIOS_TRANSLATION_25563) {
 		/* Please verify that this is the same as what DOS returns */
-		ip[0] = 255;
-		ip[1] = 63;
-		ip[2] = size / 255 / 63;
+		geom[0] = 255;	/* heads */
+		geom[1] = 63;	/* sectors */
 	} else {
-		ip[0] = 64;
-		ip[1] = 32;
-		ip[2] = size >> 11;
+		geom[0] = 64;	/* heads */
+		geom[1] = 32;	/* sectors */
 	}
+	geom[2] = sector_div(capacity, geom[0] * geom[1]);	/* cylinders */
 
 	return 0;
 }
 MODULE_LICENSE("GPL");
 
-
 static struct scsi_host_template driver_template = {
+	.module			= THIS_MODULE,
 	.proc_name		= "aha1542",
 	.name			= "Adaptec 1542",
-	.detect			= aha1542_detect,
-	.release		= aha1542_release,
 	.queuecommand		= aha1542_queuecommand,
 	.eh_device_reset_handler= aha1542_dev_reset,
 	.eh_bus_reset_handler	= aha1542_bus_reset,
@@ -1674,9 +950,124 @@ static struct scsi_host_template driver_template = {
 	.bios_param		= aha1542_biosparam,
 	.can_queue		= AHA1542_MAILBOXES, 
 	.this_id		= 7,
-	.sg_tablesize		= AHA1542_SCATTER,
-	.cmd_per_lun		= AHA1542_CMDLUN,
+	.sg_tablesize		= 16,
+	.cmd_per_lun		= 1,
 	.unchecked_isa_dma	= 1, 
 	.use_clustering		= ENABLE_CLUSTERING,
 };
-#include "scsi_module.c"
+
+static int aha1542_isa_match(struct device *pdev, unsigned int ndev)
+{
+	struct Scsi_Host *sh = aha1542_hw_init(&driver_template, pdev, ndev);
+
+	if (!sh)
+		return 0;
+
+	dev_set_drvdata(pdev, sh);
+	return 1;
+}
+
+static int aha1542_isa_remove(struct device *pdev,
+				    unsigned int ndev)
+{
+	aha1542_release(dev_get_drvdata(pdev));
+	dev_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static struct isa_driver aha1542_isa_driver = {
+	.match		= aha1542_isa_match,
+	.remove		= aha1542_isa_remove,
+	.driver		= {
+		.name	= "aha1542"
+	},
+};
+static int isa_registered;
+
+#ifdef CONFIG_PNP
+static struct pnp_device_id aha1542_pnp_ids[] = {
+	{ .id = "ADP1542" },
+	{ .id = "" }
+};
+MODULE_DEVICE_TABLE(pnp, aha1542_pnp_ids);
+
+static int aha1542_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id)
+{
+	int indx;
+	struct Scsi_Host *sh;
+
+	for (indx = 0; indx < ARRAY_SIZE(io); indx++) {
+		if (io[indx])
+			continue;
+
+		if (pnp_activate_dev(pdev) < 0)
+			continue;
+
+		io[indx] = pnp_port_start(pdev, 0);
+
+		/* The card can be queried for its DMA, we have
+		   the DMA set up that is enough */
+
+		dev_info(&pdev->dev, "ISAPnP found an AHA1535 at I/O 0x%03X", io[indx]);
+	}
+
+	sh = aha1542_hw_init(&driver_template, &pdev->dev, indx);
+	if (!sh)
+		return -ENODEV;
+
+	pnp_set_drvdata(pdev, sh);
+	return 0;
+}
+
+static void aha1542_pnp_remove(struct pnp_dev *pdev)
+{
+	aha1542_release(pnp_get_drvdata(pdev));
+	pnp_set_drvdata(pdev, NULL);
+}
+
+static struct pnp_driver aha1542_pnp_driver = {
+	.name		= "aha1542",
+	.id_table	= aha1542_pnp_ids,
+	.probe		= aha1542_pnp_probe,
+	.remove		= aha1542_pnp_remove,
+};
+static int pnp_registered;
+#endif /* CONFIG_PNP */
+
+static int __init aha1542_init(void)
+{
+	int ret = 0;
+
+#ifdef CONFIG_PNP
+	if (isapnp) {
+		ret = pnp_register_driver(&aha1542_pnp_driver);
+		if (!ret)
+			pnp_registered = 1;
+	}
+#endif
+	ret = isa_register_driver(&aha1542_isa_driver, MAXBOARDS);
+	if (!ret)
+		isa_registered = 1;
+
+#ifdef CONFIG_PNP
+	if (pnp_registered)
+		ret = 0;
+#endif
+	if (isa_registered)
+		ret = 0;
+
+	return ret;
+}
+
+static void __exit aha1542_exit(void)
+{
+#ifdef CONFIG_PNP
+	if (pnp_registered)
+		pnp_unregister_driver(&aha1542_pnp_driver);
+#endif
+	if (isa_registered)
+		isa_unregister_driver(&aha1542_isa_driver);
+}
+
+module_init(aha1542_init);
+module_exit(aha1542_exit);
diff --git a/drivers/scsi/aha1542.h b/drivers/scsi/aha1542.h
index b871d2b57f93..0fe9bae1b3d1 100644
--- a/drivers/scsi/aha1542.h
+++ b/drivers/scsi/aha1542.h
@@ -1,64 +1,35 @@
-#ifndef _AHA1542_H
-
-/* $Id: aha1542.h,v 1.1 1992/07/24 06:27:38 root Exp root $
- *
- * Header file for the adaptec 1542 driver for Linux
- *
- * $Log: aha1542.h,v $
- * Revision 1.1  1992/07/24  06:27:38  root
- * Initial revision
- *
- * Revision 1.2  1992/07/04  18:41:49  root
- * Replaced distribution with current drivers
- *
- * Revision 1.3  1992/06/23  23:58:20  root
- * Fixes.
- *
- * Revision 1.2  1992/05/26  22:13:23  root
- * Changed bug that prevented DMA above first 2 mbytes.
- *
- * Revision 1.1  1992/05/22  21:00:29  root
- * Initial revision
- *
- * Revision 1.1  1992/04/24  18:01:50  root
- * Initial revision
- *
- * Revision 1.1  1992/04/02  03:23:13  drew
- * Initial revision
- *
- * Revision 1.3  1992/01/27  14:46:29  tthorn
- * *** empty log message ***
- *
- */
+#ifndef _AHA1542_H_
+#define _AHA1542_H_
 
 #include <linux/types.h>
 
 /* I/O Port interface 4.2 */
 /* READ */
 #define STATUS(base) base
-#define STST	0x80		/* Self Test in Progress */
-#define DIAGF	0x40		/* Internal Diagnostic Failure */
-#define INIT	0x20		/* Mailbox Initialization Required */
-#define IDLE	0x10		/* SCSI Host Adapter Idle */
-#define CDF	0x08		/* Command/Data Out Port Full */
-#define DF	0x04		/* Data In Port Full */
-#define INVDCMD	0x01		/* Invalid H A Command */
-#define STATMASK 0xfd		/* 0x02 is reserved */
+#define STST	BIT(7)		/* Self Test in Progress */
+#define DIAGF	BIT(6)		/* Internal Diagnostic Failure */
+#define INIT	BIT(5)		/* Mailbox Initialization Required */
+#define IDLE	BIT(4)		/* SCSI Host Adapter Idle */
+#define CDF	BIT(3)		/* Command/Data Out Port Full */
+#define DF	BIT(2)		/* Data In Port Full */
+/* BIT(1) is reserved */
+#define INVDCMD	BIT(0)		/* Invalid H A Command */
+#define STATMASK (STST | DIAGF | INIT | IDLE | CDF | DF | INVDCMD)
 
 #define INTRFLAGS(base) (STATUS(base)+2)
-#define ANYINTR	0x80		/* Any Interrupt */
-#define SCRD	0x08		/* SCSI Reset Detected */
-#define HACC	0x04		/* HA Command Complete */
-#define MBOA	0x02		/* MBO Empty */
-#define MBIF	0x01		/* MBI Full */
-#define INTRMASK 0x8f
+#define ANYINTR	BIT(7)		/* Any Interrupt */
+#define SCRD	BIT(3)		/* SCSI Reset Detected */
+#define HACC	BIT(2)		/* HA Command Complete */
+#define MBOA	BIT(1)		/* MBO Empty */
+#define MBIF	BIT(0)		/* MBI Full */
+#define INTRMASK (ANYINTR | SCRD | HACC | MBOA | MBIF)
 
 /* WRITE */
 #define CONTROL(base) STATUS(base)
-#define HRST	0x80		/* Hard Reset */
-#define SRST	0x40		/* Soft Reset */
-#define IRST	0x20		/* Interrupt Reset */
-#define SCRST	0x10		/* SCSI Bus Reset */
+#define HRST	BIT(7)		/* Hard Reset */
+#define SRST	BIT(6)		/* Soft Reset */
+#define IRST	BIT(5)		/* Interrupt Reset */
+#define SCRST	BIT(4)		/* SCSI Bus Reset */
 
 /* READ/WRITE */
 #define DATA(base) (STATUS(base)+1)
@@ -80,14 +51,14 @@
 
 /* Mailbox Definition 5.2.1 and 5.2.2 */
 struct mailbox {
-  unchar status;		/* Command/Status */
-  unchar ccbptr[3];		/* msb, .., lsb */
+	u8 status;	/* Command/Status */
+	u8 ccbptr[3];	/* msb, .., lsb */
 };
 
 /* This is used with scatter-gather */
 struct chain {
-  unchar datalen[3];		/* Size of this part of chain */
-  unchar dataptr[3];		/* Location of data */
+	u8 datalen[3];	/* Size of this part of chain */
+	u8 dataptr[3];	/* Location of data */
 };
 
 /* These belong in scsi.h also */
@@ -100,51 +71,32 @@ static inline void any2scsi(u8 *p, u32 v)
 
 #define scsi2int(up) ( (((long)*(up)) << 16) + (((long)(up)[1]) << 8) + ((long)(up)[2]) )
 
-#define xany2scsi(up, p)	\
-(up)[0] = ((long)(p)) >> 24;	\
-(up)[1] = ((long)(p)) >> 16;	\
-(up)[2] = ((long)(p)) >> 8;	\
-(up)[3] = ((long)(p));
-
 #define xscsi2int(up) ( (((long)(up)[0]) << 24) + (((long)(up)[1]) << 16) \
 		      + (((long)(up)[2]) <<  8) +  ((long)(up)[3]) )
 
 #define MAX_CDB 12
 #define MAX_SENSE 14
 
-struct ccb {			/* Command Control Block 5.3 */
-  unchar op;			/* Command Control Block Operation Code */
-  unchar idlun;			/* op=0,2:Target Id, op=1:Initiator Id */
-				/* Outbound data transfer, length is checked*/
-				/* Inbound data transfer, length is checked */
-				/* Logical Unit Number */
-  unchar cdblen;		/* SCSI Command Length */
-  unchar rsalen;		/* Request Sense Allocation Length/Disable */
-  unchar datalen[3];		/* Data Length (msb, .., lsb) */
-  unchar dataptr[3];		/* Data Pointer */
-  unchar linkptr[3];		/* Link Pointer */
-  unchar commlinkid;		/* Command Linking Identifier */
-  unchar hastat;		/* Host Adapter Status (HASTAT) */
-  unchar tarstat;		/* Target Device Status */
-  unchar reserved[2];
-  unchar cdb[MAX_CDB+MAX_SENSE];/* SCSI Command Descriptor Block */
-				/* REQUEST SENSE */
+struct ccb {		/* Command Control Block 5.3 */
+	u8 op;		/* Command Control Block Operation Code */
+	u8 idlun;	/* op=0,2:Target Id, op=1:Initiator Id */
+			/* Outbound data transfer, length is checked*/
+			/* Inbound data transfer, length is checked */
+			/* Logical Unit Number */
+	u8 cdblen;	/* SCSI Command Length */
+	u8 rsalen;	/* Request Sense Allocation Length/Disable */
+	u8 datalen[3];	/* Data Length (msb, .., lsb) */
+	u8 dataptr[3];	/* Data Pointer */
+	u8 linkptr[3];	/* Link Pointer */
+	u8 commlinkid;	/* Command Linking Identifier */
+	u8 hastat;	/* Host Adapter Status (HASTAT) */
+	u8 tarstat;	/* Target Device Status */
+	u8 reserved[2];
+	u8 cdb[MAX_CDB+MAX_SENSE];	/* SCSI Command Descriptor Block */
+					/* REQUEST SENSE */
 };
 
-static int aha1542_detect(struct scsi_host_template *);
-static int aha1542_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
-static int aha1542_bus_reset(Scsi_Cmnd * SCpnt);
-static int aha1542_dev_reset(Scsi_Cmnd * SCpnt);
-static int aha1542_host_reset(Scsi_Cmnd * SCpnt);
-#if 0
-static int aha1542_old_abort(Scsi_Cmnd * SCpnt);
-static int aha1542_old_reset(Scsi_Cmnd *, unsigned int);
-#endif
-static int aha1542_biosparam(struct scsi_device *, struct block_device *,
-		sector_t, int *);
-
+#define AHA1542_REGION_SIZE 4
 #define AHA1542_MAILBOXES 8
-#define AHA1542_SCATTER 16
-#define AHA1542_CMDLUN 1
 
-#endif
+#endif /* _AHA1542_H_ */
diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c
index 97f2accd3dbb..109e2c99e6c1 100644
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -10437,14 +10437,13 @@ ahd_handle_en_lun(struct ahd_softc *ahd, struct cam_sim *sim, union ccb *ccb)
 				return;
 			}
 		}
-		lstate = kmalloc(sizeof(*lstate), GFP_ATOMIC);
+		lstate = kzalloc(sizeof(*lstate), GFP_ATOMIC);
 		if (lstate == NULL) {
 			xpt_print_path(ccb->ccb_h.path);
 			printk("Couldn't allocate lstate\n");
 			ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
 			return;
 		}
-		memset(lstate, 0, sizeof(*lstate));
 		status = xpt_create_path(&lstate->path, /*periph*/NULL,
 					 xpt_path_path_id(ccb->ccb_h.path),
 					 xpt_path_target_id(ccb->ccb_h.path),
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index d5c7b193d8d3..ce96a0be3282 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -1326,10 +1326,9 @@ int
 ahd_platform_alloc(struct ahd_softc *ahd, void *platform_arg)
 {
 	ahd->platform_data =
-	    kmalloc(sizeof(struct ahd_platform_data), GFP_ATOMIC);
+	    kzalloc(sizeof(struct ahd_platform_data), GFP_ATOMIC);
 	if (ahd->platform_data == NULL)
 		return (ENOMEM);
-	memset(ahd->platform_data, 0, sizeof(struct ahd_platform_data));
 	ahd->platform_data->irq = AHD_LINUX_NOIRQ;
 	ahd_lockinit(ahd);
 	ahd->seltime = (aic79xx_seltime & 0x3) << 4;
diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c
index 10172a3af1b9..c4829d84b335 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_core.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
@@ -4464,10 +4464,9 @@ ahc_softc_init(struct ahc_softc *ahc)
 	ahc->pause = ahc->unpause | PAUSE; 
 	/* XXX The shared scb data stuff should be deprecated */
 	if (ahc->scb_data == NULL) {
-		ahc->scb_data = kmalloc(sizeof(*ahc->scb_data), GFP_ATOMIC);
+		ahc->scb_data = kzalloc(sizeof(*ahc->scb_data), GFP_ATOMIC);
 		if (ahc->scb_data == NULL)
 			return (ENOMEM);
-		memset(ahc->scb_data, 0, sizeof(*ahc->scb_data));
 	}
 
 	return (0);
@@ -4780,10 +4779,10 @@ ahc_init_scbdata(struct ahc_softc *ahc)
 	SLIST_INIT(&scb_data->sg_maps);
 
 	/* Allocate SCB resources */
-	scb_data->scbarray = kmalloc(sizeof(struct scb) * AHC_SCB_MAX_ALLOC, GFP_ATOMIC);
+	scb_data->scbarray = kzalloc(sizeof(struct scb) * AHC_SCB_MAX_ALLOC,
+				GFP_ATOMIC);
 	if (scb_data->scbarray == NULL)
 		return (ENOMEM);
-	memset(scb_data->scbarray, 0, sizeof(struct scb) * AHC_SCB_MAX_ALLOC);
 
 	/* Determine the number of hardware SCBs and initialize them */
 
@@ -7558,14 +7557,13 @@ ahc_handle_en_lun(struct ahc_softc *ahc, struct cam_sim *sim, union ccb *ccb)
 				return;
 			}
 		}
-		lstate = kmalloc(sizeof(*lstate), GFP_ATOMIC);
+		lstate = kzalloc(sizeof(*lstate), GFP_ATOMIC);
 		if (lstate == NULL) {
 			xpt_print_path(ccb->ccb_h.path);
 			printk("Couldn't allocate lstate\n");
 			ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
 			return;
 		}
-		memset(lstate, 0, sizeof(*lstate));
 		status = xpt_create_path(&lstate->path, /*periph*/NULL,
 					 xpt_path_path_id(ccb->ccb_h.path),
 					 xpt_path_target_id(ccb->ccb_h.path),
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index 88360116dbcb..a2f2c774cd6b 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -1214,10 +1214,9 @@ ahc_platform_alloc(struct ahc_softc *ahc, void *platform_arg)
 {
 
 	ahc->platform_data =
-	    kmalloc(sizeof(struct ahc_platform_data), GFP_ATOMIC);
+	    kzalloc(sizeof(struct ahc_platform_data), GFP_ATOMIC);
 	if (ahc->platform_data == NULL)
 		return (ENOMEM);
-	memset(ahc->platform_data, 0, sizeof(struct ahc_platform_data));
 	ahc->platform_data->irq = AHC_LINUX_NOIRQ;
 	ahc_lockinit(ahc);
 	ahc->seltime = (aic7xxx_seltime & 0x3) << 4;
diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c
index a70255413e7f..db87ece6edb2 100644
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c
@@ -1486,7 +1486,7 @@ static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 	 * selection.
 	 */
 
-	timeout = jiffies + (250 * HZ / 1000);
+	timeout = jiffies + msecs_to_jiffies(250);
 
 	/*
 	 * XXX very interesting - we're seeing a bounce where the BSY we
diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c
index d1c37a386947..5ede3daa93dc 100644
--- a/drivers/scsi/atari_scsi.c
+++ b/drivers/scsi/atari_scsi.c
@@ -1014,7 +1014,6 @@ static struct platform_driver atari_scsi_driver = {
 	.remove = __exit_p(atari_scsi_remove),
 	.driver = {
 		.name	= DRV_MODULE_NAME,
-		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index f35792f7051c..f8d2478b11cc 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -57,9 +57,9 @@
  */
 
 /* settings for DTC3181E card with only Mustek scanner attached */
-#define USLEEP_POLL	1
-#define USLEEP_SLEEP	20
-#define USLEEP_WAITLONG	500
+#define USLEEP_POLL	msecs_to_jiffies(10)
+#define USLEEP_SLEEP	msecs_to_jiffies(200)
+#define USLEEP_WAITLONG	msecs_to_jiffies(5000)
 
 #define AUTOPROBE_IRQ
 
@@ -723,7 +723,7 @@ module_param(ncr_53c400a, int, 0);
 module_param(dtc_3181e, int, 0);
 MODULE_LICENSE("GPL");
 
-#ifndef SCSI_G_NCR5380_MEM
+#if !defined(SCSI_G_NCR5380_MEM) && defined(MODULE)
 static struct isapnp_device_id id_table[] = {
 	{
 	 ISAPNP_ANY_ID, ISAPNP_ANY_ID,
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d9afc51af7d3..882744852aac 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -99,6 +99,7 @@ static unsigned int ipr_debug = 0;
 static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS;
 static unsigned int ipr_dual_ioa_raid = 1;
 static unsigned int ipr_number_of_msix = 2;
+static unsigned int ipr_fast_reboot;
 static DEFINE_SPINLOCK(ipr_driver_lock);
 
 /* This table describes the differences between DMA controller chips */
@@ -221,6 +222,8 @@ MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. "
 		 "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]");
 module_param_named(number_of_msix, ipr_number_of_msix, int, 0);
 MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 16).  (default:2)");
+module_param_named(fast_reboot, ipr_fast_reboot, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(fast_reboot, "Skip adapter shutdown during reboot. Set to 1 to enable. (default: 0)");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(IPR_DRIVER_VERSION);
 
@@ -495,6 +498,10 @@ struct ipr_error_table_t ipr_error_table[] = {
 	"4061: Multipath redundancy level got better"},
 	{0x066B9200, 0, IPR_DEFAULT_LOG_LEVEL,
 	"4060: Multipath redundancy level got worse"},
+	{0x06808100, 0, IPR_DEFAULT_LOG_LEVEL,
+	"9083: Device raw mode enabled"},
+	{0x06808200, 0, IPR_DEFAULT_LOG_LEVEL,
+	"9084: Device raw mode disabled"},
 	{0x07270000, 0, 0,
 	"Failure due to other device"},
 	{0x07278000, 0, IPR_DEFAULT_LOG_LEVEL,
@@ -1462,7 +1469,8 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd)
 	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 
 	if (ioasc) {
-		if (ioasc != IPR_IOASC_IOA_WAS_RESET)
+		if (ioasc != IPR_IOASC_IOA_WAS_RESET &&
+		    ioasc != IPR_IOASC_ABORTED_CMD_TERM_BY_HOST)
 			dev_err(&ioa_cfg->pdev->dev,
 				"Host RCB failed with IOASC: 0x%08X\n", ioasc);
 
@@ -2566,7 +2574,8 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd)
 		ipr_handle_log_data(ioa_cfg, hostrcb);
 		if (fd_ioasc == IPR_IOASC_NR_IOA_RESET_REQUIRED)
 			ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV);
-	} else if (ioasc != IPR_IOASC_IOA_WAS_RESET) {
+	} else if (ioasc != IPR_IOASC_IOA_WAS_RESET &&
+		   ioasc != IPR_IOASC_ABORTED_CMD_TERM_BY_HOST) {
 		dev_err(&ioa_cfg->pdev->dev,
 			"Host RCB failed with IOASC: 0x%08X\n", ioasc);
 	}
@@ -4491,11 +4500,83 @@ static struct device_attribute ipr_resource_type_attr = {
 	.show = ipr_show_resource_type
 };
 
+/**
+ * ipr_show_raw_mode - Show the adapter's raw mode
+ * @dev:	class device struct
+ * @buf:	buffer
+ *
+ * Return value:
+ * 	number of bytes printed to buffer
+ **/
+static ssize_t ipr_show_raw_mode(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)sdev->host->hostdata;
+	struct ipr_resource_entry *res;
+	unsigned long lock_flags = 0;
+	ssize_t len;
+
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+	res = (struct ipr_resource_entry *)sdev->hostdata;
+	if (res)
+		len = snprintf(buf, PAGE_SIZE, "%d\n", res->raw_mode);
+	else
+		len = -ENXIO;
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	return len;
+}
+
+/**
+ * ipr_store_raw_mode - Change the adapter's raw mode
+ * @dev:	class device struct
+ * @buf:	buffer
+ *
+ * Return value:
+ * 	number of bytes printed to buffer
+ **/
+static ssize_t ipr_store_raw_mode(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)sdev->host->hostdata;
+	struct ipr_resource_entry *res;
+	unsigned long lock_flags = 0;
+	ssize_t len;
+
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+	res = (struct ipr_resource_entry *)sdev->hostdata;
+	if (res) {
+		if (ioa_cfg->sis64 && ipr_is_af_dasd_device(res)) {
+			res->raw_mode = simple_strtoul(buf, NULL, 10);
+			len = strlen(buf);
+			if (res->sdev)
+				sdev_printk(KERN_INFO, res->sdev, "raw mode is %s\n",
+					res->raw_mode ? "enabled" : "disabled");
+		} else
+			len = -EINVAL;
+	} else
+		len = -ENXIO;
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	return len;
+}
+
+static struct device_attribute ipr_raw_mode_attr = {
+	.attr = {
+		.name =		"raw_mode",
+		.mode =		S_IRUGO | S_IWUSR,
+	},
+	.show = ipr_show_raw_mode,
+	.store = ipr_store_raw_mode
+};
+
 static struct device_attribute *ipr_dev_attrs[] = {
 	&ipr_adapter_handle_attr,
 	&ipr_resource_path_attr,
 	&ipr_device_id_attr,
 	&ipr_resource_type_attr,
+	&ipr_raw_mode_attr,
 	NULL,
 };
 
@@ -5379,9 +5460,6 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
 	if (int_reg & IPR_PCII_IOA_TRANS_TO_OPER) {
 		/* Mask the interrupt */
 		writel(IPR_PCII_IOA_TRANS_TO_OPER, ioa_cfg->regs.set_interrupt_mask_reg);
-
-		/* Clear the interrupt */
-		writel(IPR_PCII_IOA_TRANS_TO_OPER, ioa_cfg->regs.clr_interrupt_reg);
 		int_reg = readl(ioa_cfg->regs.sense_interrupt_reg);
 
 		list_del(&ioa_cfg->reset_cmd->queue);
@@ -6150,6 +6228,13 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
 		break;
 	case IPR_IOASC_NR_INIT_CMD_REQUIRED:
 		break;
+	case IPR_IOASC_IR_NON_OPTIMIZED:
+		if (res->raw_mode) {
+			res->raw_mode = 0;
+			scsi_cmd->result |= (DID_IMM_RETRY << 16);
+		} else
+			scsi_cmd->result |= (DID_ERROR << 16);
+		break;
 	default:
 		if (IPR_IOASC_SENSE_KEY(ioasc) > RECOVERED_ERROR)
 			scsi_cmd->result |= (DID_ERROR << 16);
@@ -6289,6 +6374,8 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
 	    (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) {
 		ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
 	}
+	if (res->raw_mode && ipr_is_af_dasd_device(res))
+		ioarcb->cmd_pkt.request_type = IPR_RQTYPE_PIPE;
 
 	if (ioa_cfg->sis64)
 		rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd);
@@ -6402,7 +6489,6 @@ static struct scsi_host_template driver_template = {
 	.shost_attrs = ipr_ioa_attrs,
 	.sdev_attrs = ipr_dev_attrs,
 	.proc_name = IPR_NAME,
-	.no_write_same = 1,
 	.use_blk_tags = 1,
 };
 
@@ -8318,7 +8404,6 @@ static int ipr_reset_start_bist(struct ipr_cmnd *ipr_cmd)
 static int ipr_reset_slot_reset_done(struct ipr_cmnd *ipr_cmd)
 {
 	ENTER;
-	pci_set_pcie_reset_state(ipr_cmd->ioa_cfg->pdev, pcie_deassert_reset);
 	ipr_cmd->job_step = ipr_reset_bist_done;
 	ipr_reset_start_timer(ipr_cmd, IPR_WAIT_FOR_BIST_TIMEOUT);
 	LEAVE;
@@ -8326,6 +8411,32 @@ static int ipr_reset_slot_reset_done(struct ipr_cmnd *ipr_cmd)
 }
 
 /**
+ * ipr_reset_reset_work - Pulse a PCIe fundamental reset
+ * @work:	work struct
+ *
+ * Description: This pulses warm reset to a slot.
+ *
+ **/
+static void ipr_reset_reset_work(struct work_struct *work)
+{
+	struct ipr_cmnd *ipr_cmd = container_of(work, struct ipr_cmnd, work);
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+	struct pci_dev *pdev = ioa_cfg->pdev;
+	unsigned long lock_flags = 0;
+
+	ENTER;
+	pci_set_pcie_reset_state(pdev, pcie_warm_reset);
+	msleep(jiffies_to_msecs(IPR_PCI_RESET_TIMEOUT));
+	pci_set_pcie_reset_state(pdev, pcie_deassert_reset);
+
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+	if (ioa_cfg->reset_cmd == ipr_cmd)
+		ipr_reset_ioa_job(ipr_cmd);
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	LEAVE;
+}
+
+/**
  * ipr_reset_slot_reset - Reset the PCI slot of the adapter.
  * @ipr_cmd:	ipr command struct
  *
@@ -8337,12 +8448,11 @@ static int ipr_reset_slot_reset_done(struct ipr_cmnd *ipr_cmd)
 static int ipr_reset_slot_reset(struct ipr_cmnd *ipr_cmd)
 {
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-	struct pci_dev *pdev = ioa_cfg->pdev;
 
 	ENTER;
-	pci_set_pcie_reset_state(pdev, pcie_warm_reset);
+	INIT_WORK(&ipr_cmd->work, ipr_reset_reset_work);
+	queue_work(ioa_cfg->reset_work_q, &ipr_cmd->work);
 	ipr_cmd->job_step = ipr_reset_slot_reset_done;
-	ipr_reset_start_timer(ipr_cmd, IPR_PCI_RESET_TIMEOUT);
 	LEAVE;
 	return IPR_RC_JOB_RETURN;
 }
@@ -8480,6 +8590,122 @@ static int ipr_reset_alert(struct ipr_cmnd *ipr_cmd)
 }
 
 /**
+ * ipr_reset_quiesce_done - Complete IOA disconnect
+ * @ipr_cmd:	ipr command struct
+ *
+ * Description: Freeze the adapter to complete quiesce processing
+ *
+ * Return value:
+ * 	IPR_RC_JOB_CONTINUE
+ **/
+static int ipr_reset_quiesce_done(struct ipr_cmnd *ipr_cmd)
+{
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+
+	ENTER;
+	ipr_cmd->job_step = ipr_ioa_bringdown_done;
+	ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER);
+	LEAVE;
+	return IPR_RC_JOB_CONTINUE;
+}
+
+/**
+ * ipr_reset_cancel_hcam_done - Check for outstanding commands
+ * @ipr_cmd:	ipr command struct
+ *
+ * Description: Ensure nothing is outstanding to the IOA and
+ *			proceed with IOA disconnect. Otherwise reset the IOA.
+ *
+ * Return value:
+ * 	IPR_RC_JOB_RETURN / IPR_RC_JOB_CONTINUE
+ **/
+static int ipr_reset_cancel_hcam_done(struct ipr_cmnd *ipr_cmd)
+{
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+	struct ipr_cmnd *loop_cmd;
+	struct ipr_hrr_queue *hrrq;
+	int rc = IPR_RC_JOB_CONTINUE;
+	int count = 0;
+
+	ENTER;
+	ipr_cmd->job_step = ipr_reset_quiesce_done;
+
+	for_each_hrrq(hrrq, ioa_cfg) {
+		spin_lock(&hrrq->_lock);
+		list_for_each_entry(loop_cmd, &hrrq->hrrq_pending_q, queue) {
+			count++;
+			ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
+			list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
+			rc = IPR_RC_JOB_RETURN;
+			break;
+		}
+		spin_unlock(&hrrq->_lock);
+
+		if (count)
+			break;
+	}
+
+	LEAVE;
+	return rc;
+}
+
+/**
+ * ipr_reset_cancel_hcam - Cancel outstanding HCAMs
+ * @ipr_cmd:	ipr command struct
+ *
+ * Description: Cancel any oustanding HCAMs to the IOA.
+ *
+ * Return value:
+ * 	IPR_RC_JOB_CONTINUE / IPR_RC_JOB_RETURN
+ **/
+static int ipr_reset_cancel_hcam(struct ipr_cmnd *ipr_cmd)
+{
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+	int rc = IPR_RC_JOB_CONTINUE;
+	struct ipr_cmd_pkt *cmd_pkt;
+	struct ipr_cmnd *hcam_cmd;
+	struct ipr_hrr_queue *hrrq = &ioa_cfg->hrrq[IPR_INIT_HRRQ];
+
+	ENTER;
+	ipr_cmd->job_step = ipr_reset_cancel_hcam_done;
+
+	if (!hrrq->ioa_is_dead) {
+		if (!list_empty(&ioa_cfg->hostrcb_pending_q)) {
+			list_for_each_entry(hcam_cmd, &hrrq->hrrq_pending_q, queue) {
+				if (hcam_cmd->ioarcb.cmd_pkt.cdb[0] != IPR_HOST_CONTROLLED_ASYNC)
+					continue;
+
+				ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
+				ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
+				cmd_pkt = &ipr_cmd->ioarcb.cmd_pkt;
+				cmd_pkt->request_type = IPR_RQTYPE_IOACMD;
+				cmd_pkt->cdb[0] = IPR_CANCEL_REQUEST;
+				cmd_pkt->cdb[1] = IPR_CANCEL_64BIT_IOARCB;
+				cmd_pkt->cdb[10] = ((u64) hcam_cmd->dma_addr >> 56) & 0xff;
+				cmd_pkt->cdb[11] = ((u64) hcam_cmd->dma_addr >> 48) & 0xff;
+				cmd_pkt->cdb[12] = ((u64) hcam_cmd->dma_addr >> 40) & 0xff;
+				cmd_pkt->cdb[13] = ((u64) hcam_cmd->dma_addr >> 32) & 0xff;
+				cmd_pkt->cdb[2] = ((u64) hcam_cmd->dma_addr >> 24) & 0xff;
+				cmd_pkt->cdb[3] = ((u64) hcam_cmd->dma_addr >> 16) & 0xff;
+				cmd_pkt->cdb[4] = ((u64) hcam_cmd->dma_addr >> 8) & 0xff;
+				cmd_pkt->cdb[5] = ((u64) hcam_cmd->dma_addr) & 0xff;
+
+				ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout,
+					   IPR_CANCEL_TIMEOUT);
+
+				rc = IPR_RC_JOB_RETURN;
+				ipr_cmd->job_step = ipr_reset_cancel_hcam;
+				break;
+			}
+		}
+	} else
+		ipr_cmd->job_step = ipr_reset_alert;
+
+	LEAVE;
+	return rc;
+}
+
+/**
  * ipr_reset_ucode_download_done - Microcode download completion
  * @ipr_cmd:	ipr command struct
  *
@@ -8561,7 +8787,9 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd)
 	int rc = IPR_RC_JOB_CONTINUE;
 
 	ENTER;
-	if (shutdown_type != IPR_SHUTDOWN_NONE &&
+	if (shutdown_type == IPR_SHUTDOWN_QUIESCE)
+		ipr_cmd->job_step = ipr_reset_cancel_hcam;
+	else if (shutdown_type != IPR_SHUTDOWN_NONE &&
 			!ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
 		ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
 		ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
@@ -8917,13 +9145,15 @@ static void ipr_free_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
 {
 	int i;
 
-	for (i = 0; i < IPR_NUM_CMD_BLKS; i++) {
-		if (ioa_cfg->ipr_cmnd_list[i])
-			dma_pool_free(ioa_cfg->ipr_cmd_pool,
-				      ioa_cfg->ipr_cmnd_list[i],
-				      ioa_cfg->ipr_cmnd_list_dma[i]);
+	if (ioa_cfg->ipr_cmnd_list) {
+		for (i = 0; i < IPR_NUM_CMD_BLKS; i++) {
+			if (ioa_cfg->ipr_cmnd_list[i])
+				dma_pool_free(ioa_cfg->ipr_cmd_pool,
+					      ioa_cfg->ipr_cmnd_list[i],
+					      ioa_cfg->ipr_cmnd_list_dma[i]);
 
-		ioa_cfg->ipr_cmnd_list[i] = NULL;
+			ioa_cfg->ipr_cmnd_list[i] = NULL;
+		}
 	}
 
 	if (ioa_cfg->ipr_cmd_pool)
@@ -8973,26 +9203,25 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg)
 }
 
 /**
- * ipr_free_all_resources - Free all allocated resources for an adapter.
- * @ipr_cmd:	ipr command struct
+ * ipr_free_irqs - Free all allocated IRQs for the adapter.
+ * @ioa_cfg:	ipr cfg struct
  *
- * This function frees all allocated resources for the
+ * This function frees all allocated IRQs for the
  * specified adapter.
  *
  * Return value:
  * 	none
  **/
-static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg)
+static void ipr_free_irqs(struct ipr_ioa_cfg *ioa_cfg)
 {
 	struct pci_dev *pdev = ioa_cfg->pdev;
 
-	ENTER;
 	if (ioa_cfg->intr_flag == IPR_USE_MSI ||
 	    ioa_cfg->intr_flag == IPR_USE_MSIX) {
 		int i;
 		for (i = 0; i < ioa_cfg->nvectors; i++)
 			free_irq(ioa_cfg->vectors_info[i].vec,
-				&ioa_cfg->hrrq[i]);
+				 &ioa_cfg->hrrq[i]);
 	} else
 		free_irq(pdev->irq, &ioa_cfg->hrrq[0]);
 
@@ -9003,7 +9232,26 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg)
 		pci_disable_msix(pdev);
 		ioa_cfg->intr_flag &= ~IPR_USE_MSIX;
 	}
+}
 
+/**
+ * ipr_free_all_resources - Free all allocated resources for an adapter.
+ * @ipr_cmd:	ipr command struct
+ *
+ * This function frees all allocated resources for the
+ * specified adapter.
+ *
+ * Return value:
+ * 	none
+ **/
+static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg)
+{
+	struct pci_dev *pdev = ioa_cfg->pdev;
+
+	ENTER;
+	ipr_free_irqs(ioa_cfg);
+	if (ioa_cfg->reset_work_q)
+		destroy_workqueue(ioa_cfg->reset_work_q);
 	iounmap(ioa_cfg->hdw_dma_regs);
 	pci_release_regions(pdev);
 	ipr_free_mem(ioa_cfg);
@@ -9823,6 +10071,14 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
 	    (dev_id->device == PCI_DEVICE_ID_IBM_OBSIDIAN_E && !ioa_cfg->revid)) {
 		ioa_cfg->needs_warm_reset = 1;
 		ioa_cfg->reset = ipr_reset_slot_reset;
+
+		ioa_cfg->reset_work_q = alloc_ordered_workqueue("ipr_reset_%d",
+								WQ_MEM_RECLAIM, host->host_no);
+
+		if (!ioa_cfg->reset_work_q) {
+			dev_err(&pdev->dev, "Couldn't register reset workqueue\n");
+			goto out_free_irq;
+		}
 	} else
 		ioa_cfg->reset = ipr_reset_start_bist;
 
@@ -9834,6 +10090,8 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
 out:
 	return rc;
 
+out_free_irq:
+	ipr_free_irqs(ioa_cfg);
 cleanup_nolog:
 	ipr_free_mem(ioa_cfg);
 out_msi_disable:
@@ -9914,6 +10172,8 @@ static void __ipr_remove(struct pci_dev *pdev)
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
 	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
 	flush_work(&ioa_cfg->work_q);
+	if (ioa_cfg->reset_work_q)
+		flush_workqueue(ioa_cfg->reset_work_q);
 	INIT_LIST_HEAD(&ioa_cfg->used_res_q);
 	spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
 
@@ -10036,6 +10296,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
 {
 	struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
 	unsigned long lock_flags = 0;
+	enum ipr_shutdown_type shutdown_type = IPR_SHUTDOWN_NORMAL;
 	int i;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
@@ -10051,9 +10312,16 @@ static void ipr_shutdown(struct pci_dev *pdev)
 		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 	}
 
-	ipr_initiate_ioa_bringdown(ioa_cfg, IPR_SHUTDOWN_NORMAL);
+	if (ipr_fast_reboot && system_state == SYSTEM_RESTART && ioa_cfg->sis64)
+		shutdown_type = IPR_SHUTDOWN_QUIESCE;
+
+	ipr_initiate_ioa_bringdown(ioa_cfg, shutdown_type);
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
+	if (ipr_fast_reboot && system_state == SYSTEM_RESTART && ioa_cfg->sis64) {
+		ipr_free_irqs(ioa_cfg);
+		pci_disable_device(ioa_cfg->pdev);
+	}
 }
 
 static struct pci_device_id ipr_pci_table[] = {
@@ -10211,7 +10479,8 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf)
 
 	list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) {
 		spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
-		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) {
+		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds ||
+		    (ipr_fast_reboot && event == SYS_RESTART && ioa_cfg->sis64)) {
 			spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 			continue;
 		}
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index ec03b42fa2b9..47412cf4eaac 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -39,8 +39,8 @@
 /*
  * Literals
  */
-#define IPR_DRIVER_VERSION "2.6.0"
-#define IPR_DRIVER_DATE "(November 16, 2012)"
+#define IPR_DRIVER_VERSION "2.6.1"
+#define IPR_DRIVER_DATE "(March 12, 2015)"
 
 /*
  * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding
@@ -138,6 +138,7 @@
 #define IPR_IOASC_BUS_WAS_RESET			0x06290000
 #define IPR_IOASC_BUS_WAS_RESET_BY_OTHER		0x06298000
 #define IPR_IOASC_ABORTED_CMD_TERM_BY_HOST	0x0B5A0000
+#define IPR_IOASC_IR_NON_OPTIMIZED		0x05258200
 
 #define IPR_FIRST_DRIVER_IOASC			0x10000000
 #define IPR_IOASC_IOA_WAS_RESET			0x10000001
@@ -196,6 +197,8 @@
 /*
  * Adapter Commands
  */
+#define IPR_CANCEL_REQUEST				0xC0
+#define	IPR_CANCEL_64BIT_IOARCB			0x01
 #define IPR_QUERY_RSRC_STATE				0xC2
 #define IPR_RESET_DEVICE				0xC3
 #define	IPR_RESET_TYPE_SELECT				0x80
@@ -222,6 +225,7 @@
 #define IPR_ABBREV_SHUTDOWN_TIMEOUT		(10 * HZ)
 #define IPR_DUAL_IOA_ABBR_SHUTDOWN_TO	(2 * 60 * HZ)
 #define IPR_DEVICE_RESET_TIMEOUT		(ipr_fastfail ? 10 * HZ : 30 * HZ)
+#define IPR_CANCEL_TIMEOUT			(ipr_fastfail ? 10 * HZ : 30 * HZ)
 #define IPR_CANCEL_ALL_TIMEOUT		(ipr_fastfail ? 10 * HZ : 30 * HZ)
 #define IPR_ABORT_TASK_TIMEOUT		(ipr_fastfail ? 10 * HZ : 30 * HZ)
 #define IPR_INTERNAL_TIMEOUT			(ipr_fastfail ? 10 * HZ : 30 * HZ)
@@ -518,6 +522,7 @@ struct ipr_cmd_pkt {
 #define IPR_RQTYPE_IOACMD		0x01
 #define IPR_RQTYPE_HCAM			0x02
 #define IPR_RQTYPE_ATA_PASSTHRU	0x04
+#define IPR_RQTYPE_PIPE			0x05
 
 	u8 reserved2;
 
@@ -1271,6 +1276,7 @@ struct ipr_resource_entry {
 	u8 del_from_ml:1;
 	u8 resetting_device:1;
 	u8 reset_occurred:1;
+	u8 raw_mode:1;
 
 	u32 bus;		/* AKA channel */
 	u32 target;		/* AKA id */
@@ -1402,7 +1408,8 @@ enum ipr_shutdown_type {
 	IPR_SHUTDOWN_NORMAL = 0x00,
 	IPR_SHUTDOWN_PREPARE_FOR_NORMAL = 0x40,
 	IPR_SHUTDOWN_ABBREV = 0x80,
-	IPR_SHUTDOWN_NONE = 0x100
+	IPR_SHUTDOWN_NONE = 0x100,
+	IPR_SHUTDOWN_QUIESCE = 0x101,
 };
 
 struct ipr_trace_entry {
@@ -1536,6 +1543,7 @@ struct ipr_ioa_cfg {
 	u8 saved_mode_page_len;
 
 	struct work_struct work_q;
+	struct workqueue_struct *reset_work_q;
 
 	wait_queue_head_t reset_wait_q;
 	wait_queue_head_t msi_wait_q;
@@ -1587,6 +1595,7 @@ struct ipr_cmnd {
 	struct ata_queued_cmd *qc;
 	struct completion completion;
 	struct timer_list timer;
+	struct work_struct work;
 	void (*fast_done) (struct ipr_cmnd *);
 	void (*done) (struct ipr_cmnd *);
 	int (*job_step) (struct ipr_cmnd *);
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e9037908e..9b81a34d7449 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -413,6 +413,9 @@ struct lpfc_vport {
 	uint32_t cfg_fcp_class;
 	uint32_t cfg_use_adisc;
 	uint32_t cfg_fdmi_on;
+#define LPFC_FDMI_SUPPORT	1	/* bit 0 - FDMI supported? */
+#define LPFC_FDMI_REG_DELAY	2	/* bit 1 - 60 sec registration delay */
+#define LPFC_FDMI_ALL_ATTRIB	4	/* bit 2 - register ALL attributes? */
 	uint32_t cfg_discovery_threads;
 	uint32_t cfg_log_verbose;
 	uint32_t cfg_max_luns;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 2f9b96826ac0..d65bd178d131 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -406,8 +406,13 @@ lpfc_option_rom_version_show(struct device *dev, struct device_attribute *attr,
 	struct Scsi_Host  *shost = class_to_shost(dev);
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
+	char fwrev[FW_REV_STR_SIZE];
+
+	if (phba->sli_rev < LPFC_SLI_REV4)
+		return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion);
 
-	return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion);
+	lpfc_decode_firmware_rev(phba, fwrev, 1);
+	return snprintf(buf, PAGE_SIZE, "%s\n", fwrev);
 }
 
 /**
@@ -4568,12 +4573,18 @@ LPFC_ATTR_R(multi_ring_type, FC_TYPE_IP, 1,
 
 /*
 # lpfc_fdmi_on: controls FDMI support.
-#       0 = no FDMI support
-#       1 = support FDMI without attribute of hostname
-#       2 = support FDMI with attribute of hostname
-# Value range [0,2]. Default value is 0.
+#               Set                NOT Set
+#       bit 0 = FDMI support       no FDMI support
+#           LPFC_FDMI_SUPPORT just turns basic support on/off
+#       bit 1 = Register delay     no register delay  (60 seconds)
+#           LPFC_FDMI_REG_DELAY	60 sec registration delay after FDMI login
+#       bit 2 = All attributes     Use a attribute subset
+#           LPFC_FDMI_ALL_ATTRIB applies to both port and HBA attributes
+#           Port attrutes subset: 1 thru 6 OR all: 1 thru 0xd 0x101 0x102 0x103
+#           HBA attributes subset: 1 thru 0xb OR all: 1 thru 0xc
+# Value range [0,7]. Default value is 0.
 */
-LPFC_VPORT_ATTR_RW(fdmi_on, 0, 0, 2, "Enable FDMI support");
+LPFC_VPORT_ATTR_RW(fdmi_on, 0, 0, 7, "Enable FDMI support");
 
 /*
 # Specifies the maximum number of ELS cmds we can have outstanding (for
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index a7bf359aa0c6..b705068079c0 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2009-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -3194,6 +3194,7 @@ lpfc_bsg_diag_loopback_run(struct fc_bsg_job *job)
 		cmd->unsli3.rcvsli3.ox_id = 0xffff;
 	}
 	cmdiocbq->iocb_flag |= LPFC_IO_LIBDFC;
+	cmdiocbq->iocb_flag |= LPFC_IO_LOOPBACK;
 	cmdiocbq->vport = phba->pport;
 	cmdiocbq->iocb_cmpl = NULL;
 	iocb_stat = lpfc_sli_issue_iocb_wait(phba, LPFC_ELS_RING, cmdiocbq,
@@ -4179,6 +4180,7 @@ lpfc_bsg_handle_sli_cfg_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 			switch (opcode) {
 			case COMN_OPCODE_GET_CNTL_ADDL_ATTRIBUTES:
 			case COMN_OPCODE_GET_CNTL_ATTRIBUTES:
+			case COMN_OPCODE_GET_PROFILE_CONFIG:
 				lpfc_printf_log(phba, KERN_INFO, LOG_LIBDFC,
 						"3106 Handled SLI_CONFIG "
 						"subsys_comn, opcode:x%x\n",
diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index 928ef609f363..e557bcdbcb19 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2010-2014 Emulex.  All rights reserved.                *
+ * Copyright (C) 2010-2015 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -246,6 +246,7 @@ struct lpfc_sli_config_emb1_subsys {
 #define lpfc_emb1_subcmnd_subsys_WORD	word6
 /* Subsystem COMN (0x01) OpCodes */
 #define SLI_CONFIG_SUBSYS_COMN		0x01
+#define COMN_OPCODE_GET_PROFILE_CONFIG	0xA4
 #define COMN_OPCODE_READ_OBJECT		0xAB
 #define COMN_OPCODE_WRITE_OBJECT	0xAC
 #define COMN_OPCODE_READ_OBJECT_LIST	0xAD
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 00665a5d92fd..587e3e962f2b 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -284,6 +284,7 @@ void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *,
 				    struct lpfc_sli_ring *, uint32_t);
 void lpfc_sli4_handle_received_buffer(struct lpfc_hba *, struct hbq_dmabuf *);
 void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
+void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 			struct lpfc_iocbq *, uint32_t);
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
@@ -354,6 +355,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *);
 extern struct device_attribute *lpfc_hba_attrs[];
 extern struct device_attribute *lpfc_vport_attrs[];
 extern struct scsi_host_template lpfc_template;
+extern struct scsi_host_template lpfc_template_s3;
 extern struct scsi_host_template lpfc_vport_template;
 extern struct fc_function_template lpfc_transport_functions;
 extern struct fc_function_template lpfc_vport_transport_functions;
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 61a32cd23f79..af129966bd11 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -555,7 +555,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size)
 					}
 				}
 			}
-			if (CTentry & (be32_to_cpu(SLI_CT_LAST_ENTRY)))
+			if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY)))
 				goto nsout1;
 			Cnt -= sizeof (uint32_t);
 		}
@@ -641,7 +641,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		/* Good status, continue checking */
 		CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
 		if (CTrsp->CommandResponse.bits.CmdRsp ==
-		    be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
+		    cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) {
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
 					 "0208 NameServer Rsp Data: x%x\n",
 					 vport->fc_flag);
@@ -1074,11 +1074,48 @@ lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol,
 
 	lpfc_decode_firmware_rev(vport->phba, fwrev, 0);
 
-	n = snprintf(symbol, size, "Emulex %s FV%s DV%s",
-		vport->phba->ModelName, fwrev, lpfc_release_version);
+	n = snprintf(symbol, size, "Emulex %s", vport->phba->ModelName);
+
+	if (size < n)
+		return n;
+	n += snprintf(symbol + n, size - n, " FV%s", fwrev);
+
+	if (size < n)
+		return n;
+	n += snprintf(symbol + n, size - n, " DV%s", lpfc_release_version);
+
+	if (size < n)
+		return n;
+	n += snprintf(symbol + n, size - n, " HN:%s", init_utsname()->nodename);
+
+	/* Note :- OS name is "Linux" */
+	if (size < n)
+		return n;
+	n += snprintf(symbol + n, size - n, " OS:%s", init_utsname()->sysname);
+
 	return n;
 }
 
+static uint32_t
+lpfc_find_map_node(struct lpfc_vport *vport)
+{
+	struct lpfc_nodelist *ndlp, *next_ndlp;
+	struct Scsi_Host  *shost;
+	uint32_t cnt = 0;
+
+	shost = lpfc_shost_from_vport(vport);
+	spin_lock_irq(shost->host_lock);
+	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
+		if (ndlp->nlp_type & NLP_FABRIC)
+			continue;
+		if ((ndlp->nlp_state == NLP_STE_MAPPED_NODE) ||
+		    (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE))
+			cnt++;
+	}
+	spin_unlock_irq(shost->host_lock);
+	return cnt;
+}
+
 /*
  * lpfc_ns_cmd
  * Description:
@@ -1177,7 +1214,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	switch (cmdcode) {
 	case SLI_CTNS_GID_FT:
 		CtReq->CommandResponse.bits.CmdRsp =
-		    be16_to_cpu(SLI_CTNS_GID_FT);
+		    cpu_to_be16(SLI_CTNS_GID_FT);
 		CtReq->un.gid.Fc4Type = SLI_CTPT_FCP;
 		if (vport->port_state < LPFC_NS_QRY)
 			vport->port_state = LPFC_NS_QRY;
@@ -1188,7 +1225,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 
 	case SLI_CTNS_GFF_ID:
 		CtReq->CommandResponse.bits.CmdRsp =
-			be16_to_cpu(SLI_CTNS_GFF_ID);
+			cpu_to_be16(SLI_CTNS_GFF_ID);
 		CtReq->un.gff.PortId = cpu_to_be32(context);
 		cmpl = lpfc_cmpl_ct_cmd_gff_id;
 		break;
@@ -1196,7 +1233,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	case SLI_CTNS_RFT_ID:
 		vport->ct_flags &= ~FC_CT_RFT_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
-		    be16_to_cpu(SLI_CTNS_RFT_ID);
+		    cpu_to_be16(SLI_CTNS_RFT_ID);
 		CtReq->un.rft.PortId = cpu_to_be32(vport->fc_myDID);
 		CtReq->un.rft.fcpReg = 1;
 		cmpl = lpfc_cmpl_ct_cmd_rft_id;
@@ -1205,7 +1242,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	case SLI_CTNS_RNN_ID:
 		vport->ct_flags &= ~FC_CT_RNN_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
-		    be16_to_cpu(SLI_CTNS_RNN_ID);
+		    cpu_to_be16(SLI_CTNS_RNN_ID);
 		CtReq->un.rnn.PortId = cpu_to_be32(vport->fc_myDID);
 		memcpy(CtReq->un.rnn.wwnn,  &vport->fc_nodename,
 		       sizeof (struct lpfc_name));
@@ -1215,7 +1252,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	case SLI_CTNS_RSPN_ID:
 		vport->ct_flags &= ~FC_CT_RSPN_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
-		    be16_to_cpu(SLI_CTNS_RSPN_ID);
+		    cpu_to_be16(SLI_CTNS_RSPN_ID);
 		CtReq->un.rspn.PortId = cpu_to_be32(vport->fc_myDID);
 		size = sizeof(CtReq->un.rspn.symbname);
 		CtReq->un.rspn.len =
@@ -1226,7 +1263,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	case SLI_CTNS_RSNN_NN:
 		vport->ct_flags &= ~FC_CT_RSNN_NN;
 		CtReq->CommandResponse.bits.CmdRsp =
-		    be16_to_cpu(SLI_CTNS_RSNN_NN);
+		    cpu_to_be16(SLI_CTNS_RSNN_NN);
 		memcpy(CtReq->un.rsnn.wwnn, &vport->fc_nodename,
 		       sizeof (struct lpfc_name));
 		size = sizeof(CtReq->un.rsnn.symbname);
@@ -1238,14 +1275,14 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	case SLI_CTNS_DA_ID:
 		/* Implement DA_ID Nameserver request */
 		CtReq->CommandResponse.bits.CmdRsp =
-			be16_to_cpu(SLI_CTNS_DA_ID);
+			cpu_to_be16(SLI_CTNS_DA_ID);
 		CtReq->un.da_id.port_id = cpu_to_be32(vport->fc_myDID);
 		cmpl = lpfc_cmpl_ct_cmd_da_id;
 		break;
 	case SLI_CTNS_RFF_ID:
 		vport->ct_flags &= ~FC_CT_RFF_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
-		    be16_to_cpu(SLI_CTNS_RFF_ID);
+		    cpu_to_be16(SLI_CTNS_RFF_ID);
 		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);
 		CtReq->un.rff.fbits = FC4_FEATURE_INIT;
 		CtReq->un.rff.type_code = FC_TYPE_FCP;
@@ -1299,7 +1336,6 @@ lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	uint32_t latt;
 
 	latt = lpfc_els_chk_latt(vport);
-
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
 		"FDMI cmpl:       status:x%x/x%x latt:%d",
 		irsp->ulpStatus, irsp->un.ulpWord[4], latt);
@@ -1310,29 +1346,49 @@ lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				 "ulpStatus: x%x, rid x%x\n",
 				 be16_to_cpu(fdmi_cmd), latt, irsp->ulpStatus,
 				 irsp->un.ulpWord[4]);
-		lpfc_ct_free_iocb(phba, cmdiocb);
-		return;
+		goto fail_out;
 	}
 
 	ndlp = lpfc_findnode_did(vport, FDMI_DID);
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp))
 		goto fail_out;
 
-	if (fdmi_rsp == be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
+	if (fdmi_rsp == cpu_to_be16(SLI_CT_RESPONSE_FS_RJT)) {
 		/* FDMI rsp failed */
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
 				 "0220 FDMI rsp failed Data: x%x\n",
 				 be16_to_cpu(fdmi_cmd));
 	}
 
+fail_out:
+	lpfc_ct_free_iocb(phba, cmdiocb);
+}
+
+static void
+lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+		       struct lpfc_iocbq *rspiocb)
+{
+	struct lpfc_vport *vport = cmdiocb->vport;
+	struct lpfc_dmabuf *inp = cmdiocb->context1;
+	struct lpfc_sli_ct_request *CTcmd = inp->virt;
+	uint16_t fdmi_cmd = CTcmd->CommandResponse.bits.CmdRsp;
+	struct lpfc_nodelist *ndlp;
+
+	lpfc_cmpl_ct_cmd_fdmi(phba, cmdiocb, rspiocb);
+
+	ndlp = lpfc_findnode_did(vport, FDMI_DID);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp))
+		return;
+
+	/*
+	 * Need to cycle thru FDMI registration for discovery
+	 * DHBA -> DPRT -> RHBA -> RPA
+	 */
 	switch (be16_to_cpu(fdmi_cmd)) {
 	case SLI_MGMT_RHBA:
 		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RPA);
 		break;
 
-	case SLI_MGMT_RPA:
-		break;
-
 	case SLI_MGMT_DHBA:
 		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DPRT);
 		break;
@@ -1341,12 +1397,9 @@ lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RHBA);
 		break;
 	}
-
-fail_out:
-	lpfc_ct_free_iocb(phba, cmdiocb);
-	return;
 }
 
+
 int
 lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
 {
@@ -1355,18 +1408,28 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
 	struct lpfc_sli_ct_request *CtReq;
 	struct ulp_bde64 *bpl;
 	uint32_t size;
-	REG_HBA *rh;
-	PORT_ENTRY *pe;
-	REG_PORT_ATTRIBUTE *pab;
-	ATTRIBUTE_BLOCK *ab;
-	ATTRIBUTE_ENTRY *ae;
+	uint32_t rsp_size;
+	struct lpfc_fdmi_reg_hba *rh;
+	struct lpfc_fdmi_port_entry *pe;
+	struct lpfc_fdmi_reg_portattr *pab = NULL;
+	struct lpfc_fdmi_attr_block *ab = NULL;
+	struct lpfc_fdmi_attr_entry *ae;
+	struct lpfc_fdmi_attr_def *ad;
 	void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
 		      struct lpfc_iocbq *);
 
+	if (ndlp == NULL) {
+		ndlp = lpfc_findnode_did(vport, FDMI_DID);
+		if (!ndlp || !NLP_CHK_NODE_ACT(ndlp))
+			return 0;
+		cmpl = lpfc_cmpl_ct_cmd_fdmi; /* cmd interface */
+	} else {
+		cmpl = lpfc_cmpl_ct_disc_fdmi; /* called from discovery */
+	}
 
 	/* fill in BDEs for command */
 	/* Allocate buffer for command payload */
-	mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
+	mp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
 	if (!mp)
 		goto fdmi_cmd_exit;
 
@@ -1375,7 +1438,7 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
 		goto fdmi_cmd_free_mp;
 
 	/* Allocate buffer for Buffer ptr list */
-	bmp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
+	bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
 	if (!bmp)
 		goto fdmi_cmd_free_mpvirt;
 
@@ -1390,205 +1453,330 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
 			 "0218 FDMI Request Data: x%x x%x x%x\n",
 			 vport->fc_flag, vport->port_state, cmdcode);
-	CtReq = (struct lpfc_sli_ct_request *) mp->virt;
+	CtReq = (struct lpfc_sli_ct_request *)mp->virt;
 
+	/* First populate the CT_IU preamble */
 	memset(CtReq, 0, sizeof(struct lpfc_sli_ct_request));
 	CtReq->RevisionId.bits.Revision = SLI_CT_REVISION;
 	CtReq->RevisionId.bits.InId = 0;
 
 	CtReq->FsType = SLI_CT_MANAGEMENT_SERVICE;
 	CtReq->FsSubType = SLI_CT_FDMI_Subtypes;
+
+	CtReq->CommandResponse.bits.CmdRsp = cpu_to_be16(cmdcode);
+	rsp_size = LPFC_BPL_SIZE;
 	size = 0;
 
+	/* Next fill in the specific FDMI cmd information */
 	switch (cmdcode) {
+	case SLI_MGMT_RHAT:
 	case SLI_MGMT_RHBA:
 		{
 			lpfc_vpd_t *vp = &phba->vpd;
 			uint32_t i, j, incr;
-			int len;
+			int len = 0;
 
-			CtReq->CommandResponse.bits.CmdRsp =
-			    be16_to_cpu(SLI_MGMT_RHBA);
-			CtReq->CommandResponse.bits.Size = 0;
-			rh = (REG_HBA *) & CtReq->un.PortID;
+			rh = (struct lpfc_fdmi_reg_hba *)&CtReq->un.PortID;
+			/* HBA Identifier */
 			memcpy(&rh->hi.PortName, &vport->fc_sparam.portName,
-			       sizeof (struct lpfc_name));
-			/* One entry (port) per adapter */
-			rh->rpl.EntryCnt = be32_to_cpu(1);
-			memcpy(&rh->rpl.pe, &vport->fc_sparam.portName,
-			       sizeof (struct lpfc_name));
-
-			/* point to the HBA attribute block */
-			size = 2 * sizeof (struct lpfc_name) + FOURBYTES;
-			ab = (ATTRIBUTE_BLOCK *) ((uint8_t *) rh + size);
+			       sizeof(struct lpfc_name));
+
+			if (cmdcode == SLI_MGMT_RHBA) {
+				/* Registered Port List */
+				/* One entry (port) per adapter */
+				rh->rpl.EntryCnt = cpu_to_be32(1);
+				memcpy(&rh->rpl.pe, &vport->fc_sparam.portName,
+				       sizeof(struct lpfc_name));
+
+				/* point to the HBA attribute block */
+				size = 2 * sizeof(struct lpfc_name) +
+					FOURBYTES;
+			} else {
+				size = sizeof(struct lpfc_name);
+			}
+			ab = (struct lpfc_fdmi_attr_block *)
+				((uint8_t *)rh + size);
 			ab->EntryCnt = 0;
+			size += FOURBYTES;
 
-			/* Point to the beginning of the first HBA attribute
-			   entry */
+			/*
+			 * Point to beginning of first HBA attribute entry
+			 */
 			/* #1 HBA attribute entry */
-			size += FOURBYTES;
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(NODE_NAME);
-			ae->ad.bits.AttrLen =  be16_to_cpu(FOURBYTES
-						+ sizeof (struct lpfc_name));
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(struct lpfc_name));
+			ad->AttrType = cpu_to_be16(RHBA_NODENAME);
+			ad->AttrLen =  cpu_to_be16(FOURBYTES
+						+ sizeof(struct lpfc_name));
 			memcpy(&ae->un.NodeName, &vport->fc_sparam.nodeName,
-			       sizeof (struct lpfc_name));
+			       sizeof(struct lpfc_name));
 			ab->EntryCnt++;
-			size += FOURBYTES + sizeof (struct lpfc_name);
+			size += FOURBYTES + sizeof(struct lpfc_name);
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #2 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(MANUFACTURER);
-			strncpy(ae->un.Manufacturer, "Emulex Corporation", 64);
-			len = strlen(ae->un.Manufacturer);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.Manufacturer));
+			ad->AttrType = cpu_to_be16(RHBA_MANUFACTURER);
+			strncpy(ae->un.Manufacturer, "Emulex Corporation",
+				sizeof(ae->un.Manufacturer));
+			len = strnlen(ae->un.Manufacturer,
+					  sizeof(ae->un.Manufacturer));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #3 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(SERIAL_NUMBER);
-			strncpy(ae->un.SerialNumber, phba->SerialNumber, 64);
-			len = strlen(ae->un.SerialNumber);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.SerialNumber));
+			ad->AttrType = cpu_to_be16(RHBA_SERIAL_NUMBER);
+			strncpy(ae->un.SerialNumber, phba->SerialNumber,
+				sizeof(ae->un.SerialNumber));
+			len = strnlen(ae->un.SerialNumber,
+					  sizeof(ae->un.SerialNumber));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #4 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(MODEL);
-			strncpy(ae->un.Model, phba->ModelName, 256);
-			len = strlen(ae->un.Model);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.Model));
+			ad->AttrType = cpu_to_be16(RHBA_MODEL);
+			strncpy(ae->un.Model, phba->ModelName,
+				sizeof(ae->un.Model));
+			len = strnlen(ae->un.Model, sizeof(ae->un.Model));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #5 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(MODEL_DESCRIPTION);
-			strncpy(ae->un.ModelDescription, phba->ModelDesc, 256);
-			len = strlen(ae->un.ModelDescription);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.ModelDescription));
+			ad->AttrType = cpu_to_be16(RHBA_MODEL_DESCRIPTION);
+			strncpy(ae->un.ModelDescription, phba->ModelDesc,
+				sizeof(ae->un.ModelDescription));
+			len = strnlen(ae->un.ModelDescription,
+					  sizeof(ae->un.ModelDescription));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + 8) > (LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #6 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(HARDWARE_VERSION);
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 8);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, 8);
+			ad->AttrType = cpu_to_be16(RHBA_HARDWARE_VERSION);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 8);
 			/* Convert JEDEC ID to ascii for hardware version */
 			incr = vp->rev.biuRev;
 			for (i = 0; i < 8; i++) {
 				j = (incr & 0xf);
 				if (j <= 9)
 					ae->un.HardwareVersion[7 - i] =
-					    (char)((uint8_t) 0x30 +
-						   (uint8_t) j);
+					    (char)((uint8_t)0x30 +
+						   (uint8_t)j);
 				else
 					ae->un.HardwareVersion[7 - i] =
-					    (char)((uint8_t) 0x61 +
-						   (uint8_t) (j - 10));
+					    (char)((uint8_t)0x61 +
+						   (uint8_t)(j - 10));
 				incr = (incr >> 4);
 			}
 			ab->EntryCnt++;
 			size += FOURBYTES + 8;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #7 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(DRIVER_VERSION);
-			strncpy(ae->un.DriverVersion,
-				lpfc_release_version, 256);
-			len = strlen(ae->un.DriverVersion);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.DriverVersion));
+			ad->AttrType = cpu_to_be16(RHBA_DRIVER_VERSION);
+			strncpy(ae->un.DriverVersion, lpfc_release_version,
+				sizeof(ae->un.DriverVersion));
+			len = strnlen(ae->un.DriverVersion,
+					sizeof(ae->un.DriverVersion));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #8 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(OPTION_ROM_VERSION);
-			strncpy(ae->un.OptionROMVersion,
-				phba->OptionROMVersion, 256);
-			len = strlen(ae->un.OptionROMVersion);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.OptionROMVersion));
+			ad->AttrType = cpu_to_be16(RHBA_OPTION_ROM_VERSION);
+			strncpy(ae->un.OptionROMVersion, phba->OptionROMVersion,
+				sizeof(ae->un.OptionROMVersion));
+			len = strnlen(ae->un.OptionROMVersion,
+				      sizeof(ae->un.OptionROMVersion));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #9 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(FIRMWARE_VERSION);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.FirmwareVersion));
+			ad->AttrType = cpu_to_be16(RHBA_FIRMWARE_VERSION);
 			lpfc_decode_firmware_rev(phba, ae->un.FirmwareVersion,
 				1);
-			len = strlen(ae->un.FirmwareVersion);
+			len = strnlen(ae->un.FirmwareVersion,
+					sizeof(ae->un.FirmwareVersion));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #10 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(OS_NAME_VERSION);
-			sprintf(ae->un.OsNameVersion, "%s %s %s",
-				init_utsname()->sysname,
-				init_utsname()->release,
-				init_utsname()->version);
-			len = strlen(ae->un.OsNameVersion);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.OsNameVersion));
+			ad->AttrType = cpu_to_be16(RHBA_OS_NAME_VERSION);
+			snprintf(ae->un.OsNameVersion,
+				 sizeof(ae->un.OsNameVersion),
+				 "%s %s %s",
+				 init_utsname()->sysname,
+				 init_utsname()->release,
+				 init_utsname()->version);
+			len = strnlen(ae->un.OsNameVersion,
+				      sizeof(ae->un.OsNameVersion));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			ab->EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + 4) > (LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
 			/* #11 HBA attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
-			ae->ad.bits.AttrType = be16_to_cpu(MAX_CT_PAYLOAD_LEN);
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
-			ae->un.MaxCTPayloadLen = (65 * 4096);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType =
+				cpu_to_be16(RHBA_MAX_CT_PAYLOAD_LEN);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			ae->un.MaxCTPayloadLen = cpu_to_be32(LPFC_MAX_CT_SIZE);
 			ab->EntryCnt++;
 			size += FOURBYTES + 4;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto hba_out;
 
-			ab->EntryCnt = be32_to_cpu(ab->EntryCnt);
+			/*
+			 * Currently switches don't seem to support the
+			 * following extended HBA attributes.
+			 */
+			if (!(vport->cfg_fdmi_on & LPFC_FDMI_ALL_ATTRIB))
+				goto hba_out;
+
+			/* #12 HBA attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)rh + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.NodeSymName));
+			ad->AttrType = cpu_to_be16(RHBA_SYM_NODENAME);
+			len = lpfc_vport_symbolic_node_name(vport,
+				ae->un.NodeSymName, sizeof(ae->un.NodeSymName));
+			len += (len & 3) ? (4 - (len & 3)) : 4;
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
+			ab->EntryCnt++;
+			size += FOURBYTES + len;
+hba_out:
+			ab->EntryCnt = cpu_to_be32(ab->EntryCnt);
 			/* Total size */
 			size = GID_REQUEST_SZ - 4 + size;
 		}
 		break;
 
+	case SLI_MGMT_RPRT:
 	case SLI_MGMT_RPA:
 		{
 			lpfc_vpd_t *vp;
 			struct serv_parm *hsp;
-			int len;
+			int len = 0;
 
 			vp = &phba->vpd;
 
-			CtReq->CommandResponse.bits.CmdRsp =
-			    be16_to_cpu(SLI_MGMT_RPA);
-			CtReq->CommandResponse.bits.Size = 0;
-			pab = (REG_PORT_ATTRIBUTE *) & CtReq->un.PortID;
-			size = sizeof (struct lpfc_name) + FOURBYTES;
-			memcpy((uint8_t *) & pab->PortName,
-			       (uint8_t *) & vport->fc_sparam.portName,
-			       sizeof (struct lpfc_name));
+			if (cmdcode == SLI_MGMT_RPRT) {
+				rh = (struct lpfc_fdmi_reg_hba *)
+					&CtReq->un.PortID;
+				/* HBA Identifier */
+				memcpy(&rh->hi.PortName,
+				       &vport->fc_sparam.portName,
+				       sizeof(struct lpfc_name));
+				pab = (struct lpfc_fdmi_reg_portattr *)
+					&rh->rpl.EntryCnt;
+			} else
+				pab = (struct lpfc_fdmi_reg_portattr *)
+					&CtReq->un.PortID;
+			size = sizeof(struct lpfc_name) + FOURBYTES;
+			memcpy((uint8_t *)&pab->PortName,
+			       (uint8_t *)&vport->fc_sparam.portName,
+			       sizeof(struct lpfc_name));
 			pab->ab.EntryCnt = 0;
 
 			/* #1 Port attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
-			ae->ad.bits.AttrType = be16_to_cpu(SUPPORTED_FC4_TYPES);
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 32);
-			ae->un.SupportFC4Types[2] = 1;
-			ae->un.SupportFC4Types[7] = 1;
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.FC4Types));
+			ad->AttrType =
+				cpu_to_be16(RPRT_SUPPORTED_FC4_TYPES);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 32);
+			ae->un.FC4Types[0] = 0x40; /* Type 1 - ELS */
+			ae->un.FC4Types[1] = 0x80; /* Type 8 - FCP */
+			ae->un.FC4Types[4] = 0x80; /* Type 32 - CT */
 			pab->ab.EntryCnt++;
 			size += FOURBYTES + 32;
 
 			/* #2 Port attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
-			ae->ad.bits.AttrType = be16_to_cpu(SUPPORTED_SPEED);
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
-
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_SUPPORTED_SPEED);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
 			ae->un.SupportSpeed = 0;
 			if (phba->lmt & LMT_16Gb)
 				ae->un.SupportSpeed |= HBA_PORTSPEED_16GBIT;
@@ -1602,15 +1790,19 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
 				ae->un.SupportSpeed |= HBA_PORTSPEED_2GBIT;
 			if (phba->lmt & LMT_1Gb)
 				ae->un.SupportSpeed |= HBA_PORTSPEED_1GBIT;
+			ae->un.SupportSpeed =
+				cpu_to_be32(ae->un.SupportSpeed);
 
 			pab->ab.EntryCnt++;
 			size += FOURBYTES + 4;
 
 			/* #3 Port attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
-			ae->ad.bits.AttrType = be16_to_cpu(PORT_SPEED);
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
-			switch(phba->fc_linkspeed) {
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_PORT_SPEED);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			switch (phba->fc_linkspeed) {
 			case LPFC_LINK_SPEED_1GHZ:
 				ae->un.PortSpeed = HBA_PORTSPEED_1GBIT;
 				break;
@@ -1633,93 +1825,273 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
 				ae->un.PortSpeed = HBA_PORTSPEED_UNKNOWN;
 				break;
 			}
+			ae->un.PortSpeed = cpu_to_be32(ae->un.PortSpeed);
 			pab->ab.EntryCnt++;
 			size += FOURBYTES + 4;
 
 			/* #4 Port attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
-			ae->ad.bits.AttrType = be16_to_cpu(MAX_FRAME_SIZE);
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
-			hsp = (struct serv_parm *) & vport->fc_sparam;
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_MAX_FRAME_SIZE);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			hsp = (struct serv_parm *)&vport->fc_sparam;
 			ae->un.MaxFrameSize =
-			    (((uint32_t) hsp->cmn.
-			      bbRcvSizeMsb) << 8) | (uint32_t) hsp->cmn.
+			    (((uint32_t)hsp->cmn.
+			      bbRcvSizeMsb) << 8) | (uint32_t)hsp->cmn.
 			    bbRcvSizeLsb;
+			ae->un.MaxFrameSize =
+				cpu_to_be32(ae->un.MaxFrameSize);
 			pab->ab.EntryCnt++;
 			size += FOURBYTES + 4;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
 
 			/* #5 Port attribute entry */
-			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
-			ae->ad.bits.AttrType = be16_to_cpu(OS_DEVICE_NAME);
-			strcpy((char *)ae->un.OsDeviceName, LPFC_DRIVER_NAME);
-			len = strlen((char *)ae->un.OsDeviceName);
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.OsDeviceName));
+			ad->AttrType = cpu_to_be16(RPRT_OS_DEVICE_NAME);
+			strncpy((char *)ae->un.OsDeviceName, LPFC_DRIVER_NAME,
+				sizeof(ae->un.OsDeviceName));
+			len = strnlen((char *)ae->un.OsDeviceName,
+					  sizeof(ae->un.OsDeviceName));
 			len += (len & 3) ? (4 - (len & 3)) : 4;
-			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
 			pab->ab.EntryCnt++;
 			size += FOURBYTES + len;
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #6 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.HostName));
+			snprintf(ae->un.HostName, sizeof(ae->un.HostName), "%s",
+				 init_utsname()->nodename);
+			ad->AttrType = cpu_to_be16(RPRT_HOST_NAME);
+			len = strnlen(ae->un.HostName,
+					sizeof(ae->un.HostName));
+			len += (len & 3) ? (4 - (len & 3)) : 4;
+			ad->AttrLen =
+				cpu_to_be16(FOURBYTES + len);
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + len;
+			if ((size + sizeof(struct lpfc_name)) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
 
-			if (vport->cfg_fdmi_on == 2) {
-				/* #6 Port attribute entry */
-				ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab +
-							  size);
-				ae->ad.bits.AttrType = be16_to_cpu(HOST_NAME);
-				sprintf(ae->un.HostName, "%s",
-					init_utsname()->nodename);
-				len = strlen(ae->un.HostName);
-				len += (len & 3) ? (4 - (len & 3)) : 4;
-				ae->ad.bits.AttrLen =
-				    be16_to_cpu(FOURBYTES + len);
-				pab->ab.EntryCnt++;
-				size += FOURBYTES + len;
-			}
-
-			pab->ab.EntryCnt = be32_to_cpu(pab->ab.EntryCnt);
+			/*
+			 * Currently switches don't seem to support the
+			 * following extended Port attributes.
+			 */
+			if (!(vport->cfg_fdmi_on & LPFC_FDMI_ALL_ATTRIB))
+				goto port_out;
+
+			/* #7 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0,  sizeof(struct lpfc_name));
+			ad->AttrType = cpu_to_be16(RPRT_NODENAME);
+			ad->AttrLen =  cpu_to_be16(FOURBYTES
+						+ sizeof(struct lpfc_name));
+			memcpy(&ae->un.NodeName, &vport->fc_sparam.nodeName,
+			       sizeof(struct lpfc_name));
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + sizeof(struct lpfc_name);
+			if ((size + sizeof(struct lpfc_name)) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #8 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0,  sizeof(struct lpfc_name));
+			ad->AttrType = cpu_to_be16(RPRT_PORTNAME);
+			ad->AttrLen =  cpu_to_be16(FOURBYTES
+						+ sizeof(struct lpfc_name));
+			memcpy(&ae->un.PortName, &vport->fc_sparam.portName,
+			       sizeof(struct lpfc_name));
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + sizeof(struct lpfc_name);
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #9 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.NodeSymName));
+			ad->AttrType = cpu_to_be16(RPRT_SYM_PORTNAME);
+			len = lpfc_vport_symbolic_port_name(vport,
+				ae->un.NodeSymName, sizeof(ae->un.NodeSymName));
+			len += (len & 3) ? (4 - (len & 3)) : 4;
+			ad->AttrLen = cpu_to_be16(FOURBYTES + len);
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + len;
+			if ((size + 4) > (LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #10 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_PORT_TYPE);
+			ae->un.PortState = 0;
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + 4;
+			if ((size + 4) > (LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #11 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_SUPPORTED_CLASS);
+			ae->un.SupportClass =
+				cpu_to_be32(FC_COS_CLASS2 | FC_COS_CLASS3);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + 4;
+			if ((size + sizeof(struct lpfc_name)) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #12 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(struct lpfc_name));
+			ad->AttrType = cpu_to_be16(RPRT_FABRICNAME);
+			ad->AttrLen =  cpu_to_be16(FOURBYTES
+						+ sizeof(struct lpfc_name));
+			memcpy(&ae->un.FabricName, &vport->fabric_nodename,
+			       sizeof(struct lpfc_name));
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + sizeof(struct lpfc_name);
+			if ((size + LPFC_FDMI_MAX_AE_SIZE) >
+					(LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #13 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			memset(ae, 0, sizeof(ae->un.FC4Types));
+			ad->AttrType =
+				cpu_to_be16(RPRT_ACTIVE_FC4_TYPES);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 32);
+			ae->un.FC4Types[0] = 0x40; /* Type 1 - ELS */
+			ae->un.FC4Types[1] = 0x80; /* Type 8 - FCP */
+			ae->un.FC4Types[4] = 0x80; /* Type 32 - CT */
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + 32;
+			if ((size + 4) > (LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #257 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_PORT_STATE);
+			ae->un.PortState = 0;
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + 4;
+			if ((size + 4) > (LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #258 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_DISC_PORT);
+			ae->un.PortState = lpfc_find_map_node(vport);
+			ae->un.PortState = cpu_to_be32(ae->un.PortState);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + 4;
+			if ((size + 4) > (LPFC_BPL_SIZE - LPFC_CT_PREAMBLE))
+				goto port_out;
+
+			/* #259 Port attribute entry */
+			ad = (struct lpfc_fdmi_attr_def *)
+				((uint8_t *)pab + size);
+			ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
+			ad->AttrType = cpu_to_be16(RPRT_PORT_ID);
+			ae->un.PortId =  cpu_to_be32(vport->fc_myDID);
+			ad->AttrLen = cpu_to_be16(FOURBYTES + 4);
+			pab->ab.EntryCnt++;
+			size += FOURBYTES + 4;
+port_out:
+			pab->ab.EntryCnt = cpu_to_be32(pab->ab.EntryCnt);
 			/* Total size */
 			size = GID_REQUEST_SZ - 4 + size;
 		}
 		break;
 
+	case SLI_MGMT_GHAT:
+	case SLI_MGMT_GRPL:
+		rsp_size = FC_MAX_NS_RSP;
 	case SLI_MGMT_DHBA:
-		CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_MGMT_DHBA);
-		CtReq->CommandResponse.bits.Size = 0;
-		pe = (PORT_ENTRY *) & CtReq->un.PortID;
-		memcpy((uint8_t *) & pe->PortName,
-		       (uint8_t *) & vport->fc_sparam.portName,
-		       sizeof (struct lpfc_name));
-		size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
+	case SLI_MGMT_DHAT:
+		pe = (struct lpfc_fdmi_port_entry *)&CtReq->un.PortID;
+		memcpy((uint8_t *)&pe->PortName,
+		       (uint8_t *)&vport->fc_sparam.portName,
+		       sizeof(struct lpfc_name));
+		size = GID_REQUEST_SZ - 4 + sizeof(struct lpfc_name);
 		break;
 
+	case SLI_MGMT_GPAT:
+	case SLI_MGMT_GPAS:
+		rsp_size = FC_MAX_NS_RSP;
 	case SLI_MGMT_DPRT:
-		CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_MGMT_DPRT);
-		CtReq->CommandResponse.bits.Size = 0;
-		pe = (PORT_ENTRY *) & CtReq->un.PortID;
-		memcpy((uint8_t *) & pe->PortName,
-		       (uint8_t *) & vport->fc_sparam.portName,
-		       sizeof (struct lpfc_name));
-		size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
+	case SLI_MGMT_DPA:
+		pe = (struct lpfc_fdmi_port_entry *)&CtReq->un.PortID;
+		memcpy((uint8_t *)&pe->PortName,
+		       (uint8_t *)&vport->fc_sparam.portName,
+		       sizeof(struct lpfc_name));
+		size = GID_REQUEST_SZ - 4 + sizeof(struct lpfc_name);
+		break;
+	case SLI_MGMT_GRHL:
+		size = GID_REQUEST_SZ - 4;
 		break;
+	default:
+		lpfc_printf_vlog(vport, KERN_WARNING, LOG_DISCOVERY,
+				 "0298 FDMI cmdcode x%x not supported\n",
+				 cmdcode);
+		goto fdmi_cmd_free_bmpvirt;
 	}
+	CtReq->CommandResponse.bits.Size = cpu_to_be16(rsp_size);
 
-	bpl = (struct ulp_bde64 *) bmp->virt;
-	bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) );
-	bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) );
+	bpl = (struct ulp_bde64 *)bmp->virt;
+	bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys));
+	bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys));
 	bpl->tus.f.bdeFlags = 0;
 	bpl->tus.f.bdeSize = size;
-	bpl->tus.w = le32_to_cpu(bpl->tus.w);
-
-	cmpl = lpfc_cmpl_ct_cmd_fdmi;
 
-	/* The lpfc_ct_cmd/lpfc_get_req shall increment ndlp reference count
+	/*
+	 * The lpfc_ct_cmd/lpfc_get_req shall increment ndlp reference count
 	 * to hold ndlp reference for the corresponding callback function.
 	 */
-	if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP, 0))
+	if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, rsp_size, 0))
 		return 0;
 
-	/* Decrement ndlp reference count to release ndlp reference held
+	/*
+	 * Decrement ndlp reference count to release ndlp reference held
 	 * for the failed command's callback function.
 	 */
 	lpfc_nlp_put(ndlp);
 
+fdmi_cmd_free_bmpvirt:
 	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
 fdmi_cmd_free_bmp:
 	kfree(bmp);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 5633e7dadc08..513edcb0c2da 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2007-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2007-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index c66088d0fd2a..851e8efe364e 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -2243,8 +2243,7 @@ lpfc_adisc_done(struct lpfc_vport *vport)
 	*/
 	if (vport->port_state < LPFC_VPORT_READY) {
 		/* If we get here, there is nothing to ADISC */
-		if (vport->port_type == LPFC_PHYSICAL_PORT)
-			lpfc_issue_clear_la(phba, vport);
+		lpfc_issue_clear_la(phba, vport);
 		if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
 			vport->num_disc_nodes = 0;
 			/* go thru NPR list, issue ELS PLOGIs */
@@ -3338,7 +3337,11 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		/* FLOGI retry policy */
 		retry = 1;
 		/* retry FLOGI forever */
-		maxretry = 0;
+		if (phba->link_flag != LS_LOOPBACK_MODE)
+			maxretry = 0;
+		else
+			maxretry = 2;
+
 		if (cmdiocb->retry >= 100)
 			delay = 5000;
 		else if (cmdiocb->retry >= 32)
@@ -3701,6 +3704,11 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	kfree(mp);
 	mempool_free(pmb, phba->mbox_mem_pool);
 	if (ndlp) {
+		lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+				 "0006 rpi%x DID:%x flg:%x %d map:%x %p\n",
+				 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
+				 atomic_read(&ndlp->kref.refcount),
+				 ndlp->nlp_usg_map, ndlp);
 		if (NLP_CHK_NODE_ACT(ndlp)) {
 			lpfc_nlp_put(ndlp);
 			/* This is the end of the default RPI cleanup logic for
@@ -5198,7 +5206,6 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 		port_state = vport->port_state;
 		vport->fc_flag |= FC_PT2PT;
 		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
-		vport->port_state = LPFC_FLOGI;
 		spin_unlock_irq(shost->host_lock);
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
 				 "3311 Rcv Flogi PS x%x new PS x%x "
@@ -7173,7 +7180,7 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
 		return;
 	}
 
-	if (vport->cfg_fdmi_on) {
+	if (vport->cfg_fdmi_on & LPFC_FDMI_SUPPORT) {
 		/* If this is the first time, allocate an ndlp and initialize
 		 * it. Otherwise, make sure the node is enabled and then do the
 		 * login.
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 5452f1f4220e..2500f15d437f 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -3439,6 +3439,11 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	pmb->context1 = NULL;
 	pmb->context2 = NULL;
 
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+			 "0002 rpi:%x DID:%x flg:%x %d map:%x %p\n",
+			 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
+			 atomic_read(&ndlp->kref.refcount),
+			 ndlp->nlp_usg_map, ndlp);
 	if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND)
 		ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
 
@@ -3855,6 +3860,11 @@ out:
 	ndlp->nlp_flag |= NLP_RPI_REGISTERED;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+			 "0003 rpi:%x DID:%x flg:%x %d map%x %p\n",
+			 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
+			 atomic_read(&ndlp->kref.refcount),
+			 ndlp->nlp_usg_map, ndlp);
 
 	if (vport->port_state < LPFC_VPORT_READY) {
 		/* Link up discovery requires Fabric registration. */
@@ -4250,8 +4260,15 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		ndlp->active_rrqs_xri_bitmap = active_rrqs_xri_bitmap;
 
 	spin_unlock_irqrestore(&phba->ndlp_lock, flags);
-	if (vport->phba->sli_rev == LPFC_SLI_REV4)
+	if (vport->phba->sli_rev == LPFC_SLI_REV4) {
 		ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba);
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
+				 "0008 rpi:%x DID:%x flg:%x refcnt:%d "
+				 "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
+				 ndlp->nlp_flag,
+				 atomic_read(&ndlp->kref.refcount),
+				 ndlp->nlp_usg_map, ndlp);
+	}
 
 
 	if (state != NLP_STE_UNUSED_NODE)
@@ -4276,9 +4293,12 @@ lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
 		return;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
-	if (vport->phba->sli_rev == LPFC_SLI_REV4)
+	if (vport->phba->sli_rev == LPFC_SLI_REV4) {
 		lpfc_cleanup_vports_rrqs(vport, ndlp);
-	lpfc_nlp_put(ndlp);
+		lpfc_unreg_rpi(vport, ndlp);
+	} else {
+		lpfc_nlp_put(ndlp);
+	}
 	return;
 }
 
@@ -4515,7 +4535,17 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 				mbox->context1 = ndlp;
 				mbox->mbox_cmpl = lpfc_nlp_logo_unreg;
 			} else {
-				mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+				if (phba->sli_rev == LPFC_SLI_REV4 &&
+				    (!(vport->load_flag & FC_UNLOADING)) &&
+				    (bf_get(lpfc_sli_intf_if_type,
+				     &phba->sli4_hba.sli_intf) ==
+				      LPFC_SLI_INTF_IF_TYPE_2)) {
+					mbox->context1 = lpfc_nlp_get(ndlp);
+					mbox->mbox_cmpl =
+						lpfc_sli4_unreg_rpi_cmpl_clr;
+				} else
+					mbox->mbox_cmpl =
+						lpfc_sli_def_mbox_cmpl;
 			}
 
 			rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
@@ -4741,6 +4771,11 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		/* For this case we need to cleanup the default rpi
 		 * allocated by the firmware.
 		 */
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
+				 "0005 rpi:%x DID:%x flg:%x %d map:%x %p\n",
+				 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
+				 atomic_read(&ndlp->kref.refcount),
+				 ndlp->nlp_usg_map, ndlp);
 		if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))
 			!= NULL) {
 			rc = lpfc_reg_rpi(phba, vport->vpi, ndlp->nlp_DID,
@@ -5070,8 +5105,7 @@ lpfc_disc_start(struct lpfc_vport *vport)
 	    !(vport->fc_flag & FC_PT2PT) &&
 	    !(vport->fc_flag & FC_RSCN_MODE) &&
 	    (phba->sli_rev < LPFC_SLI_REV4)) {
-		if (vport->port_type == LPFC_PHYSICAL_PORT)
-			lpfc_issue_clear_la(phba, vport);
+		lpfc_issue_clear_la(phba, vport);
 		lpfc_issue_reg_vpi(phba, vport);
 		return;
 	}
@@ -5082,8 +5116,7 @@ lpfc_disc_start(struct lpfc_vport *vport)
 	 */
 	if (vport->port_state < LPFC_VPORT_READY && !clear_la_pending) {
 		/* If we get here, there is nothing to ADISC */
-		if (vport->port_type == LPFC_PHYSICAL_PORT)
-			lpfc_issue_clear_la(phba, vport);
+		lpfc_issue_clear_la(phba, vport);
 
 		if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
 			vport->num_disc_nodes = 0;
@@ -5484,18 +5517,22 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	ndlp->nlp_flag |= NLP_RPI_REGISTERED;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
-
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+			 "0004 rpi:%x DID:%x flg:%x %d map:%x %p\n",
+			 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
+			 atomic_read(&ndlp->kref.refcount),
+			 ndlp->nlp_usg_map, ndlp);
 	/*
 	 * Start issuing Fabric-Device Management Interface (FDMI) command to
 	 * 0xfffffa (FDMI well known port) or Delay issuing FDMI command if
 	 * fdmi-on=2 (supporting RPA/hostnmae)
 	 */
 
-	if (vport->cfg_fdmi_on == 1)
-		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA);
-	else
+	if (vport->cfg_fdmi_on & LPFC_FDMI_REG_DELAY)
 		mod_timer(&vport->fc_fdmitmo,
 			  jiffies + msecs_to_jiffies(1000 * 60));
+	else
+		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA);
 
 	/* decrement the node reference count held for this callback
 	 * function.
@@ -5650,6 +5687,13 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	INIT_LIST_HEAD(&ndlp->nlp_listp);
 	if (vport->phba->sli_rev == LPFC_SLI_REV4) {
 		ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba);
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
+				 "0007 rpi:%x DID:%x flg:%x refcnt:%d "
+				 "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
+				 ndlp->nlp_flag,
+				 atomic_read(&ndlp->kref.refcount),
+				 ndlp->nlp_usg_map, ndlp);
+
 		ndlp->active_rrqs_xri_bitmap =
 				mempool_alloc(vport->phba->active_rrq_pool,
 					      GFP_KERNEL);
@@ -5684,9 +5728,9 @@ lpfc_nlp_release(struct kref *kref)
 
 	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
 			"0279 lpfc_nlp_release: ndlp:x%p did %x "
-			"usgmap:x%x refcnt:%d\n",
+			"usgmap:x%x refcnt:%d rpi:%x\n",
 			(void *)ndlp, ndlp->nlp_DID, ndlp->nlp_usg_map,
-			atomic_read(&ndlp->kref.refcount));
+			atomic_read(&ndlp->kref.refcount), ndlp->nlp_rpi);
 
 	/* remove ndlp from action. */
 	lpfc_nlp_remove(ndlp->vport, ndlp);
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 236259252379..37beb9dc1311 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -107,6 +107,7 @@ struct lpfc_sli_ct_request {
 	uint8_t ReasonCode;
 	uint8_t Explanation;
 	uint8_t VendorUnique;
+#define LPFC_CT_PREAMBLE	20	/* Size of CTReq + 4 up to here */
 
 	union {
 		uint32_t PortID;
@@ -170,6 +171,8 @@ struct lpfc_sli_ct_request {
 	} un;
 };
 
+#define LPFC_MAX_CT_SIZE	(60 * 4096)
+
 #define  SLI_CT_REVISION        1
 #define  GID_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
 			   sizeof(struct gid))
@@ -1007,78 +1010,45 @@ typedef struct _ELS_PKT {	/* Structure is in Big Endian format */
 	} un;
 } ELS_PKT;
 
-/*
- * FDMI
- * HBA MAnagement Operations Command Codes
- */
-#define  SLI_MGMT_GRHL     0x100	/* Get registered HBA list */
-#define  SLI_MGMT_GHAT     0x101	/* Get HBA attributes */
-#define  SLI_MGMT_GRPL     0x102	/* Get registered Port list */
-#define  SLI_MGMT_GPAT     0x110	/* Get Port attributes */
-#define  SLI_MGMT_RHBA     0x200	/* Register HBA */
-#define  SLI_MGMT_RHAT     0x201	/* Register HBA attributes */
-#define  SLI_MGMT_RPRT     0x210	/* Register Port */
-#define  SLI_MGMT_RPA      0x211	/* Register Port attributes */
-#define  SLI_MGMT_DHBA     0x300	/* De-register HBA */
-#define  SLI_MGMT_DPRT     0x310	/* De-register Port */
+/******** FDMI ********/
 
-/*
- * Management Service Subtypes
- */
-#define  SLI_CT_FDMI_Subtypes     0x10
+/* lpfc_sli_ct_request defines the CT_IU preamble for FDMI commands */
+#define  SLI_CT_FDMI_Subtypes     0x10	/* Management Service Subtype */
 
 /*
- * HBA Management Service Reject Code
+ * Registered Port List Format
  */
-#define  REJECT_CODE             0x9	/* Unable to perform command request */
+struct lpfc_fdmi_reg_port_list {
+	uint32_t EntryCnt;
+	uint32_t pe;		/* Variable-length array */
+};
 
-/*
- * HBA Management Service Reject Reason Code
- * Please refer to the Reason Codes above
- */
 
-/*
- * HBA Attribute Types
- */
-#define  NODE_NAME               0x1
-#define  MANUFACTURER            0x2
-#define  SERIAL_NUMBER           0x3
-#define  MODEL                   0x4
-#define  MODEL_DESCRIPTION       0x5
-#define  HARDWARE_VERSION        0x6
-#define  DRIVER_VERSION          0x7
-#define  OPTION_ROM_VERSION      0x8
-#define  FIRMWARE_VERSION        0x9
-#define  OS_NAME_VERSION	 0xa
-#define  MAX_CT_PAYLOAD_LEN	 0xb
+/* Definitions for HBA / Port attribute entries */
 
-/*
- * Port Attrubute Types
- */
-#define  SUPPORTED_FC4_TYPES     0x1
-#define  SUPPORTED_SPEED         0x2
-#define  PORT_SPEED              0x3
-#define  MAX_FRAME_SIZE          0x4
-#define  OS_DEVICE_NAME          0x5
-#define  HOST_NAME               0x6
-
-union AttributesDef {
+struct lpfc_fdmi_attr_def { /* Defined in TLV format */
 	/* Structure is in Big Endian format */
-	struct {
-		uint32_t AttrType:16;
-		uint32_t AttrLen:16;
-	} bits;
-	uint32_t word;
+	uint32_t AttrType:16;
+	uint32_t AttrLen:16;
+	uint32_t AttrValue;  /* Marks start of Value (ATTRIBUTE_ENTRY) */
 };
 
 
-/*
- * HBA Attribute Entry (8 - 260 bytes)
- */
-typedef struct {
-	union AttributesDef ad;
+/* Attribute Entry */
+struct lpfc_fdmi_attr_entry {
 	union {
 		uint32_t VendorSpecific;
+		uint32_t SupportClass;
+		uint32_t SupportSpeed;
+		uint32_t PortSpeed;
+		uint32_t MaxFrameSize;
+		uint32_t MaxCTPayloadLen;
+		uint32_t PortState;
+		uint32_t PortId;
+		struct lpfc_name NodeName;
+		struct lpfc_name PortName;
+		struct lpfc_name FabricName;
+		uint8_t FC4Types[32];
 		uint8_t Manufacturer[64];
 		uint8_t SerialNumber[64];
 		uint8_t Model[256];
@@ -1087,97 +1057,115 @@ typedef struct {
 		uint8_t DriverVersion[256];
 		uint8_t OptionROMVersion[256];
 		uint8_t FirmwareVersion[256];
-		struct lpfc_name NodeName;
-		uint8_t SupportFC4Types[32];
-		uint32_t SupportSpeed;
-		uint32_t PortSpeed;
-		uint32_t MaxFrameSize;
+		uint8_t OsHostName[256];
+		uint8_t NodeSymName[256];
 		uint8_t OsDeviceName[256];
 		uint8_t OsNameVersion[256];
-		uint32_t MaxCTPayloadLen;
 		uint8_t HostName[256];
 	} un;
-} ATTRIBUTE_ENTRY;
+};
+
+#define LPFC_FDMI_MAX_AE_SIZE	sizeof(struct lpfc_fdmi_attr_entry)
 
 /*
  * HBA Attribute Block
  */
-typedef struct {
-	uint32_t EntryCnt;	/* Number of HBA attribute entries */
-	ATTRIBUTE_ENTRY Entry;	/* Variable-length array */
-} ATTRIBUTE_BLOCK;
+struct lpfc_fdmi_attr_block {
+	uint32_t EntryCnt;		/* Number of HBA attribute entries */
+	struct lpfc_fdmi_attr_entry Entry;	/* Variable-length array */
+};
 
 /*
  * Port Entry
  */
-typedef struct {
+struct lpfc_fdmi_port_entry {
 	struct lpfc_name PortName;
-} PORT_ENTRY;
+};
 
 /*
  * HBA Identifier
  */
-typedef struct {
+struct lpfc_fdmi_hba_ident {
 	struct lpfc_name PortName;
-} HBA_IDENTIFIER;
-
-/*
- * Registered Port List Format
- */
-typedef struct {
-	uint32_t EntryCnt;
-	PORT_ENTRY pe;		/* Variable-length array */
-} REG_PORT_LIST;
+};
 
 /*
  * Register HBA(RHBA)
  */
-typedef struct {
-	HBA_IDENTIFIER hi;
-	REG_PORT_LIST rpl;	/* variable-length array */
-/* ATTRIBUTE_BLOCK   ab; */
-} REG_HBA;
+struct lpfc_fdmi_reg_hba {
+	struct lpfc_fdmi_hba_ident hi;
+	struct lpfc_fdmi_reg_port_list rpl;	/* variable-length array */
+/* struct lpfc_fdmi_attr_block   ab; */
+};
 
 /*
  * Register HBA Attributes (RHAT)
  */
-typedef struct {
+struct lpfc_fdmi_reg_hbaattr {
 	struct lpfc_name HBA_PortName;
-	ATTRIBUTE_BLOCK ab;
-} REG_HBA_ATTRIBUTE;
+	struct lpfc_fdmi_attr_block ab;
+};
 
 /*
  * Register Port Attributes (RPA)
  */
-typedef struct {
+struct lpfc_fdmi_reg_portattr {
 	struct lpfc_name PortName;
-	ATTRIBUTE_BLOCK ab;
-} REG_PORT_ATTRIBUTE;
+	struct lpfc_fdmi_attr_block ab;
+};
 
 /*
- * Get Registered HBA List (GRHL) Accept Payload Format
+ * HBA MAnagement Operations Command Codes
  */
-typedef struct {
-	uint32_t HBA__Entry_Cnt; /* Number of Registered HBA Identifiers */
-	struct lpfc_name HBA_PortName;	/* Variable-length array */
-} GRHL_ACC_PAYLOAD;
+#define  SLI_MGMT_GRHL     0x100	/* Get registered HBA list */
+#define  SLI_MGMT_GHAT     0x101	/* Get HBA attributes */
+#define  SLI_MGMT_GRPL     0x102	/* Get registered Port list */
+#define  SLI_MGMT_GPAT     0x110	/* Get Port attributes */
+#define  SLI_MGMT_GPAS     0x120	/* Get Port Statistics */
+#define  SLI_MGMT_RHBA     0x200	/* Register HBA */
+#define  SLI_MGMT_RHAT     0x201	/* Register HBA attributes */
+#define  SLI_MGMT_RPRT     0x210	/* Register Port */
+#define  SLI_MGMT_RPA      0x211	/* Register Port attributes */
+#define  SLI_MGMT_DHBA     0x300	/* De-register HBA */
+#define  SLI_MGMT_DHAT     0x301	/* De-register HBA attributes */
+#define  SLI_MGMT_DPRT     0x310	/* De-register Port */
+#define  SLI_MGMT_DPA      0x311	/* De-register Port attributes */
 
 /*
- * Get Registered Port List (GRPL) Accept Payload Format
+ * HBA Attribute Types
  */
-typedef struct {
-	uint32_t RPL_Entry_Cnt;	/* Number of Registered Port Entries */
-	PORT_ENTRY Reg_Port_Entry[1];	/* Variable-length array */
-} GRPL_ACC_PAYLOAD;
+#define  RHBA_NODENAME           0x1 /* 8 byte WWNN */
+#define  RHBA_MANUFACTURER       0x2 /* 4 to 64 byte ASCII string */
+#define  RHBA_SERIAL_NUMBER      0x3 /* 4 to 64 byte ASCII string */
+#define  RHBA_MODEL              0x4 /* 4 to 256 byte ASCII string */
+#define  RHBA_MODEL_DESCRIPTION  0x5 /* 4 to 256 byte ASCII string */
+#define  RHBA_HARDWARE_VERSION   0x6 /* 4 to 256 byte ASCII string */
+#define  RHBA_DRIVER_VERSION     0x7 /* 4 to 256 byte ASCII string */
+#define  RHBA_OPTION_ROM_VERSION 0x8 /* 4 to 256 byte ASCII string */
+#define  RHBA_FIRMWARE_VERSION   0x9 /* 4 to 256 byte ASCII string */
+#define  RHBA_OS_NAME_VERSION	 0xa /* 4 to 256 byte ASCII string */
+#define  RHBA_MAX_CT_PAYLOAD_LEN 0xb /* 32-bit unsigned int */
+#define  RHBA_SYM_NODENAME       0xc /* 4 to 256 byte ASCII string */
 
 /*
- * Get Port Attributes (GPAT) Accept Payload Format
+ * Port Attrubute Types
  */
-
-typedef struct {
-	ATTRIBUTE_BLOCK pab;
-} GPAT_ACC_PAYLOAD;
-
+#define  RPRT_SUPPORTED_FC4_TYPES     0x1 /* 32 byte binary array */
+#define  RPRT_SUPPORTED_SPEED         0x2 /* 32-bit unsigned int */
+#define  RPRT_PORT_SPEED              0x3 /* 32-bit unsigned int */
+#define  RPRT_MAX_FRAME_SIZE          0x4 /* 32-bit unsigned int */
+#define  RPRT_OS_DEVICE_NAME          0x5 /* 4 to 256 byte ASCII string */
+#define  RPRT_HOST_NAME               0x6 /* 4 to 256 byte ASCII string */
+#define  RPRT_NODENAME                0x7 /* 8 byte WWNN */
+#define  RPRT_PORTNAME                0x8 /* 8 byte WWNN */
+#define  RPRT_SYM_PORTNAME            0x9 /* 4 to 256 byte ASCII string */
+#define  RPRT_PORT_TYPE               0xa /* 32-bit unsigned int */
+#define  RPRT_SUPPORTED_CLASS         0xb /* 32-bit unsigned int */
+#define  RPRT_FABRICNAME              0xc /* 8 byte Fabric WWNN */
+#define  RPRT_ACTIVE_FC4_TYPES        0xd /* 32 byte binary array */
+#define  RPRT_PORT_STATE              0x101 /* 32-bit unsigned int */
+#define  RPRT_DISC_PORT               0x102 /* 32-bit unsigned int */
+#define  RPRT_PORT_ID                 0x103 /* 32-bit unsigned int */
 
 /*
  *  Begin HBA configuration parameters.
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index f432ec180cf8..1813c45946f4 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2014 Emulex.  All rights reserved.                *
+ * Copyright (C) 2009-2015 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -3085,6 +3085,9 @@ struct lpfc_acqe_link {
 #define LPFC_ASYNC_LINK_SPEED_100MBPS		0x2
 #define LPFC_ASYNC_LINK_SPEED_1GBPS		0x3
 #define LPFC_ASYNC_LINK_SPEED_10GBPS		0x4
+#define LPFC_ASYNC_LINK_SPEED_20GBPS		0x5
+#define LPFC_ASYNC_LINK_SPEED_25GBPS		0x6
+#define LPFC_ASYNC_LINK_SPEED_40GBPS		0x7
 #define lpfc_acqe_link_duplex_SHIFT		16
 #define lpfc_acqe_link_duplex_MASK		0x000000FF
 #define lpfc_acqe_link_duplex_WORD		word0
@@ -3166,7 +3169,7 @@ struct lpfc_acqe_fc_la {
 #define lpfc_acqe_fc_la_speed_SHIFT		24
 #define lpfc_acqe_fc_la_speed_MASK		0x000000FF
 #define lpfc_acqe_fc_la_speed_WORD		word0
-#define LPFC_FC_LA_SPEED_UNKOWN		0x0
+#define LPFC_FC_LA_SPEED_UNKNOWN		0x0
 #define LPFC_FC_LA_SPEED_1G		0x1
 #define LPFC_FC_LA_SPEED_2G		0x2
 #define LPFC_FC_LA_SPEED_4G		0x4
@@ -3244,6 +3247,7 @@ struct lpfc_acqe_sli {
 #define LPFC_SLI_EVENT_TYPE_NVLOG_POST		0x4
 #define LPFC_SLI_EVENT_TYPE_DIAG_DUMP		0x5
 #define LPFC_SLI_EVENT_TYPE_MISCONFIGURED	0x9
+#define LPFC_SLI_EVENT_TYPE_REMOTE_DPORT	0xA
 };
 
 /*
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 0b2c53af85c7..e8c8c1ecc1f5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -1330,13 +1330,14 @@ lpfc_offline_eratt(struct lpfc_hba *phba)
 void
 lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
 {
+	spin_lock_irq(&phba->hbalock);
+	phba->link_state = LPFC_HBA_ERROR;
+	spin_unlock_irq(&phba->hbalock);
+
 	lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
 	lpfc_offline(phba);
-	lpfc_sli4_brdreset(phba);
 	lpfc_hba_down_post(phba);
-	lpfc_sli4_post_status_check(phba);
 	lpfc_unblock_mgmt_io(phba);
-	phba->link_state = LPFC_HBA_ERROR;
 }
 
 /**
@@ -1629,6 +1630,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
 	uint32_t uerrlo_reg, uemasklo_reg;
 	uint32_t pci_rd_rc1, pci_rd_rc2;
 	bool en_rn_msg = true;
+	struct temp_event temp_event_data;
 	int rc;
 
 	/* If the pci channel is offline, ignore possible errors, since
@@ -1636,9 +1638,6 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
 	 */
 	if (pci_channel_offline(phba->pcidev))
 		return;
-	/* If resets are disabled then leave the HBA alone and return */
-	if (!phba->cfg_enable_hba_reset)
-		return;
 
 	if_type = bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf);
 	switch (if_type) {
@@ -1654,6 +1653,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
 			return;
 		lpfc_sli4_offline_eratt(phba);
 		break;
+
 	case LPFC_SLI_INTF_IF_TYPE_2:
 		pci_rd_rc1 = lpfc_readl(
 				phba->sli4_hba.u.if_type2.STATUSregaddr,
@@ -1668,15 +1668,27 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
 		reg_err1 = readl(phba->sli4_hba.u.if_type2.ERR1regaddr);
 		reg_err2 = readl(phba->sli4_hba.u.if_type2.ERR2regaddr);
 		if (bf_get(lpfc_sliport_status_oti, &portstat_reg)) {
-			/* TODO: Register for Overtemp async events. */
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2889 Port Overtemperature event, "
-				"taking port offline\n");
+				"taking port offline Data: x%x x%x\n",
+				reg_err1, reg_err2);
+
+			temp_event_data.event_type = FC_REG_TEMPERATURE_EVENT;
+			temp_event_data.event_code = LPFC_CRIT_TEMP;
+			temp_event_data.data = 0xFFFFFFFF;
+
+			shost = lpfc_shost_from_vport(phba->pport);
+			fc_host_post_vendor_event(shost, fc_get_event_number(),
+						  sizeof(temp_event_data),
+						  (char *)&temp_event_data,
+						  SCSI_NL_VID_TYPE_PCI
+						  | PCI_VENDOR_ID_EMULEX);
+
 			spin_lock_irq(&phba->hbalock);
 			phba->over_temp_state = HBA_OVER_TEMP;
 			spin_unlock_irq(&phba->hbalock);
 			lpfc_sli4_offline_eratt(phba);
-			break;
+			return;
 		}
 		if (reg_err1 == SLIPORT_ERR1_REG_ERR_CODE_2 &&
 		    reg_err2 == SLIPORT_ERR2_REG_FW_RESTART) {
@@ -1693,6 +1705,10 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"3145 Port Down: Provisioning\n");
 
+		/* If resets are disabled then leave the HBA alone and return */
+		if (!phba->cfg_enable_hba_reset)
+			return;
+
 		/* Check port status register for function reset */
 		rc = lpfc_sli4_port_sta_fn_reset(phba, LPFC_MBX_NO_WAIT,
 				en_rn_msg);
@@ -2759,9 +2775,19 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
 			list_for_each_entry_safe(ndlp, next_ndlp,
 						 &vports[i]->fc_nodes,
 						 nlp_listp) {
-				if (NLP_CHK_NODE_ACT(ndlp))
+				if (NLP_CHK_NODE_ACT(ndlp)) {
 					ndlp->nlp_rpi =
 						lpfc_sli4_alloc_rpi(phba);
+					lpfc_printf_vlog(ndlp->vport, KERN_INFO,
+							 LOG_NODE,
+							 "0009 rpi:%x DID:%x "
+							 "flg:%x map:%x %p\n",
+							 ndlp->nlp_rpi,
+							 ndlp->nlp_DID,
+							 ndlp->nlp_flag,
+							 ndlp->nlp_usg_map,
+							 ndlp);
+				}
 			}
 		}
 	}
@@ -2925,8 +2951,18 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
 				 * RPI. Get a new RPI when the adapter port
 				 * comes back online.
 				 */
-				if (phba->sli_rev == LPFC_SLI_REV4)
+				if (phba->sli_rev == LPFC_SLI_REV4) {
+					lpfc_printf_vlog(ndlp->vport,
+							 KERN_INFO, LOG_NODE,
+							 "0011 lpfc_offline: "
+							 "ndlp:x%p did %x "
+							 "usgmap:x%x rpi:%x\n",
+							 ndlp, ndlp->nlp_DID,
+							 ndlp->nlp_usg_map,
+							 ndlp->nlp_rpi);
+
 					lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi);
+				}
 				lpfc_unreg_rpi(vports[i], ndlp);
 			}
 		}
@@ -3241,12 +3277,17 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	struct Scsi_Host  *shost;
 	int error = 0;
 
-	if (dev != &phba->pcidev->dev)
+	if (dev != &phba->pcidev->dev) {
 		shost = scsi_host_alloc(&lpfc_vport_template,
 					sizeof(struct lpfc_vport));
-	else
-		shost = scsi_host_alloc(&lpfc_template,
+	} else {
+		if (phba->sli_rev == LPFC_SLI_REV4)
+			shost = scsi_host_alloc(&lpfc_template,
 					sizeof(struct lpfc_vport));
+		else
+			shost = scsi_host_alloc(&lpfc_template_s3,
+					sizeof(struct lpfc_vport));
+	}
 	if (!shost)
 		goto out;
 
@@ -3685,6 +3726,11 @@ lpfc_sli4_parse_latt_link_speed(struct lpfc_hba *phba,
 	case LPFC_ASYNC_LINK_SPEED_10GBPS:
 		link_speed = LPFC_LINK_SPEED_10GHZ;
 		break;
+	case LPFC_ASYNC_LINK_SPEED_20GBPS:
+	case LPFC_ASYNC_LINK_SPEED_25GBPS:
+	case LPFC_ASYNC_LINK_SPEED_40GBPS:
+		link_speed = LPFC_LINK_SPEED_UNKNOWN;
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0483 Invalid link-attention link speed: x%x\n",
@@ -3756,46 +3802,55 @@ lpfc_sli4_port_speed_parse(struct lpfc_hba *phba, uint32_t evt_code,
 	switch (evt_code) {
 	case LPFC_TRAILER_CODE_LINK:
 		switch (speed_code) {
-		case LPFC_EVT_CODE_LINK_NO_LINK:
+		case LPFC_ASYNC_LINK_SPEED_ZERO:
 			port_speed = 0;
 			break;
-		case LPFC_EVT_CODE_LINK_10_MBIT:
+		case LPFC_ASYNC_LINK_SPEED_10MBPS:
 			port_speed = 10;
 			break;
-		case LPFC_EVT_CODE_LINK_100_MBIT:
+		case LPFC_ASYNC_LINK_SPEED_100MBPS:
 			port_speed = 100;
 			break;
-		case LPFC_EVT_CODE_LINK_1_GBIT:
+		case LPFC_ASYNC_LINK_SPEED_1GBPS:
 			port_speed = 1000;
 			break;
-		case LPFC_EVT_CODE_LINK_10_GBIT:
+		case LPFC_ASYNC_LINK_SPEED_10GBPS:
 			port_speed = 10000;
 			break;
+		case LPFC_ASYNC_LINK_SPEED_20GBPS:
+			port_speed = 20000;
+			break;
+		case LPFC_ASYNC_LINK_SPEED_25GBPS:
+			port_speed = 25000;
+			break;
+		case LPFC_ASYNC_LINK_SPEED_40GBPS:
+			port_speed = 40000;
+			break;
 		default:
 			port_speed = 0;
 		}
 		break;
 	case LPFC_TRAILER_CODE_FC:
 		switch (speed_code) {
-		case LPFC_EVT_CODE_FC_NO_LINK:
+		case LPFC_FC_LA_SPEED_UNKNOWN:
 			port_speed = 0;
 			break;
-		case LPFC_EVT_CODE_FC_1_GBAUD:
+		case LPFC_FC_LA_SPEED_1G:
 			port_speed = 1000;
 			break;
-		case LPFC_EVT_CODE_FC_2_GBAUD:
+		case LPFC_FC_LA_SPEED_2G:
 			port_speed = 2000;
 			break;
-		case LPFC_EVT_CODE_FC_4_GBAUD:
+		case LPFC_FC_LA_SPEED_4G:
 			port_speed = 4000;
 			break;
-		case LPFC_EVT_CODE_FC_8_GBAUD:
+		case LPFC_FC_LA_SPEED_8G:
 			port_speed = 8000;
 			break;
-		case LPFC_EVT_CODE_FC_10_GBAUD:
+		case LPFC_FC_LA_SPEED_10G:
 			port_speed = 10000;
 			break;
-		case LPFC_EVT_CODE_FC_16_GBAUD:
+		case LPFC_FC_LA_SPEED_16G:
 			port_speed = 16000;
 			break;
 		default:
@@ -4044,18 +4099,21 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
 	char port_name;
 	char message[128];
 	uint8_t status;
+	uint8_t evt_type;
+	struct temp_event temp_event_data;
 	struct lpfc_acqe_misconfigured_event *misconfigured;
+	struct Scsi_Host  *shost;
+
+	evt_type = bf_get(lpfc_trailer_type, acqe_sli);
 
-	/* special case misconfigured event as it contains data for all ports */
-	if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
-		 LPFC_SLI_INTF_IF_TYPE_2) ||
-		(bf_get(lpfc_trailer_type, acqe_sli) !=
-			LPFC_SLI_EVENT_TYPE_MISCONFIGURED)) {
+	/* Special case Lancer */
+	if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
+		 LPFC_SLI_INTF_IF_TYPE_2) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 				"2901 Async SLI event - Event Data1:x%08x Event Data2:"
 				"x%08x SLI Event Type:%d\n",
 				acqe_sli->event_data1, acqe_sli->event_data2,
-				bf_get(lpfc_trailer_type, acqe_sli));
+				evt_type);
 		return;
 	}
 
@@ -4063,58 +4121,107 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
 	if (port_name == 0x00)
 		port_name = '?'; /* get port name is empty */
 
-	misconfigured = (struct lpfc_acqe_misconfigured_event *)
+	switch (evt_type) {
+	case LPFC_SLI_EVENT_TYPE_OVER_TEMP:
+		temp_event_data.event_type = FC_REG_TEMPERATURE_EVENT;
+		temp_event_data.event_code = LPFC_THRESHOLD_TEMP;
+		temp_event_data.data = (uint32_t)acqe_sli->event_data1;
+
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"3190 Over Temperature:%d Celsius- Port Name %c\n",
+				acqe_sli->event_data1, port_name);
+
+		shost = lpfc_shost_from_vport(phba->pport);
+		fc_host_post_vendor_event(shost, fc_get_event_number(),
+					  sizeof(temp_event_data),
+					  (char *)&temp_event_data,
+					  SCSI_NL_VID_TYPE_PCI
+					  | PCI_VENDOR_ID_EMULEX);
+		break;
+	case LPFC_SLI_EVENT_TYPE_NORM_TEMP:
+		temp_event_data.event_type = FC_REG_TEMPERATURE_EVENT;
+		temp_event_data.event_code = LPFC_NORMAL_TEMP;
+		temp_event_data.data = (uint32_t)acqe_sli->event_data1;
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"3191 Normal Temperature:%d Celsius - Port Name %c\n",
+				acqe_sli->event_data1, port_name);
+
+		shost = lpfc_shost_from_vport(phba->pport);
+		fc_host_post_vendor_event(shost, fc_get_event_number(),
+					  sizeof(temp_event_data),
+					  (char *)&temp_event_data,
+					  SCSI_NL_VID_TYPE_PCI
+					  | PCI_VENDOR_ID_EMULEX);
+		break;
+	case LPFC_SLI_EVENT_TYPE_MISCONFIGURED:
+		misconfigured = (struct lpfc_acqe_misconfigured_event *)
 					&acqe_sli->event_data1;
 
-	/* fetch the status for this port */
-	switch (phba->sli4_hba.lnk_info.lnk_no) {
-	case LPFC_LINK_NUMBER_0:
-		status = bf_get(lpfc_sli_misconfigured_port0,
+		/* fetch the status for this port */
+		switch (phba->sli4_hba.lnk_info.lnk_no) {
+		case LPFC_LINK_NUMBER_0:
+			status = bf_get(lpfc_sli_misconfigured_port0,
 					&misconfigured->theEvent);
-		break;
-	case LPFC_LINK_NUMBER_1:
-		status = bf_get(lpfc_sli_misconfigured_port1,
+			break;
+		case LPFC_LINK_NUMBER_1:
+			status = bf_get(lpfc_sli_misconfigured_port1,
 					&misconfigured->theEvent);
-		break;
-	case LPFC_LINK_NUMBER_2:
-		status = bf_get(lpfc_sli_misconfigured_port2,
+			break;
+		case LPFC_LINK_NUMBER_2:
+			status = bf_get(lpfc_sli_misconfigured_port2,
 					&misconfigured->theEvent);
-		break;
-	case LPFC_LINK_NUMBER_3:
-		status = bf_get(lpfc_sli_misconfigured_port3,
+			break;
+		case LPFC_LINK_NUMBER_3:
+			status = bf_get(lpfc_sli_misconfigured_port3,
 					&misconfigured->theEvent);
-		break;
-	default:
-		status = ~LPFC_SLI_EVENT_STATUS_VALID;
-		break;
-	}
+			break;
+		default:
+			status = ~LPFC_SLI_EVENT_STATUS_VALID;
+			break;
+		}
 
-	switch (status) {
-	case LPFC_SLI_EVENT_STATUS_VALID:
-		return; /* no message if the sfp is okay */
-	case LPFC_SLI_EVENT_STATUS_NOT_PRESENT:
-		sprintf(message, "Optics faulted/incorrectly installed/not " \
-				"installed - Reseat optics, if issue not "
-				"resolved, replace.");
-		break;
-	case LPFC_SLI_EVENT_STATUS_WRONG_TYPE:
-		sprintf(message,
-			"Optics of two types installed - Remove one optic or " \
-			"install matching pair of optics.");
-		break;
-	case LPFC_SLI_EVENT_STATUS_UNSUPPORTED:
-		sprintf(message, "Incompatible optics - Replace with " \
+		switch (status) {
+		case LPFC_SLI_EVENT_STATUS_VALID:
+			return; /* no message if the sfp is okay */
+		case LPFC_SLI_EVENT_STATUS_NOT_PRESENT:
+			sprintf(message, "Optics faulted/incorrectly "
+				"installed/not installed - Reseat optics, "
+				"if issue not resolved, replace.");
+			break;
+		case LPFC_SLI_EVENT_STATUS_WRONG_TYPE:
+			sprintf(message,
+				"Optics of two types installed - Remove one "
+				"optic or install matching pair of optics.");
+			break;
+		case LPFC_SLI_EVENT_STATUS_UNSUPPORTED:
+			sprintf(message, "Incompatible optics - Replace with "
 				"compatible optics for card to function.");
+			break;
+		default:
+			/* firmware is reporting a status we don't know about */
+			sprintf(message, "Unknown event status x%02x", status);
+			break;
+		}
+
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"3176 Misconfigured Physical Port - "
+				"Port Name %c %s\n", port_name, message);
+		break;
+	case LPFC_SLI_EVENT_TYPE_REMOTE_DPORT:
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"3192 Remote DPort Test Initiated - "
+				"Event Data1:x%08x Event Data2: x%08x\n",
+				acqe_sli->event_data1, acqe_sli->event_data2);
 		break;
 	default:
-		/* firmware is reporting a status we don't know about */
-		sprintf(message, "Unknown event status x%02x", status);
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"3193 Async SLI event - Event Data1:x%08x Event Data2:"
+				"x%08x SLI Event Type:%d\n",
+				acqe_sli->event_data1, acqe_sli->event_data2,
+				evt_type);
 		break;
 	}
-
-	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-			"3176 Misconfigured Physical Port - "
-			"Port Name %c %s\n", port_name, message);
 }
 
 /**
@@ -5183,6 +5290,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		rc = lpfc_pci_function_reset(phba);
 		if (unlikely(rc))
 			return -ENODEV;
+		phba->temp_sensor_support = 1;
 	}
 
 	/* Create the bootstrap mailbox command */
@@ -7647,6 +7755,14 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			goto out_destroy_els_rq;
 		}
 	}
+
+	/*
+	 * Configure EQ delay multipier for interrupt coalescing using
+	 * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
+	 */
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel;
+			fcp_eqidx += LPFC_MAX_EQ_DELAY)
+		lpfc_modify_fcp_eq_delay(phba, fcp_eqidx);
 	return 0;
 
 out_destroy_els_rq:
@@ -7953,7 +8069,7 @@ wait:
 		 * up to 30 seconds. If the port doesn't respond, treat
 		 * it as an error.
 		 */
-		for (rdy_chk = 0; rdy_chk < 3000; rdy_chk++) {
+		for (rdy_chk = 0; rdy_chk < 1500; rdy_chk++) {
 			if (lpfc_readl(phba->sli4_hba.u.if_type2.
 				STATUSregaddr, &reg_data.word0)) {
 				rc = -ENODEV;
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 06241f590c1e..816f596cda60 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 5cc1103d811e..4cb9882af157 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1,7 +1,7 @@
  /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -276,6 +276,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct Scsi_Host   *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_hba    *phba = vport->phba;
 	struct lpfc_dmabuf *pcmd;
+	uint64_t nlp_portwwn = 0;
 	uint32_t *lp;
 	IOCB_t *icmd;
 	struct serv_parm *sp;
@@ -332,6 +333,8 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 			NULL);
 		return 0;
 	}
+
+	nlp_portwwn = wwn_to_u64(ndlp->nlp_portname.u.wwn);
 	if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3, 0) == 0)) {
 		/* Reject this request because invalid parameters */
 		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
@@ -367,7 +370,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	ndlp->nlp_maxframe =
 		((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb;
 
-	/* no need to reg_login if we are already in one of these states */
+	/* if already logged in, do implicit logout */
 	switch (ndlp->nlp_state) {
 	case  NLP_STE_NPR_NODE:
 		if (!(ndlp->nlp_flag & NLP_NPR_ADISC))
@@ -376,8 +379,26 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	case  NLP_STE_PRLI_ISSUE:
 	case  NLP_STE_UNMAPPED_NODE:
 	case  NLP_STE_MAPPED_NODE:
-		lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL);
-		return 1;
+		/* lpfc_plogi_confirm_nport skips fabric did, handle it here */
+		if (!(ndlp->nlp_type & NLP_FABRIC)) {
+			lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb,
+					 ndlp, NULL);
+			return 1;
+		}
+		if (nlp_portwwn != 0 &&
+		    nlp_portwwn != wwn_to_u64(sp->portName.u.wwn))
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+					 "0143 PLOGI recv'd from DID: x%x "
+					 "WWPN changed: old %llx new %llx\n",
+					 ndlp->nlp_DID,
+					 (unsigned long long)nlp_portwwn,
+					 (unsigned long long)
+					 wwn_to_u64(sp->portName.u.wwn));
+
+		ndlp->nlp_prev_state = ndlp->nlp_state;
+		/* rport needs to be unregistered first */
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+		break;
 	}
 
 	/* Check for Nport to NPort pt2pt protocol */
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 4f9222eb2266..cb73cf9e9ba5 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -1130,6 +1130,25 @@ lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 }
 
 /**
+ * lpfc_fcpcmd_to_iocb - copy the fcp_cmd data into the IOCB
+ * @data: A pointer to the immediate command data portion of the IOCB.
+ * @fcp_cmnd: The FCP Command that is provided by the SCSI layer.
+ *
+ * The routine copies the entire FCP command from @fcp_cmnd to @data while
+ * byte swapping the data to big endian format for transmission on the wire.
+ **/
+static void
+lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd)
+{
+	int i, j;
+
+	for (i = 0, j = 0; i < sizeof(struct fcp_cmnd);
+	     i += sizeof(uint32_t), j++) {
+		((uint32_t *)data)[j] = cpu_to_be32(((uint32_t *)fcp_cmnd)[j]);
+	}
+}
+
+/**
  * lpfc_scsi_prep_dma_buf_s3 - DMA mapping for scsi buffer to SLI3 IF spec
  * @phba: The Hba for which this call is being executed.
  * @lpfc_cmd: The scsi buffer which is going to be mapped.
@@ -1264,6 +1283,7 @@ lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 	 * we need to set word 4 of IOCB here
 	 */
 	iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd);
+	lpfc_fcpcmd_to_iocb(iocb_cmd->unsli3.fcp_ext.icd, fcp_cmnd);
 	return 0;
 }
 
@@ -4127,24 +4147,6 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 }
 
 /**
- * lpfc_fcpcmd_to_iocb - copy the fcp_cmd data into the IOCB
- * @data: A pointer to the immediate command data portion of the IOCB.
- * @fcp_cmnd: The FCP Command that is provided by the SCSI layer.
- *
- * The routine copies the entire FCP command from @fcp_cmnd to @data while
- * byte swapping the data to big endian format for transmission on the wire.
- **/
-static void
-lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd)
-{
-	int i, j;
-	for (i = 0, j = 0; i < sizeof(struct fcp_cmnd);
-	     i += sizeof(uint32_t), j++) {
-		((uint32_t *)data)[j] = cpu_to_be32(((uint32_t *)fcp_cmnd)[j]);
-	}
-}
-
-/**
  * lpfc_scsi_prep_cmnd - Wrapper func for convert scsi cmnd to FCP info unit
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: The scsi command which needs to send.
@@ -4223,9 +4225,6 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 		fcp_cmnd->fcpCntl3 = 0;
 		phba->fc4ControlRequests++;
 	}
-	if (phba->sli_rev == 3 &&
-	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED))
-		lpfc_fcpcmd_to_iocb(iocb_cmd->unsli3.fcp_ext.icd, fcp_cmnd);
 	/*
 	 * Finish initializing those IOCB fields that are independent
 	 * of the scsi_cmnd request_buffer
@@ -5118,9 +5117,10 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
 	int status;
 
 	rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
-	if (!rdata) {
+	if (!rdata || !rdata->pnode) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-			"0798 Device Reset rport failure: rdata x%p\n", rdata);
+				 "0798 Device Reset rport failure: rdata x%p\n",
+				 rdata);
 		return FAILED;
 	}
 	pnode = rdata->pnode;
@@ -5202,10 +5202,12 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
 	if (status == FAILED) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
 			"0722 Target Reset rport failure: rdata x%p\n", rdata);
-		spin_lock_irq(shost->host_lock);
-		pnode->nlp_flag &= ~NLP_NPR_ADISC;
-		pnode->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
-		spin_unlock_irq(shost->host_lock);
+		if (pnode) {
+			spin_lock_irq(shost->host_lock);
+			pnode->nlp_flag &= ~NLP_NPR_ADISC;
+			pnode->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+			spin_unlock_irq(shost->host_lock);
+		}
 		lpfc_reset_flush_io_context(vport, tgt_id, lun_id,
 					  LPFC_CTX_TGT);
 		return FAST_IO_FAIL;
@@ -5857,6 +5859,31 @@ lpfc_disable_oas_lun(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn,
 	return false;
 }
 
+struct scsi_host_template lpfc_template_s3 = {
+	.module			= THIS_MODULE,
+	.name			= LPFC_DRIVER_NAME,
+	.info			= lpfc_info,
+	.queuecommand		= lpfc_queuecommand,
+	.eh_abort_handler	= lpfc_abort_handler,
+	.eh_device_reset_handler = lpfc_device_reset_handler,
+	.eh_target_reset_handler = lpfc_target_reset_handler,
+	.eh_bus_reset_handler	= lpfc_bus_reset_handler,
+	.slave_alloc		= lpfc_slave_alloc,
+	.slave_configure	= lpfc_slave_configure,
+	.slave_destroy		= lpfc_slave_destroy,
+	.scan_finished		= lpfc_scan_finished,
+	.this_id		= -1,
+	.sg_tablesize		= LPFC_DEFAULT_SG_SEG_CNT,
+	.cmd_per_lun		= LPFC_CMD_PER_LUN,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= lpfc_hba_attrs,
+	.max_sectors		= 0xFFFF,
+	.vendor_id		= LPFC_NL_VENDOR_ID,
+	.change_queue_depth	= scsi_change_queue_depth,
+	.use_blk_tags		= 1,
+	.track_queue_depth	= 1,
+};
+
 struct scsi_host_template lpfc_template = {
 	.module			= THIS_MODULE,
 	.name			= LPFC_DRIVER_NAME,
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 0389ac1e7b83..474e30cdee6e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 207a43d952fa..56f73682d4bd 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -918,12 +918,16 @@ __lpfc_sli_get_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 		lpfc_cmd = (struct lpfc_scsi_buf *) piocbq->context1;
 		ndlp = lpfc_cmd->rdata->pnode;
 	} else  if ((piocbq->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) &&
-			!(piocbq->iocb_flag & LPFC_IO_LIBDFC))
+			!(piocbq->iocb_flag & LPFC_IO_LIBDFC)) {
 		ndlp = piocbq->context_un.ndlp;
-	else  if (piocbq->iocb_flag & LPFC_IO_LIBDFC)
-		ndlp = piocbq->context_un.ndlp;
-	else
+	} else  if (piocbq->iocb_flag & LPFC_IO_LIBDFC) {
+		if (piocbq->iocb_flag & LPFC_IO_LOOPBACK)
+			ndlp = NULL;
+		else
+			ndlp = piocbq->context_un.ndlp;
+	} else {
 		ndlp = piocbq->context1;
+	}
 
 	list_remove_head(lpfc_sgl_list, sglq, struct lpfc_sglq, list);
 	start_sglq = sglq;
@@ -2213,6 +2217,46 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	else
 		mempool_free(pmb, phba->mbox_mem_pool);
 }
+ /**
+ * lpfc_sli4_unreg_rpi_cmpl_clr - mailbox completion handler
+ * @phba: Pointer to HBA context object.
+ * @pmb: Pointer to mailbox object.
+ *
+ * This function is the unreg rpi mailbox completion handler. It
+ * frees the memory resources associated with the completed mailbox
+ * command. An additional refrenece is put on the ndlp to prevent
+ * lpfc_nlp_release from freeing the rpi bit in the bitmask before
+ * the unreg mailbox command completes, this routine puts the
+ * reference back.
+ *
+ **/
+void
+lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+{
+	struct lpfc_vport  *vport = pmb->vport;
+	struct lpfc_nodelist *ndlp;
+
+	ndlp = pmb->context1;
+	if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) {
+		if (phba->sli_rev == LPFC_SLI_REV4 &&
+		    (bf_get(lpfc_sli_intf_if_type,
+		     &phba->sli4_hba.sli_intf) ==
+		     LPFC_SLI_INTF_IF_TYPE_2)) {
+			if (ndlp) {
+				lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+						 "0010 UNREG_LOGIN vpi:%x "
+						 "rpi:%x DID:%x map:%x %p\n",
+						 vport->vpi, ndlp->nlp_rpi,
+						 ndlp->nlp_DID,
+						 ndlp->nlp_usg_map, ndlp);
+
+				lpfc_nlp_put(ndlp);
+			}
+		}
+	}
+
+	mempool_free(pmb, phba->mbox_mem_pool);
+}
 
 /**
  * lpfc_sli_handle_mb_event - Handle mailbox completions from firmware
@@ -12842,7 +12886,7 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
  * fails this function will return -ENXIO.
  **/
 int
-lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint16_t startq)
+lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 {
 	struct lpfc_mbx_modify_eq_delay *eq_delay;
 	LPFC_MBOXQ_t *mbox;
@@ -12959,11 +13003,8 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 	bf_set(lpfc_eq_context_size, &eq_create->u.request.context,
 	       LPFC_EQE_SIZE);
 	bf_set(lpfc_eq_context_valid, &eq_create->u.request.context, 1);
-	/* Calculate delay multiper from maximum interrupt per second */
-	if (imax > LPFC_DMULT_CONST)
-		dmult = 0;
-	else
-		dmult = LPFC_DMULT_CONST/imax - 1;
+	/* don't setup delay multiplier using EQ_CREATE */
+	dmult = 0;
 	bf_set(lpfc_eq_context_delay_multi, &eq_create->u.request.context,
 	       dmult);
 	switch (eq->entry_count) {
@@ -15662,14 +15703,14 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba)
 	struct lpfc_rpi_hdr *rpi_hdr;
 	unsigned long iflag;
 
-	max_rpi = phba->sli4_hba.max_cfg_param.max_rpi;
-	rpi_limit = phba->sli4_hba.next_rpi;
-
 	/*
 	 * Fetch the next logical rpi.  Because this index is logical,
 	 * the  driver starts at 0 each time.
 	 */
 	spin_lock_irqsave(&phba->hbalock, iflag);
+	max_rpi = phba->sli4_hba.max_cfg_param.max_rpi;
+	rpi_limit = phba->sli4_hba.next_rpi;
+
 	rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, 0);
 	if (rpi >= rpi_limit)
 		rpi = LPFC_RPI_ALLOC_ERROR;
@@ -15678,6 +15719,9 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba)
 		phba->sli4_hba.max_cfg_param.rpi_used++;
 		phba->sli4_hba.rpi_count++;
 	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"0001 rpi:%x max:%x lim:%x\n",
+			(int) rpi, max_rpi, rpi_limit);
 
 	/*
 	 * Don't try to allocate more rpi header regions if the device limit
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 4a01452415cf..7fe99ff80846 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -80,6 +80,7 @@ struct lpfc_iocbq {
 
 #define LPFC_IO_OAS		0x10000 /* OAS FCP IO */
 #define LPFC_IO_FOF		0x20000 /* FOF FCP IO */
+#define LPFC_IO_LOOPBACK	0x40000 /* Loopback IO */
 
 	uint32_t drvrTimeout;	/* driver timeout in seconds */
 	uint32_t fcp_wqidx;	/* index to FCP work queue */
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 22ceb2b05ba1..6eca3b8124d3 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2009-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -671,7 +671,7 @@ struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
 			uint32_t);
 void lpfc_sli4_queue_free(struct lpfc_queue *);
 int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
-int lpfc_modify_fcp_eq_delay(struct lpfc_hba *, uint16_t);
+int lpfc_modify_fcp_eq_delay(struct lpfc_hba *, uint32_t);
 int lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t, uint32_t);
 int32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 89413add2252..c37bb9f91c3b 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "10.4.8000.0."
+#define LPFC_DRIVER_VERSION "10.5.0.0."
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
@@ -30,4 +30,4 @@
 
 #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \
 		LPFC_DRIVER_VERSION
-#define LPFC_COPYRIGHT "Copyright(c) 2004-2014 Emulex.  All rights reserved."
+#define LPFC_COPYRIGHT "Copyright(c) 2004-2015 Emulex.  All rights reserved."
diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c
index e5cd8d8d4ce7..0adb2e015597 100644
--- a/drivers/scsi/mac53c94.c
+++ b/drivers/scsi/mac53c94.c
@@ -382,16 +382,16 @@ static void set_dma_cmds(struct fsc_state *state, struct scsi_cmnd *cmd)
 		if (dma_len > 0xffff)
 			panic("mac53c94: scatterlist element >= 64k");
 		total += dma_len;
-		st_le16(&dcmds->req_count, dma_len);
-		st_le16(&dcmds->command, dma_cmd);
-		st_le32(&dcmds->phy_addr, dma_addr);
+		dcmds->req_count = cpu_to_le16(dma_len);
+		dcmds->command = cpu_to_le16(dma_cmd);
+		dcmds->phy_addr = cpu_to_le32(dma_addr);
 		dcmds->xfer_status = 0;
 		++dcmds;
 	}
 
 	dma_cmd += OUTPUT_LAST - OUTPUT_MORE;
-	st_le16(&dcmds[-1].command, dma_cmd);
-	st_le16(&dcmds->command, DBDMA_STOP);
+	dcmds[-1].command = cpu_to_le16(dma_cmd);
+	dcmds->command = cpu_to_le16(DBDMA_STOP);
 	cmd->SCp.this_residual = total;
 }
 
diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index 1e85c07e3b62..d64a769b8155 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c
@@ -483,7 +483,6 @@ static struct platform_driver mac_scsi_driver = {
 	.remove = __exit_p(mac_scsi_remove),
 	.driver = {
 		.name	= DRV_MODULE_NAME,
-		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 57a95e2c3442..555367f00228 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1287,9 +1287,9 @@ static void set_dma_cmds(struct mesh_state *ms, struct scsi_cmnd *cmd)
 				}
 				if (dma_len > 0xffff)
 					panic("mesh: scatterlist element >= 64k");
-				st_le16(&dcmds->req_count, dma_len - off);
-				st_le16(&dcmds->command, dma_cmd);
-				st_le32(&dcmds->phy_addr, dma_addr + off);
+				dcmds->req_count = cpu_to_le16(dma_len - off);
+				dcmds->command = cpu_to_le16(dma_cmd);
+				dcmds->phy_addr = cpu_to_le32(dma_addr + off);
 				dcmds->xfer_status = 0;
 				++dcmds;
 				dtot += dma_len - off;
@@ -1303,15 +1303,15 @@ static void set_dma_cmds(struct mesh_state *ms, struct scsi_cmnd *cmd)
 		static char mesh_extra_buf[64];
 
 		dtot = sizeof(mesh_extra_buf);
-		st_le16(&dcmds->req_count, dtot);
-		st_le32(&dcmds->phy_addr, virt_to_phys(mesh_extra_buf));
+		dcmds->req_count = cpu_to_le16(dtot);
+		dcmds->phy_addr = cpu_to_le32(virt_to_phys(mesh_extra_buf));
 		dcmds->xfer_status = 0;
 		++dcmds;
 	}
 	dma_cmd += OUTPUT_LAST - OUTPUT_MORE;
-	st_le16(&dcmds[-1].command, dma_cmd);
+	dcmds[-1].command = cpu_to_le16(dma_cmd);
 	memset(dcmds, 0, sizeof(*dcmds));
-	st_le16(&dcmds->command, DBDMA_STOP);
+	dcmds->command = cpu_to_le16(DBDMA_STOP);
 	ms->dma_count = dtot;
 }
 
diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig
index 113e6c9826a1..33f60c92e20e 100644
--- a/drivers/scsi/qla2xxx/Kconfig
+++ b/drivers/scsi/qla2xxx/Kconfig
@@ -18,6 +18,9 @@ config SCSI_QLA_FC
 	2322, 6322        ql2322_fw.bin
 	24xx, 54xx        ql2400_fw.bin
 	25xx              ql2500_fw.bin
+	2031              ql2600_fw.bin
+	8031              ql8300_fw.bin
+	27xx              ql2700_fw.bin
 
 	Upon request, the driver caches the firmware image until
 	the driver is unloaded.
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index d77fe43793b6..0e6ee3ca30e6 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -11,9 +11,9 @@
  * ----------------------------------------------------------------------
  * |             Level            |   Last Value Used  |     Holes	|
  * ----------------------------------------------------------------------
- * | Module Init and Probe        |       0x017d       | 0x0144,0x0146	|
+ * | Module Init and Probe        |       0x017f       | 0x0146         |
  * |                              |                    | 0x015b-0x0160	|
- * |                              |                    | 0x016e-0x0170	|
+ * |                              |                    | 0x016e-0x0170  |
  * | Mailbox commands             |       0x118d       | 0x1115-0x1116	|
  * |                              |                    | 0x111a-0x111b  |
  * | Device Discovery             |       0x2016       | 0x2020-0x2022, |
@@ -60,7 +60,7 @@
  * |                              |                    | 0xb13c-0xb140  |
  * |                              |                    | 0xb149		|
  * | MultiQ                       |       0xc00c       |		|
- * | Misc                         |       0xd213       | 0xd011-0xd017	|
+ * | Misc                         |       0xd300       | 0xd016-0xd017	|
  * |                              |                    | 0xd021,0xd024	|
  * |                              |                    | 0xd025,0xd029	|
  * |                              |                    | 0xd02a,0xd02e	|
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 5f6b2960cccb..e86201d3b8c6 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2163,7 +2163,7 @@ struct ct_fdmi_hba_attr {
 		uint8_t node_name[WWN_SIZE];
 		uint8_t manufacturer[64];
 		uint8_t serial_num[32];
-		uint8_t model[16];
+		uint8_t model[16+1];
 		uint8_t model_desc[80];
 		uint8_t hw_version[32];
 		uint8_t driver_version[32];
@@ -2184,9 +2184,9 @@ struct ct_fdmiv2_hba_attr {
 	uint16_t len;
 	union {
 		uint8_t node_name[WWN_SIZE];
-		uint8_t manufacturer[32];
+		uint8_t manufacturer[64];
 		uint8_t serial_num[32];
-		uint8_t model[16];
+		uint8_t model[16+1];
 		uint8_t model_desc[80];
 		uint8_t hw_version[16];
 		uint8_t driver_version[32];
@@ -2252,7 +2252,7 @@ struct ct_fdmiv2_port_attr {
 		uint32_t cur_speed;
 		uint32_t max_frame_size;
 		uint8_t os_dev_name[32];
-		uint8_t host_name[32];
+		uint8_t host_name[256];
 		uint8_t node_name[WWN_SIZE];
 		uint8_t port_name[WWN_SIZE];
 		uint8_t port_sym_name[128];
@@ -2283,7 +2283,7 @@ struct ct_fdmi_port_attr {
 		uint32_t cur_speed;
 		uint32_t max_frame_size;
 		uint8_t os_dev_name[32];
-		uint8_t host_name[32];
+		uint8_t host_name[256];
 	} a;
 };
 
@@ -3132,7 +3132,8 @@ struct qla_hw_data {
 				IS_QLA25XX(ha) || IS_QLA81XX(ha) || \
 				IS_QLA82XX(ha) || IS_QLA83XX(ha) || \
 				IS_QLA8044(ha) || IS_QLA27XX(ha))
-#define IS_MSIX_NACK_CAPABLE(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha))
+#define IS_MSIX_NACK_CAPABLE(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha) || \
+				IS_QLA27XX(ha))
 #define IS_NOPOLLING_TYPE(ha)	(IS_QLA81XX(ha) && (ha)->flags.msix_enabled)
 #define IS_FAC_REQUIRED(ha)	(IS_QLA81XX(ha) || IS_QLA83XX(ha) || \
 				IS_QLA27XX(ha))
@@ -3300,6 +3301,8 @@ struct qla_hw_data {
 #define RISC_RDY_AFT_RESET	3
 #define RISC_SRAM_DUMP_CMPL	4
 #define RISC_EXT_MEM_DUMP_CMPL	5
+#define ISP_MBX_RDY		6
+#define ISP_SOFT_RESET_CMPL	7
 	int		fw_dump_reading;
 	int		prev_minidump_failed;
 	dma_addr_t	eft_dma;
@@ -3587,6 +3590,7 @@ typedef struct scsi_qla_host {
 #define VP_BIND_NEEDED		2
 #define VP_DELETE_NEEDED	3
 #define VP_SCR_NEEDED		4	/* State Change Request registration */
+#define VP_CONFIG_OK		5	/* Flag to cfg VP, if FW is ready */
 	atomic_t 		vp_state;
 #define VP_OFFLINE		0
 #define VP_ACTIVE		1
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 5bb57c5282c9..285cb204f300 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1121,7 +1121,7 @@ qla81xx_reset_mpi(scsi_qla_host_t *vha)
  *
  * Returns 0 on success.
  */
-static inline void
+static inline int
 qla24xx_reset_risc(scsi_qla_host_t *vha)
 {
 	unsigned long flags = 0;
@@ -1130,6 +1130,7 @@ qla24xx_reset_risc(scsi_qla_host_t *vha)
 	uint32_t cnt, d2;
 	uint16_t wd;
 	static int abts_cnt; /* ISP abort retry counts */
+	int rval = QLA_SUCCESS;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
@@ -1142,26 +1143,57 @@ qla24xx_reset_risc(scsi_qla_host_t *vha)
 		udelay(10);
 	}
 
+	if (!(RD_REG_DWORD(&reg->ctrl_status) & CSRX_DMA_ACTIVE))
+		set_bit(DMA_SHUTDOWN_CMPL, &ha->fw_dump_cap_flags);
+
+	ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x017e,
+	    "HCCR: 0x%x, Control Status %x, DMA active status:0x%x\n",
+	    RD_REG_DWORD(&reg->hccr),
+	    RD_REG_DWORD(&reg->ctrl_status),
+	    (RD_REG_DWORD(&reg->ctrl_status) & CSRX_DMA_ACTIVE));
+
 	WRT_REG_DWORD(&reg->ctrl_status,
 	    CSRX_ISP_SOFT_RESET|CSRX_DMA_SHUTDOWN|MWB_4096_BYTES);
 	pci_read_config_word(ha->pdev, PCI_COMMAND, &wd);
 
 	udelay(100);
+
 	/* Wait for firmware to complete NVRAM accesses. */
 	d2 = (uint32_t) RD_REG_WORD(&reg->mailbox0);
-	for (cnt = 10000 ; cnt && d2; cnt--) {
-		udelay(5);
-		d2 = (uint32_t) RD_REG_WORD(&reg->mailbox0);
+	for (cnt = 10000; RD_REG_WORD(&reg->mailbox0) != 0 &&
+	    rval == QLA_SUCCESS; cnt--) {
 		barrier();
+		if (cnt)
+			udelay(5);
+		else
+			rval = QLA_FUNCTION_TIMEOUT;
 	}
 
+	if (rval == QLA_SUCCESS)
+		set_bit(ISP_MBX_RDY, &ha->fw_dump_cap_flags);
+
+	ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x017f,
+	    "HCCR: 0x%x, MailBox0 Status 0x%x\n",
+	    RD_REG_DWORD(&reg->hccr),
+	    RD_REG_DWORD(&reg->mailbox0));
+
 	/* Wait for soft-reset to complete. */
 	d2 = RD_REG_DWORD(&reg->ctrl_status);
-	for (cnt = 6000000 ; cnt && (d2 & CSRX_ISP_SOFT_RESET); cnt--) {
-		udelay(5);
-		d2 = RD_REG_DWORD(&reg->ctrl_status);
+	for (cnt = 0; cnt < 6000000; cnt++) {
 		barrier();
+		if ((RD_REG_DWORD(&reg->ctrl_status) &
+		    CSRX_ISP_SOFT_RESET) == 0)
+			break;
+
+		udelay(5);
 	}
+	if (!(RD_REG_DWORD(&reg->ctrl_status) & CSRX_ISP_SOFT_RESET))
+		set_bit(ISP_SOFT_RESET_CMPL, &ha->fw_dump_cap_flags);
+
+	ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x015d,
+	    "HCCR: 0x%x, Soft Reset status: 0x%x\n",
+	    RD_REG_DWORD(&reg->hccr),
+	    RD_REG_DWORD(&reg->ctrl_status));
 
 	/* If required, do an MPI FW reset now */
 	if (test_and_clear_bit(MPI_RESET_NEEDED, &vha->dpc_flags)) {
@@ -1190,16 +1222,32 @@ qla24xx_reset_risc(scsi_qla_host_t *vha)
 	RD_REG_DWORD(&reg->hccr);
 
 	d2 = (uint32_t) RD_REG_WORD(&reg->mailbox0);
-	for (cnt = 6000000 ; cnt && d2; cnt--) {
-		udelay(5);
-		d2 = (uint32_t) RD_REG_WORD(&reg->mailbox0);
+	for (cnt = 6000000; RD_REG_WORD(&reg->mailbox0) != 0 &&
+	    rval == QLA_SUCCESS; cnt--) {
 		barrier();
+		if (cnt)
+			udelay(5);
+		else
+			rval = QLA_FUNCTION_TIMEOUT;
 	}
+	if (rval == QLA_SUCCESS)
+		set_bit(RISC_RDY_AFT_RESET, &ha->fw_dump_cap_flags);
+
+	ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x015e,
+	    "Host Risc 0x%x, mailbox0 0x%x\n",
+	    RD_REG_DWORD(&reg->hccr),
+	     RD_REG_WORD(&reg->mailbox0));
 
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
+	ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x015f,
+	    "Driver in %s mode\n",
+	    IS_NOPOLLING_TYPE(ha) ? "Interrupt" : "Polling");
+
 	if (IS_NOPOLLING_TYPE(ha))
 		ha->isp_ops->enable_intrs(ha);
+
+	return rval;
 }
 
 static void
@@ -2243,8 +2291,11 @@ qla2x00_fw_ready(scsi_qla_host_t *vha)
 
 	rval = QLA_SUCCESS;
 
-	/* 20 seconds for loop down. */
-	min_wait = 20;
+	/* Time to wait for loop down */
+	if (IS_P3P_TYPE(ha))
+		min_wait = 30;
+	else
+		min_wait = 20;
 
 	/*
 	 * Firmware should take at most one RATOV to login, plus 5 seconds for
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index a04a1b1f7f32..6dc14cd782b2 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -756,11 +756,21 @@ skip_rio:
 			/*
 			 * In case of loop down, restore WWPN from
 			 * NVRAM in case of FA-WWPN capable ISP
+			 * Restore for Physical Port only
 			 */
-			if (ha->flags.fawwpn_enabled) {
-				void *wwpn = ha->init_cb->port_name;
+			if (!vha->vp_idx) {
+				if (ha->flags.fawwpn_enabled) {
+					void *wwpn = ha->init_cb->port_name;
+					memcpy(vha->port_name, wwpn, WWN_SIZE);
+					fc_host_port_name(vha->host) =
+					    wwn_to_u64(vha->port_name);
+					ql_dbg(ql_dbg_init + ql_dbg_verbose,
+					    vha, 0x0144, "LOOP DOWN detected,"
+					    "restore WWPN %016llx\n",
+					    wwn_to_u64(vha->port_name));
+				}
 
-				memcpy(vha->port_name, wwpn, WWN_SIZE);
+				clear_bit(VP_CONFIG_OK, &vha->vp_flags);
 			}
 
 			vha->device_flags |= DFLG_NO_CABLE;
@@ -947,6 +957,7 @@ skip_rio:
 
 		set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+		set_bit(VP_CONFIG_OK, &vha->vp_flags);
 
 		qlt_async_event(mb[0], vha, mb);
 		break;
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 72971daa2552..02b1c1c5355b 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -33,7 +33,7 @@
 static int
 qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
 {
-	int		rval;
+	int		rval, i;
 	unsigned long    flags = 0;
 	device_reg_t *reg;
 	uint8_t		abort_active;
@@ -43,10 +43,12 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
 	uint16_t __iomem *optr;
 	uint32_t	cnt;
 	uint32_t	mboxes;
+	uint16_t __iomem *mbx_reg;
 	unsigned long	wait_time;
 	struct qla_hw_data *ha = vha->hw;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
 
+
 	ql_dbg(ql_dbg_mbx, vha, 0x1000, "Entered %s.\n", __func__);
 
 	if (ha->pdev->error_state > pci_channel_io_frozen) {
@@ -376,6 +378,18 @@ mbx_done:
 		ql_dbg(ql_dbg_disc, base_vha, 0x1020,
 		    "**** Failed mbx[0]=%x, mb[1]=%x, mb[2]=%x, mb[3]=%x, cmd=%x ****.\n",
 		    mcp->mb[0], mcp->mb[1], mcp->mb[2], mcp->mb[3], command);
+
+		ql_dbg(ql_dbg_disc, vha, 0x1115,
+		    "host status: 0x%x, flags:0x%lx, intr ctrl reg:0x%x, intr status:0x%x\n",
+		    RD_REG_DWORD(&reg->isp24.host_status),
+		    ha->fw_dump_cap_flags,
+		    RD_REG_DWORD(&reg->isp24.ictrl),
+		    RD_REG_DWORD(&reg->isp24.istatus));
+
+		mbx_reg = &reg->isp24.mailbox0;
+		for (i = 0; i < 6; i++)
+			ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x1116,
+			    "mbox[%d] 0x%04x\n", i, RD_REG_WORD(mbx_reg++));
 	} else {
 		ql_dbg(ql_dbg_mbx, base_vha, 0x1021, "Done %s.\n", __func__);
 	}
@@ -2838,7 +2852,7 @@ qla2x00_write_serdes_word(scsi_qla_host_t *vha, uint16_t addr, uint16_t data)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	if (!IS_QLA2031(vha->hw))
+	if (!IS_QLA2031(vha->hw) && !IS_QLA27XX(vha->hw))
 		return QLA_FUNCTION_FAILED;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1182,
@@ -2846,7 +2860,11 @@ qla2x00_write_serdes_word(scsi_qla_host_t *vha, uint16_t addr, uint16_t data)
 
 	mcp->mb[0] = MBC_WRITE_SERDES;
 	mcp->mb[1] = addr;
-	mcp->mb[2] = data & 0xff;
+	if (IS_QLA2031(vha->hw))
+		mcp->mb[2] = data & 0xff;
+	else
+		mcp->mb[2] = data;
+
 	mcp->mb[3] = 0;
 	mcp->out_mb = MBX_3|MBX_2|MBX_1|MBX_0;
 	mcp->in_mb = MBX_0;
@@ -2872,7 +2890,7 @@ qla2x00_read_serdes_word(scsi_qla_host_t *vha, uint16_t addr, uint16_t *data)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	if (!IS_QLA2031(vha->hw))
+	if (!IS_QLA2031(vha->hw) && !IS_QLA27XX(vha->hw))
 		return QLA_FUNCTION_FAILED;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1185,
@@ -2887,7 +2905,10 @@ qla2x00_read_serdes_word(scsi_qla_host_t *vha, uint16_t addr, uint16_t *data)
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
 
-	*data = mcp->mb[1] & 0xff;
+	if (IS_QLA2031(vha->hw))
+		*data = mcp->mb[1] & 0xff;
+	else
+		*data = mcp->mb[1];
 
 	if (rval != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_mbx, vha, 0x1186,
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index ca3804e34833..cc94192511cf 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -306,19 +306,25 @@ qla2x00_vp_abort_isp(scsi_qla_host_t *vha)
 static int
 qla2x00_do_dpc_vp(scsi_qla_host_t *vha)
 {
+	struct qla_hw_data *ha = vha->hw;
+	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+
 	ql_dbg(ql_dbg_dpc + ql_dbg_verbose, vha, 0x4012,
 	    "Entering %s vp_flags: 0x%lx.\n", __func__, vha->vp_flags);
 
 	qla2x00_do_work(vha);
 
-	if (test_and_clear_bit(VP_IDX_ACQUIRED, &vha->vp_flags)) {
-		/* VP acquired. complete port configuration */
-		ql_dbg(ql_dbg_dpc, vha, 0x4014,
-		    "Configure VP scheduled.\n");
-		qla24xx_configure_vp(vha);
-		ql_dbg(ql_dbg_dpc, vha, 0x4015,
-		    "Configure VP end.\n");
-		return 0;
+	/* Check if Fw is ready to configure VP first */
+	if (test_bit(VP_CONFIG_OK, &base_vha->vp_flags)) {
+		if (test_and_clear_bit(VP_IDX_ACQUIRED, &vha->vp_flags)) {
+			/* VP acquired. complete port configuration */
+			ql_dbg(ql_dbg_dpc, vha, 0x4014,
+			    "Configure VP scheduled.\n");
+			qla24xx_configure_vp(vha);
+			ql_dbg(ql_dbg_dpc, vha, 0x4015,
+			    "Configure VP end.\n");
+			return 0;
+		}
 	}
 
 	if (test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags)) {
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 5319b3cb219e..7462dd70b150 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -5834,3 +5834,6 @@ MODULE_FIRMWARE(FW_FILE_ISP2300);
 MODULE_FIRMWARE(FW_FILE_ISP2322);
 MODULE_FIRMWARE(FW_FILE_ISP24XX);
 MODULE_FIRMWARE(FW_FILE_ISP25XX);
+MODULE_FIRMWARE(FW_FILE_ISP2031);
+MODULE_FIRMWARE(FW_FILE_ISP8031);
+MODULE_FIRMWARE(FW_FILE_ISP27XX);
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index b656a05613e8..028e8c8a7de9 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -1718,13 +1718,16 @@ qla83xx_beacon_blink(struct scsi_qla_host *vha)
 	uint16_t orig_led_cfg[6];
 	uint32_t led_10_value, led_43_value;
 
-	if (!IS_QLA83XX(ha) && !IS_QLA81XX(ha))
+	if (!IS_QLA83XX(ha) && !IS_QLA81XX(ha) && !IS_QLA27XX(ha))
 		return;
 
 	if (!ha->beacon_blink_led)
 		return;
 
-	if (IS_QLA2031(ha)) {
+	if (IS_QLA27XX(ha)) {
+		qla2x00_write_ram_word(vha, 0x1003, 0x40000230);
+		qla2x00_write_ram_word(vha, 0x1004, 0x40000230);
+	} else if (IS_QLA2031(ha)) {
 		led_select_value = qla83xx_select_led_port(ha);
 
 		qla83xx_wr_reg(vha, led_select_value, 0x40000230);
@@ -1811,7 +1814,7 @@ qla24xx_beacon_on(struct scsi_qla_host *vha)
 			return QLA_FUNCTION_FAILED;
 		}
 
-		if (IS_QLA2031(ha))
+		if (IS_QLA2031(ha) || IS_QLA27XX(ha))
 			goto skip_gpio;
 
 		spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -1848,7 +1851,7 @@ qla24xx_beacon_off(struct scsi_qla_host *vha)
 
 	ha->beacon_blink_led = 0;
 
-	if (IS_QLA2031(ha))
+	if (IS_QLA2031(ha) || IS_QLA27XX(ha))
 		goto set_fw_options;
 
 	if (IS_QLA8031(ha) || IS_QLA81XX(ha))
diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c
index a8c0c7362e48..962cb89fe0ae 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.c
+++ b/drivers/scsi/qla2xxx/qla_tmpl.c
@@ -190,7 +190,7 @@ static inline void
 qla27xx_write_reg(__iomem struct device_reg_24xx *reg,
 	uint offset, uint32_t data, void *buf)
 {
-	__iomem void *window = reg + offset;
+	__iomem void *window = (void __iomem *)reg + offset;
 
 	if (buf) {
 		WRT_REG_DWORD(window, data);
@@ -219,6 +219,8 @@ qla27xx_skip_entry(struct qla27xx_fwdt_entry *ent, void *buf)
 {
 	if (buf)
 		ent->hdr.driver_flags |= DRIVER_FLAG_SKIP_ENTRY;
+	ql_dbg(ql_dbg_misc + ql_dbg_verbose, NULL, 0xd011,
+	    "Skipping entry %d\n", ent->hdr.entry_type);
 }
 
 static int
@@ -784,6 +786,13 @@ qla27xx_walk_template(struct scsi_qla_host *vha,
 
 	ql_dbg(ql_dbg_misc, vha, 0xd01b,
 	    "%s: len=%lx\n", __func__, *len);
+
+	if (buf) {
+		ql_log(ql_log_warn, vha, 0xd015,
+		    "Firmware dump saved to temp buffer (%ld/%p)\n",
+		    vha->host_no, vha->hw->fw_dump);
+		qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP);
+	}
 }
 
 static void
@@ -938,6 +947,10 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked)
 		ql_log(ql_log_warn, vha, 0xd01e, "fwdump buffer missing.\n");
 	else if (!vha->hw->fw_dump_template)
 		ql_log(ql_log_warn, vha, 0xd01f, "fwdump template missing.\n");
+	else if (vha->hw->fw_dumped)
+		ql_log(ql_log_warn, vha, 0xd300,
+		    "Firmware has been previously dumped (%p),"
+		    " -- ignoring request\n", vha->hw->fw_dump);
 	else
 		qla27xx_execute_fwdt_template(vha);
 
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index d88b86214ec5..2ed9ab90a455 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.07.00.16-k"
+#define QLA2XXX_VERSION      "8.07.00.18-k"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	7
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index c9c3b579eece..3833bf59fb66 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -972,18 +972,24 @@ EXPORT_SYMBOL(scsi_report_opcode);
  * Description: Gets a reference to the scsi_device and increments the use count
  * of the underlying LLDD module.  You must hold host_lock of the
  * parent Scsi_Host or already have a reference when calling this.
+ *
+ * This will fail if a device is deleted or cancelled, or when the LLD module
+ * is in the process of being unloaded.
  */
 int scsi_device_get(struct scsi_device *sdev)
 {
-	if (sdev->sdev_state == SDEV_DEL)
-		return -ENXIO;
+	if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL)
+		goto fail;
 	if (!get_device(&sdev->sdev_gendev))
-		return -ENXIO;
-	/* We can fail try_module_get if we're doing SCSI operations
-	 * from module exit (like cache flush) */
-	__module_get(sdev->host->hostt->module);
-
+		goto fail;
+	if (!try_module_get(sdev->host->hostt->module))
+		goto fail_put_device;
 	return 0;
+
+fail_put_device:
+	put_device(&sdev->sdev_gendev);
+fail:
+	return -ENXIO;
 }
 EXPORT_SYMBOL(scsi_device_get);
 
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 9c0a520d933c..60aae01caa89 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1570,16 +1570,15 @@ EXPORT_SYMBOL(scsi_add_device);
 
 void scsi_rescan_device(struct device *dev)
 {
-	if (!dev->driver)
-		return;
-
-	if (try_module_get(dev->driver->owner)) {
+	device_lock(dev);
+	if (dev->driver && try_module_get(dev->driver->owner)) {
 		struct scsi_driver *drv = to_scsi_driver(dev->driver);
 
 		if (drv->rescan)
 			drv->rescan(dev);
 		module_put(dev->driver->owner);
 	}
+	device_unlock(dev);
 }
 EXPORT_SYMBOL(scsi_rescan_device);
 
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 5d6f348eb3d8..24eaaf66af71 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -265,6 +265,7 @@ static const struct {
 	{ FC_PORTSPEED_40GBIT,		"40 Gbit" },
 	{ FC_PORTSPEED_50GBIT,		"50 Gbit" },
 	{ FC_PORTSPEED_100GBIT,		"100 Gbit" },
+	{ FC_PORTSPEED_25GBIT,		"25 Gbit" },
 	{ FC_PORTSPEED_NOT_NEGOTIATED,	"Not Negotiated" },
 };
 fc_bitfield_name_search(port_speed, fc_port_speed_names)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 6b78476d04bb..dcc42446f58a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -564,10 +564,12 @@ static int sd_major(int major_idx)
 	}
 }
 
-static struct scsi_disk *__scsi_disk_get(struct gendisk *disk)
+static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
 {
 	struct scsi_disk *sdkp = NULL;
 
+	mutex_lock(&sd_ref_mutex);
+
 	if (disk->private_data) {
 		sdkp = scsi_disk(disk);
 		if (scsi_device_get(sdkp->device) == 0)
@@ -575,27 +577,6 @@ static struct scsi_disk *__scsi_disk_get(struct gendisk *disk)
 		else
 			sdkp = NULL;
 	}
-	return sdkp;
-}
-
-static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
-{
-	struct scsi_disk *sdkp;
-
-	mutex_lock(&sd_ref_mutex);
-	sdkp = __scsi_disk_get(disk);
-	mutex_unlock(&sd_ref_mutex);
-	return sdkp;
-}
-
-static struct scsi_disk *scsi_disk_get_from_dev(struct device *dev)
-{
-	struct scsi_disk *sdkp;
-
-	mutex_lock(&sd_ref_mutex);
-	sdkp = dev_get_drvdata(dev);
-	if (sdkp)
-		sdkp = __scsi_disk_get(sdkp->disk);
 	mutex_unlock(&sd_ref_mutex);
 	return sdkp;
 }
@@ -610,8 +591,6 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 	mutex_unlock(&sd_ref_mutex);
 }
 
-
-
 static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
 					   unsigned int dix, unsigned int dif)
 {
@@ -1525,12 +1504,9 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 
 static void sd_rescan(struct device *dev)
 {
-	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
-	if (sdkp) {
-		revalidate_disk(sdkp->disk);
-		scsi_disk_put(sdkp);
-	}
+	revalidate_disk(sdkp->disk);
 }
 
 
@@ -2235,11 +2211,11 @@ got_data:
 
 	{
 		char cap_str_2[10], cap_str_10[10];
-		u64 sz = (u64)sdkp->capacity << ilog2(sector_size);
 
-		string_get_size(sz, STRING_UNITS_2, cap_str_2,
-				sizeof(cap_str_2));
-		string_get_size(sz, STRING_UNITS_10, cap_str_10,
+		string_get_size(sdkp->capacity, sector_size,
+				STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
+		string_get_size(sdkp->capacity, sector_size,
+				STRING_UNITS_10, cap_str_10,
 				sizeof(cap_str_10));
 
 		if (sdkp->first_scan || old_capacity != sdkp->capacity) {
@@ -3149,13 +3125,13 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
  */
 static void sd_shutdown(struct device *dev)
 {
-	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
 	if (!sdkp)
 		return;         /* this can happen */
 
 	if (pm_runtime_suspended(dev))
-		goto exit;
+		return;
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3166,14 +3142,11 @@ static void sd_shutdown(struct device *dev)
 		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
 		sd_start_stop_device(sdkp, 0);
 	}
-
-exit:
-	scsi_disk_put(sdkp);
 }
 
 static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 {
-	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 	int ret = 0;
 
 	if (!sdkp)
@@ -3199,7 +3172,6 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 	}
 
 done:
-	scsi_disk_put(sdkp);
 	return ret;
 }
 
@@ -3215,18 +3187,13 @@ static int sd_suspend_runtime(struct device *dev)
 
 static int sd_resume(struct device *dev)
 {
-	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
-	int ret = 0;
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
 	if (!sdkp->device->manage_start_stop)
-		goto done;
+		return 0;
 
 	sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
-	ret = sd_start_stop_device(sdkp, 1);
-
-done:
-	scsi_disk_put(sdkp);
-	return ret;
+	return sd_start_stop_device(sdkp, 1);
 }
 
 /**
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index efc6e446b6c8..d9dad90344d5 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -308,11 +308,16 @@ enum storvsc_request_type {
  * This is the end of Protocol specific defines.
  */
 
-static int storvsc_ringbuffer_size = (20 * PAGE_SIZE);
+static int storvsc_ringbuffer_size = (256 * PAGE_SIZE);
+static u32 max_outstanding_req_per_channel;
+
+static int storvsc_vcpus_per_sub_channel = 4;
 
 module_param(storvsc_ringbuffer_size, int, S_IRUGO);
 MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
 
+module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
+MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
 /*
  * Timeout in seconds for all devices managed by this driver.
  */
@@ -320,7 +325,6 @@ static int storvsc_timeout = 180;
 
 static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
 
-#define STORVSC_MAX_IO_REQUESTS				200
 
 static void storvsc_on_channel_callback(void *context);
 
@@ -347,7 +351,10 @@ struct storvsc_cmd_request {
 	/* Synchronize the request/response if needed */
 	struct completion wait_event;
 
-	struct hv_multipage_buffer data_buffer;
+	struct vmbus_channel_packet_multipage_buffer mpb;
+	struct vmbus_packet_mpb_array *payload;
+	u32 payload_sz;
+
 	struct vstor_packet vstor_packet;
 };
 
@@ -373,6 +380,10 @@ struct storvsc_device {
 	unsigned char path_id;
 	unsigned char target_id;
 
+	/*
+	 * Max I/O, the device can support.
+	 */
+	u32   max_transfer_bytes;
 	/* Used for vsc/vsp channel reset process */
 	struct storvsc_cmd_request init_request;
 	struct storvsc_cmd_request reset_request;
@@ -618,19 +629,6 @@ cleanup:
 	return NULL;
 }
 
-/* Disgusting wrapper functions */
-static inline unsigned long sg_kmap_atomic(struct scatterlist *sgl, int idx)
-{
-	void *addr = kmap_atomic(sg_page(sgl + idx));
-	return (unsigned long)addr;
-}
-
-static inline void sg_kunmap_atomic(unsigned long addr)
-{
-	kunmap_atomic((void *)addr);
-}
-
-
 /* Assume the original sgl has enough room */
 static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl,
 					    struct scatterlist *bounce_sgl,
@@ -645,32 +643,38 @@ static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl,
 	unsigned long bounce_addr = 0;
 	unsigned long dest_addr = 0;
 	unsigned long flags;
+	struct scatterlist *cur_dest_sgl;
+	struct scatterlist *cur_src_sgl;
 
 	local_irq_save(flags);
-
+	cur_dest_sgl = orig_sgl;
+	cur_src_sgl = bounce_sgl;
 	for (i = 0; i < orig_sgl_count; i++) {
-		dest_addr = sg_kmap_atomic(orig_sgl,i) + orig_sgl[i].offset;
+		dest_addr = (unsigned long)
+				kmap_atomic(sg_page(cur_dest_sgl)) +
+				cur_dest_sgl->offset;
 		dest = dest_addr;
-		destlen = orig_sgl[i].length;
+		destlen = cur_dest_sgl->length;
 
 		if (bounce_addr == 0)
-			bounce_addr = sg_kmap_atomic(bounce_sgl,j);
+			bounce_addr = (unsigned long)kmap_atomic(
+							sg_page(cur_src_sgl));
 
 		while (destlen) {
-			src = bounce_addr + bounce_sgl[j].offset;
-			srclen = bounce_sgl[j].length - bounce_sgl[j].offset;
+			src = bounce_addr + cur_src_sgl->offset;
+			srclen = cur_src_sgl->length - cur_src_sgl->offset;
 
 			copylen = min(srclen, destlen);
 			memcpy((void *)dest, (void *)src, copylen);
 
 			total_copied += copylen;
-			bounce_sgl[j].offset += copylen;
+			cur_src_sgl->offset += copylen;
 			destlen -= copylen;
 			dest += copylen;
 
-			if (bounce_sgl[j].offset == bounce_sgl[j].length) {
+			if (cur_src_sgl->offset == cur_src_sgl->length) {
 				/* full */
-				sg_kunmap_atomic(bounce_addr);
+				kunmap_atomic((void *)bounce_addr);
 				j++;
 
 				/*
@@ -684,21 +688,27 @@ static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl,
 					/*
 					 * We are done; cleanup and return.
 					 */
-					sg_kunmap_atomic(dest_addr - orig_sgl[i].offset);
+					kunmap_atomic((void *)(dest_addr -
+						cur_dest_sgl->offset));
 					local_irq_restore(flags);
 					return total_copied;
 				}
 
 				/* if we need to use another bounce buffer */
-				if (destlen || i != orig_sgl_count - 1)
-					bounce_addr = sg_kmap_atomic(bounce_sgl,j);
+				if (destlen || i != orig_sgl_count - 1) {
+					cur_src_sgl = sg_next(cur_src_sgl);
+					bounce_addr = (unsigned long)
+							kmap_atomic(
+							sg_page(cur_src_sgl));
+				}
 			} else if (destlen == 0 && i == orig_sgl_count - 1) {
 				/* unmap the last bounce that is < PAGE_SIZE */
-				sg_kunmap_atomic(bounce_addr);
+				kunmap_atomic((void *)bounce_addr);
 			}
 		}
 
-		sg_kunmap_atomic(dest_addr - orig_sgl[i].offset);
+		kunmap_atomic((void *)(dest_addr - cur_dest_sgl->offset));
+		cur_dest_sgl = sg_next(cur_dest_sgl);
 	}
 
 	local_irq_restore(flags);
@@ -719,48 +729,62 @@ static unsigned int copy_to_bounce_buffer(struct scatterlist *orig_sgl,
 	unsigned long bounce_addr = 0;
 	unsigned long src_addr = 0;
 	unsigned long flags;
+	struct scatterlist *cur_src_sgl;
+	struct scatterlist *cur_dest_sgl;
 
 	local_irq_save(flags);
 
+	cur_src_sgl = orig_sgl;
+	cur_dest_sgl = bounce_sgl;
+
 	for (i = 0; i < orig_sgl_count; i++) {
-		src_addr = sg_kmap_atomic(orig_sgl,i) + orig_sgl[i].offset;
+		src_addr = (unsigned long)
+				kmap_atomic(sg_page(cur_src_sgl)) +
+				cur_src_sgl->offset;
 		src = src_addr;
-		srclen = orig_sgl[i].length;
+		srclen = cur_src_sgl->length;
 
 		if (bounce_addr == 0)
-			bounce_addr = sg_kmap_atomic(bounce_sgl,j);
+			bounce_addr = (unsigned long)
+					kmap_atomic(sg_page(cur_dest_sgl));
 
 		while (srclen) {
 			/* assume bounce offset always == 0 */
-			dest = bounce_addr + bounce_sgl[j].length;
-			destlen = PAGE_SIZE - bounce_sgl[j].length;
+			dest = bounce_addr + cur_dest_sgl->length;
+			destlen = PAGE_SIZE - cur_dest_sgl->length;
 
 			copylen = min(srclen, destlen);
 			memcpy((void *)dest, (void *)src, copylen);
 
 			total_copied += copylen;
-			bounce_sgl[j].length += copylen;
+			cur_dest_sgl->length += copylen;
 			srclen -= copylen;
 			src += copylen;
 
-			if (bounce_sgl[j].length == PAGE_SIZE) {
+			if (cur_dest_sgl->length == PAGE_SIZE) {
 				/* full..move to next entry */
-				sg_kunmap_atomic(bounce_addr);
+				kunmap_atomic((void *)bounce_addr);
+				bounce_addr = 0;
 				j++;
+			}
 
-				/* if we need to use another bounce buffer */
-				if (srclen || i != orig_sgl_count - 1)
-					bounce_addr = sg_kmap_atomic(bounce_sgl,j);
-
-			} else if (srclen == 0 && i == orig_sgl_count - 1) {
-				/* unmap the last bounce that is < PAGE_SIZE */
-				sg_kunmap_atomic(bounce_addr);
+			/* if we need to use another bounce buffer */
+			if (srclen && bounce_addr == 0) {
+				cur_dest_sgl = sg_next(cur_dest_sgl);
+				bounce_addr = (unsigned long)
+						kmap_atomic(
+						sg_page(cur_dest_sgl));
 			}
+
 		}
 
-		sg_kunmap_atomic(src_addr - orig_sgl[i].offset);
+		kunmap_atomic((void *)(src_addr - cur_src_sgl->offset));
+		cur_src_sgl = sg_next(cur_src_sgl);
 	}
 
+	if (bounce_addr)
+		kunmap_atomic((void *)bounce_addr);
+
 	local_irq_restore(flags);
 
 	return total_copied;
@@ -970,6 +994,8 @@ static int storvsc_channel_init(struct hv_device *device)
 		    STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL)
 			process_sub_channels = true;
 	}
+	stor_device->max_transfer_bytes =
+		vstor_packet->storage_channel_properties.max_transfer_bytes;
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_END_INITIALIZATION;
@@ -1080,6 +1106,8 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request)
 	struct Scsi_Host *host;
 	struct storvsc_device *stor_dev;
 	struct hv_device *dev = host_dev->dev;
+	u32 payload_sz = cmd_request->payload_sz;
+	void *payload = cmd_request->payload;
 
 	stor_dev = get_in_stor_device(dev);
 	host = stor_dev->host;
@@ -1109,10 +1137,14 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request)
 					 sense_hdr.ascq);
 
 	scsi_set_resid(scmnd,
-		cmd_request->data_buffer.len -
+		cmd_request->payload->range.len -
 		vm_srb->data_transfer_length);
 
 	scmnd->scsi_done(scmnd);
+
+	if (payload_sz >
+		sizeof(struct vmbus_channel_packet_multipage_buffer))
+		kfree(payload);
 }
 
 static void storvsc_on_io_completion(struct hv_device *device,
@@ -1314,7 +1346,7 @@ static int storvsc_dev_remove(struct hv_device *device)
 }
 
 static int storvsc_do_io(struct hv_device *device,
-			      struct storvsc_cmd_request *request)
+			 struct storvsc_cmd_request *request)
 {
 	struct storvsc_device *stor_device;
 	struct vstor_packet *vstor_packet;
@@ -1346,19 +1378,20 @@ static int storvsc_do_io(struct hv_device *device,
 
 
 	vstor_packet->vm_srb.data_transfer_length =
-	request->data_buffer.len;
+	request->payload->range.len;
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTE_SRB;
 
-	if (request->data_buffer.len) {
-		ret = vmbus_sendpacket_multipagebuffer(outgoing_channel,
-				&request->data_buffer,
+	if (request->payload->range.len) {
+
+		ret = vmbus_sendpacket_mpb_desc(outgoing_channel,
+				request->payload, request->payload_sz,
 				vstor_packet,
 				(sizeof(struct vstor_packet) -
 				vmscsi_size_delta),
 				(unsigned long)request);
 	} else {
-		ret = vmbus_sendpacket(device->channel, vstor_packet,
+		ret = vmbus_sendpacket(outgoing_channel, vstor_packet,
 			       (sizeof(struct vstor_packet) -
 				vmscsi_size_delta),
 			       (unsigned long)request,
@@ -1376,7 +1409,6 @@ static int storvsc_do_io(struct hv_device *device,
 
 static int storvsc_device_configure(struct scsi_device *sdevice)
 {
-	scsi_change_queue_depth(sdevice, STORVSC_MAX_IO_REQUESTS);
 
 	blk_queue_max_segment_size(sdevice->request_queue, PAGE_SIZE);
 
@@ -1526,6 +1558,10 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	struct scatterlist *sgl;
 	unsigned int sg_count = 0;
 	struct vmscsi_request *vm_srb;
+	struct scatterlist *cur_sgl;
+	struct vmbus_packet_mpb_array  *payload;
+	u32 payload_sz;
+	u32 length;
 
 	if (vmstor_current_major <= VMSTOR_WIN8_MAJOR) {
 		/*
@@ -1579,46 +1615,71 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 
 	memcpy(vm_srb->cdb, scmnd->cmnd, vm_srb->cdb_length);
 
-	cmd_request->data_buffer.len = scsi_bufflen(scmnd);
-	if (scsi_sg_count(scmnd)) {
-		sgl = (struct scatterlist *)scsi_sglist(scmnd);
-		sg_count = scsi_sg_count(scmnd);
+	sgl = (struct scatterlist *)scsi_sglist(scmnd);
+	sg_count = scsi_sg_count(scmnd);
+
+	length = scsi_bufflen(scmnd);
+	payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
+	payload_sz = sizeof(cmd_request->mpb);
 
+	if (sg_count) {
 		/* check if we need to bounce the sgl */
 		if (do_bounce_buffer(sgl, scsi_sg_count(scmnd)) != -1) {
 			cmd_request->bounce_sgl =
-				create_bounce_buffer(sgl, scsi_sg_count(scmnd),
-						     scsi_bufflen(scmnd),
+				create_bounce_buffer(sgl, sg_count,
+						     length,
 						     vm_srb->data_in);
 			if (!cmd_request->bounce_sgl)
 				return SCSI_MLQUEUE_HOST_BUSY;
 
 			cmd_request->bounce_sgl_count =
-				ALIGN(scsi_bufflen(scmnd), PAGE_SIZE) >>
-					PAGE_SHIFT;
+				ALIGN(length, PAGE_SIZE) >> PAGE_SHIFT;
 
 			if (vm_srb->data_in == WRITE_TYPE)
 				copy_to_bounce_buffer(sgl,
-					cmd_request->bounce_sgl,
-					scsi_sg_count(scmnd));
+					cmd_request->bounce_sgl, sg_count);
 
 			sgl = cmd_request->bounce_sgl;
 			sg_count = cmd_request->bounce_sgl_count;
 		}
 
-		cmd_request->data_buffer.offset = sgl[0].offset;
 
-		for (i = 0; i < sg_count; i++)
-			cmd_request->data_buffer.pfn_array[i] =
-				page_to_pfn(sg_page((&sgl[i])));
+		if (sg_count > MAX_PAGE_BUFFER_COUNT) {
+
+			payload_sz = (sg_count * sizeof(void *) +
+				      sizeof(struct vmbus_packet_mpb_array));
+			payload = kmalloc(payload_sz, GFP_ATOMIC);
+			if (!payload) {
+				if (cmd_request->bounce_sgl_count)
+					destroy_bounce_buffer(
+					cmd_request->bounce_sgl,
+					cmd_request->bounce_sgl_count);
+
+					return SCSI_MLQUEUE_DEVICE_BUSY;
+			}
+		}
+
+		payload->range.len = length;
+		payload->range.offset = sgl[0].offset;
+
+		cur_sgl = sgl;
+		for (i = 0; i < sg_count; i++) {
+			payload->range.pfn_array[i] =
+				page_to_pfn(sg_page((cur_sgl)));
+			cur_sgl = sg_next(cur_sgl);
+		}
 
 	} else if (scsi_sglist(scmnd)) {
-		cmd_request->data_buffer.offset =
+		payload->range.len = length;
+		payload->range.offset =
 			virt_to_phys(scsi_sglist(scmnd)) & (PAGE_SIZE-1);
-		cmd_request->data_buffer.pfn_array[0] =
+		payload->range.pfn_array[0] =
 			virt_to_phys(scsi_sglist(scmnd)) >> PAGE_SHIFT;
 	}
 
+	cmd_request->payload = payload;
+	cmd_request->payload_sz = payload_sz;
+
 	/* Invokes the vsc to start an IO */
 	ret = storvsc_do_io(dev, cmd_request);
 
@@ -1646,12 +1707,8 @@ static struct scsi_host_template scsi_driver = {
 	.eh_timed_out =		storvsc_eh_timed_out,
 	.slave_configure =	storvsc_device_configure,
 	.cmd_per_lun =		255,
-	.can_queue =		STORVSC_MAX_IO_REQUESTS*STORVSC_MAX_TARGETS,
 	.this_id =		-1,
-	/* no use setting to 0 since ll_blk_rw reset it to 1 */
-	/* currently 32 */
-	.sg_tablesize =		MAX_MULTIPAGE_BUFFER_COUNT,
-	.use_clustering =	DISABLE_CLUSTERING,
+	.use_clustering =	ENABLE_CLUSTERING,
 	/* Make sure we dont get a sg segment crosses a page boundary */
 	.dma_boundary =		PAGE_SIZE-1,
 	.no_write_same =	1,
@@ -1686,6 +1743,7 @@ static int storvsc_probe(struct hv_device *device,
 			const struct hv_vmbus_device_id *dev_id)
 {
 	int ret;
+	int num_cpus = num_online_cpus();
 	struct Scsi_Host *host;
 	struct hv_host_device *host_dev;
 	bool dev_is_ide = ((dev_id->driver_data == IDE_GUID) ? true : false);
@@ -1694,6 +1752,7 @@ static int storvsc_probe(struct hv_device *device,
 	int max_luns_per_target;
 	int max_targets;
 	int max_channels;
+	int max_sub_channels = 0;
 
 	/*
 	 * Based on the windows host we are running on,
@@ -1719,12 +1778,18 @@ static int storvsc_probe(struct hv_device *device,
 		max_luns_per_target = STORVSC_MAX_LUNS_PER_TARGET;
 		max_targets = STORVSC_MAX_TARGETS;
 		max_channels = STORVSC_MAX_CHANNELS;
+		/*
+		 * On Windows8 and above, we support sub-channels for storage.
+		 * The number of sub-channels offerred is based on the number of
+		 * VCPUs in the guest.
+		 */
+		max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
 		break;
 	}
 
-	if (dev_id->driver_data == SFC_GUID)
-		scsi_driver.can_queue = (STORVSC_MAX_IO_REQUESTS *
-					 STORVSC_FC_MAX_TARGETS);
+	scsi_driver.can_queue = (max_outstanding_req_per_channel *
+				 (max_sub_channels + 1));
+
 	host = scsi_host_alloc(&scsi_driver,
 			       sizeof(struct hv_host_device));
 	if (!host)
@@ -1780,6 +1845,12 @@ static int storvsc_probe(struct hv_device *device,
 	/* max cmd length */
 	host->max_cmd_len = STORVSC_MAX_CMD_LEN;
 
+	/*
+	 * set the table size based on the info we got
+	 * from the host.
+	 */
+	host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT);
+
 	/* Register the HBA and start the scsi bus scan */
 	ret = scsi_add_host(host, &device->device);
 	if (ret != 0)
@@ -1837,7 +1908,6 @@ static struct hv_driver storvsc_drv = {
 
 static int __init storvsc_drv_init(void)
 {
-	u32 max_outstanding_req_per_channel;
 
 	/*
 	 * Divide the ring buffer data size (which is 1 page less
@@ -1852,10 +1922,6 @@ static int __init storvsc_drv_init(void)
 		vmscsi_size_delta,
 		sizeof(u64)));
 
-	if (max_outstanding_req_per_channel <
-	    STORVSC_MAX_IO_REQUESTS)
-		return -EINVAL;
-
 	return vmbus_driver_register(&storvsc_drv);
 }
 
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 2a906d1d34ba..22a42836d193 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -676,7 +676,6 @@ static struct platform_driver sun3_scsi_driver = {
 	.remove = __exit_p(sun3_scsi_remove),
 	.driver = {
 		.name	= DRV_MODULE_NAME,
-		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 9217af9bf734..6652a8171de6 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -214,8 +214,6 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba)
 	struct ufs_qcom_host *host = hba->priv;
 	struct phy *phy = host->generic_phy;
 	int ret = 0;
-	u8 major;
-	u16 minor, step;
 	bool is_rate_B = (UFS_QCOM_LIMIT_HS_RATE == PA_HS_MODE_B)
 							? true : false;
 
@@ -224,8 +222,6 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba)
 	/* provide 1ms delay to let the reset pulse propagate */
 	usleep_range(1000, 1100);
 
-	ufs_qcom_get_controller_revision(hba, &major, &minor, &step);
-	ufs_qcom_phy_save_controller_version(phy, major, minor, step);
 	ret = ufs_qcom_phy_calibrate_phy(phy, is_rate_B);
 	if (ret) {
 		dev_err(hba->dev, "%s: ufs_qcom_phy_calibrate_phy() failed, ret = %d\n",
@@ -698,16 +694,24 @@ out:
  */
 static void ufs_qcom_advertise_quirks(struct ufs_hba *hba)
 {
-	u8 major;
-	u16 minor, step;
+	struct ufs_qcom_host *host = hba->priv;
 
-	ufs_qcom_get_controller_revision(hba, &major, &minor, &step);
+	if (host->hw_ver.major == 0x1)
+		hba->quirks |= UFSHCD_QUIRK_DELAY_BEFORE_DME_CMDS;
 
-	/*
-	 * TBD
-	 * here we should be advertising controller quirks according to
-	 * controller version.
-	 */
+	if (host->hw_ver.major >= 0x2) {
+		if (!ufs_qcom_cap_qunipro(host))
+			/* Legacy UniPro mode still need following quirks */
+			hba->quirks |= UFSHCD_QUIRK_DELAY_BEFORE_DME_CMDS;
+	}
+}
+
+static void ufs_qcom_set_caps(struct ufs_hba *hba)
+{
+	struct ufs_qcom_host *host = hba->priv;
+
+	if (host->hw_ver.major >= 0x2)
+		host->caps = UFS_QCOM_CAP_QUNIPRO;
 }
 
 static int ufs_qcom_get_bus_vote(struct ufs_qcom_host *host,
@@ -929,6 +933,13 @@ static int ufs_qcom_init(struct ufs_hba *hba)
 	if (err)
 		goto out_host_free;
 
+	ufs_qcom_get_controller_revision(hba, &host->hw_ver.major,
+		&host->hw_ver.minor, &host->hw_ver.step);
+
+	/* update phy revision information before calling phy_init() */
+	ufs_qcom_phy_save_controller_version(host->generic_phy,
+		host->hw_ver.major, host->hw_ver.minor, host->hw_ver.step);
+
 	phy_init(host->generic_phy);
 	err = phy_power_on(host->generic_phy);
 	if (err)
@@ -938,6 +949,7 @@ static int ufs_qcom_init(struct ufs_hba *hba)
 	if (err)
 		goto out_disable_phy;
 
+	ufs_qcom_set_caps(hba);
 	ufs_qcom_advertise_quirks(hba);
 
 	hba->caps |= UFSHCD_CAP_CLK_GATING | UFSHCD_CAP_CLK_SCALING;
diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h
index 9a6febd007df..db2c0a00e846 100644
--- a/drivers/scsi/ufs/ufs-qcom.h
+++ b/drivers/scsi/ufs/ufs-qcom.h
@@ -151,7 +151,23 @@ struct ufs_qcom_bus_vote {
 	struct device_attribute max_bus_bw;
 };
 
+/* Host controller hardware version: major.minor.step */
+struct ufs_hw_version {
+	u16 step;
+	u16 minor;
+	u8 major;
+};
 struct ufs_qcom_host {
+
+	/*
+	 * Set this capability if host controller supports the QUniPro mode
+	 * and if driver wants the Host controller to operate in QUniPro mode.
+	 * Note: By default this capability will be kept enabled if host
+	 * controller supports the QUniPro mode.
+	 */
+	#define UFS_QCOM_CAP_QUNIPRO	UFS_BIT(0)
+	u32 caps;
+
 	struct phy *generic_phy;
 	struct ufs_hba *hba;
 	struct ufs_qcom_bus_vote bus_vote;
@@ -161,10 +177,20 @@ struct ufs_qcom_host {
 	struct clk *rx_l1_sync_clk;
 	struct clk *tx_l1_sync_clk;
 	bool is_lane_clks_enabled;
+
+	struct ufs_hw_version hw_ver;
 };
 
 #define ufs_qcom_is_link_off(hba) ufshcd_is_link_off(hba)
 #define ufs_qcom_is_link_active(hba) ufshcd_is_link_active(hba)
 #define ufs_qcom_is_link_hibern8(hba) ufshcd_is_link_hibern8(hba)
 
+static inline bool ufs_qcom_cap_qunipro(struct ufs_qcom_host *host)
+{
+	if (host->caps & UFS_QCOM_CAP_QUNIPRO)
+		return true;
+	else
+		return false;
+}
+
 #endif /* UFS_QCOM_H_ */
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 2aa85e398f76..648a44675880 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -183,6 +183,7 @@ static int __ufshcd_setup_clocks(struct ufs_hba *hba, bool on,
 static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on);
 static int ufshcd_uic_hibern8_exit(struct ufs_hba *hba);
 static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba);
+static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba);
 static int ufshcd_host_reset_and_restore(struct ufs_hba *hba);
 static irqreturn_t ufshcd_intr(int irq, void *__hba);
 static int ufshcd_config_pwr_mode(struct ufs_hba *hba,
@@ -972,6 +973,8 @@ ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
 
 	ufshcd_hold(hba, false);
 	mutex_lock(&hba->uic_cmd_mutex);
+	ufshcd_add_delay_before_dme_cmd(hba);
+
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	ret = __ufshcd_send_uic_cmd(hba, uic_cmd);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
@@ -2058,6 +2061,37 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba)
 	return ret;
 }
 
+static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba)
+{
+	#define MIN_DELAY_BEFORE_DME_CMDS_US	1000
+	unsigned long min_sleep_time_us;
+
+	if (!(hba->quirks & UFSHCD_QUIRK_DELAY_BEFORE_DME_CMDS))
+		return;
+
+	/*
+	 * last_dme_cmd_tstamp will be 0 only for 1st call to
+	 * this function
+	 */
+	if (unlikely(!ktime_to_us(hba->last_dme_cmd_tstamp))) {
+		min_sleep_time_us = MIN_DELAY_BEFORE_DME_CMDS_US;
+	} else {
+		unsigned long delta =
+			(unsigned long) ktime_to_us(
+				ktime_sub(ktime_get(),
+				hba->last_dme_cmd_tstamp));
+
+		if (delta < MIN_DELAY_BEFORE_DME_CMDS_US)
+			min_sleep_time_us =
+				MIN_DELAY_BEFORE_DME_CMDS_US - delta;
+		else
+			return; /* no more delay required */
+	}
+
+	/* allow sleep for extra 50us if needed */
+	usleep_range(min_sleep_time_us, min_sleep_time_us + 50);
+}
+
 /**
  * ufshcd_dme_set_attr - UIC command for DME_SET, DME_PEER_SET
  * @hba: per adapter instance
@@ -2157,6 +2191,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd)
 
 	mutex_lock(&hba->uic_cmd_mutex);
 	init_completion(&uic_async_done);
+	ufshcd_add_delay_before_dme_cmd(hba);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->uic_async_done = &uic_async_done;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 4a574aa45855..b47ff07698e8 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -366,6 +366,7 @@ struct ufs_init_prefetch {
  * @saved_err: sticky error mask
  * @saved_uic_err: sticky UIC error mask
  * @dev_cmd: ufs device management command information
+ * @last_dme_cmd_tstamp: time stamp of the last completed DME command
  * @auto_bkops_enabled: to track whether bkops is enabled in device
  * @vreg_info: UFS device voltage regulator information
  * @clk_list_head: UFS host controller clocks list node head
@@ -416,6 +417,13 @@ struct ufs_hba {
 	unsigned int irq;
 	bool is_irq_enabled;
 
+	/*
+	 * delay before each dme command is required as the unipro
+	 * layer has shown instabilities
+	 */
+	#define UFSHCD_QUIRK_DELAY_BEFORE_DME_CMDS		UFS_BIT(0)
+
+	unsigned int quirks;	/* Deviations from standard UFSHCI spec. */
 
 	wait_queue_head_t tm_wq;
 	wait_queue_head_t tm_tag_wq;
@@ -446,6 +454,7 @@ struct ufs_hba {
 
 	/* Device management request data */
 	struct ufs_dev_cmd dev_cmd;
+	ktime_t last_dme_cmd_tstamp;
 
 	/* Keeps information of the UFS device connected to this host */
 	struct ufs_dev_info dev_info;
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 34199d206ba6..fad22caf0eff 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -63,6 +63,7 @@
 
 #define VSCSIFRONT_OP_ADD_LUN	1
 #define VSCSIFRONT_OP_DEL_LUN	2
+#define VSCSIFRONT_OP_READD_LUN	3
 
 /* Tuning point. */
 #define VSCSIIF_DEFAULT_CMD_PER_LUN 10
@@ -113,8 +114,13 @@ struct vscsifrnt_info {
 	DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
 	struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
 
+	/* Following items are protected by the host lock. */
 	wait_queue_head_t wq_sync;
+	wait_queue_head_t wq_pause;
 	unsigned int wait_ring_available:1;
+	unsigned int waiting_pause:1;
+	unsigned int pause:1;
+	unsigned callers;
 
 	char dev_state_path[64];
 	struct task_struct *curr;
@@ -274,31 +280,31 @@ static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
 	wake_up(&shadow->wq_reset);
 }
 
-static int scsifront_cmd_done(struct vscsifrnt_info *info)
+static void scsifront_do_response(struct vscsifrnt_info *info,
+				  struct vscsiif_response *ring_rsp)
+{
+	if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
+		 test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
+		 "illegal rqid %u returned by backend!\n", ring_rsp->rqid))
+		return;
+
+	if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
+		scsifront_cdb_cmd_done(info, ring_rsp);
+	else
+		scsifront_sync_cmd_done(info, ring_rsp);
+}
+
+static int scsifront_ring_drain(struct vscsifrnt_info *info)
 {
 	struct vscsiif_response *ring_rsp;
 	RING_IDX i, rp;
 	int more_to_do = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(info->host->host_lock, flags);
 
 	rp = info->ring.sring->rsp_prod;
 	rmb();	/* ordering required respective to dom0 */
 	for (i = info->ring.rsp_cons; i != rp; i++) {
-
 		ring_rsp = RING_GET_RESPONSE(&info->ring, i);
-
-		if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
-			 test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
-			 "illegal rqid %u returned by backend!\n",
-			 ring_rsp->rqid))
-			continue;
-
-		if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
-			scsifront_cdb_cmd_done(info, ring_rsp);
-		else
-			scsifront_sync_cmd_done(info, ring_rsp);
+		scsifront_do_response(info, ring_rsp);
 	}
 
 	info->ring.rsp_cons = i;
@@ -308,6 +314,18 @@ static int scsifront_cmd_done(struct vscsifrnt_info *info)
 	else
 		info->ring.sring->rsp_event = i + 1;
 
+	return more_to_do;
+}
+
+static int scsifront_cmd_done(struct vscsifrnt_info *info)
+{
+	int more_to_do;
+	unsigned long flags;
+
+	spin_lock_irqsave(info->host->host_lock, flags);
+
+	more_to_do = scsifront_ring_drain(info);
+
 	info->wait_ring_available = 0;
 
 	spin_unlock_irqrestore(info->host->host_lock, flags);
@@ -328,6 +346,24 @@ static irqreturn_t scsifront_irq_fn(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static void scsifront_finish_all(struct vscsifrnt_info *info)
+{
+	unsigned i;
+	struct vscsiif_response resp;
+
+	scsifront_ring_drain(info);
+
+	for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
+		if (test_bit(i, info->shadow_free_bitmap))
+			continue;
+		resp.rqid = i;
+		resp.sense_len = 0;
+		resp.rslt = DID_RESET << 16;
+		resp.residual_len = 0;
+		scsifront_do_response(info, &resp);
+	}
+}
+
 static int map_data_for_request(struct vscsifrnt_info *info,
 				struct scsi_cmnd *sc,
 				struct vscsiif_request *ring_req,
@@ -475,6 +511,27 @@ static struct vscsiif_request *scsifront_command2ring(
 	return ring_req;
 }
 
+static int scsifront_enter(struct vscsifrnt_info *info)
+{
+	if (info->pause)
+		return 1;
+	info->callers++;
+	return 0;
+}
+
+static void scsifront_return(struct vscsifrnt_info *info)
+{
+	info->callers--;
+	if (info->callers)
+		return;
+
+	if (!info->waiting_pause)
+		return;
+
+	info->waiting_pause = 0;
+	wake_up(&info->wq_pause);
+}
+
 static int scsifront_queuecommand(struct Scsi_Host *shost,
 				  struct scsi_cmnd *sc)
 {
@@ -486,6 +543,10 @@ static int scsifront_queuecommand(struct Scsi_Host *shost,
 	uint16_t rqid;
 
 	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsifront_enter(info)) {
+		spin_unlock_irqrestore(shost->host_lock, flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
 	if (RING_FULL(&info->ring))
 		goto busy;
 
@@ -505,6 +566,7 @@ static int scsifront_queuecommand(struct Scsi_Host *shost,
 	if (err < 0) {
 		pr_debug("%s: err %d\n", __func__, err);
 		scsifront_put_rqid(info, rqid);
+		scsifront_return(info);
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		if (err == -ENOMEM)
 			return SCSI_MLQUEUE_HOST_BUSY;
@@ -514,11 +576,13 @@ static int scsifront_queuecommand(struct Scsi_Host *shost,
 	}
 
 	scsifront_do_request(info);
+	scsifront_return(info);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	return 0;
 
 busy:
+	scsifront_return(info);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	pr_debug("%s: busy\n", __func__);
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -549,7 +613,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
 			if (ring_req)
 				break;
 		}
-		if (err) {
+		if (err || info->pause) {
 			spin_unlock_irq(host->host_lock);
 			kfree(shadow);
 			return FAILED;
@@ -561,6 +625,11 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
 		spin_lock_irq(host->host_lock);
 	}
 
+	if (scsifront_enter(info)) {
+		spin_unlock_irq(host->host_lock);
+		return FAILED;
+	}
+
 	ring_req->act = act;
 	ring_req->ref_rqid = s->rqid;
 
@@ -587,6 +656,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
 		err = FAILED;
 	}
 
+	scsifront_return(info);
 	spin_unlock_irq(host->host_lock);
 	return err;
 }
@@ -644,6 +714,7 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info)
 {
 	struct xenbus_device *dev = info->dev;
 	struct vscsiif_sring *sring;
+	grant_ref_t gref;
 	int err = -ENOMEM;
 
 	/***** Frontend to Backend ring start *****/
@@ -656,14 +727,14 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info)
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-	err = xenbus_grant_ring(dev, virt_to_mfn(sring));
+	err = xenbus_grant_ring(dev, sring, 1, &gref);
 	if (err < 0) {
 		free_page((unsigned long)sring);
 		xenbus_dev_fatal(dev, err,
 			"fail to grant shared ring (Front to Back)");
 		return err;
 	}
-	info->ring_ref = err;
+	info->ring_ref = gref;
 
 	err = xenbus_alloc_evtchn(dev, &info->evtchn);
 	if (err) {
@@ -698,6 +769,13 @@ free_gnttab:
 	return err;
 }
 
+static void scsifront_free_ring(struct vscsifrnt_info *info)
+{
+	unbind_from_irqhandler(info->irq, info);
+	gnttab_end_foreign_access(info->ring_ref, 0,
+				  (unsigned long)info->ring.sring);
+}
+
 static int scsifront_init_ring(struct vscsifrnt_info *info)
 {
 	struct xenbus_device *dev = info->dev;
@@ -744,9 +822,7 @@ again:
 fail:
 	xenbus_transaction_end(xbt, 1);
 free_sring:
-	unbind_from_irqhandler(info->irq, info);
-	gnttab_end_foreign_access(info->ring_ref, 0,
-				  (unsigned long)info->ring.sring);
+	scsifront_free_ring(info);
 
 	return err;
 }
@@ -779,6 +855,7 @@ static int scsifront_probe(struct xenbus_device *dev,
 	}
 
 	init_waitqueue_head(&info->wq_sync);
+	init_waitqueue_head(&info->wq_pause);
 	spin_lock_init(&info->shadow_lock);
 
 	snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
@@ -802,13 +879,60 @@ static int scsifront_probe(struct xenbus_device *dev,
 	return 0;
 
 free_sring:
-	unbind_from_irqhandler(info->irq, info);
-	gnttab_end_foreign_access(info->ring_ref, 0,
-				  (unsigned long)info->ring.sring);
+	scsifront_free_ring(info);
 	scsi_host_put(host);
 	return err;
 }
 
+static int scsifront_resume(struct xenbus_device *dev)
+{
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+	struct Scsi_Host *host = info->host;
+	int err;
+
+	spin_lock_irq(host->host_lock);
+
+	/* Finish all still pending commands. */
+	scsifront_finish_all(info);
+
+	spin_unlock_irq(host->host_lock);
+
+	/* Reconnect to dom0. */
+	scsifront_free_ring(info);
+	err = scsifront_init_ring(info);
+	if (err) {
+		dev_err(&dev->dev, "fail to resume %d\n", err);
+		scsi_host_put(host);
+		return err;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
+	return 0;
+}
+
+static int scsifront_suspend(struct xenbus_device *dev)
+{
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+	struct Scsi_Host *host = info->host;
+	int err = 0;
+
+	/* No new commands for the backend. */
+	spin_lock_irq(host->host_lock);
+	info->pause = 1;
+	while (info->callers && !err) {
+		info->waiting_pause = 1;
+		info->wait_ring_available = 0;
+		spin_unlock_irq(host->host_lock);
+		wake_up(&info->wq_sync);
+		err = wait_event_interruptible(info->wq_pause,
+					       !info->waiting_pause);
+		spin_lock_irq(host->host_lock);
+	}
+	spin_unlock_irq(host->host_lock);
+	return err;
+}
+
 static int scsifront_remove(struct xenbus_device *dev)
 {
 	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
@@ -823,10 +947,7 @@ static int scsifront_remove(struct xenbus_device *dev)
 	}
 	mutex_unlock(&scsifront_mutex);
 
-	gnttab_end_foreign_access(info->ring_ref, 0,
-				  (unsigned long)info->ring.sring);
-	unbind_from_irqhandler(info->irq, info);
-
+	scsifront_free_ring(info);
 	scsi_host_put(info->host);
 
 	return 0;
@@ -919,6 +1040,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 				scsi_device_put(sdev);
 			}
 			break;
+		case VSCSIFRONT_OP_READD_LUN:
+			if (device_state == XenbusStateConnected)
+				xenbus_printf(XBT_NIL, dev->nodename,
+					      info->dev_state_path,
+					      "%d", XenbusStateConnected);
+			break;
 		default:
 			break;
 		}
@@ -932,21 +1059,29 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 static void scsifront_read_backend_params(struct xenbus_device *dev,
 					  struct vscsifrnt_info *info)
 {
-	unsigned int sg_grant;
+	unsigned int sg_grant, nr_segs;
 	int ret;
 	struct Scsi_Host *host = info->host;
 
 	ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u",
 			   &sg_grant);
-	if (ret == 1 && sg_grant) {
-		sg_grant = min_t(unsigned int, sg_grant, SG_ALL);
-		sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE);
-		host->sg_tablesize = min_t(unsigned int, sg_grant,
+	if (ret != 1)
+		sg_grant = 0;
+	nr_segs = min_t(unsigned int, sg_grant, SG_ALL);
+	nr_segs = max_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE);
+	nr_segs = min_t(unsigned int, nr_segs,
 			VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
 			sizeof(struct scsiif_request_segment));
-		host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
-	}
-	dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize);
+
+	if (!info->pause && sg_grant)
+		dev_info(&dev->dev, "using up to %d SG entries\n", nr_segs);
+	else if (info->pause && nr_segs < host->sg_tablesize)
+		dev_warn(&dev->dev,
+			 "SG entries decreased from %d to %u - device may not work properly anymore\n",
+			 host->sg_tablesize, nr_segs);
+
+	host->sg_tablesize = nr_segs;
+	host->max_sectors = (nr_segs - 1) * PAGE_SIZE / 512;
 }
 
 static void scsifront_backend_changed(struct xenbus_device *dev,
@@ -965,6 +1100,14 @@ static void scsifront_backend_changed(struct xenbus_device *dev,
 
 	case XenbusStateConnected:
 		scsifront_read_backend_params(dev, info);
+
+		if (info->pause) {
+			scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN);
+			xenbus_switch_state(dev, XenbusStateConnected);
+			info->pause = 0;
+			return;
+		}
+
 		if (xenbus_read_driver_state(dev->nodename) ==
 		    XenbusStateInitialised)
 			scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
@@ -1002,6 +1145,8 @@ static struct xenbus_driver scsifront_driver = {
 	.ids			= scsifront_ids,
 	.probe			= scsifront_probe,
 	.remove			= scsifront_remove,
+	.resume			= scsifront_resume,
+	.suspend		= scsifront_suspend,
 	.otherend_changed	= scsifront_backend_changed,
 };
 
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 91442fab5725..c6c824356464 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -359,8 +359,8 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
  * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */
 #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
 		      ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
-			       struct iov_iter *iter, loff_t file_offset)
+static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
+			       loff_t file_offset)
 {
 	struct lu_env *env;
 	struct cl_io *io;
@@ -399,7 +399,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	 *    size changing by concurrent truncates and writes.
 	 * 1. Need inode mutex to operate transient pages.
 	 */
-	if (rw == READ)
+	if (iov_iter_rw(iter) == READ)
 		mutex_lock(&inode->i_mutex);
 
 	LASSERT(obj->cob_transient_pages == 0);
@@ -408,7 +408,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 		size_t offs;
 
 		count = min_t(size_t, iov_iter_count(iter), size);
-		if (rw == READ) {
+		if (iov_iter_rw(iter) == READ) {
 			if (file_offset >= i_size_read(inode))
 				break;
 			if (file_offset + count > i_size_read(inode))
@@ -418,11 +418,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 		result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
 		if (likely(result > 0)) {
 			int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
-			result = ll_direct_IO_26_seg(env, io, rw, inode,
-						     file->f_mapping,
-						     result, file_offset,
-						     pages, n);
-			ll_free_user_pages(pages, n, rw==READ);
+			result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter),
+						     inode, file->f_mapping,
+						     result, file_offset, pages,
+						     n);
+			ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ);
 		}
 		if (unlikely(result <= 0)) {
 			/* If we can't allocate a large enough buffer
@@ -449,11 +449,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	}
 out:
 	LASSERT(obj->cob_transient_pages == 0);
-	if (rw == READ)
+	if (iov_iter_rw(iter) == READ)
 		mutex_unlock(&inode->i_mutex);
 
 	if (tot_bytes > 0) {
-		if (rw == WRITE) {
+		if (iov_iter_rw(iter) == WRITE) {
 			struct lov_stripe_md *lsm;
 
 			lsm = ccc_inode_lsm_get(inode);
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index b3dd417b4719..109462303087 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -479,7 +479,7 @@ config FB_ATARI
 
 config FB_OF
 	bool "Open Firmware frame buffer device support"
-	depends on (FB = y) && (PPC64 || PPC_OF) && (!PPC_PSERIES || PCI)
+	depends on (FB = y) && PPC && (!PPC_PSERIES || PCI)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
@@ -1333,7 +1333,7 @@ config FB_RADEON
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
-	select FB_MACMODES if PPC_OF
+	select FB_MACMODES if PPC
 	help
 	  Choose this option if you want to use an ATI Radeon graphics card as
 	  a framebuffer device.  There are both PCI and AGP versions.  You
diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c
index aedf2fbf9bf6..0156954bf340 100644
--- a/drivers/video/fbdev/aty/aty128fb.c
+++ b/drivers/video/fbdev/aty/aty128fb.c
@@ -965,7 +965,7 @@ static void __iomem *aty128_find_mem_vbios(struct aty128fb_par *par)
 /* fill in known card constants if pll_block is not available */
 static void aty128_timings(struct aty128fb_par *par)
 {
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 	/* instead of a table lookup, assume OF has properly
 	 * setup the PLL registers and use their values
 	 * to set the XCLK values and reference divider values */
@@ -979,7 +979,7 @@ static void aty128_timings(struct aty128fb_par *par)
 	if (!par->constants.ref_clk)
 		par->constants.ref_clk = 2950;
 
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 	x_mpll_ref_fb_div = aty_ld_pll(X_MPLL_REF_FB_DIV);
 	xclk_cntl = aty_ld_pll(XCLK_CNTL) & 0x7;
 	Nx = (x_mpll_ref_fb_div & 0x00ff00) >> 8;
diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c
index 26d80a4486fb..01237c8fcdc6 100644
--- a/drivers/video/fbdev/aty/radeon_base.c
+++ b/drivers/video/fbdev/aty/radeon_base.c
@@ -74,7 +74,7 @@
 #include <asm/io.h>
 #include <linux/uaccess.h>
 
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 
 #include <asm/pci-bridge.h>
 #include "../macmodes.h"
@@ -83,7 +83,7 @@
 #include <asm/btext.h>
 #endif
 
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
@@ -418,7 +418,7 @@ static int  radeon_find_mem_vbios(struct radeonfb_info *rinfo)
 }
 #endif
 
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 /*
  * Read XTAL (ref clock), SCLK and MCLK from Open Firmware device
  * tree. Hopefully, ATI OF driver is kind enough to fill these
@@ -448,7 +448,7 @@ static int radeon_read_xtal_OF(struct radeonfb_info *rinfo)
 
        	return 0;
 }
-#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
+#endif /* CONFIG_PPC || CONFIG_SPARC */
 
 /*
  * Read PLL infos from chip registers
@@ -653,7 +653,7 @@ static void radeon_get_pllinfo(struct radeonfb_info *rinfo)
 	rinfo->pll.ref_div = INPLL(PPLL_REF_DIV) & PPLL_REF_DIV_MASK;
 
 
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 	/*
 	 * Retrieve PLL infos from Open Firmware first
 	 */
@@ -661,7 +661,7 @@ static void radeon_get_pllinfo(struct radeonfb_info *rinfo)
        		printk(KERN_INFO "radeonfb: Retrieved PLL infos from Open Firmware\n");
 		goto found;
 	}
-#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
+#endif /* CONFIG_PPC || CONFIG_SPARC */
 
 	/*
 	 * Check out if we have an X86 which gave us some PLL informations
@@ -1910,7 +1910,7 @@ static int radeon_set_fbinfo(struct radeonfb_info *rinfo)
  * I put the card's memory at 0 in card space and AGP at some random high
  * local (0xe0000000 for now) that will be changed by XFree/DRI anyway
  */
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 #undef SET_MC_FB_FROM_APERTURE
 static void fixup_memory_mappings(struct radeonfb_info *rinfo)
 {
@@ -1984,7 +1984,7 @@ static void fixup_memory_mappings(struct radeonfb_info *rinfo)
 		((aper_base + aper_size - 1) & 0xffff0000) | (aper_base >> 16),
 		0xffff0000 | (agp_base >> 16));
 }
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 
 
 static void radeon_identify_vram(struct radeonfb_info *rinfo)
@@ -2236,7 +2236,7 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
 	    rinfo->family == CHIP_FAMILY_RS200)
 		rinfo->errata |= CHIP_ERRATA_PLL_DELAY;
 
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 	/* On PPC, we obtain the OF device-node pointer to the firmware
 	 * data for this chip
 	 */
@@ -2245,14 +2245,14 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
 		printk(KERN_WARNING "radeonfb (%s): Cannot match card to OF node !\n",
 		       pci_name(rinfo->pdev));
 
-#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
-#ifdef CONFIG_PPC_OF
+#endif /* CONFIG_PPC || CONFIG_SPARC */
+#ifdef CONFIG_PPC
 	/* On PPC, the firmware sets up a memory mapping that tends
 	 * to cause lockups when enabling the engine. We reconfigure
 	 * the card internal memory mappings properly
 	 */
 	fixup_memory_mappings(rinfo);
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 
 	/* Get VRAM size and type */
 	radeon_identify_vram(rinfo);
diff --git a/drivers/video/fbdev/aty/radeon_monitor.c b/drivers/video/fbdev/aty/radeon_monitor.c
index bc078d50d8f1..f1ce229de78d 100644
--- a/drivers/video/fbdev/aty/radeon_monitor.c
+++ b/drivers/video/fbdev/aty/radeon_monitor.c
@@ -55,7 +55,7 @@ static char *radeon_get_mon_name(int type)
 }
 
 
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 /*
  * Try to find monitor informations & EDID data out of the Open Firmware
  * device-tree. This also contains some "hacks" to work around a few machine
@@ -160,7 +160,7 @@ static int radeon_probe_OF_head(struct radeonfb_info *rinfo, int head_no,
 	}
         return MT_NONE;
 }
-#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
+#endif /* CONFIG_PPC || CONFIG_SPARC */
 
 
 static int radeon_get_panel_info_BIOS(struct radeonfb_info *rinfo)
@@ -499,11 +499,11 @@ void radeon_probe_screens(struct radeonfb_info *rinfo,
 		 * Old single head cards
 		 */
 		if (!rinfo->has_CRTC2) {
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 			if (rinfo->mon1_type == MT_NONE)
 				rinfo->mon1_type = radeon_probe_OF_head(rinfo, 0,
 									&rinfo->mon1_EDID);
-#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
+#endif /* CONFIG_PPC || CONFIG_SPARC */
 #ifdef CONFIG_FB_RADEON_I2C
 			if (rinfo->mon1_type == MT_NONE)
 				rinfo->mon1_type =
@@ -548,11 +548,11 @@ void radeon_probe_screens(struct radeonfb_info *rinfo,
 		/*
 		 * Probe primary head (DVI or laptop internal panel)
 		 */
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 		if (rinfo->mon1_type == MT_NONE)
 			rinfo->mon1_type = radeon_probe_OF_head(rinfo, 0,
 								&rinfo->mon1_EDID);
-#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
+#endif /* CONFIG_PPC || CONFIG_SPARC */
 #ifdef CONFIG_FB_RADEON_I2C
 		if (rinfo->mon1_type == MT_NONE)
 			rinfo->mon1_type = radeon_probe_i2c_connector(rinfo, ddc_dvi,
@@ -576,11 +576,11 @@ void radeon_probe_screens(struct radeonfb_info *rinfo,
 		/*
 		 * Probe secondary head (mostly VGA, can be DVI)
 		 */
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 		if (rinfo->mon2_type == MT_NONE)
 			rinfo->mon2_type = radeon_probe_OF_head(rinfo, 1,
 								&rinfo->mon2_EDID);
-#endif /* CONFIG_PPC_OF || defined(CONFIG_SPARC) */
+#endif /* CONFIG_PPC || defined(CONFIG_SPARC) */
 #ifdef CONFIG_FB_RADEON_I2C
 		if (rinfo->mon2_type == MT_NONE)
 			rinfo->mon2_type = radeon_probe_i2c_connector(rinfo, ddc_vga,
@@ -653,7 +653,7 @@ void radeon_probe_screens(struct radeonfb_info *rinfo,
  */
 static void radeon_fixup_panel_info(struct radeonfb_info *rinfo)
 {
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 	/*
 	 * LCD Flat panels should use fixed dividers, we enfore that on
 	 * PPC only for now...
@@ -676,7 +676,7 @@ static void radeon_fixup_panel_info(struct radeonfb_info *rinfo)
 		       (rinfo->panel_info.post_divider << 16),
 		       ppll_div_sel);
 	}
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 }
 
 
diff --git a/drivers/video/fbdev/aty/radeon_pm.c b/drivers/video/fbdev/aty/radeon_pm.c
index 46a12f1a93c3..1417542738fc 100644
--- a/drivers/video/fbdev/aty/radeon_pm.c
+++ b/drivers/video/fbdev/aty/radeon_pm.c
@@ -523,7 +523,7 @@ static void radeon_pm_enable_dynamic_mode(struct radeonfb_info *rinfo)
 	OUTPLL(pllVCLK_ECP_CNTL, tmp);
 
 	/* X doesn't do that ... hrm, we do on mobility && Macs */
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 	if (rinfo->is_mobility) {
 		tmp  = INPLL(pllMCLK_CNTL);
 		tmp &= ~(MCLK_CNTL__FORCE_MCLKA |
@@ -541,7 +541,7 @@ static void radeon_pm_enable_dynamic_mode(struct radeonfb_info *rinfo)
 		OUTPLL(pllMCLK_MISC, tmp);
 		radeon_msleep(15);
 	}
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 }
 
 #ifdef CONFIG_PM
@@ -1288,7 +1288,7 @@ static void radeon_pm_full_reset_sdram(struct radeonfb_info *rinfo)
        		radeon_pm_enable_dll_m10(rinfo);
 		radeon_pm_yclk_mclk_sync_m10(rinfo);
 
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 		if (rinfo->of_node != NULL) {
 			int size;
 
@@ -1298,7 +1298,7 @@ static void radeon_pm_full_reset_sdram(struct radeonfb_info *rinfo)
 			else
 				mrtable = default_mrtable;
 		}
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 
 		/* Program the SDRAM */
 		sdram_mode_reg = mrtable[0];
@@ -1943,7 +1943,7 @@ static void radeon_reinitialize_M10(struct radeonfb_info *rinfo)
 }
 #endif
 
-#ifdef CONFIG_PPC_OF
+#ifdef CONFIG_PPC
 #ifdef CONFIG_PPC_PMAC
 static void radeon_pm_m9p_reconfigure_mc(struct radeonfb_info *rinfo)
 {
@@ -2512,7 +2512,7 @@ static void radeon_reinitialize_QW(struct radeonfb_info *rinfo)
 }
 #endif /* 0 */
 
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 
 static void radeonfb_whack_power_state(struct radeonfb_info *rinfo, pci_power_t state)
 {
@@ -2793,7 +2793,7 @@ int radeonfb_pci_resume(struct pci_dev *pdev)
 	return rc;
 }
 
-#ifdef CONFIG_PPC_OF__disabled
+#ifdef CONFIG_PPC__disabled
 static void radeonfb_early_resume(void *data)
 {
         struct radeonfb_info *rinfo = data;
@@ -2803,7 +2803,7 @@ static void radeonfb_early_resume(void *data)
 	radeonfb_pci_resume(rinfo->pdev);
 	rinfo->no_schedule = 0;
 }
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC */
 
 #endif /* CONFIG_PM */
 
diff --git a/drivers/video/fbdev/aty/radeonfb.h b/drivers/video/fbdev/aty/radeonfb.h
index cb846044f57c..039def41c920 100644
--- a/drivers/video/fbdev/aty/radeonfb.h
+++ b/drivers/video/fbdev/aty/radeonfb.h
@@ -20,7 +20,7 @@
 
 #include <asm/io.h>
 
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 #include <asm/prom.h>
 #endif
 
@@ -301,7 +301,7 @@ struct radeonfb_info {
 	unsigned long		fb_local_base;
 
 	struct pci_dev		*pdev;
-#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
+#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
 	struct device_node	*of_node;
 #endif
 
diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c
index 080fdd2a70f3..8d14b29aafea 100644
--- a/drivers/video/fbdev/controlfb.c
+++ b/drivers/video/fbdev/controlfb.c
@@ -315,7 +315,7 @@ static int controlfb_blank(int blank_mode, struct fb_info *info)
 		container_of(info, struct fb_info_control, info);
 	unsigned ctrl;
 
-	ctrl = ld_le32(CNTRL_REG(p,ctrl));
+	ctrl = le32_to_cpup(CNTRL_REG(p,ctrl));
 	if (blank_mode > 0)
 		switch (blank_mode) {
 		case FB_BLANK_VSYNC_SUSPEND:
diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index 868facdec638..01ef1b953390 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -33,10 +33,6 @@
 #include <video/edid.h>
 #include <video/of_videomode.h>
 #include <video/videomode.h>
-#ifdef CONFIG_PPC_OF
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#endif
 #include "../edid.h"
 
 /*
diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
index aae10ce74f14..9b167f7ef6c6 100644
--- a/drivers/video/fbdev/imsttfb.c
+++ b/drivers/video/fbdev/imsttfb.c
@@ -1470,15 +1470,13 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	unsigned long addr, size;
 	struct imstt_par *par;
 	struct fb_info *info;
-#ifdef CONFIG_PPC_OF
 	struct device_node *dp;
 	
 	dp = pci_device_to_OF_node(pdev);
 	if(dp)
 		printk(KERN_INFO "%s: OF name %s\n",__func__, dp->name);
-	else
+	else if (IS_ENABLED(CONFIG_OF))
 		printk(KERN_ERR "imsttfb: no OF node for pci device\n");
-#endif /* CONFIG_PPC_OF */
 
 	info = framebuffer_alloc(sizeof(struct imstt_par), &pdev->dev);
 
@@ -1501,11 +1499,9 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	switch (pdev->device) {
 		case PCI_DEVICE_ID_IMS_TT128: /* IMS,tt128mbA */
 			par->ramdac = IBM;
-#ifdef CONFIG_PPC_OF
 			if (dp && ((strcmp(dp->name, "IMS,tt128mb8") == 0) ||
 				   (strcmp(dp->name, "IMS,tt128mb8A") == 0)))
 				par->ramdac = TVP;
-#endif /* CONFIG_PPC_OF */
 			break;
 		case PCI_DEVICE_ID_IMS_TT3D:  /* IMS,tt3d */
 			par->ramdac = TVP;
diff --git a/drivers/video/fbdev/nvidia/Makefile b/drivers/video/fbdev/nvidia/Makefile
index ca47432113e0..917d3eb05feb 100644
--- a/drivers/video/fbdev/nvidia/Makefile
+++ b/drivers/video/fbdev/nvidia/Makefile
@@ -5,9 +5,8 @@
 obj-$(CONFIG_FB_NVIDIA)          += nvidiafb.o
 
 nvidiafb-y                       := nvidia.o nv_hw.o nv_setup.o \
-			            nv_accel.o
+			            nv_accel.o nv_of.o
 nvidiafb-$(CONFIG_FB_NVIDIA_I2C) += nv_i2c.o
 nvidiafb-$(CONFIG_FB_NVIDIA_BACKLIGHT)  += nv_backlight.o
-nvidiafb-$(CONFIG_PPC_OF)	 += nv_of.o
 
 nvidiafb-objs                    := $(nvidiafb-y)
diff --git a/drivers/video/fbdev/nvidia/nv_of.c b/drivers/video/fbdev/nvidia/nv_of.c
index 3bc13df4b120..5f3e5179c25a 100644
--- a/drivers/video/fbdev/nvidia/nv_of.c
+++ b/drivers/video/fbdev/nvidia/nv_of.c
@@ -19,9 +19,6 @@
 
 #include <asm/io.h>
 
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
 #include "nv_type.h"
 #include "nv_local.h"
 #include "nv_proto.h"
diff --git a/drivers/video/fbdev/nvidia/nv_proto.h b/drivers/video/fbdev/nvidia/nv_proto.h
index ff5c410355ea..878a5ce02299 100644
--- a/drivers/video/fbdev/nvidia/nv_proto.h
+++ b/drivers/video/fbdev/nvidia/nv_proto.h
@@ -42,16 +42,8 @@ int nvidia_probe_i2c_connector(struct fb_info *info, int conn,
 #define nvidia_probe_i2c_connector(p, c, edid) (-1)
 #endif
 
-#ifdef CONFIG_PPC_OF
 int nvidia_probe_of_connector(struct fb_info *info, int conn,
 			      u8 ** out_edid);
-#else
-static inline int nvidia_probe_of_connector(struct fb_info *info, int conn,
-				      u8 ** out_edid)
-{
-	return -1;
-}
-#endif
 
 /* in nv_accel.c */
 extern void NVResetGraphics(struct fb_info *info);
diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c
index def041204676..4273c6ee8cf6 100644
--- a/drivers/video/fbdev/nvidia/nvidia.c
+++ b/drivers/video/fbdev/nvidia/nvidia.c
@@ -24,10 +24,6 @@
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
 #endif
-#ifdef CONFIG_PPC_OF
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#endif
 #ifdef CONFIG_BOOTX_TEXT
 #include <asm/btext.h>
 #endif
diff --git a/drivers/video/fbdev/platinumfb.c b/drivers/video/fbdev/platinumfb.c
index 518d1fd38a81..377d3399a3ad 100644
--- a/drivers/video/fbdev/platinumfb.c
+++ b/drivers/video/fbdev/platinumfb.c
@@ -168,7 +168,7 @@ static int platinumfb_blank(int blank,  struct fb_info *fb)
 	struct fb_info_platinum *info = (struct fb_info_platinum *) fb;
 	int	ctrl;
 
-	ctrl = ld_le32(&info->platinum_regs->ctrl.r) | 0x33;
+	ctrl = le32_to_cpup(&info->platinum_regs->ctrl.r) | 0x33;
 	if (blank)
 		--blank_mode;
 	if (blank & VESA_VSYNC_SUSPEND)
diff --git a/drivers/video/fbdev/riva/fbdev.c b/drivers/video/fbdev/riva/fbdev.c
index be73727c7227..294a80908c8c 100644
--- a/drivers/video/fbdev/riva/fbdev.c
+++ b/drivers/video/fbdev/riva/fbdev.c
@@ -44,10 +44,6 @@
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
 #endif
-#ifdef CONFIG_PPC_OF
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#endif
 #ifdef CONFIG_PMAC_BACKLIGHT
 #include <asm/machdep.h>
 #include <asm/backlight.h>
@@ -1735,7 +1731,6 @@ static int riva_set_fbinfo(struct fb_info *info)
 	return (rivafb_check_var(&info->var, info));
 }
 
-#ifdef CONFIG_PPC_OF
 static int riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd)
 {
 	struct riva_par *par = info->par;
@@ -1766,9 +1761,8 @@ static int riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd)
 	NVTRACE_LEAVE();
 	return 0;
 }
-#endif /* CONFIG_PPC_OF */
 
-#if defined(CONFIG_FB_RIVA_I2C) && !defined(CONFIG_PPC_OF)
+#if defined(CONFIG_FB_RIVA_I2C)
 static int riva_get_EDID_i2c(struct fb_info *info)
 {
 	struct riva_par *par = info->par;
@@ -1828,10 +1822,13 @@ static void riva_update_default_var(struct fb_var_screeninfo *var,
 static void riva_get_EDID(struct fb_info *info, struct pci_dev *pdev)
 {
 	NVTRACE_ENTER();
-#ifdef CONFIG_PPC_OF
-	if (!riva_get_EDID_OF(info, pdev))
+	if (riva_get_EDID_OF(info, pdev)) {
+		NVTRACE_LEAVE();
+		return;
+	}
+	if (IS_ENABLED(CONFIG_OF))
 		printk(PFX "could not retrieve EDID from OF\n");
-#elif defined(CONFIG_FB_RIVA_I2C)
+#if defined(CONFIG_FB_RIVA_I2C)
 	if (!riva_get_EDID_i2c(info))
 		printk(PFX "could not retrieve EDID from DDC/I2C\n");
 #endif
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 94d96809e686..a270004c9605 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -270,4 +270,10 @@ config XEN_EFI
 	def_bool y
 	depends on X86_64 && EFI
 
+config XEN_AUTO_XLATE
+	def_bool y
+	depends on ARM || ARM64 || XEN_PVHVM
+	help
+	  Support for auto-translated physmap guests.
+
 endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 2ccd3592d41f..40edd1cbb60d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU)	+= xen-acpi-cpuhotplug.o
 obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-acpi-processor.o
 obj-$(CONFIG_XEN_EFI)			+= efi.o
 obj-$(CONFIG_XEN_SCSI_BACKEND)		+= xen-scsiback.o
+obj-$(CONFIG_XEN_AUTO_XLATE)		+= xlate_mmu.o
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index 6ab6a79c38a5..a493c7315e94 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -393,14 +393,25 @@ static int bind_virq_for_mce(void)
 
 static int __init xen_late_init_mcelog(void)
 {
+	int ret;
+
 	/* Only DOM0 is responsible for MCE logging */
-	if (xen_initial_domain()) {
-		/* register character device /dev/mcelog for xen mcelog */
-		if (misc_register(&xen_mce_chrdev_device))
-			return -ENODEV;
-		return bind_virq_for_mce();
-	}
+	if (!xen_initial_domain())
+		return -ENODEV;
+
+	/* register character device /dev/mcelog for xen mcelog */
+	ret = misc_register(&xen_mce_chrdev_device);
+	if (ret)
+		return ret;
+
+	ret = bind_virq_for_mce();
+	if (ret)
+		goto deregister;
 
-	return -ENODEV;
+	return 0;
+
+deregister:
+	misc_deregister(&xen_mce_chrdev_device);
+	return ret;
 }
 device_initcall(xen_late_init_mcelog);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 95ee4302ffb8..7494dbeb4409 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -19,6 +19,7 @@
 
 #include <linux/pci.h>
 #include <linux/acpi.h>
+#include <linux/pci-acpi.h>
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/xen.h>
@@ -67,12 +68,22 @@ static int xen_add_device(struct device *dev)
 
 #ifdef CONFIG_ACPI
 		handle = ACPI_HANDLE(&pci_dev->dev);
-		if (!handle && pci_dev->bus->bridge)
-			handle = ACPI_HANDLE(pci_dev->bus->bridge);
 #ifdef CONFIG_PCI_IOV
 		if (!handle && pci_dev->is_virtfn)
 			handle = ACPI_HANDLE(physfn->bus->bridge);
 #endif
+		if (!handle) {
+			/*
+			 * This device was not listed in the ACPI name space at
+			 * all. Try to get acpi handle of parent pci bus.
+			 */
+			struct pci_bus *pbus;
+			for (pbus = pci_dev->bus; pbus; pbus = pbus->parent) {
+				handle = acpi_pci_get_bridge_handle(pbus);
+				if (handle)
+					break;
+			}
+		}
 		if (handle) {
 			acpi_status status;
 
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
index 0aac403d53fd..49e88f2ce7a1 100644
--- a/drivers/xen/pcpu.c
+++ b/drivers/xen/pcpu.c
@@ -132,6 +132,33 @@ static ssize_t __ref store_online(struct device *dev,
 }
 static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
 
+static struct attribute *pcpu_dev_attrs[] = {
+	&dev_attr_online.attr,
+	NULL
+};
+
+static umode_t pcpu_dev_is_visible(struct kobject *kobj,
+				   struct attribute *attr, int idx)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	/*
+	 * Xen never offline cpu0 due to several restrictions
+	 * and assumptions. This basically doesn't add a sys control
+	 * to user, one cannot attempt to offline BSP.
+	 */
+	return dev->id ? attr->mode : 0;
+}
+
+static const struct attribute_group pcpu_dev_group = {
+	.attrs = pcpu_dev_attrs,
+	.is_visible = pcpu_dev_is_visible,
+};
+
+static const struct attribute_group *pcpu_dev_groups[] = {
+	&pcpu_dev_group,
+	NULL
+};
+
 static bool xen_pcpu_online(uint32_t flags)
 {
 	return !!(flags & XEN_PCPU_FLAGS_ONLINE);
@@ -181,9 +208,6 @@ static void unregister_and_remove_pcpu(struct pcpu *pcpu)
 		return;
 
 	dev = &pcpu->dev;
-	if (dev->id)
-		device_remove_file(dev, &dev_attr_online);
-
 	/* pcpu remove would be implicitly done */
 	device_unregister(dev);
 }
@@ -200,6 +224,7 @@ static int register_pcpu(struct pcpu *pcpu)
 	dev->bus = &xen_pcpu_subsys;
 	dev->id = pcpu->cpu_id;
 	dev->release = pcpu_release;
+	dev->groups = pcpu_dev_groups;
 
 	err = device_register(dev);
 	if (err) {
@@ -207,19 +232,6 @@ static int register_pcpu(struct pcpu *pcpu)
 		return err;
 	}
 
-	/*
-	 * Xen never offline cpu0 due to several restrictions
-	 * and assumptions. This basically doesn't add a sys control
-	 * to user, one cannot attempt to offline BSP.
-	 */
-	if (dev->id) {
-		err = device_create_file(dev, &dev_attr_online);
-		if (err) {
-			device_unregister(dev);
-			return err;
-		}
-	}
-
 	return 0;
 }
 
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 59ac71c4a043..5a296161d843 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
 	return ret;
 }
 
+/*
+ * Similar to traverse_pages, but use each page as a "block" of
+ * data to be processed as one unit.
+ */
+static int traverse_pages_block(unsigned nelem, size_t size,
+				struct list_head *pos,
+				int (*fn)(void *data, int nr, void *state),
+				void *state)
+{
+	void *pagedata;
+	unsigned pageidx;
+	int ret = 0;
+
+	BUG_ON(size > PAGE_SIZE);
+
+	pageidx = PAGE_SIZE;
+
+	while (nelem) {
+		int nr = (PAGE_SIZE/size);
+		struct page *page;
+		if (nr > nelem)
+			nr = nelem;
+		pos = pos->next;
+		page = list_entry(pos, struct page, lru);
+		pagedata = page_address(page);
+		ret = (*fn)(pagedata, nr, state);
+		if (ret)
+			break;
+		nelem -= nr;
+	}
+
+	return ret;
+}
+
 struct mmap_mfn_state {
 	unsigned long va;
 	struct vm_area_struct *vma;
@@ -274,39 +308,25 @@ struct mmap_batch_state {
 /* auto translated dom0 note: if domU being created is PV, then mfn is
  * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
  */
-static int mmap_batch_fn(void *data, void *state)
+static int mmap_batch_fn(void *data, int nr, void *state)
 {
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
 	struct vm_area_struct *vma = st->vma;
 	struct page **pages = vma->vm_private_data;
-	struct page *cur_page = NULL;
+	struct page **cur_pages = NULL;
 	int ret;
 
 	if (xen_feature(XENFEAT_auto_translated_physmap))
-		cur_page = pages[st->index++];
+		cur_pages = &pages[st->index];
 
-	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-					 st->vma->vm_page_prot, st->domain,
-					 &cur_page);
+	BUG_ON(nr < 0);
+	ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
+					 (int *)mfnp, st->vma->vm_page_prot,
+					 st->domain, cur_pages);
 
-	/* Store error code for second pass. */
-	if (st->version == 1) {
-		if (ret < 0) {
-			/*
-			 * V1 encodes the error codes in the 32bit top nibble of the
-			 * mfn (with its known limitations vis-a-vis 64 bit callers).
-			 */
-			*mfnp |= (ret == -ENOENT) ?
-						PRIVCMD_MMAPBATCH_PAGED_ERROR :
-						PRIVCMD_MMAPBATCH_MFN_ERROR;
-		}
-	} else { /* st->version == 2 */
-		*((int *) mfnp) = ret;
-	}
-
-	/* And see if it affects the global_error. */
-	if (ret < 0) {
+	/* Adjust the global_error? */
+	if (ret != nr) {
 		if (ret == -ENOENT)
 			st->global_error = -ENOENT;
 		else {
@@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
 				st->global_error = 1;
 		}
 	}
-	st->va += PAGE_SIZE;
+	st->va += PAGE_SIZE * nr;
+	st->index += nr;
 
 	return 0;
 }
 
-static int mmap_return_errors(void *data, void *state)
+static int mmap_return_error(int err, struct mmap_batch_state *st)
 {
-	struct mmap_batch_state *st = state;
+	int ret;
 
 	if (st->version == 1) {
-		xen_pfn_t mfnp = *((xen_pfn_t *) data);
-		if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
-			return __put_user(mfnp, st->user_mfn++);
-		else
+		if (err) {
+			xen_pfn_t mfn;
+
+			ret = get_user(mfn, st->user_mfn);
+			if (ret < 0)
+				return ret;
+			/*
+			 * V1 encodes the error codes in the 32bit top
+			 * nibble of the mfn (with its known
+			 * limitations vis-a-vis 64 bit callers).
+			 */
+			mfn |= (err == -ENOENT) ?
+				PRIVCMD_MMAPBATCH_PAGED_ERROR :
+				PRIVCMD_MMAPBATCH_MFN_ERROR;
+			return __put_user(mfn, st->user_mfn++);
+		} else
 			st->user_mfn++;
 	} else { /* st->version == 2 */
-		int err = *((int *) data);
 		if (err)
 			return __put_user(err, st->user_err++);
 		else
@@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
 	return 0;
 }
 
+static int mmap_return_errors(void *data, int nr, void *state)
+{
+	struct mmap_batch_state *st = state;
+	int *errs = data;
+	int i;
+	int ret;
+
+	for (i = 0; i < nr; i++) {
+		ret = mmap_return_error(errs[i], st);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
  * the vma with the page info to use later.
  * Returns: 0 if success, otherwise -errno
@@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 	state.version       = version;
 
 	/* mmap_batch_fn guarantees ret == 0 */
-	BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
-			     &pagelist, mmap_batch_fn, &state));
+	BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
+				    &pagelist, mmap_batch_fn, &state));
 
 	up_write(&mm->mmap_sem);
 
@@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 		/* Write back errors in second pass. */
 		state.user_mfn = (xen_pfn_t *)m.arr;
 		state.user_err = m.err;
-		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-							 &pagelist, mmap_return_errors, &state);
+		ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
+					   &pagelist, mmap_return_errors, &state);
 	} else
 		ret = 0;
 
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index e555845d61fa..39e7ef8d3957 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -193,13 +193,18 @@ static DEVICE_ATTR(target, S_IRUGO | S_IWUSR,
 		   show_target, store_target);
 
 
-static struct device_attribute *balloon_attrs[] = {
-	&dev_attr_target_kb,
-	&dev_attr_target,
-	&dev_attr_schedule_delay.attr,
-	&dev_attr_max_schedule_delay.attr,
-	&dev_attr_retry_count.attr,
-	&dev_attr_max_retry_count.attr
+static struct attribute *balloon_attrs[] = {
+	&dev_attr_target_kb.attr,
+	&dev_attr_target.attr,
+	&dev_attr_schedule_delay.attr.attr,
+	&dev_attr_max_schedule_delay.attr.attr,
+	&dev_attr_retry_count.attr.attr,
+	&dev_attr_max_retry_count.attr.attr,
+	NULL
+};
+
+static const struct attribute_group balloon_group = {
+	.attrs = balloon_attrs
 };
 
 static struct attribute *balloon_info_attrs[] = {
@@ -214,6 +219,12 @@ static const struct attribute_group balloon_info_group = {
 	.attrs = balloon_info_attrs
 };
 
+static const struct attribute_group *balloon_groups[] = {
+	&balloon_group,
+	&balloon_info_group,
+	NULL
+};
+
 static struct bus_type balloon_subsys = {
 	.name = BALLOON_CLASS_NAME,
 	.dev_name = BALLOON_CLASS_NAME,
@@ -221,7 +232,7 @@ static struct bus_type balloon_subsys = {
 
 static int register_balloon(struct device *dev)
 {
-	int i, error;
+	int error;
 
 	error = subsys_system_register(&balloon_subsys, NULL);
 	if (error)
@@ -229,6 +240,7 @@ static int register_balloon(struct device *dev)
 
 	dev->id = 0;
 	dev->bus = &balloon_subsys;
+	dev->groups = balloon_groups;
 
 	error = device_register(dev);
 	if (error) {
@@ -236,24 +248,7 @@ static int register_balloon(struct device *dev)
 		return error;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
-		error = device_create_file(dev, balloon_attrs[i]);
-		if (error)
-			goto fail;
-	}
-
-	error = sysfs_create_group(&dev->kobj, &balloon_info_group);
-	if (error)
-		goto fail;
-
 	return 0;
-
- fail:
-	while (--i >= 0)
-		device_remove_file(dev, balloon_attrs[i]);
-	device_unregister(dev);
-	bus_unregister(&balloon_subsys);
-	return error;
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index 2d7369391472..c2260a0456c9 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -88,9 +88,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 			printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
 			       pci_name(dev));
 		pci_set_master(dev);
+	} else if (dev->is_busmaster && !is_master_cmd(value)) {
+		if (unlikely(verbose_request))
+			printk(KERN_DEBUG DRV_NAME ": %s: clear bus master\n",
+			       pci_name(dev));
+		pci_clear_master(dev);
 	}
 
-	if (value & PCI_COMMAND_INVALIDATE) {
+	if (!(cmd->val & PCI_COMMAND_INVALIDATE) &&
+	    (value & PCI_COMMAND_INVALIDATE)) {
 		if (unlikely(verbose_request))
 			printk(KERN_DEBUG
 			       DRV_NAME ": %s: enable memory-write-invalidate\n",
@@ -101,6 +107,13 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 				pci_name(dev), err);
 			value &= ~PCI_COMMAND_INVALIDATE;
 		}
+	} else if ((cmd->val & PCI_COMMAND_INVALIDATE) &&
+		   !(value & PCI_COMMAND_INVALIDATE)) {
+		if (unlikely(verbose_request))
+			printk(KERN_DEBUG
+			       DRV_NAME ": %s: disable memory-write-invalidate\n",
+			       pci_name(dev));
+		pci_clear_mwi(dev);
 	}
 
 	cmd->val = value;
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index cc3cbb4435f8..258b7c325649 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -118,7 +118,7 @@ static void pcistub_device_release(struct kref *kref)
 		int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix,
 						&ppdev);
 
-		if (err)
+		if (err && err != -ENOSYS)
 			dev_warn(&dev->dev, "MSI-X release failed (%d)\n",
 				 err);
 	}
@@ -402,7 +402,7 @@ static int pcistub_init_device(struct pci_dev *dev)
 		};
 
 		err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev);
-		if (err)
+		if (err && err != -ENOSYS)
 			dev_err(&dev->dev, "MSI-X preparation failed (%d)\n",
 				err);
 	}
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index fe17c80ff4b7..98bc345f296e 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -113,7 +113,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
 		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
 		gnt_ref, remote_evtchn);
 
-	err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+	err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
 	if (err < 0) {
 		xenbus_dev_fatal(pdev->xdev, err,
 				"Error mapping other domain page in ours.");
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 42bd55a6c237..07ef38325223 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -31,6 +31,8 @@
  * IN THE SOFTWARE.
  */
 
+#define pr_fmt(fmt) "xen-pvscsi: " fmt
+
 #include <stdarg.h>
 
 #include <linux/module.h>
@@ -69,9 +71,6 @@
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/vscsiif.h>
 
-#define DPRINTK(_f, _a...)			\
-	pr_debug("(file=%s, line=%d) " _f, __FILE__ , __LINE__ , ## _a)
-
 #define VSCSI_VERSION	"v0.1"
 #define VSCSI_NAMELEN	32
 
@@ -271,7 +270,7 @@ static void scsiback_print_status(char *sense_buffer, int errors,
 {
 	struct scsiback_tpg *tpg = pending_req->v2p->tpg;
 
-	pr_err("xen-pvscsi[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n",
+	pr_err("[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n",
 	       tpg->tport->tport_name, pending_req->v2p->lun,
 	       pending_req->cmnd[0], status_byte(errors), msg_byte(errors),
 	       host_byte(errors), driver_byte(errors));
@@ -427,7 +426,7 @@ static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map,
 	BUG_ON(err);
 	for (i = 0; i < cnt; i++) {
 		if (unlikely(map[i].status != GNTST_okay)) {
-			pr_err("xen-pvscsi: invalid buffer -- could not remap it\n");
+			pr_err("invalid buffer -- could not remap it\n");
 			map[i].handle = SCSIBACK_INVALID_HANDLE;
 			err = -ENOMEM;
 		} else {
@@ -449,7 +448,7 @@ static int scsiback_gnttab_data_map_list(struct vscsibk_pend *pending_req,
 	for (i = 0; i < cnt; i++) {
 		if (get_free_page(pg + mapcount)) {
 			put_free_pages(pg, mapcount);
-			pr_err("xen-pvscsi: no grant page\n");
+			pr_err("no grant page\n");
 			return -ENOMEM;
 		}
 		gnttab_set_map_op(&map[mapcount], vaddr_page(pg[mapcount]),
@@ -492,7 +491,7 @@ static int scsiback_gnttab_data_map(struct vscsiif_request *ring_req,
 		return 0;
 
 	if (nr_segments > VSCSIIF_SG_TABLESIZE) {
-		DPRINTK("xen-pvscsi: invalid parameter nr_seg = %d\n",
+		pr_debug("invalid parameter nr_seg = %d\n",
 			ring_req->nr_segments);
 		return -EINVAL;
 	}
@@ -516,13 +515,12 @@ static int scsiback_gnttab_data_map(struct vscsiif_request *ring_req,
 			nr_segments += n_segs;
 		}
 		if (nr_segments > SG_ALL) {
-			DPRINTK("xen-pvscsi: invalid nr_seg = %d\n",
-				nr_segments);
+			pr_debug("invalid nr_seg = %d\n", nr_segments);
 			return -EINVAL;
 		}
 	}
 
-	/* free of (sgl) in fast_flush_area()*/
+	/* free of (sgl) in fast_flush_area() */
 	pending_req->sgl = kmalloc_array(nr_segments,
 					sizeof(struct scatterlist), GFP_KERNEL);
 	if (!pending_req->sgl)
@@ -679,7 +677,8 @@ static int prepare_pending_reqs(struct vscsibk_info *info,
 	v2p = scsiback_do_translation(info, &vir);
 	if (!v2p) {
 		pending_req->v2p = NULL;
-		DPRINTK("xen-pvscsi: doesn't exist.\n");
+		pr_debug("the v2p of (chn:%d, tgt:%d, lun:%d) doesn't exist.\n",
+			vir.chn, vir.tgt, vir.lun);
 		return -ENODEV;
 	}
 	pending_req->v2p = v2p;
@@ -690,14 +689,14 @@ static int prepare_pending_reqs(struct vscsibk_info *info,
 		(pending_req->sc_data_direction != DMA_TO_DEVICE) &&
 		(pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
 		(pending_req->sc_data_direction != DMA_NONE)) {
-		DPRINTK("xen-pvscsi: invalid parameter data_dir = %d\n",
+		pr_debug("invalid parameter data_dir = %d\n",
 			pending_req->sc_data_direction);
 		return -EINVAL;
 	}
 
 	pending_req->cmd_len = ring_req->cmd_len;
 	if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
-		DPRINTK("xen-pvscsi: invalid parameter cmd_len = %d\n",
+		pr_debug("invalid parameter cmd_len = %d\n",
 			pending_req->cmd_len);
 		return -EINVAL;
 	}
@@ -721,7 +720,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
 
 	if (RING_REQUEST_PROD_OVERFLOW(ring, rp)) {
 		rc = ring->rsp_prod_pvt;
-		pr_warn("xen-pvscsi: Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
+		pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
 			   info->domid, rp, rc, rp - rc);
 		info->ring_error = 1;
 		return 0;
@@ -772,7 +771,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
 			scsiback_device_action(pending_req, TMR_LUN_RESET, 0);
 			break;
 		default:
-			pr_err_ratelimited("xen-pvscsi: invalid request\n");
+			pr_err_ratelimited("invalid request\n");
 			scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24,
 						    0, pending_req);
 			kmem_cache_free(scsiback_cachep, pending_req);
@@ -810,7 +809,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
 	if (info->irq)
 		return -1;
 
-	err = xenbus_map_ring_valloc(info->dev, ring_ref, &area);
+	err = xenbus_map_ring_valloc(info->dev, &ring_ref, 1, &area);
 	if (err)
 		return err;
 
@@ -874,14 +873,13 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
 
 	lunp = strrchr(phy, ':');
 	if (!lunp) {
-		pr_err("xen-pvscsi: illegal format of physical device %s\n",
-			phy);
+		pr_err("illegal format of physical device %s\n", phy);
 		return -EINVAL;
 	}
 	*lunp = 0;
 	lunp++;
 	if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) {
-		pr_err("xen-pvscsi: lun number not valid: %s\n", lunp);
+		pr_err("lun number not valid: %s\n", lunp);
 		return -EINVAL;
 	}
 
@@ -909,7 +907,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
 	mutex_unlock(&scsiback_mutex);
 
 	if (!tpg) {
-		pr_err("xen-pvscsi: %s:%d %s\n", phy, lun, error);
+		pr_err("%s:%d %s\n", phy, lun, error);
 		return -ENODEV;
 	}
 
@@ -926,7 +924,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
 		if ((entry->v.chn == v->chn) &&
 		    (entry->v.tgt == v->tgt) &&
 		    (entry->v.lun == v->lun)) {
-			pr_warn("xen-pvscsi: Virtual ID is already used. Assignment was not performed.\n");
+			pr_warn("Virtual ID is already used. Assignment was not performed.\n");
 			err = -EEXIST;
 			goto out;
 		}
@@ -992,15 +990,15 @@ found:
 }
 
 static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
-				char *phy, struct ids_tuple *vir)
+				char *phy, struct ids_tuple *vir, int try)
 {
 	if (!scsiback_add_translation_entry(info, phy, vir)) {
 		if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
 				  "%d", XenbusStateInitialised)) {
-			pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
+			pr_err("xenbus_printf error %s\n", state);
 			scsiback_del_translation_entry(info, vir);
 		}
-	} else {
+	} else if (!try) {
 		xenbus_printf(XBT_NIL, info->dev->nodename, state,
 			      "%d", XenbusStateClosed);
 	}
@@ -1012,7 +1010,7 @@ static void scsiback_do_del_lun(struct vscsibk_info *info, const char *state,
 	if (!scsiback_del_translation_entry(info, vir)) {
 		if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
 				  "%d", XenbusStateClosed))
-			pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
+			pr_err("xenbus_printf error %s\n", state);
 	}
 }
 
@@ -1060,10 +1058,19 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
 
 	switch (op) {
 	case VSCSIBACK_OP_ADD_OR_DEL_LUN:
-		if (device_state == XenbusStateInitialising)
-			scsiback_do_add_lun(info, state, phy, &vir);
-		if (device_state == XenbusStateClosing)
+		switch (device_state) {
+		case XenbusStateInitialising:
+			scsiback_do_add_lun(info, state, phy, &vir, 0);
+			break;
+		case XenbusStateConnected:
+			scsiback_do_add_lun(info, state, phy, &vir, 1);
+			break;
+		case XenbusStateClosing:
 			scsiback_do_del_lun(info, state, &vir);
+			break;
+		default:
+			break;
+		}
 		break;
 
 	case VSCSIBACK_OP_UPDATEDEV_STATE:
@@ -1071,15 +1078,14 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
 			/* modify vscsi-devs/dev-x/state */
 			if (xenbus_printf(XBT_NIL, dev->nodename, state,
 					  "%d", XenbusStateConnected)) {
-				pr_err("xen-pvscsi: xenbus_printf error %s\n",
-				       str);
+				pr_err("xenbus_printf error %s\n", str);
 				scsiback_del_translation_entry(info, &vir);
 				xenbus_printf(XBT_NIL, dev->nodename, state,
 					      "%d", XenbusStateClosed);
 			}
 		}
 		break;
-	/*When it is necessary, processing is added here.*/
+	/* When it is necessary, processing is added here. */
 	default:
 		break;
 	}
@@ -1196,7 +1202,7 @@ static int scsiback_probe(struct xenbus_device *dev,
 	struct vscsibk_info *info = kzalloc(sizeof(struct vscsibk_info),
 					    GFP_KERNEL);
 
-	DPRINTK("%p %d\n", dev, dev->otherend_id);
+	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
 
 	if (!info) {
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating backend structure");
@@ -1227,7 +1233,7 @@ static int scsiback_probe(struct xenbus_device *dev,
 	return 0;
 
 fail:
-	pr_warn("xen-pvscsi: %s failed\n", __func__);
+	pr_warn("%s failed\n", __func__);
 	scsiback_remove(dev);
 
 	return err;
@@ -1432,7 +1438,7 @@ check_len:
 	}
 	snprintf(&tport->tport_name[0], VSCSI_NAMELEN, "%s", &name[off]);
 
-	pr_debug("xen-pvscsi: Allocated emulated Target %s Address: %s\n",
+	pr_debug("Allocated emulated Target %s Address: %s\n",
 		 scsiback_dump_proto_id(tport), name);
 
 	return &tport->tport_wwn;
@@ -1443,7 +1449,7 @@ static void scsiback_drop_tport(struct se_wwn *wwn)
 	struct scsiback_tport *tport = container_of(wwn,
 				struct scsiback_tport, tport_wwn);
 
-	pr_debug("xen-pvscsi: Deallocating emulated Target %s Address: %s\n",
+	pr_debug("Deallocating emulated Target %s Address: %s\n",
 		 scsiback_dump_proto_id(tport), tport->tport_name);
 
 	kfree(tport);
@@ -1470,8 +1476,8 @@ static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg)
 static int scsiback_check_stop_free(struct se_cmd *se_cmd)
 {
 	/*
-	 * Do not release struct se_cmd's containing a valid TMR
-	 * pointer.  These will be released directly in scsiback_device_action()
+	 * Do not release struct se_cmd's containing a valid TMR pointer.
+	 * These will be released directly in scsiback_device_action()
 	 * with transport_generic_free_cmd().
 	 */
 	if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
@@ -1637,7 +1643,7 @@ static int scsiback_make_nexus(struct scsiback_tpg *tpg,
 		return -ENOMEM;
 	}
 	/*
-	 *  Initialize the struct se_session pointer
+	 * Initialize the struct se_session pointer
 	 */
 	tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_NORMAL);
 	if (IS_ERR(tv_nexus->tvn_se_sess)) {
@@ -1705,7 +1711,7 @@ static int scsiback_drop_nexus(struct scsiback_tpg *tpg)
 		return -EBUSY;
 	}
 
-	pr_debug("xen-pvscsi: Removing I_T Nexus to emulated %s Initiator Port: %s\n",
+	pr_debug("Removing I_T Nexus to emulated %s Initiator Port: %s\n",
 		scsiback_dump_proto_id(tpg->tport),
 		tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
 
@@ -1751,7 +1757,7 @@ static ssize_t scsiback_tpg_store_nexus(struct se_portal_group *se_tpg,
 	unsigned char i_port[VSCSI_NAMELEN], *ptr, *port_ptr;
 	int ret;
 	/*
-	 * Shutdown the active I_T nexus if 'NULL' is passed..
+	 * Shutdown the active I_T nexus if 'NULL' is passed.
 	 */
 	if (!strncmp(page, "NULL", 4)) {
 		ret = scsiback_drop_nexus(tpg);
@@ -1922,7 +1928,7 @@ static void scsiback_drop_tpg(struct se_portal_group *se_tpg)
 	 */
 	scsiback_drop_nexus(tpg);
 	/*
-	 * Deregister the se_tpg from TCM..
+	 * Deregister the se_tpg from TCM.
 	 */
 	core_tpg_deregister(se_tpg);
 	kfree(tpg);
@@ -1992,7 +1998,7 @@ static int scsiback_register_configfs(void)
 	struct target_fabric_configfs *fabric;
 	int ret;
 
-	pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n",
+	pr_debug("fabric module %s on %s/%s on "UTS_RELEASE"\n",
 		 VSCSI_VERSION, utsname()->sysname, utsname()->machine);
 	/*
 	 * Register the top level struct config_item_type with TCM core
@@ -2029,7 +2035,7 @@ static int scsiback_register_configfs(void)
 	 * Setup our local pointer to *fabric
 	 */
 	scsiback_fabric_configfs = fabric;
-	pr_debug("xen-pvscsi: Set fabric -> scsiback_fabric_configfs\n");
+	pr_debug("Set fabric -> scsiback_fabric_configfs\n");
 	return 0;
 };
 
@@ -2040,7 +2046,7 @@ static void scsiback_deregister_configfs(void)
 
 	target_fabric_configfs_deregister(scsiback_fabric_configfs);
 	scsiback_fabric_configfs = NULL;
-	pr_debug("xen-pvscsi: Cleared scsiback_fabric_configfs\n");
+	pr_debug("Cleared scsiback_fabric_configfs\n");
 };
 
 static const struct xenbus_device_id scsiback_ids[] = {
@@ -2091,7 +2097,7 @@ out_unregister_xenbus:
 	xenbus_unregister_driver(&scsiback_driver);
 out_cache_destroy:
 	kmem_cache_destroy(scsiback_cachep);
-	pr_err("xen-pvscsi: %s: error %d\n", __func__, ret);
+	pr_err("%s: error %d\n", __func__, ret);
 	return ret;
 }
 
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index ca744102b666..96b2011d25f3 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -52,17 +52,25 @@
 struct xenbus_map_node {
 	struct list_head next;
 	union {
-		struct vm_struct *area; /* PV */
-		struct page *page;     /* HVM */
+		struct {
+			struct vm_struct *area;
+		} pv;
+		struct {
+			struct page *pages[XENBUS_MAX_RING_PAGES];
+			void *addr;
+		} hvm;
 	};
-	grant_handle_t handle;
+	grant_handle_t handles[XENBUS_MAX_RING_PAGES];
+	unsigned int   nr_handles;
 };
 
 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);
 
 struct xenbus_ring_ops {
-	int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+	int (*map)(struct xenbus_device *dev,
+		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
+		   void **vaddr);
 	int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };
 
@@ -355,17 +363,39 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * a grant reference on success, or -errno on error. On error, the device will
- * switch to XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of pages to be granted
+ * @grefs: grant reference array to be filled in
+ *
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+		      unsigned int nr_pages, grant_ref_t *grefs)
 {
-	int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-	if (err < 0)
-		xenbus_dev_fatal(dev, err, "granting access to ring page");
+	int err;
+	int i, j;
+
+	for (i = 0; i < nr_pages; i++) {
+		unsigned long addr = (unsigned long)vaddr +
+			(PAGE_SIZE * i);
+		err = gnttab_grant_foreign_access(dev->otherend_id,
+						  virt_to_mfn(addr), 0);
+		if (err < 0) {
+			xenbus_dev_fatal(dev, err,
+					 "granting access to ring page");
+			goto fail;
+		}
+		grefs[i] = err;
+	}
+
+	return 0;
+
+fail:
+	for (j = 0; j < i; j++)
+		gnttab_end_foreign_access_ref(grefs[j], 0);
 	return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -419,62 +449,130 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_refs: grant reference array
+ * @nr_grefs: number of grant references
  * @vaddr: pointer to address to be filled out by mapping
  *
- * Based on Rusty Russell's skeleton driver's map_page.
- * Map a page of memory into this domain from another domain's grant table.
- * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
- * page to that address, and sets *vaddr to that address.
- * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM on error. If an error is returned, device will switch to
+ * Map @nr_grefs pages of memory into this domain from another
+ * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
+ * pages of virtual address space, maps the pages to that address, and
+ * sets *vaddr to that address.  Returns 0 on success, and GNTST_*
+ * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
+ * error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
+			   unsigned int nr_grefs, void **vaddr)
 {
-	return ring_ops->map(dev, gnt_ref, vaddr);
+	return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
 
+/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
+ * long), e.g. 32-on-64.  Caller is responsible for preparing the
+ * right array to feed into this function */
+static int __xenbus_map_ring(struct xenbus_device *dev,
+			     grant_ref_t *gnt_refs,
+			     unsigned int nr_grefs,
+			     grant_handle_t *handles,
+			     phys_addr_t *addrs,
+			     unsigned int flags,
+			     bool *leaked)
+{
+	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES];
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
+	int i, j;
+	int err = GNTST_okay;
+
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
+
+	for (i = 0; i < nr_grefs; i++) {
+		memset(&map[i], 0, sizeof(map[i]));
+		gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
+				  dev->otherend_id);
+		handles[i] = INVALID_GRANT_HANDLE;
+	}
+
+	gnttab_batch_map(map, i);
+
+	for (i = 0; i < nr_grefs; i++) {
+		if (map[i].status != GNTST_okay) {
+			err = map[i].status;
+			xenbus_dev_fatal(dev, map[i].status,
+					 "mapping in shared page %d from domain %d",
+					 gnt_refs[i], dev->otherend_id);
+			goto fail;
+		} else
+			handles[i] = map[i].handle;
+	}
+
+	return GNTST_okay;
+
+ fail:
+	for (i = j = 0; i < nr_grefs; i++) {
+		if (handles[i] != INVALID_GRANT_HANDLE) {
+			memset(&unmap[j], 0, sizeof(unmap[j]));
+			gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
+					    GNTMAP_host_map, handles[i]);
+			j++;
+		}
+	}
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
+		BUG();
+
+	*leaked = false;
+	for (i = 0; i < j; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			*leaked = true;
+			break;
+		}
+	}
+
+	return err;
+}
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-				     int gnt_ref, void **vaddr)
+				     grant_ref_t *gnt_refs,
+				     unsigned int nr_grefs,
+				     void **vaddr)
 {
-	struct gnttab_map_grant_ref op = {
-		.flags = GNTMAP_host_map | GNTMAP_contains_pte,
-		.ref   = gnt_ref,
-		.dom   = dev->otherend_id,
-	};
 	struct xenbus_map_node *node;
 	struct vm_struct *area;
-	pte_t *pte;
+	pte_t *ptes[XENBUS_MAX_RING_PAGES];
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
+	int err = GNTST_okay;
+	int i;
+	bool leaked;
 
 	*vaddr = NULL;
 
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
+
 	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node)
 		return -ENOMEM;
 
-	area = alloc_vm_area(PAGE_SIZE, &pte);
+	area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes);
 	if (!area) {
 		kfree(node);
 		return -ENOMEM;
 	}
 
-	op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+	for (i = 0; i < nr_grefs; i++)
+		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
 
-	gnttab_batch_map(&op, 1);
-
-	if (op.status != GNTST_okay) {
-		free_vm_area(area);
-		kfree(node);
-		xenbus_dev_fatal(dev, op.status,
-				 "mapping in shared page %d from domain %d",
-				 gnt_ref, dev->otherend_id);
-		return op.status;
-	}
+	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
+				phys_addrs,
+				GNTMAP_host_map | GNTMAP_contains_pte,
+				&leaked);
+	if (err)
+		goto failed;
 
-	node->handle = op.handle;
-	node->area = area;
+	node->nr_handles = nr_grefs;
+	node->pv.area = area;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_add(&node->next, &xenbus_valloc_pages);
@@ -482,14 +580,33 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 
 	*vaddr = area->addr;
 	return 0;
+
+failed:
+	if (!leaked)
+		free_vm_area(area);
+	else
+		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
+
+	kfree(node);
+	return err;
 }
 
 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-				      int gnt_ref, void **vaddr)
+				      grant_ref_t *gnt_ref,
+				      unsigned int nr_grefs,
+				      void **vaddr)
 {
 	struct xenbus_map_node *node;
+	int i;
 	int err;
 	void *addr;
+	bool leaked = false;
+	/* Why do we need two arrays? See comment of __xenbus_map_ring */
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
+	unsigned long addrs[XENBUS_MAX_RING_PAGES];
+
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
 
 	*vaddr = NULL;
 
@@ -497,15 +614,32 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 	if (!node)
 		return -ENOMEM;
 
-	err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+	err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages,
+				       false /* lowmem */);
 	if (err)
 		goto out_err;
 
-	addr = pfn_to_kaddr(page_to_pfn(node->page));
+	for (i = 0; i < nr_grefs; i++) {
+		unsigned long pfn = page_to_pfn(node->hvm.pages[i]);
+		phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
+		addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
+	}
+
+	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
+				phys_addrs, GNTMAP_host_map, &leaked);
+	node->nr_handles = nr_grefs;
 
-	err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
 	if (err)
-		goto out_err_free_ballooned_pages;
+		goto out_free_ballooned_pages;
+
+	addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP,
+		    PAGE_KERNEL);
+	if (!addr) {
+		err = -ENOMEM;
+		goto out_xenbus_unmap_ring;
+	}
+
+	node->hvm.addr = addr;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_add(&node->next, &xenbus_valloc_pages);
@@ -514,8 +648,16 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 	*vaddr = addr;
 	return 0;
 
- out_err_free_ballooned_pages:
-	free_xenballooned_pages(1, &node->page);
+ out_xenbus_unmap_ring:
+	if (!leaked)
+		xenbus_unmap_ring(dev, node->handles, node->nr_handles,
+				  addrs);
+	else
+		pr_alert("leaking %p size %u page(s)",
+			 addr, nr_grefs);
+ out_free_ballooned_pages:
+	if (!leaked)
+		free_xenballooned_pages(nr_grefs, node->hvm.pages);
  out_err:
 	kfree(node);
 	return err;
@@ -525,35 +667,37 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
- * @vaddr: address to be mapped to
+ * @gnt_refs: grant reference array
+ * @nr_grefs: number of grant reference
+ * @handles: pointer to grant handle to be filled
+ * @vaddrs: addresses to be mapped to
+ * @leaked: fail to clean up a failed map, caller should not free vaddr
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the first error message will be saved in XenStore.
+ * Further more if we fail to map the ring, caller should check @leaked.
+ * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller
+ * should not free the address space of @vaddr.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-		    grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
+		    unsigned int nr_grefs, grant_handle_t *handles,
+		    unsigned long *vaddrs, bool *leaked)
 {
-	struct gnttab_map_grant_ref op;
-
-	gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref,
-			  dev->otherend_id);
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
+	int i;
 
-	gnttab_batch_map(&op, 1);
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
 
-	if (op.status != GNTST_okay) {
-		xenbus_dev_fatal(dev, op.status,
-				 "mapping in shared page %d from domain %d",
-				 gnt_ref, dev->otherend_id);
-	} else
-		*handle = op.handle;
+	for (i = 0; i < nr_grefs; i++)
+		phys_addrs[i] = (unsigned long)vaddrs[i];
 
-	return op.status;
+	return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
+				 phys_addrs, GNTMAP_host_map, leaked);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);
 
@@ -579,14 +723,15 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
 	struct xenbus_map_node *node;
-	struct gnttab_unmap_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-	};
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
 	unsigned int level;
+	int i;
+	bool leaked = false;
+	int err;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
-		if (node->area->addr == vaddr) {
+		if (node->pv.area->addr == vaddr) {
 			list_del(&node->next);
 			goto found;
 		}
@@ -601,22 +746,41 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 		return GNTST_bad_virt_addr;
 	}
 
-	op.handle = node->handle;
-	op.host_addr = arbitrary_virt_to_machine(
-		lookup_address((unsigned long)vaddr, &level)).maddr;
+	for (i = 0; i < node->nr_handles; i++) {
+		unsigned long addr;
+
+		memset(&unmap[i], 0, sizeof(unmap[i]));
+		addr = (unsigned long)vaddr + (PAGE_SIZE * i);
+		unmap[i].host_addr = arbitrary_virt_to_machine(
+			lookup_address(addr, &level)).maddr;
+		unmap[i].dev_bus_addr = 0;
+		unmap[i].handle = node->handles[i];
+	}
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
 		BUG();
 
-	if (op.status == GNTST_okay)
-		free_vm_area(node->area);
+	err = GNTST_okay;
+	leaked = false;
+	for (i = 0; i < node->nr_handles; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			leaked = true;
+			xenbus_dev_error(dev, unmap[i].status,
+					 "unmapping page at handle %d error %d",
+					 node->handles[i], unmap[i].status);
+			err = unmap[i].status;
+			break;
+		}
+	}
+
+	if (!leaked)
+		free_vm_area(node->pv.area);
 	else
-		xenbus_dev_error(dev, op.status,
-				 "unmapping page at handle %d error %d",
-				 node->handle, op.status);
+		pr_alert("leaking VM area %p size %u page(s)",
+			 node->pv.area, node->nr_handles);
 
 	kfree(node);
-	return op.status;
+	return err;
 }
 
 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
@@ -624,10 +788,12 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 	int rv;
 	struct xenbus_map_node *node;
 	void *addr;
+	unsigned long addrs[XENBUS_MAX_RING_PAGES];
+	int i;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
-		addr = pfn_to_kaddr(page_to_pfn(node->page));
+		addr = node->hvm.addr;
 		if (addr == vaddr) {
 			list_del(&node->next);
 			goto found;
@@ -643,12 +809,16 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 		return GNTST_bad_virt_addr;
 	}
 
-	rv = xenbus_unmap_ring(dev, node->handle, addr);
+	for (i = 0; i < node->nr_handles; i++)
+		addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i]));
 
+	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
+			       addrs);
 	if (!rv)
-		free_xenballooned_pages(1, &node->page);
+		vunmap(vaddr);
 	else
-		WARN(1, "Leaking %p\n", vaddr);
+		WARN(1, "Leaking %p, size %u page(s)\n", vaddr,
+		     node->nr_handles);
 
 	kfree(node);
 	return rv;
@@ -657,29 +827,44 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 /**
  * xenbus_unmap_ring
  * @dev: xenbus device
- * @handle: grant handle
- * @vaddr: addr to unmap
+ * @handles: grant handle array
+ * @nr_handles: number of handles in the array
+ * @vaddrs: addresses to unmap
  *
- * Unmap a page of memory in this domain that was imported from another domain.
+ * Unmap memory in this domain that was imported from another domain.
  * Returns 0 on success and returns GNTST_* on error
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-		      grant_handle_t handle, void *vaddr)
+		      grant_handle_t *handles, unsigned int nr_handles,
+		      unsigned long *vaddrs)
 {
-	struct gnttab_unmap_grant_ref op;
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
+	int i;
+	int err;
 
-	gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle);
+	if (nr_handles > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+	for (i = 0; i < nr_handles; i++)
+		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
+				    GNTMAP_host_map, handles[i]);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
 		BUG();
 
-	if (op.status != GNTST_okay)
-		xenbus_dev_error(dev, op.status,
-				 "unmapping page at handle %d error %d",
-				 handle, op.status);
+	err = GNTST_okay;
+	for (i = 0; i < nr_handles; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			xenbus_dev_error(dev, unmap[i].status,
+					 "unmapping page at handle %d error %d",
+					 handles[i], unmap[i].status);
+			err = unmap[i].status;
+			break;
+		}
+	}
 
-	return op.status;
+	return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
 
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
new file mode 100644
index 000000000000..58a5389aec89
--- /dev/null
+++ b/drivers/xen/xlate_mmu.c
@@ -0,0 +1,143 @@
+/*
+ * MMU operations common to all auto-translated physmap guests.
+ *
+ * Copyright (C) 2015 Citrix Systems R&D Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/memory.h>
+
+/* map fgmfn of domid to lpfn in the current domain */
+static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
+			    unsigned int domid)
+{
+	int rc;
+	struct xen_add_to_physmap_range xatp = {
+		.domid = DOMID_SELF,
+		.foreign_domid = domid,
+		.size = 1,
+		.space = XENMAPSPACE_gmfn_foreign,
+	};
+	xen_ulong_t idx = fgmfn;
+	xen_pfn_t gpfn = lpfn;
+	int err = 0;
+
+	set_xen_guest_handle(xatp.idxs, &idx);
+	set_xen_guest_handle(xatp.gpfns, &gpfn);
+	set_xen_guest_handle(xatp.errs, &err);
+
+	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
+	return rc < 0 ? rc : err;
+}
+
+struct remap_data {
+	xen_pfn_t *fgmfn; /* foreign domain's gmfn */
+	pgprot_t prot;
+	domid_t  domid;
+	struct vm_area_struct *vma;
+	int index;
+	struct page **pages;
+	struct xen_remap_mfn_info *info;
+	int *err_ptr;
+	int mapped;
+};
+
+static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+			void *data)
+{
+	struct remap_data *info = data;
+	struct page *page = info->pages[info->index++];
+	unsigned long pfn = page_to_pfn(page);
+	pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
+	int rc;
+
+	rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
+	*info->err_ptr++ = rc;
+	if (!rc) {
+		set_pte_at(info->vma->vm_mm, addr, ptep, pte);
+		info->mapped++;
+	}
+	info->fgmfn++;
+
+	return 0;
+}
+
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+			      unsigned long addr,
+			      xen_pfn_t *mfn, int nr,
+			      int *err_ptr, pgprot_t prot,
+			      unsigned domid,
+			      struct page **pages)
+{
+	int err;
+	struct remap_data data;
+	unsigned long range = nr << PAGE_SHIFT;
+
+	/* Kept here for the purpose of making sure code doesn't break
+	   x86 PVOPS */
+	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+
+	data.fgmfn = mfn;
+	data.prot  = prot;
+	data.domid = domid;
+	data.vma   = vma;
+	data.pages = pages;
+	data.index = 0;
+	data.err_ptr = err_ptr;
+	data.mapped = 0;
+
+	err = apply_to_page_range(vma->vm_mm, addr, range,
+				  remap_pte_fn, &data);
+	return err < 0 ? err : data.mapped;
+}
+EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);
+
+int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+			      int nr, struct page **pages)
+{
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		struct xen_remove_from_physmap xrp;
+		unsigned long pfn;
+
+		pfn = page_to_pfn(pages[i]);
+
+		xrp.domid = DOMID_SELF;
+		xrp.gpfn = pfn;
+		(void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 2e38f9a5b472..be35d05a4d0e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -230,7 +230,6 @@ static int v9fs_launder_page(struct page *page)
 
 /**
  * v9fs_direct_IO - 9P address space operation for direct I/O
- * @rw: direction (read or write)
  * @iocb: target I/O control block
  * @iov: array of vectors that define I/O buffer
  * @pos: offset in file to begin the operation
@@ -248,12 +247,12 @@ static int v9fs_launder_page(struct page *page)
  *
  */
 static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
+v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t n;
 	int err = 0;
-	if (rw & WRITE) {
+	if (iov_iter_rw(iter) == WRITE) {
 		n = p9_client_write(file->private_data, pos, iter, &err);
 		if (n) {
 			struct inode *inode = file_inode(file);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index d7fcb775311e..2a9dd37dc426 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -404,21 +404,16 @@ static ssize_t
 v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	ssize_t retval = 0;
-	loff_t origin = iocb->ki_pos;
-	size_t count = iov_iter_count(from);
+	ssize_t retval;
+	loff_t origin;
 	int err = 0;
 
-	retval = generic_write_checks(file, &origin, &count, 0);
-	if (retval)
+	retval = generic_write_checks(iocb, from);
+	if (retval <= 0)
 		return retval;
 
-	iov_iter_truncate(from, count);
-
-	if (!count)
-		return 0;
-
-	retval = p9_client_write(file->private_data, origin, from, &err);
+	origin = iocb->ki_pos;
+	retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err);
 	if (retval > 0) {
 		struct inode *inode = file_inode(file);
 		loff_t i_size;
@@ -428,12 +423,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		if (inode->i_mapping && inode->i_mapping->nrpages)
 			invalidate_inode_pages2_range(inode->i_mapping,
 						      pg_start, pg_end);
-		origin += retval;
+		iocb->ki_pos += retval;
 		i_size = i_size_read(inode);
-		iocb->ki_pos = origin;
-		if (origin > i_size) {
-			inode_add_bytes(inode, origin - i_size);
-			i_size_write(inode, origin);
+		if (iocb->ki_pos > i_size) {
+			inode_add_bytes(inode, iocb->ki_pos - i_size);
+			i_size_write(inode, iocb->ki_pos);
 		}
 		return retval;
 	}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 7f506fb1ca23..659c579c4588 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -389,8 +389,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
 }
 
 static ssize_t
-affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
-	       loff_t offset)
+affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -398,15 +397,15 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	if (rw == WRITE) {
+	if (iov_iter_rw(iter) == WRITE) {
 		loff_t size = offset + count;
 
 		if (AFFS_I(inode)->mmu_private < size)
 			return 0;
 	}
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block);
-	if (ret < 0 && (rw & WRITE))
+	ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block);
+	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		affs_write_failed(mapping, offset + count);
 	return ret;
 }
diff --git a/fs/aio.c b/fs/aio.c
index 5785c4b58fea..480440f4701f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
 	unsigned		reqs_available;
 };
 
+struct ctx_rq_wait {
+	struct completion comp;
+	atomic_t count;
+};
+
 struct kioctx {
 	struct percpu_ref	users;
 	atomic_t		dead;
@@ -115,7 +120,7 @@ struct kioctx {
 	/*
 	 * signals when all in-flight requests are done
 	 */
-	struct completion *requests_done;
+	struct ctx_rq_wait	*rq_wait;
 
 	struct {
 		/*
@@ -572,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
 
 	/* At this point we know that there are no any in-flight requests */
-	if (ctx->requests_done)
-		complete(ctx->requests_done);
+	if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
+		complete(&ctx->rq_wait->comp);
 
 	INIT_WORK(&ctx->free_work, free_ioctx);
 	schedule_work(&ctx->free_work);
@@ -783,7 +788,7 @@ err:
  *	the rapid destruction of the kioctx.
  */
 static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
-		struct completion *requests_done)
+		      struct ctx_rq_wait *wait)
 {
 	struct kioctx_table *table;
 
@@ -813,7 +818,7 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 	if (ctx->mmap_size)
 		vm_munmap(ctx->mmap_base, ctx->mmap_size);
 
-	ctx->requests_done = requests_done;
+	ctx->rq_wait = wait;
 	percpu_ref_kill(&ctx->users);
 	return 0;
 }
@@ -829,18 +834,24 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 void exit_aio(struct mm_struct *mm)
 {
 	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
-	int i;
+	struct ctx_rq_wait wait;
+	int i, skipped;
 
 	if (!table)
 		return;
 
+	atomic_set(&wait.count, table->nr);
+	init_completion(&wait.comp);
+
+	skipped = 0;
 	for (i = 0; i < table->nr; ++i) {
 		struct kioctx *ctx = table->table[i];
-		struct completion requests_done =
-			COMPLETION_INITIALIZER_ONSTACK(requests_done);
 
-		if (!ctx)
+		if (!ctx) {
+			skipped++;
 			continue;
+		}
+
 		/*
 		 * We don't need to bother with munmap() here - exit_mmap(mm)
 		 * is coming and it'll unmap everything. And we simply can't,
@@ -849,10 +860,12 @@ void exit_aio(struct mm_struct *mm)
 		 * that it needs to unmap the area, just set it to 0.
 		 */
 		ctx->mmap_size = 0;
-		kill_ioctx(mm, ctx, &requests_done);
+		kill_ioctx(mm, ctx, &wait);
+	}
 
+	if (!atomic_sub_and_test(skipped, &wait.count)) {
 		/* Wait until all IO for the context are done. */
-		wait_for_completion(&requests_done);
+		wait_for_completion(&wait.comp);
 	}
 
 	RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -1331,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 {
 	struct kioctx *ioctx = lookup_ioctx(ctx);
 	if (likely(NULL != ioctx)) {
-		struct completion requests_done =
-			COMPLETION_INITIALIZER_ONSTACK(requests_done);
+		struct ctx_rq_wait wait;
 		int ret;
 
+		init_completion(&wait.comp);
+		atomic_set(&wait.count, 1);
+
 		/* Pass requests_done to kill_ioctx() where it can be set
 		 * in a thread-safe way. If we try to set it here then we have
 		 * a race condition if two io_destroy() called simultaneously.
 		 */
-		ret = kill_ioctx(current->mm, ioctx, &requests_done);
+		ret = kill_ioctx(current->mm, ioctx, &wait);
 		percpu_ref_put(&ioctx->users);
 
 		/* Wait until all IO for the context are done. Otherwise kernel
@@ -1347,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 		 * is destroyed.
 		 */
 		if (!ret)
-			wait_for_completion(&requests_done);
+			wait_for_completion(&wait.comp);
 
 		return ret;
 	}
@@ -1502,7 +1517,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	}
 	req->common.ki_pos = iocb->aio_offset;
 	req->common.ki_complete = aio_complete;
-	req->common.ki_flags = 0;
+	req->common.ki_flags = iocb_flags(req->common.ki_filp);
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b5e87896f517..897ee0503932 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -146,15 +146,13 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 }
 
 static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
-			loff_t offset)
+blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
-				    offset, blkdev_get_block,
-				    NULL, NULL, 0);
+	return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
+				    blkdev_get_block, NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1597,9 +1595,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
+	struct inode *bd_inode = file->f_mapping->host;
+	loff_t size = i_size_read(bd_inode);
 	struct blk_plug plug;
 	ssize_t ret;
 
+	if (bdev_read_only(I_BDEV(bd_inode)))
+		return -EPERM;
+
+	if (!iov_iter_count(from))
+		return 0;
+
+	if (iocb->ki_pos >= size)
+		return -ENOSPC;
+
+	iov_iter_truncate(from, size - iocb->ki_pos);
+
 	blk_start_plug(&plug);
 	ret = __generic_file_write_iter(iocb, from);
 	if (ret > 0) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index cdc801c85105..faa7d390841b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1739,27 +1739,19 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	u64 start_pos;
 	u64 end_pos;
 	ssize_t num_written = 0;
-	ssize_t err = 0;
-	size_t count = iov_iter_count(from);
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
-	loff_t pos = iocb->ki_pos;
+	ssize_t err;
+	loff_t pos;
+	size_t count;
 
 	mutex_lock(&inode->i_mutex);
-
-	current->backing_dev_info = inode_to_bdi(inode);
-	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-	if (err) {
+	err = generic_write_checks(iocb, from);
+	if (err <= 0) {
 		mutex_unlock(&inode->i_mutex);
-		goto out;
-	}
-
-	if (count == 0) {
-		mutex_unlock(&inode->i_mutex);
-		goto out;
+		return err;
 	}
 
-	iov_iter_truncate(from, count);
-
+	current->backing_dev_info = inode_to_bdi(inode);
 	err = file_remove_suid(file);
 	if (err) {
 		mutex_unlock(&inode->i_mutex);
@@ -1786,6 +1778,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	 */
 	update_time_for_write(inode);
 
+	pos = iocb->ki_pos;
+	count = iov_iter_count(from);
 	start_pos = round_down(pos, root->sectorsize);
 	if (start_pos > i_size_read(inode)) {
 		/* Expand hole size to cover write data, preventing empty gap */
@@ -1800,7 +1794,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	if (sync)
 		atomic_inc(&BTRFS_I(inode)->sync_writers);
 
-	if (file->f_flags & O_DIRECT) {
+	if (iocb->ki_flags & IOCB_DIRECT) {
 		num_written = __btrfs_direct_write(iocb, from, pos);
 	} else {
 		num_written = __btrfs_buffered_write(file, from, pos);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 686331f22b15..43192e10cc43 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8081,7 +8081,7 @@ free_ordered:
 	bio_endio(dio_bio, ret);
 }
 
-static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
+static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
 			const struct iov_iter *iter, loff_t offset)
 {
 	int seg;
@@ -8096,7 +8096,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 		goto out;
 
 	/* If this is a write we don't need to check anymore */
-	if (rw & WRITE)
+	if (iov_iter_rw(iter) == WRITE)
 		return 0;
 	/*
 	 * Check to make sure we don't have duplicate iov_base's in this
@@ -8114,8 +8114,8 @@ out:
 	return retval;
 }
 
-static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
-			struct iov_iter *iter, loff_t offset)
+static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			       loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -8126,7 +8126,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 	bool relock = false;
 	ssize_t ret;
 
-	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
+	if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
 		return 0;
 
 	atomic_inc(&inode->i_dio_count);
@@ -8144,7 +8144,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 		filemap_fdatawrite_range(inode->i_mapping, offset,
 					 offset + count - 1);
 
-	if (rw & WRITE) {
+	if (iov_iter_rw(iter) == WRITE) {
 		/*
 		 * If the write DIO is beyond the EOF, we need update
 		 * the isize, but it is protected by i_mutex. So we can
@@ -8174,11 +8174,11 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 		wakeup = false;
 	}
 
-	ret = __blockdev_direct_IO(rw, iocb, inode,
-			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-			iter, offset, btrfs_get_blocks_direct, NULL,
-			btrfs_submit_direct, flags);
-	if (rw & WRITE) {
+	ret = __blockdev_direct_IO(iocb, inode,
+				   BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
+				   iter, offset, btrfs_get_blocks_direct, NULL,
+				   btrfs_submit_direct, flags);
+	if (iov_iter_rw(iter) == WRITE) {
 		current->journal_info = NULL;
 		if (ret < 0 && ret != -EIOCBQUEUED)
 			btrfs_delalloc_release_space(inode, count);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index fd5599d32362..155ab9c0246b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1198,8 +1198,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
  * intercept O_DIRECT reads and writes early, this function should
  * never get called.
  */
-static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
-			      struct iov_iter *iter,
+static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter,
 			      loff_t pos)
 {
 	WARN_ON(1);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 56237ea5fc22..b9b8eb225f66 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -457,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 	if (ret < 0)
 		return ret;
 
-	if (file->f_flags & O_DIRECT) {
+	if (iocb->ki_flags & IOCB_DIRECT) {
 		while (iov_iter_count(i)) {
 			size_t start;
 			ssize_t n;
@@ -828,7 +828,7 @@ again:
 		return ret;
 
 	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-	    (iocb->ki_filp->f_flags & O_DIRECT) ||
+	    (iocb->ki_flags & IOCB_DIRECT) ||
 	    (fi->flags & CEPH_F_SYNC)) {
 
 		dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
@@ -941,9 +941,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_client *osdc =
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
-	ssize_t count = iov_iter_count(from), written = 0;
+	ssize_t count, written = 0;
 	int err, want, got;
-	loff_t pos = iocb->ki_pos;
+	loff_t pos;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
@@ -953,14 +953,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = inode_to_bdi(inode);
 
-	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-	if (err)
-		goto out;
-
-	if (count == 0)
+	err = generic_write_checks(iocb, from);
+	if (err <= 0)
 		goto out;
-	iov_iter_truncate(from, count);
 
+	pos = iocb->ki_pos;
+	count = iov_iter_count(from);
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -997,12 +995,12 @@ retry_snap:
 	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-	    (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+	    (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
 		struct iov_iter data;
 		mutex_unlock(&inode->i_mutex);
 		/* we might need to revert back to that point */
 		data = *from;
-		if (file->f_flags & O_DIRECT)
+		if (iocb->ki_flags & IOCB_DIRECT)
 			written = ceph_sync_direct_write(iocb, &data, pos);
 		else
 			written = ceph_sync_write(iocb, &data, pos);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ca30c391a894..ca2bc5406306 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2560,10 +2560,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 	return rc;
 }
 
-static ssize_t
-cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 {
-	size_t len;
+	struct file *file = iocb->ki_filp;
 	ssize_t total_written = 0;
 	struct cifsFileInfo *open_file;
 	struct cifs_tcon *tcon;
@@ -2573,15 +2572,15 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
 	struct iov_iter saved_from;
 	int rc;
 
-	len = iov_iter_count(from);
-	rc = generic_write_checks(file, poffset, &len, 0);
-	if (rc)
-		return rc;
-
-	if (!len)
-		return 0;
+	/*
+	 * BB - optimize the way when signing is disabled. We can drop this
+	 * extra memory-to-memory copying and use iovec buffers for constructing
+	 * write request.
+	 */
 
-	iov_iter_truncate(from, len);
+	rc = generic_write_checks(iocb, from);
+	if (rc <= 0)
+		return rc;
 
 	INIT_LIST_HEAD(&wdata_list);
 	cifs_sb = CIFS_FILE_SB(file);
@@ -2593,8 +2592,8 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
 
 	memcpy(&saved_from, from, sizeof(struct iov_iter));
 
-	rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
-				  &wdata_list);
+	rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
+				  open_file, cifs_sb, &wdata_list);
 
 	/*
 	 * If at least one write was successfully sent, then discard any rc
@@ -2633,7 +2632,7 @@ restart_loop:
 				memcpy(&tmp_from, &saved_from,
 				       sizeof(struct iov_iter));
 				iov_iter_advance(&tmp_from,
-						 wdata->offset - *poffset);
+						 wdata->offset - iocb->ki_pos);
 
 				rc = cifs_write_from_iter(wdata->offset,
 						wdata->bytes, &tmp_from,
@@ -2650,34 +2649,13 @@ restart_loop:
 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
 	}
 
-	if (total_written > 0)
-		*poffset += total_written;
+	if (unlikely(!total_written))
+		return rc;
 
+	iocb->ki_pos += total_written;
+	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
 	cifs_stats_bytes_written(tcon, total_written);
-	return total_written ? total_written : (ssize_t)rc;
-}
-
-ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
-{
-	ssize_t written;
-	struct inode *inode;
-	loff_t pos = iocb->ki_pos;
-
-	inode = file_inode(iocb->ki_filp);
-
-	/*
-	 * BB - optimize the way when signing is disabled. We can drop this
-	 * extra memory-to-memory copying and use iovec buffers for constructing
-	 * write request.
-	 */
-
-	written = cifs_iovec_write(iocb->ki_filp, from, &pos);
-	if (written > 0) {
-		set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
-		iocb->ki_pos = pos;
-	}
-
-	return written;
+	return total_written;
 }
 
 static ssize_t
@@ -2688,8 +2666,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
 	struct inode *inode = file->f_mapping->host;
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
-	ssize_t rc = -EACCES;
-	loff_t lock_pos = iocb->ki_pos;
+	ssize_t rc;
 
 	/*
 	 * We need to hold the sem to be sure nobody modifies lock list
@@ -2697,23 +2674,24 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
 	 */
 	down_read(&cinode->lock_sem);
 	mutex_lock(&inode->i_mutex);
-	if (file->f_flags & O_APPEND)
-		lock_pos = i_size_read(inode);
-	if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
+
+	rc = generic_write_checks(iocb, from);
+	if (rc <= 0)
+		goto out;
+
+	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
 				     server->vals->exclusive_lock_type, NULL,
-				     CIFS_WRITE_OP)) {
+				     CIFS_WRITE_OP))
 		rc = __generic_file_write_iter(iocb, from);
-		mutex_unlock(&inode->i_mutex);
-
-		if (rc > 0) {
-			ssize_t err;
+	else
+		rc = -EACCES;
+out:
+	mutex_unlock(&inode->i_mutex);
 
-			err = generic_write_sync(file, iocb->ki_pos - rc, rc);
-			if (err < 0)
-				rc = err;
-		}
-	} else {
-		mutex_unlock(&inode->i_mutex);
+	if (rc > 0) {
+		ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+		if (err < 0)
+			rc = err;
 	}
 	up_read(&cinode->lock_sem);
 	return rc;
@@ -3877,8 +3855,7 @@ void cifs_oplock_break(struct work_struct *work)
  * Direct IO is not yet supported in the cached mode. 
  */
 static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
-               loff_t pos)
+cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
         /*
          * FIXME
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index cf0db005d2f5..acb3d63bc9dc 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file_inode(file)->i_mutex);
+			mutex_unlock(&dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
diff --git a/fs/dax.c b/fs/dax.c
index d0bd1f4f81b3..0bb0aecb556c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -98,9 +98,9 @@ static bool buffer_size_valid(struct buffer_head *bh)
 	return bh->b_state != 0;
 }
 
-static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
-			loff_t start, loff_t end, get_block_t get_block,
-			struct buffer_head *bh)
+static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
+		      loff_t start, loff_t end, get_block_t get_block,
+		      struct buffer_head *bh)
 {
 	ssize_t retval = 0;
 	loff_t pos = start;
@@ -109,7 +109,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
 	void *addr;
 	bool hole = false;
 
-	if (rw != WRITE)
+	if (iov_iter_rw(iter) != WRITE)
 		end = min(end, i_size_read(inode));
 
 	while (pos < end) {
@@ -124,7 +124,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
 				bh->b_size = PAGE_ALIGN(end - pos);
 				bh->b_state = 0;
 				retval = get_block(inode, block, bh,
-								rw == WRITE);
+						   iov_iter_rw(iter) == WRITE);
 				if (retval)
 					break;
 				if (!buffer_size_valid(bh))
@@ -137,7 +137,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
 				bh->b_size -= done;
 			}
 
-			hole = (rw != WRITE) && !buffer_written(bh);
+			hole = iov_iter_rw(iter) != WRITE && !buffer_written(bh);
 			if (hole) {
 				addr = NULL;
 				size = bh->b_size - first;
@@ -154,7 +154,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
 			max = min(pos + size, end);
 		}
 
-		if (rw == WRITE)
+		if (iov_iter_rw(iter) == WRITE)
 			len = copy_from_iter(addr, max - pos, iter);
 		else if (!hole)
 			len = copy_to_iter(addr, max - pos, iter);
@@ -173,7 +173,6 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
 
 /**
  * dax_do_io - Perform I/O to a DAX file
- * @rw: READ to read or WRITE to write
  * @iocb: The control block for this I/O
  * @inode: The file which the I/O is directed at
  * @iter: The addresses to do I/O from or to
@@ -189,9 +188,9 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
  * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
  * is in progress.
  */
-ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
-			struct iov_iter *iter, loff_t pos,
-			get_block_t get_block, dio_iodone_t end_io, int flags)
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+		  struct iov_iter *iter, loff_t pos, get_block_t get_block,
+		  dio_iodone_t end_io, int flags)
 {
 	struct buffer_head bh;
 	ssize_t retval = -EINVAL;
@@ -199,7 +198,7 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
 
 	memset(&bh, 0, sizeof(bh));
 
-	if ((flags & DIO_LOCKING) && (rw == READ)) {
+	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
 		struct address_space *mapping = inode->i_mapping;
 		mutex_lock(&inode->i_mutex);
 		retval = filemap_write_and_wait_range(mapping, pos, end - 1);
@@ -212,9 +211,9 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
 	/* Protects against truncate */
 	atomic_inc(&inode->i_dio_count);
 
-	retval = dax_io(rw, inode, iter, pos, end, get_block, &bh);
+	retval = dax_io(inode, iter, pos, end, get_block, &bh);
 
-	if ((flags & DIO_LOCKING) && (rw == READ))
+	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
 		mutex_unlock(&inode->i_mutex);
 
 	if ((retval > 0) && end_io)
diff --git a/fs/dcache.c b/fs/dcache.c
index d99736a63e3c..656ce522a218 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,6 +269,41 @@ static inline int dname_external(const struct dentry *dentry)
 	return dentry->d_name.name != dentry->d_iname;
 }
 
+/*
+ * Make sure other CPUs see the inode attached before the type is set.
+ */
+static inline void __d_set_inode_and_type(struct dentry *dentry,
+					  struct inode *inode,
+					  unsigned type_flags)
+{
+	unsigned flags;
+
+	dentry->d_inode = inode;
+	smp_wmb();
+	flags = READ_ONCE(dentry->d_flags);
+	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+	flags |= type_flags;
+	WRITE_ONCE(dentry->d_flags, flags);
+}
+
+/*
+ * Ideally, we want to make sure that other CPUs see the flags cleared before
+ * the inode is detached, but this is really a violation of RCU principles
+ * since the ordering suggests we should always set inode before flags.
+ *
+ * We should instead replace or discard the entire dentry - but that sucks
+ * performancewise on mass deletion/rename.
+ */
+static inline void __d_clear_type_and_inode(struct dentry *dentry)
+{
+	unsigned flags = READ_ONCE(dentry->d_flags);
+
+	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+	WRITE_ONCE(dentry->d_flags, flags);
+	smp_wmb();
+	dentry->d_inode = NULL;
+}
+
 static void dentry_free(struct dentry *dentry)
 {
 	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
@@ -311,7 +346,7 @@ static void dentry_iput(struct dentry * dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	if (inode) {
-		dentry->d_inode = NULL;
+		__d_clear_type_and_inode(dentry);
 		hlist_del_init(&dentry->d_u.d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&inode->i_lock);
@@ -335,8 +370,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
-	__d_clear_type(dentry);
-	dentry->d_inode = NULL;
+	__d_clear_type_and_inode(dentry);
 	hlist_del_init(&dentry->d_u.d_alias);
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -1715,11 +1749,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	unsigned add_flags = d_flags_for_inode(inode);
 
 	spin_lock(&dentry->d_lock);
-	dentry->d_flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
-	dentry->d_flags |= add_flags;
 	if (inode)
 		hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
-	dentry->d_inode = inode;
+	__d_set_inode_and_type(dentry, inode, add_flags);
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
 	fsnotify_d_instantiate(dentry, inode);
@@ -1937,8 +1969,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
 		add_flags |= DCACHE_DISCONNECTED;
 
 	spin_lock(&tmp->d_lock);
-	tmp->d_inode = inode;
-	tmp->d_flags |= add_flags;
+	__d_set_inode_and_type(tmp, inode, add_flags);
 	hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
 	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 61e72d44cf94..c9ee0dfe90b5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -524,7 +524,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
 
 	if (debugfs_positive(dentry)) {
 		dget(dentry);
-		if (S_ISDIR(dentry->d_inode->i_mode))
+		if (d_is_dir(dentry))
 			ret = simple_rmdir(parent->d_inode, dentry);
 		else
 			simple_unlink(parent->d_inode, dentry);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6fb00e3f1059..c3b560b24a46 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1093,10 +1093,10 @@ static inline int drop_refcount(struct dio *dio)
  * for the whole file.
  */
 static inline ssize_t
-do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, struct iov_iter *iter, loff_t offset, 
-	get_block_t get_block, dio_iodone_t end_io,
-	dio_submit_t submit_io,	int flags)
+do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+		      struct block_device *bdev, struct iov_iter *iter,
+		      loff_t offset, get_block_t get_block, dio_iodone_t end_io,
+		      dio_submit_t submit_io, int flags)
 {
 	unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
 	unsigned blkbits = i_blkbits;
@@ -1110,9 +1110,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct blk_plug plug;
 	unsigned long align = offset | iov_iter_alignment(iter);
 
-	if (rw & WRITE)
-		rw = WRITE_ODIRECT;
-
 	/*
 	 * Avoid references to bdev if not absolutely needed to give
 	 * the early prefetch in the caller enough time.
@@ -1127,7 +1124,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	}
 
 	/* watch out for a 0 len io from a tricksy fs */
-	if (rw == READ && !iov_iter_count(iter))
+	if (iov_iter_rw(iter) == READ && !iov_iter_count(iter))
 		return 0;
 
 	dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1143,7 +1140,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 	dio->flags = flags;
 	if (dio->flags & DIO_LOCKING) {
-		if (rw == READ) {
+		if (iov_iter_rw(iter) == READ) {
 			struct address_space *mapping =
 					iocb->ki_filp->f_mapping;
 
@@ -1169,19 +1166,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (is_sync_kiocb(iocb))
 		dio->is_async = false;
 	else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
-            (rw & WRITE) && end > i_size_read(inode))
+		 iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
 		dio->is_async = false;
 	else
 		dio->is_async = true;
 
 	dio->inode = inode;
-	dio->rw = rw;
+	dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ;
 
 	/*
 	 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
 	 * so that we can call ->fsync.
 	 */
-	if (dio->is_async && (rw & WRITE) &&
+	if (dio->is_async && iov_iter_rw(iter) == WRITE &&
 	    ((iocb->ki_filp->f_flags & O_DSYNC) ||
 	     IS_SYNC(iocb->ki_filp->f_mapping->host))) {
 		retval = dio_set_defer_completion(dio);
@@ -1274,7 +1271,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 * we can let i_mutex go now that its achieved its purpose
 	 * of protecting us from looking up uninitialized blocks.
 	 */
-	if (rw == READ && (dio->flags & DIO_LOCKING))
+	if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
 		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
@@ -1286,7 +1283,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 */
 	BUG_ON(retval == -EIOCBQUEUED);
 	if (dio->is_async && retval == 0 && dio->result &&
-	    (rw == READ || dio->result == count))
+	    (iov_iter_rw(iter) == READ || dio->result == count))
 		retval = -EIOCBQUEUED;
 	else
 		dio_await_completion(dio);
@@ -1300,11 +1297,11 @@ out:
 	return retval;
 }
 
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
-	get_block_t get_block, dio_iodone_t end_io,
-	dio_submit_t submit_io,	int flags)
+ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+			     struct block_device *bdev, struct iov_iter *iter,
+			     loff_t offset, get_block_t get_block,
+			     dio_iodone_t end_io, dio_submit_t submit_io,
+			     int flags)
 {
 	/*
 	 * The block device state is needed in the end to finally
@@ -1318,8 +1315,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	prefetch(bdev->bd_queue);
 	prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
 
-	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset,
-				     get_block, end_io, submit_io, flags);
+	return do_blockdev_direct_IO(iocb, inode, bdev, iter, offset, get_block,
+				     end_io, submit_io, flags);
 }
 
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a198e94813fe..35073aaec6e0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -963,8 +963,8 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
 
 
  /* TODO: Should be easy enough to do proprly */
-static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb,
-		struct iov_iter *iter, loff_t offset)
+static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			       loff_t offset)
 {
 	return 0;
 }
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b29eb6747116..5d9213963fae 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -851,8 +851,7 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 }
 
 static ssize_t
-ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
-			loff_t offset)
+ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -861,12 +860,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	ssize_t ret;
 
 	if (IS_DAX(inode))
-		ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block,
-				NULL, DIO_LOCKING);
+		ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL,
+				DIO_LOCKING);
 	else
-		ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+		ret = blockdev_direct_IO(iocb, inode, iter, offset,
 					 ext2_get_block);
-	if (ret < 0 && (rw & WRITE))
+	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
 	return ret;
 }
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index db07ffbe7c85..13c0868c7160 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1820,8 +1820,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  * crashes then stale disk data _may_ be exposed inside the file. But current
  * VFS code falls back into buffered path in that case so we are safe.
  */
-static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
-			struct iov_iter *iter, loff_t offset)
+static ssize_t ext3_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			      loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1832,9 +1832,9 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	int retries = 0;
 
-	trace_ext3_direct_IO_enter(inode, offset, count, rw);
+	trace_ext3_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 
-	if (rw == WRITE) {
+	if (iov_iter_rw(iter) == WRITE) {
 		loff_t final_size = offset + count;
 
 		if (final_size > inode->i_size) {
@@ -1856,12 +1856,12 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
+	ret = blockdev_direct_IO(iocb, inode, iter, offset, ext3_get_block);
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
-	if (unlikely((rw & WRITE) && ret < 0)) {
+	if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
 		loff_t isize = i_size_read(inode);
 		loff_t end = offset + count;
 
@@ -1908,7 +1908,7 @@ retry:
 			ret = err;
 	}
 out:
-	trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
+	trace_ext3_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
 	return ret;
 }
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index d4dbf3c259b3..f037b4b27300 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -789,7 +789,7 @@ static const struct quotactl_ops ext3_qctl_operations = {
 	.quota_on	= ext3_quota_on,
 	.quota_off	= dquot_quota_off,
 	.quota_sync	= dquot_quota_sync,
-	.get_info	= dquot_get_dqinfo,
+	.get_state	= dquot_get_state,
 	.set_info	= dquot_set_dqinfo,
 	.get_dqblk	= dquot_get_dqblk,
 	.set_dqblk	= dquot_set_dqblk
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index c6874be6d58b..24215dc09a18 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -546,8 +546,7 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 		free += EXT3_XATTR_LEN(name_len);
 	}
 	if (i->value) {
-		if (free < EXT3_XATTR_SIZE(i->value_len) ||
-		    free < EXT3_XATTR_LEN(name_len) +
+		if (free < EXT3_XATTR_LEN(name_len) +
 			   EXT3_XATTR_SIZE(i->value_len))
 			return -ENOSPC;
 	}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8a3981ea35d8..c8eb32eefc3c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2152,8 +2152,8 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 				struct ext4_map_blocks *map, int flags);
-extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
-				struct iov_iter *iter, loff_t offset);
+extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+				  loff_t offset);
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
 extern void ext4_ind_truncate(handle_t *, struct inode *inode);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 7a6defcf3352..e576d682b353 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -95,11 +95,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	struct mutex *aio_mutex = NULL;
 	struct blk_plug plug;
-	int o_direct = io_is_direct(file);
+	int o_direct = iocb->ki_flags & IOCB_DIRECT;
 	int overwrite = 0;
-	size_t length = iov_iter_count(from);
 	ssize_t ret;
-	loff_t pos = iocb->ki_pos;
 
 	/*
 	 * Unaligned direct AIO must be serialized; see comment above
@@ -108,16 +106,17 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (o_direct &&
 	    ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
 	    !is_sync_kiocb(iocb) &&
-	    (file->f_flags & O_APPEND ||
-	     ext4_unaligned_aio(inode, from, pos))) {
+	    (iocb->ki_flags & IOCB_APPEND ||
+	     ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
 		aio_mutex = ext4_aio_mutex(inode);
 		mutex_lock(aio_mutex);
 		ext4_unwritten_wait(inode);
 	}
 
 	mutex_lock(&inode->i_mutex);
-	if (file->f_flags & O_APPEND)
-		iocb->ki_pos = pos = i_size_read(inode);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out;
 
 	/*
 	 * If we have encountered a bitmap-format file, the size limit
@@ -126,22 +125,19 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 
-		if ((pos > sbi->s_bitmap_maxbytes) ||
-		    (pos == sbi->s_bitmap_maxbytes && length > 0)) {
-			mutex_unlock(&inode->i_mutex);
+		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
 			ret = -EFBIG;
-			goto errout;
+			goto out;
 		}
-
-		if (pos + length > sbi->s_bitmap_maxbytes)
-			iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
+		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
 	}
 
 	iocb->private = &overwrite;
 	if (o_direct) {
+		size_t length = iov_iter_count(from);
+		loff_t pos = iocb->ki_pos;
 		blk_start_plug(&plug);
 
-
 		/* check whether we do a DIO overwrite or not */
 		if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
 		    !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
@@ -185,7 +181,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (o_direct)
 		blk_finish_plug(&plug);
 
-errout:
+	if (aio_mutex)
+		mutex_unlock(aio_mutex);
+	return ret;
+
+out:
+	mutex_unlock(&inode->i_mutex);
 	if (aio_mutex)
 		mutex_unlock(aio_mutex);
 	return ret;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 740c7871c117..3580629e42d3 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -642,8 +642,8 @@ out:
  * crashes then stale disk data _may_ be exposed inside the file. But current
  * VFS code falls back into buffered path in that case so we are safe.
  */
-ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
-			   struct iov_iter *iter, loff_t offset)
+ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			   loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -654,7 +654,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	int retries = 0;
 
-	if (rw == WRITE) {
+	if (iov_iter_rw(iter) == WRITE) {
 		loff_t final_size = offset + count;
 
 		if (final_size > inode->i_size) {
@@ -676,7 +676,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retry:
-	if (rw == READ && ext4_should_dioread_nolock(inode)) {
+	if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) {
 		/*
 		 * Nolock dioread optimization may be dynamically disabled
 		 * via ext4_inode_block_unlocked_dio(). Check inode's state
@@ -690,23 +690,24 @@ retry:
 			goto locked;
 		}
 		if (IS_DAX(inode))
-			ret = dax_do_io(rw, iocb, inode, iter, offset,
+			ret = dax_do_io(iocb, inode, iter, offset,
 					ext4_get_block, NULL, 0);
 		else
-			ret = __blockdev_direct_IO(rw, iocb, inode,
-					inode->i_sb->s_bdev, iter, offset,
-					ext4_get_block, NULL, NULL, 0);
+			ret = __blockdev_direct_IO(iocb, inode,
+						   inode->i_sb->s_bdev, iter,
+						   offset, ext4_get_block, NULL,
+						   NULL, 0);
 		inode_dio_done(inode);
 	} else {
 locked:
 		if (IS_DAX(inode))
-			ret = dax_do_io(rw, iocb, inode, iter, offset,
+			ret = dax_do_io(iocb, inode, iter, offset,
 					ext4_get_block, NULL, DIO_LOCKING);
 		else
-			ret = blockdev_direct_IO(rw, iocb, inode, iter,
-					offset, ext4_get_block);
+			ret = blockdev_direct_IO(iocb, inode, iter, offset,
+						 ext4_get_block);
 
-		if (unlikely((rw & WRITE) && ret < 0)) {
+		if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
 			loff_t isize = i_size_read(inode);
 			loff_t end = offset + count;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 035b7a06f1c3..b49cf6e59953 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2952,8 +2952,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
  * if the machine crashes during the write.
  *
  */
-static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
-			      struct iov_iter *iter, loff_t offset)
+static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+				  loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -2966,8 +2966,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 	ext4_io_end_t *io_end = NULL;
 
 	/* Use the old path for reads and writes beyond i_size. */
-	if (rw != WRITE || final_size > inode->i_size)
-		return ext4_ind_direct_IO(rw, iocb, iter, offset);
+	if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
+		return ext4_ind_direct_IO(iocb, iter, offset);
 
 	BUG_ON(iocb->private == NULL);
 
@@ -2976,7 +2976,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 	 * conversion. This also disallows race between truncate() and
 	 * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
 	 */
-	if (rw == WRITE)
+	if (iov_iter_rw(iter) == WRITE)
 		atomic_inc(&inode->i_dio_count);
 
 	/* If we do a overwrite dio, i_mutex locking can be released */
@@ -3034,10 +3034,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 		dio_flags = DIO_LOCKING;
 	}
 	if (IS_DAX(inode))
-		ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
+		ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
 				ext4_end_io_dio, dio_flags);
 	else
-		ret = __blockdev_direct_IO(rw, iocb, inode,
+		ret = __blockdev_direct_IO(iocb, inode,
 					   inode->i_sb->s_bdev, iter, offset,
 					   get_block_func,
 					   ext4_end_io_dio, NULL, dio_flags);
@@ -3078,7 +3078,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retake_lock:
-	if (rw == WRITE)
+	if (iov_iter_rw(iter) == WRITE)
 		inode_dio_done(inode);
 	/* take i_mutex locking again if we do a ovewrite dio */
 	if (overwrite) {
@@ -3089,8 +3089,8 @@ retake_lock:
 	return ret;
 }
 
-static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-			      struct iov_iter *iter, loff_t offset)
+static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			      loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -3107,12 +3107,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
 	if (ext4_has_inline_data(inode))
 		return 0;
 
-	trace_ext4_direct_IO_enter(inode, offset, count, rw);
+	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
+		ret = ext4_ext_direct_IO(iocb, iter, offset);
 	else
-		ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
-	trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
+		ret = ext4_ind_direct_IO(iocb, iter, offset);
+	trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
 	return ret;
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e061e66c8280..d348c7d29d80 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1076,7 +1076,7 @@ static const struct quotactl_ops ext4_qctl_operations = {
 	.quota_on	= ext4_quota_on,
 	.quota_off	= ext4_quota_off,
 	.quota_sync	= dquot_quota_sync,
-	.get_info	= dquot_get_dqinfo,
+	.get_state	= dquot_get_state,
 	.set_info	= dquot_set_dqinfo,
 	.get_dqblk	= dquot_get_dqblk,
 	.set_dqblk	= dquot_set_dqblk
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 497f8515d205..319eda511c4f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1118,12 +1118,12 @@ static int f2fs_write_end(struct file *file,
 	return copied;
 }
 
-static int check_direct_IO(struct inode *inode, int rw,
-		struct iov_iter *iter, loff_t offset)
+static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
+			   loff_t offset)
 {
 	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
 
-	if (rw == READ)
+	if (iov_iter_rw(iter) == READ)
 		return 0;
 
 	if (offset & blocksize_mask)
@@ -1135,8 +1135,8 @@ static int check_direct_IO(struct inode *inode, int rw,
 	return 0;
 }
 
-static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
-		struct iov_iter *iter, loff_t offset)
+static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			      loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -1151,19 +1151,19 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 			return err;
 	}
 
-	if (check_direct_IO(inode, rw, iter, offset))
+	if (check_direct_IO(inode, iter, offset))
 		return 0;
 
-	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
+	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 
-	if (rw & WRITE)
+	if (iov_iter_rw(iter) == WRITE)
 		__allocate_data_blocks(inode, offset, count);
 
-	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
-	if (err < 0 && (rw & WRITE))
+	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
+	if (err < 0 && iov_iter_rw(iter) == WRITE)
 		f2fs_write_failed(mapping, offset + count);
 
-	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+	trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
 
 	return err;
 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d1a26c2de919..c06774658345 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -237,8 +237,7 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
 	return err;
 }
 
-static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
-			     struct iov_iter *iter,
+static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
@@ -247,7 +246,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	if (rw == WRITE) {
+	if (iov_iter_rw(iter) == WRITE) {
 		/*
 		 * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
 		 * so we need to update the ->mmu_private to block boundary.
@@ -266,8 +265,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block);
-	if (ret < 0 && (rw & WRITE))
+	ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block);
+	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		fat_write_failed(mapping, offset + count);
 
 	return ret;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e1afdd7abf90..5ef05b5c4cff 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1145,13 +1145,11 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
-	size_t count = iov_iter_count(from);
 	ssize_t written = 0;
 	ssize_t written_buffered = 0;
 	struct inode *inode = mapping->host;
 	ssize_t err;
 	loff_t endbyte = 0;
-	loff_t pos = iocb->ki_pos;
 
 	if (get_fuse_conn(inode)->writeback_cache) {
 		/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1167,14 +1165,10 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = inode_to_bdi(inode);
 
-	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-	if (err)
-		goto out;
-
-	if (count == 0)
+	err = generic_write_checks(iocb, from);
+	if (err <= 0)
 		goto out;
 
-	iov_iter_truncate(from, count);
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -1183,7 +1177,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (err)
 		goto out;
 
-	if (file->f_flags & O_DIRECT) {
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		loff_t pos = iocb->ki_pos;
 		written = generic_file_direct_write(iocb, from, pos);
 		if (written < 0 || !iov_iter_count(from))
 			goto out;
@@ -1209,9 +1204,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		written += written_buffered;
 		iocb->ki_pos = pos + written_buffered;
 	} else {
-		written = fuse_perform_write(file, mapping, from, pos);
+		written = fuse_perform_write(file, mapping, from, iocb->ki_pos);
 		if (written >= 0)
-			iocb->ki_pos = pos + written;
+			iocb->ki_pos += written;
 	}
 out:
 	current->backing_dev_info = NULL;
@@ -1412,7 +1407,6 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct fuse_io_priv io = { .async = 0, .file = file };
-	size_t count = iov_iter_count(from);
 	ssize_t res;
 
 	if (is_bad_inode(inode))
@@ -1420,11 +1414,9 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	/* Don't allow parallel writes to the same file */
 	mutex_lock(&inode->i_mutex);
-	res = generic_write_checks(file, &iocb->ki_pos, &count, 0);
-	if (!res) {
-		iov_iter_truncate(from, count);
+	res = generic_write_checks(iocb, from);
+	if (res > 0)
 		res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
-	}
 	fuse_invalidate_attr(inode);
 	if (res > 0)
 		fuse_write_update_size(inode, iocb->ki_pos);
@@ -2782,8 +2774,7 @@ static inline loff_t fuse_round_up(loff_t off)
 }
 
 static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
-			loff_t offset)
+fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	ssize_t ret = 0;
@@ -2800,15 +2791,15 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	inode = file->f_mapping->host;
 	i_size = i_size_read(inode);
 
-	if ((rw == READ) && (offset > i_size))
+	if ((iov_iter_rw(iter) == READ) && (offset > i_size))
 		return 0;
 
 	/* optimization for short read */
-	if (async_dio && rw != WRITE && offset + count > i_size) {
+	if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
 		if (offset >= i_size)
 			return 0;
-		count = min_t(loff_t, count, fuse_round_up(i_size - offset));
-		iov_iter_truncate(iter, count);
+		iov_iter_truncate(iter, fuse_round_up(i_size - offset));
+		count = iov_iter_count(iter);
 	}
 
 	io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2819,7 +2810,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	io->bytes = -1;
 	io->size = 0;
 	io->offset = offset;
-	io->write = (rw == WRITE);
+	io->write = (iov_iter_rw(iter) == WRITE);
 	io->err = 0;
 	io->file = file;
 	/*
@@ -2834,19 +2825,15 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	 * to wait on real async I/O requests, so we must submit this request
 	 * synchronously.
 	 */
-	if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
+	if (!is_sync_kiocb(iocb) && (offset + count > i_size) &&
+	    iov_iter_rw(iter) == WRITE)
 		io->async = false;
 
 	if (io->async && is_sync_kiocb(iocb))
 		io->done = &wait;
 
-	if (rw == WRITE) {
-		ret = generic_write_checks(file, &pos, &count, 0);
-		if (!ret) {
-			iov_iter_truncate(iter, count);
-			ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
-		}
-
+	if (iov_iter_rw(iter) == WRITE) {
+		ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
 		fuse_invalidate_attr(inode);
 	} else {
 		ret = __fuse_direct_read(io, iter, &pos);
@@ -2865,7 +2852,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 
 	kfree(io);
 
-	if (rw == WRITE) {
+	if (iov_iter_rw(iter) == WRITE) {
 		if (ret > 0)
 			fuse_write_update_size(inode, pos);
 		else if (ret < 0 && offset + count > i_size)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index a6e6990aea39..5551fea0afd7 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1016,13 +1016,12 @@ out:
 /**
  * gfs2_ok_for_dio - check that dio is valid on this file
  * @ip: The inode
- * @rw: READ or WRITE
  * @offset: The offset at which we are reading or writing
  *
  * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
  *          1 (to accept the i/o request)
  */
-static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
+static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
 {
 	/*
 	 * Should we return an error here? I can't see that O_DIRECT for
@@ -1039,8 +1038,8 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
 
 
 
-static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
-			      struct iov_iter *iter, loff_t offset)
+static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			      loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1061,7 +1060,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	rv = gfs2_glock_nq(&gh);
 	if (rv)
 		return rv;
-	rv = gfs2_ok_for_dio(ip, rw, offset);
+	rv = gfs2_ok_for_dio(ip, offset);
 	if (rv != 1)
 		goto out; /* dio not valid, fall back to buffered i/o */
 
@@ -1091,13 +1090,12 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 		rv = filemap_write_and_wait_range(mapping, lstart, end);
 		if (rv)
 			goto out;
-		if (rw == WRITE)
+		if (iov_iter_rw(iter) == WRITE)
 			truncate_inode_pages_range(mapping, lstart, end);
 	}
 
-	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				  iter, offset,
-				  gfs2_get_block_direct, NULL, NULL, 0);
+	rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+				  offset, gfs2_get_block_direct, NULL, NULL, 0);
 out:
 	gfs2_glock_dq(&gh);
 	gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 207eb4a8135e..31892871ea87 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -709,7 +709,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
 
-	if (file->f_flags & O_APPEND) {
+	if (iocb->ki_flags & IOCB_APPEND) {
 		struct gfs2_holder gh;
 
 		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 5c27e48aa76f..e3065cb9ab08 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1494,32 +1494,34 @@ int gfs2_quotad(void *data)
 	return 0;
 }
 
-static int gfs2_quota_get_xstate(struct super_block *sb,
-				 struct fs_quota_stat *fqs)
+static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 
-	memset(fqs, 0, sizeof(struct fs_quota_stat));
-	fqs->qs_version = FS_QSTAT_VERSION;
+	memset(state, 0, sizeof(*state));
 
 	switch (sdp->sd_args.ar_quota) {
 	case GFS2_QUOTA_ON:
-		fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD);
+		state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
+		state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
 		/*FALLTHRU*/
 	case GFS2_QUOTA_ACCOUNT:
-		fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT);
+		state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED |
+						  QCI_SYSFILE;
+		state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED |
+						  QCI_SYSFILE;
 		break;
 	case GFS2_QUOTA_OFF:
 		break;
 	}
-
 	if (sdp->sd_quota_inode) {
-		fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr;
-		fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks;
+		state->s_state[USRQUOTA].ino =
+					GFS2_I(sdp->sd_quota_inode)->i_no_addr;
+		state->s_state[USRQUOTA].blocks = sdp->sd_quota_inode->i_blocks;
 	}
-	fqs->qs_uquota.qfs_nextents = 1; /* unsupported */
-	fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */
-	fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru);
+	state->s_state[USRQUOTA].nextents = 1;	/* unsupported */
+	state->s_state[GRPQUOTA] = state->s_state[USRQUOTA];
+	state->s_incoredqs = list_lru_count(&gfs2_qd_lru);
 	return 0;
 }
 
@@ -1664,7 +1666,7 @@ out_put:
 
 const struct quotactl_ops gfs2_quotactl_ops = {
 	.quota_sync     = gfs2_quota_sync,
-	.get_xstate     = gfs2_quota_get_xstate,
+	.get_state	= gfs2_quota_get_state,
 	.get_dqblk	= gfs2_get_dqblk,
 	.set_dqblk	= gfs2_set_dqblk,
 };
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9337065bcc67..75fd5d873c19 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -124,8 +124,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 	return res ? try_to_free_buffers(page) : 0;
 }
 
-static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
-		struct iov_iter *iter, loff_t offset)
+static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -133,13 +133,13 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block);
+	ret = blockdev_direct_IO(iocb, inode, iter, offset, hfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
-	if (unlikely((rw & WRITE) && ret < 0)) {
+	if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
 		loff_t isize = i_size_read(inode);
 		loff_t end = offset + count;
 
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6229214ef7c1..b0afedbef12b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -122,8 +122,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 	return res ? try_to_free_buffers(page) : 0;
 }
 
-static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
-		struct iov_iter *iter, loff_t offset)
+static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+				 loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -131,14 +131,13 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 
-				 hfsplus_get_block);
+	ret = blockdev_direct_IO(iocb, inode, iter, offset, hfsplus_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
-	if (unlikely((rw & WRITE) && ret < 0)) {
+	if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
 		loff_t isize = i_size_read(inode);
 		loff_t end = offset + count;
 
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 3197aed10614..070dc4b33544 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -330,8 +330,8 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, jfs_get_block);
 }
 
-static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
-	struct iov_iter *iter, loff_t offset)
+static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -339,13 +339,13 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block);
+	ret = blockdev_direct_IO(iocb, inode, iter, offset, jfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
-	if (unlikely((rw & WRITE) && ret < 0)) {
+	if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
 		loff_t isize = i_size_read(inode);
 		loff_t end = offset + count;
 
diff --git a/fs/namei.c b/fs/namei.c
index 76fb76a0818b..ffab2e06e147 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1585,7 +1585,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 		inode = path->dentry->d_inode;
 	}
 	err = -ENOENT;
-	if (!inode || d_is_negative(path->dentry))
+	if (d_is_negative(path->dentry))
 		goto out_path_put;
 
 	if (should_follow_link(path->dentry, follow)) {
@@ -2310,7 +2310,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
 	mutex_unlock(&dir->d_inode->i_mutex);
 
 done:
-	if (!dentry->d_inode || d_is_negative(dentry)) {
+	if (d_is_negative(dentry)) {
 		error = -ENOENT;
 		dput(dentry);
 		goto out;
@@ -3038,7 +3038,7 @@ retry_lookup:
 finish_lookup:
 	/* we _can_ be in RCU mode here */
 	error = -ENOENT;
-	if (!inode || d_is_negative(path->dentry)) {
+	if (d_is_negative(path->dentry)) {
 		path_to_nameidata(path, nd);
 		goto out;
 	}
@@ -3077,7 +3077,7 @@ finish_open:
 	error = -ENOTDIR;
 	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
 		goto out;
-	if (!S_ISREG(nd->inode->i_mode))
+	if (!d_is_reg(nd->path.dentry))
 		will_truncate = false;
 
 	if (will_truncate) {
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 479bf8db264e..011324ce9df2 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -170,20 +170,15 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	size_t already_written = 0;
-	loff_t pos = iocb->ki_pos;
-	size_t count = iov_iter_count(from);
 	size_t bufsize;
 	int errno;
 	void *bouncebuffer;
+	off_t pos;
 
 	ncp_dbg(1, "enter %pD2\n", file);
-	errno = generic_write_checks(file, &pos, &count, 0);
-	if (errno)
+	errno = generic_write_checks(iocb, from);
+	if (errno <= 0)
 		return errno;
-	iov_iter_truncate(from, count);
-	
-	if (!count)
-		return 0;
 
 	errno = ncp_make_open(inode, O_WRONLY);
 	if (errno) {
@@ -201,10 +196,11 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		errno = -EIO;	/* -ENOMEM */
 		goto outrel;
 	}
+	pos = iocb->ki_pos;
 	while (iov_iter_count(from)) {
 		int written_this_time;
 		size_t to_write = min_t(size_t,
-				      bufsize - ((off_t)pos % bufsize),
+				      bufsize - (pos % bufsize),
 				      iov_iter_count(from));
 
 		if (copy_from_iter(bouncebuffer, to_write, from) != to_write) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c3929fb2ab26..682f65fe09b5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -240,7 +240,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
 
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
- * @rw: direction (read or write)
  * @iocb: target I/O control block
  * @iov: array of vectors that define I/O buffer
  * @pos: offset in file to begin the operation
@@ -251,7 +250,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
  * shunt off direct read and write requests before the VFS gets them,
  * so this method is only ever called for swap.
  */
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
+ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 
@@ -267,9 +266,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 #else
 	VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
 
-	if (rw == READ)
+	if (iov_iter_rw(iter) == READ)
 		return nfs_file_direct_read(iocb, iter, pos);
-	return nfs_file_direct_write(iocb, iter, pos);
+	return nfs_file_direct_write(iocb, iter);
 #endif /* CONFIG_NFS_SWAP */
 }
 
@@ -960,8 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
-				loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 {
 	ssize_t result = -EINVAL;
 	struct file *file = iocb->ki_filp;
@@ -969,25 +967,16 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 	struct inode *inode = mapping->host;
 	struct nfs_direct_req *dreq;
 	struct nfs_lock_context *l_ctx;
-	loff_t end;
-	size_t count = iov_iter_count(iter);
-	end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
-
-	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
+	loff_t pos, end;
 
 	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
-		file, count, (long long) pos);
+		file, iov_iter_count(iter), (long long) iocb->ki_pos);
 
-	result = generic_write_checks(file, &pos, &count, 0);
-	if (result)
-		goto out;
+	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES,
+		      iov_iter_count(iter));
 
-	result = -EINVAL;
-	if ((ssize_t) count < 0)
-		goto out;
-	result = 0;
-	if (!count)
-		goto out;
+	pos = iocb->ki_pos;
+	end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
 
 	mutex_lock(&inode->i_mutex);
 
@@ -1002,7 +991,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 			goto out_unlock;
 	}
 
-	task_io_account_write(count);
+	task_io_account_write(iov_iter_count(iter));
 
 	result = -ENOMEM;
 	dreq = nfs_direct_req_alloc();
@@ -1010,7 +999,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 		goto out_unlock;
 
 	dreq->inode = inode;
-	dreq->bytes_left = count;
+	dreq->bytes_left = iov_iter_count(iter);
 	dreq->io_start = pos;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
@@ -1050,7 +1039,6 @@ out_release:
 	nfs_direct_req_release(dreq);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
-out:
 	return result;
 }
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f6a3adedf027..c40e4363e746 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -170,7 +170,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t result;
 
-	if (iocb->ki_filp->f_flags & O_DIRECT)
+	if (iocb->ki_flags & IOCB_DIRECT)
 		return nfs_file_direct_read(iocb, to, iocb->ki_pos);
 
 	dprintk("NFS: read(%pD2, %zu@%lu)\n",
@@ -674,17 +674,20 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	unsigned long written = 0;
 	ssize_t result;
 	size_t count = iov_iter_count(from);
-	loff_t pos = iocb->ki_pos;
 
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
 		return result;
 
-	if (file->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, from, pos);
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		result = generic_write_checks(iocb, from);
+		if (result <= 0)
+			return result;
+		return nfs_file_direct_write(iocb, from);
+	}
 
 	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
-		file, count, (long long) pos);
+		file, count, (long long) iocb->ki_pos);
 
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
@@ -692,7 +695,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	/*
 	 * O_APPEND implies that we must revalidate the file length.
 	 */
-	if (file->f_flags & O_APPEND) {
+	if (iocb->ki_flags & IOCB_APPEND) {
 		result = nfs_revalidate_file_size(inode, file);
 		if (result)
 			goto out;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 568ecf0a880f..b8f5c63f77b2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -117,15 +117,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 
 static void nfs_readpage_release(struct nfs_page *req)
 {
-	struct inode *d_inode = req->wb_context->dentry->d_inode;
+	struct inode *inode = req->wb_context->dentry->d_inode;
 
-	dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+	dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
 		(long long)req_offset(req));
 
 	if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
 		if (PageUptodate(req->wb_page))
-			nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+			nfs_readpage_to_fscache(inode, req->wb_page, 0);
 
 		unlock_page(req->wb_page);
 	}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 766cb85fe2f1..be936df4ba73 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -305,8 +305,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t
-nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
-		loff_t offset)
+nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -314,18 +313,17 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	size_t count = iov_iter_count(iter);
 	ssize_t size;
 
-	if (rw == WRITE)
+	if (iov_iter_rw(iter) == WRITE)
 		return 0;
 
 	/* Needs synchronization with the cleaner */
-	size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
-				  nilfs_get_block);
+	size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
-	if (unlikely((rw & WRITE) && size < 0)) {
+	if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) {
 		loff_t isize = i_size_read(inode);
 		loff_t end = offset + count;
 
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 840e95e3f1d2..7bb487e663b4 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -328,25 +328,25 @@ err_out:
 	return err;
 }
 
-static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
-		size_t *count)
+static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,
+		struct iov_iter *from)
 {
 	loff_t pos;
 	s64 end, ll;
 	ssize_t err;
 	unsigned long flags;
+	struct file *file = iocb->ki_filp;
 	struct inode *vi = file_inode(file);
 	ntfs_inode *base_ni, *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
 
 	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
-			"0x%llx, count 0x%lx.", vi->i_ino,
+			"0x%llx, count 0x%zx.", vi->i_ino,
 			(unsigned)le32_to_cpu(ni->type),
-			(unsigned long long)*ppos, (unsigned long)*count);
-	/* We can write back this queue in page reclaim. */
-	current->backing_dev_info = inode_to_bdi(vi);
-	err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
-	if (unlikely(err))
+			(unsigned long long)iocb->ki_pos,
+			iov_iter_count(from));
+	err = generic_write_checks(iocb, from);
+	if (unlikely(err <= 0))
 		goto out;
 	/*
 	 * All checks have passed.  Before we start doing any writing we want
@@ -379,8 +379,6 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
 		err = -EOPNOTSUPP;
 		goto out;
 	}
-	if (*count == 0)
-		goto out;
 	base_ni = ni;
 	if (NInoAttr(ni))
 		base_ni = ni->ext.base_ntfs_ino;
@@ -392,9 +390,9 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
 	 * cannot fail either so there is no need to check the return code.
 	 */
 	file_update_time(file);
-	pos = *ppos;
+	pos = iocb->ki_pos;
 	/* The first byte after the last cluster being written to. */
-	end = (pos + *count + vol->cluster_size_mask) &
+	end = (pos + iov_iter_count(from) + vol->cluster_size_mask) &
 			~(u64)vol->cluster_size_mask;
 	/*
 	 * If the write goes beyond the allocated size, extend the allocation
@@ -422,7 +420,7 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
 						"partially extended.",
 						vi->i_ino, (unsigned)
 						le32_to_cpu(ni->type));
-				*count = ll - pos;
+				iov_iter_truncate(from, ll - pos);
 			}
 		} else {
 			err = ll;
@@ -438,7 +436,7 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
 						vi->i_ino, (unsigned)
 						le32_to_cpu(ni->type),
 						(int)-err);
-				*count = ll - pos;
+				iov_iter_truncate(from, ll - pos);
 			} else {
 				if (err != -ENOSPC)
 					ntfs_error(vi->i_sb, "Cannot perform "
@@ -1930,60 +1928,36 @@ again:
 }
 
 /**
- * ntfs_file_write_iter_nolock - write data to a file
- * @iocb:	IO state structure (file, offset, etc.)
- * @from:	iov_iter with data to write
- *
- * Basically the same as __generic_file_write_iter() except that it ends
- * up calling ntfs_perform_write() instead of generic_perform_write() and that
- * O_DIRECT is not implemented.
- */
-static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
-		struct iov_iter *from)
-{
-	struct file *file = iocb->ki_filp;
-	loff_t pos = iocb->ki_pos;
-	ssize_t written = 0;
-	ssize_t err;
-	size_t count = iov_iter_count(from);
-
-	err = ntfs_prepare_file_for_write(file, &pos, &count);
-	if (count && !err) {
-		iov_iter_truncate(from, count);
-		written = ntfs_perform_write(file, from, pos);
-		if (likely(written >= 0))
-			iocb->ki_pos = pos + written;
-	}
-	current->backing_dev_info = NULL;
-	return written ? written : err;
-}
-
-/**
  * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
  * @iocb:	IO state structure
  * @from:	iov_iter with data to write
  *
  * Basically the same as generic_file_write_iter() except that it ends up
- * calling ntfs_file_write_iter_nolock() instead of
- * __generic_file_write_iter().
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
  */
 static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *vi = file_inode(file);
-	ssize_t ret;
+	ssize_t written = 0;
+	ssize_t err;
 
 	mutex_lock(&vi->i_mutex);
-	ret = ntfs_file_write_iter_nolock(iocb, from);
+	/* We can write back this queue in page reclaim. */
+	current->backing_dev_info = inode_to_bdi(vi);
+	err = ntfs_prepare_file_for_write(iocb, from);
+	if (iov_iter_count(from) && !err)
+		written = ntfs_perform_write(file, from, iocb->ki_pos);
+	current->backing_dev_info = NULL;
 	mutex_unlock(&vi->i_mutex);
-	if (ret > 0) {
-		ssize_t err;
-
-		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+	if (likely(written > 0)) {
+		err = generic_write_sync(file, iocb->ki_pos, written);
 		if (err < 0)
-			ret = err;
+			written = 0;
 	}
-	return ret;
+	iocb->ki_pos += written;
+	return written ? written : err;
 }
 
 /**
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8d2bc840c288..f906a250da6a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -855,10 +855,9 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
 		ocfs2_inode_unlock(inode, 1);
 	}
 
-	written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
-			iter, offset,
-			ocfs2_direct_IO_get_blocks,
-			ocfs2_dio_end_io, NULL, 0);
+	written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+				       offset, ocfs2_direct_IO_get_blocks,
+				       ocfs2_dio_end_io, NULL, 0);
 	if (unlikely(written < 0)) {
 		loff_t i_size = i_size_read(inode);
 
@@ -946,9 +945,7 @@ out:
 	return ret;
 }
 
-static ssize_t ocfs2_direct_IO(int rw,
-			       struct kiocb *iocb,
-			       struct iov_iter *iter,
+static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 			       loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
@@ -970,12 +967,11 @@ static ssize_t ocfs2_direct_IO(int rw,
 	if (i_size_read(inode) <= offset && !full_coherency)
 		return 0;
 
-	if (rw == READ)
-		return __blockdev_direct_IO(rw, iocb, inode,
-				    inode->i_sb->s_bdev,
-				    iter, offset,
-				    ocfs2_direct_IO_get_blocks,
-				    ocfs2_dio_end_io, NULL, 0);
+	if (iov_iter_rw(iter) == READ)
+		return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+					    iter, offset,
+					    ocfs2_direct_IO_get_blocks,
+					    ocfs2_dio_end_io, NULL, 0);
 	else
 		return ocfs2_direct_IO_write(iocb, iter, offset);
 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8c48e989beba..913fc250d85a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2106,7 +2106,7 @@ out:
 }
 
 static int ocfs2_prepare_inode_for_write(struct file *file,
-					 loff_t *ppos,
+					 loff_t pos,
 					 size_t count,
 					 int appending,
 					 int *direct_io,
@@ -2115,7 +2115,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 	int ret = 0, meta_level = 0;
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	loff_t saved_pos = 0, end;
+	loff_t end;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int full_coherency = !(osb->s_mount_opt &
 		OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2155,23 +2155,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 			}
 		}
 
-		/* work on a copy of ppos until we're sure that we won't have
-		 * to recalculate it due to relocking. */
-		if (appending)
-			saved_pos = i_size_read(inode);
-		else
-			saved_pos = *ppos;
-
-		end = saved_pos + count;
+		end = pos + count;
 
-		ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
+		ret = ocfs2_check_range_for_refcount(inode, pos, count);
 		if (ret == 1) {
 			ocfs2_inode_unlock(inode, meta_level);
 			meta_level = -1;
 
 			ret = ocfs2_prepare_inode_for_refcount(inode,
 							       file,
-							       saved_pos,
+							       pos,
 							       count,
 							       &meta_level);
 			if (has_refcount)
@@ -2227,7 +2220,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 		 * caller will have to retake some cluster
 		 * locks and initiate the io as buffered.
 		 */
-		ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
+		ret = ocfs2_check_range_for_holes(inode, pos, count);
 		if (ret == 1) {
 			/*
 			 * Fallback to old way if the feature bit is not set.
@@ -2242,12 +2235,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 		break;
 	}
 
-	if (appending)
-		*ppos = saved_pos;
-
 out_unlock:
 	trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
-					    saved_pos, appending, count,
+					    pos, appending, count,
 					    direct_io, has_refcount);
 
 	if (meta_level >= 0)
@@ -2260,19 +2250,20 @@ out:
 static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 				    struct iov_iter *from)
 {
-	int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;
+	int direct_io, appending, rw_level, have_alloc_sem  = 0;
 	int can_do_direct, has_refcount = 0;
 	ssize_t written = 0;
-	size_t count = iov_iter_count(from);
-	loff_t old_size, *ppos = &iocb->ki_pos;
+	ssize_t ret;
+	size_t count = iov_iter_count(from), orig_count;
+	loff_t old_size;
 	u32 old_clusters;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
-	struct address_space *mapping = file->f_mapping;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int full_coherency = !(osb->s_mount_opt &
 			       OCFS2_MOUNT_COHERENCY_BUFFERED);
 	int unaligned_dio = 0;
+	int dropped_dio = 0;
 
 	trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2283,8 +2274,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 	if (count == 0)
 		return 0;
 
-	appending = file->f_flags & O_APPEND ? 1 : 0;
-	direct_io = file->f_flags & O_DIRECT ? 1 : 0;
+	appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
+	direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
 
 	mutex_lock(&inode->i_mutex);
 
@@ -2329,8 +2320,17 @@ relock:
 		ocfs2_inode_unlock(inode, 1);
 	}
 
+	orig_count = iov_iter_count(from);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0) {
+		if (ret)
+			mlog_errno(ret);
+		goto out;
+	}
+	count = ret;
+
 	can_do_direct = direct_io;
-	ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
+	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
 					    &can_do_direct, &has_refcount);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -2338,7 +2338,7 @@ relock:
 	}
 
 	if (direct_io && !is_sync_kiocb(iocb))
-		unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);
+		unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
 
 	/*
 	 * We can't complete the direct I/O as requested, fall back to
@@ -2351,6 +2351,9 @@ relock:
 		rw_level = -1;
 
 		direct_io = 0;
+		iocb->ki_flags &= ~IOCB_DIRECT;
+		iov_iter_reexpand(from, orig_count);
+		dropped_dio = 1;
 		goto relock;
 	}
 
@@ -2374,74 +2377,15 @@ relock:
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	ret = generic_write_checks(file, ppos, &count,
-				   S_ISBLK(inode->i_mode));
-	if (ret)
-		goto out_dio;
-
-	iov_iter_truncate(from, count);
-	if (direct_io) {
-		loff_t endbyte;
-		ssize_t written_buffered;
-		written = generic_file_direct_write(iocb, from, *ppos);
-		if (written < 0 || written == count) {
-			ret = written;
-			goto out_dio;
-		}
-
-		/*
-		 * for completing the rest of the request.
-		 */
-		count -= written;
-		written_buffered = generic_perform_write(file, from, *ppos);
-		/*
-		 * If generic_file_buffered_write() returned a synchronous error
-		 * then we want to return the number of bytes which were
-		 * direct-written, or the error code if that was zero. Note
-		 * that this differs from normal direct-io semantics, which
-		 * will return -EFOO even if some bytes were written.
-		 */
-		if (written_buffered < 0) {
-			ret = written_buffered;
-			goto out_dio;
-		}
-
-		/* We need to ensure that the page cache pages are written to
-		 * disk and invalidated to preserve the expected O_DIRECT
-		 * semantics.
-		 */
-		endbyte = *ppos + written_buffered - 1;
-		ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
-				endbyte);
-		if (ret == 0) {
-			iocb->ki_pos = *ppos + written_buffered;
-			written += written_buffered;
-			invalidate_mapping_pages(mapping,
-					*ppos >> PAGE_CACHE_SHIFT,
-					endbyte >> PAGE_CACHE_SHIFT);
-		} else {
-			/*
-			 * We don't know how much we wrote, so just return
-			 * the number of bytes which were direct-written
-			 */
-		}
-	} else {
-		current->backing_dev_info = inode_to_bdi(inode);
-		written = generic_perform_write(file, from, *ppos);
-		if (likely(written >= 0))
-			iocb->ki_pos = *ppos + written;
-		current->backing_dev_info = NULL;
-	}
-
-out_dio:
+	written = __generic_file_write_iter(iocb, from);
 	/* buffered aio wouldn't have proper lock coverage today */
-	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
+	BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
 
 	if (unlikely(written <= 0))
 		goto no_sync;
 
-	if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
-	    ((file->f_flags & O_DIRECT) && !direct_io)) {
+	if (((file->f_flags & O_DSYNC) && !direct_io) ||
+	    IS_SYNC(inode) || dropped_dio) {
 		ret = filemap_fdatawrite_range(file->f_mapping,
 					       iocb->ki_pos - written,
 					       iocb->ki_pos - 1);
@@ -2552,7 +2496,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
 	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads
 	 * need locks to protect pending reads from racing with truncate.
 	 */
-	if (filp->f_flags & O_DIRECT) {
+	if (iocb->ki_flags & IOCB_DIRECT) {
 		have_alloc_sem = 1;
 		ocfs2_iocb_set_sem_locked(iocb);
 
@@ -2586,7 +2530,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
 	trace_generic_file_aio_read_ret(ret);
 
 	/* buffered aio wouldn't have proper lock coverage today */
-	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
+	BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
 
 	/* see ocfs2_file_write_iter */
 	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b32ce53d24ee..56e1ffda4d89 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -364,6 +364,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
 	case PSTORE_TYPE_PMSG:
 		scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id);
 		break;
+	case PSTORE_TYPE_PPC_OPAL:
+		sprintf(name, "powerpc-opal-%s-%lld", psname, id);
+		break;
 	case PSTORE_TYPE_UNKNOWN:
 		scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id);
 		break;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0ccd4ba3a246..ecc25cf0ee6e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -900,14 +900,17 @@ static inline struct dquot **i_dquot(struct inode *inode)
 
 static int dqinit_needed(struct inode *inode, int type)
 {
+	struct dquot * const *dquots;
 	int cnt;
 
 	if (IS_NOQUOTA(inode))
 		return 0;
+
+	dquots = i_dquot(inode);
 	if (type != -1)
-		return !i_dquot(inode)[type];
+		return !dquots[type];
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (!i_dquot(inode)[cnt])
+		if (!dquots[cnt])
 			return 1;
 	return 0;
 }
@@ -970,12 +973,13 @@ static void add_dquot_ref(struct super_block *sb, int type)
 static void remove_inode_dquot_ref(struct inode *inode, int type,
 				   struct list_head *tofree_head)
 {
-	struct dquot *dquot = i_dquot(inode)[type];
+	struct dquot **dquots = i_dquot(inode);
+	struct dquot *dquot = dquots[type];
 
-	i_dquot(inode)[type] = NULL;
 	if (!dquot)
 		return;
 
+	dquots[type] = NULL;
 	if (list_empty(&dquot->dq_free)) {
 		/*
 		 * The inode still has reference to dquot so it can't be in the
@@ -1159,8 +1163,8 @@ static int need_print_warning(struct dquot_warn *warn)
 			return uid_eq(current_fsuid(), warn->w_dq_id.uid);
 		case GRPQUOTA:
 			return in_group_p(warn->w_dq_id.gid);
-		case PRJQUOTA:	/* Never taken... Just make gcc happy */
-			return 0;
+		case PRJQUOTA:
+			return 1;
 	}
 	return 0;
 }
@@ -1389,16 +1393,21 @@ static int dquot_active(const struct inode *inode)
 static void __dquot_initialize(struct inode *inode, int type)
 {
 	int cnt, init_needed = 0;
-	struct dquot *got[MAXQUOTAS];
+	struct dquot **dquots, *got[MAXQUOTAS];
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 
 	if (!dquot_active(inode))
 		return;
 
+	dquots = i_dquot(inode);
+
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		struct kqid qid;
+		kprojid_t projid;
+		int rc;
+
 		got[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
@@ -1407,8 +1416,12 @@ static void __dquot_initialize(struct inode *inode, int type)
 		 * we check it without locking here to avoid unnecessary
 		 * dqget()/dqput() calls.
 		 */
-		if (i_dquot(inode)[cnt])
+		if (dquots[cnt])
+			continue;
+
+		if (!sb_has_quota_active(sb, cnt))
 			continue;
+
 		init_needed = 1;
 
 		switch (cnt) {
@@ -1418,6 +1431,12 @@ static void __dquot_initialize(struct inode *inode, int type)
 		case GRPQUOTA:
 			qid = make_kqid_gid(inode->i_gid);
 			break;
+		case PRJQUOTA:
+			rc = inode->i_sb->dq_op->get_projid(inode, &projid);
+			if (rc)
+				continue;
+			qid = make_kqid_projid(projid);
+			break;
 		}
 		got[cnt] = dqget(sb, qid);
 	}
@@ -1438,8 +1457,8 @@ static void __dquot_initialize(struct inode *inode, int type)
 		/* We could race with quotaon or dqget() could have failed */
 		if (!got[cnt])
 			continue;
-		if (!i_dquot(inode)[cnt]) {
-			i_dquot(inode)[cnt] = got[cnt];
+		if (!dquots[cnt]) {
+			dquots[cnt] = got[cnt];
 			got[cnt] = NULL;
 			/*
 			 * Make quota reservation system happy if someone
@@ -1447,7 +1466,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 			 */
 			rsv = inode_get_rsv_space(inode);
 			if (unlikely(rsv))
-				dquot_resv_space(i_dquot(inode)[cnt], rsv);
+				dquot_resv_space(dquots[cnt], rsv);
 		}
 	}
 out_err:
@@ -1473,12 +1492,13 @@ EXPORT_SYMBOL(dquot_initialize);
 static void __dquot_drop(struct inode *inode)
 {
 	int cnt;
+	struct dquot **dquots = i_dquot(inode);
 	struct dquot *put[MAXQUOTAS];
 
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		put[cnt] = i_dquot(inode)[cnt];
-		i_dquot(inode)[cnt] = NULL;
+		put[cnt] = dquots[cnt];
+		dquots[cnt] = NULL;
 	}
 	spin_unlock(&dq_data_lock);
 	dqput_all(put);
@@ -1486,6 +1506,7 @@ static void __dquot_drop(struct inode *inode)
 
 void dquot_drop(struct inode *inode)
 {
+	struct dquot * const *dquots;
 	int cnt;
 
 	if (IS_NOQUOTA(inode))
@@ -1498,8 +1519,9 @@ void dquot_drop(struct inode *inode)
 	 * must assure that nobody can come after the DQUOT_DROP and
 	 * add quota pointers back anyway.
 	 */
+	dquots = i_dquot(inode);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (i_dquot(inode)[cnt])
+		if (dquots[cnt])
 			break;
 	}
 
@@ -1600,8 +1622,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 {
 	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot **dquots = i_dquot(inode);
 	int reserve = flags & DQUOT_SPACE_RESERVE;
+	struct dquot **dquots;
 
 	if (!dquot_active(inode)) {
 		inode_incr_space(inode, number, reserve);
@@ -1611,6 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warn[cnt].w_type = QUOTA_NL_NOWARN;
 
+	dquots = i_dquot(inode);
 	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1652,13 +1675,14 @@ int dquot_alloc_inode(struct inode *inode)
 {
 	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot * const *dquots = i_dquot(inode);
+	struct dquot * const *dquots;
 
 	if (!dquot_active(inode))
 		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warn[cnt].w_type = QUOTA_NL_NOWARN;
 
+	dquots = i_dquot(inode);
 	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1690,6 +1714,7 @@ EXPORT_SYMBOL(dquot_alloc_inode);
  */
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
+	struct dquot **dquots;
 	int cnt, index;
 
 	if (!dquot_active(inode)) {
@@ -1697,18 +1722,18 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 		return 0;
 	}
 
+	dquots = i_dquot(inode);
 	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (i_dquot(inode)[cnt])
-			dquot_claim_reserved_space(i_dquot(inode)[cnt],
-							number);
+		if (dquots[cnt])
+			dquot_claim_reserved_space(dquots[cnt], number);
 	}
 	/* Update inode bytes */
 	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(i_dquot(inode));
+	mark_all_dquot_dirty(dquots);
 	srcu_read_unlock(&dquot_srcu, index);
 	return 0;
 }
@@ -1719,6 +1744,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
  */
 void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 {
+	struct dquot **dquots;
 	int cnt, index;
 
 	if (!dquot_active(inode)) {
@@ -1726,18 +1752,18 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 		return;
 	}
 
+	dquots = i_dquot(inode);
 	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (i_dquot(inode)[cnt])
-			dquot_reclaim_reserved_space(i_dquot(inode)[cnt],
-						     number);
+		if (dquots[cnt])
+			dquot_reclaim_reserved_space(dquots[cnt], number);
 	}
 	/* Update inode bytes */
 	inode_reclaim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(i_dquot(inode));
+	mark_all_dquot_dirty(dquots);
 	srcu_read_unlock(&dquot_srcu, index);
 	return;
 }
@@ -1750,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 {
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot **dquots = i_dquot(inode);
+	struct dquot **dquots;
 	int reserve = flags & DQUOT_SPACE_RESERVE, index;
 
 	if (!dquot_active(inode)) {
@@ -1758,6 +1784,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 		return;
 	}
 
+	dquots = i_dquot(inode);
 	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1793,12 +1820,13 @@ void dquot_free_inode(struct inode *inode)
 {
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot * const *dquots = i_dquot(inode);
+	struct dquot * const *dquots;
 	int index;
 
 	if (!dquot_active(inode))
 		return;
 
+	dquots = i_dquot(inode);
 	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -2161,7 +2189,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		error = -EROFS;
 		goto out_fmt;
 	}
-	if (!sb->s_op->quota_write || !sb->s_op->quota_read) {
+	if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
+	    (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
 		error = -EINVAL;
 		goto out_fmt;
 	}
@@ -2614,55 +2643,73 @@ out:
 EXPORT_SYMBOL(dquot_set_dqblk);
 
 /* Generic routine for getting common part of quota file information */
-int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_get_state(struct super_block *sb, struct qc_state *state)
 {
 	struct mem_dqinfo *mi;
+	struct qc_type_state *tstate;
+	struct quota_info *dqopt = sb_dqopt(sb);
+	int type;
   
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!sb_has_quota_active(sb, type)) {
-		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-		return -ESRCH;
+	memset(state, 0, sizeof(*state));
+	for (type = 0; type < MAXQUOTAS; type++) {
+		if (!sb_has_quota_active(sb, type))
+			continue;
+		tstate = state->s_state + type;
+		mi = sb_dqopt(sb)->info + type;
+		tstate->flags = QCI_ACCT_ENABLED;
+		spin_lock(&dq_data_lock);
+		if (mi->dqi_flags & DQF_SYS_FILE)
+			tstate->flags |= QCI_SYSFILE;
+		if (mi->dqi_flags & DQF_ROOT_SQUASH)
+			tstate->flags |= QCI_ROOT_SQUASH;
+		if (sb_has_quota_limits_enabled(sb, type))
+			tstate->flags |= QCI_LIMITS_ENFORCED;
+		tstate->spc_timelimit = mi->dqi_bgrace;
+		tstate->ino_timelimit = mi->dqi_igrace;
+		tstate->ino = dqopt->files[type]->i_ino;
+		tstate->blocks = dqopt->files[type]->i_blocks;
+		tstate->nextents = 1;	/* We don't know... */
+		spin_unlock(&dq_data_lock);
 	}
-	mi = sb_dqopt(sb)->info + type;
-	spin_lock(&dq_data_lock);
-	ii->dqi_bgrace = mi->dqi_bgrace;
-	ii->dqi_igrace = mi->dqi_igrace;
-	ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK;
-	ii->dqi_valid = IIF_ALL;
-	spin_unlock(&dq_data_lock);
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
-EXPORT_SYMBOL(dquot_get_dqinfo);
+EXPORT_SYMBOL(dquot_get_state);
 
 /* Generic routine for setting common part of quota file information */
-int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
 {
 	struct mem_dqinfo *mi;
 	int err = 0;
 
+	if ((ii->i_fieldmask & QC_WARNS_MASK) ||
+	    (ii->i_fieldmask & QC_RT_SPC_TIMER))
+		return -EINVAL;
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (!sb_has_quota_active(sb, type)) {
 		err = -ESRCH;
 		goto out;
 	}
 	mi = sb_dqopt(sb)->info + type;
-	if (ii->dqi_valid & IIF_FLAGS) {
-		if (ii->dqi_flags & ~DQF_SETINFO_MASK ||
-		    (ii->dqi_flags & DQF_ROOT_SQUASH &&
+	if (ii->i_fieldmask & QC_FLAGS) {
+		if ((ii->i_flags & QCI_ROOT_SQUASH &&
 		     mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) {
 			err = -EINVAL;
 			goto out;
 		}
 	}
 	spin_lock(&dq_data_lock);
-	if (ii->dqi_valid & IIF_BGRACE)
-		mi->dqi_bgrace = ii->dqi_bgrace;
-	if (ii->dqi_valid & IIF_IGRACE)
-		mi->dqi_igrace = ii->dqi_igrace;
-	if (ii->dqi_valid & IIF_FLAGS)
-		mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) |
-				(ii->dqi_flags & DQF_SETINFO_MASK);
+	if (ii->i_fieldmask & QC_SPC_TIMER)
+		mi->dqi_bgrace = ii->i_spc_timelimit;
+	if (ii->i_fieldmask & QC_INO_TIMER)
+		mi->dqi_igrace = ii->i_ino_timelimit;
+	if (ii->i_fieldmask & QC_FLAGS) {
+		if (ii->i_flags & QCI_ROOT_SQUASH)
+			mi->dqi_flags |= DQF_ROOT_SQUASH;
+		else
+			mi->dqi_flags &= ~DQF_ROOT_SQUASH;
+	}
 	spin_unlock(&dq_data_lock);
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
@@ -2677,7 +2724,7 @@ const struct quotactl_ops dquot_quotactl_ops = {
 	.quota_on	= dquot_quota_on,
 	.quota_off	= dquot_quota_off,
 	.quota_sync	= dquot_quota_sync,
-	.get_info	= dquot_get_dqinfo,
+	.get_state	= dquot_get_state,
 	.set_info	= dquot_set_dqinfo,
 	.get_dqblk	= dquot_get_dqblk,
 	.set_dqblk	= dquot_set_dqblk
@@ -2688,7 +2735,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
 	.quota_enable	= dquot_quota_enable,
 	.quota_disable	= dquot_quota_disable,
 	.quota_sync	= dquot_quota_sync,
-	.get_info	= dquot_get_dqinfo,
+	.get_state	= dquot_get_state,
 	.set_info	= dquot_set_dqinfo,
 	.get_dqblk	= dquot_get_dqblk,
 	.set_dqblk	= dquot_set_dqblk
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index d14a799c7785..86ded7375c21 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -118,13 +118,30 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
 
 static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
 {
-	struct if_dqinfo info;
+	struct qc_state state;
+	struct qc_type_state *tstate;
+	struct if_dqinfo uinfo;
 	int ret;
 
-	if (!sb->s_qcop->get_info)
+	/* This checks whether qc_state has enough entries... */
+	BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS);
+	if (!sb->s_qcop->get_state)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_info(sb, type, &info);
-	if (!ret && copy_to_user(addr, &info, sizeof(info)))
+	ret = sb->s_qcop->get_state(sb, &state);
+	if (ret)
+		return ret;
+	tstate = state.s_state + type;
+	if (!(tstate->flags & QCI_ACCT_ENABLED))
+		return -ESRCH;
+	memset(&uinfo, 0, sizeof(uinfo));
+	uinfo.dqi_bgrace = tstate->spc_timelimit;
+	uinfo.dqi_igrace = tstate->ino_timelimit;
+	if (tstate->flags & QCI_SYSFILE)
+		uinfo.dqi_flags |= DQF_SYS_FILE;
+	if (tstate->flags & QCI_ROOT_SQUASH)
+		uinfo.dqi_flags |= DQF_ROOT_SQUASH;
+	uinfo.dqi_valid = IIF_ALL;
+	if (!ret && copy_to_user(addr, &uinfo, sizeof(uinfo)))
 		return -EFAULT;
 	return ret;
 }
@@ -132,12 +149,31 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
 static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
 {
 	struct if_dqinfo info;
+	struct qc_info qinfo;
 
 	if (copy_from_user(&info, addr, sizeof(info)))
 		return -EFAULT;
 	if (!sb->s_qcop->set_info)
 		return -ENOSYS;
-	return sb->s_qcop->set_info(sb, type, &info);
+	if (info.dqi_valid & ~(IIF_FLAGS | IIF_BGRACE | IIF_IGRACE))
+		return -EINVAL;
+	memset(&qinfo, 0, sizeof(qinfo));
+	if (info.dqi_valid & IIF_FLAGS) {
+		if (info.dqi_flags & ~DQF_SETINFO_MASK)
+			return -EINVAL;
+		if (info.dqi_flags & DQF_ROOT_SQUASH)
+			qinfo.i_flags |= QCI_ROOT_SQUASH;
+		qinfo.i_fieldmask |= QC_FLAGS;
+	}
+	if (info.dqi_valid & IIF_BGRACE) {
+		qinfo.i_spc_timelimit = info.dqi_bgrace;
+		qinfo.i_fieldmask |= QC_SPC_TIMER;
+	}
+	if (info.dqi_valid & IIF_IGRACE) {
+		qinfo.i_ino_timelimit = info.dqi_igrace;
+		qinfo.i_fieldmask |= QC_INO_TIMER;
+	}
+	return sb->s_qcop->set_info(sb, type, &qinfo);
 }
 
 static inline qsize_t qbtos(qsize_t blocks)
@@ -252,25 +288,149 @@ static int quota_disable(struct super_block *sb, void __user *addr)
 	return sb->s_qcop->quota_disable(sb, flags);
 }
 
+static int quota_state_to_flags(struct qc_state *state)
+{
+	int flags = 0;
+
+	if (state->s_state[USRQUOTA].flags & QCI_ACCT_ENABLED)
+		flags |= FS_QUOTA_UDQ_ACCT;
+	if (state->s_state[USRQUOTA].flags & QCI_LIMITS_ENFORCED)
+		flags |= FS_QUOTA_UDQ_ENFD;
+	if (state->s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)
+		flags |= FS_QUOTA_GDQ_ACCT;
+	if (state->s_state[GRPQUOTA].flags & QCI_LIMITS_ENFORCED)
+		flags |= FS_QUOTA_GDQ_ENFD;
+	if (state->s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED)
+		flags |= FS_QUOTA_PDQ_ACCT;
+	if (state->s_state[PRJQUOTA].flags & QCI_LIMITS_ENFORCED)
+		flags |= FS_QUOTA_PDQ_ENFD;
+	return flags;
+}
+
+static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
+{
+	int type;
+	struct qc_state state;
+	int ret;
+
+	ret = sb->s_qcop->get_state(sb, &state);
+	if (ret < 0)
+		return ret;
+
+	memset(fqs, 0, sizeof(*fqs));
+	fqs->qs_version = FS_QSTAT_VERSION;
+	fqs->qs_flags = quota_state_to_flags(&state);
+	/* No quota enabled? */
+	if (!fqs->qs_flags)
+		return -ENOSYS;
+	fqs->qs_incoredqs = state.s_incoredqs;
+	/*
+	 * GETXSTATE quotactl has space for just one set of time limits so
+	 * report them for the first enabled quota type
+	 */
+	for (type = 0; type < XQM_MAXQUOTAS; type++)
+		if (state.s_state[type].flags & QCI_ACCT_ENABLED)
+			break;
+	BUG_ON(type == XQM_MAXQUOTAS);
+	fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
+	fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
+	fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
+	fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
+	fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
+	if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
+		fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
+		fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
+		fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
+	}
+	if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
+		fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
+		fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
+		fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
+	}
+	if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
+		/*
+		 * Q_XGETQSTAT doesn't have room for both group and project
+		 * quotas.  So, allow the project quota values to be copied out
+		 * only if there is no group quota information available.
+		 */
+		if (!(state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)) {
+			fqs->qs_gquota.qfs_ino = state.s_state[PRJQUOTA].ino;
+			fqs->qs_gquota.qfs_nblks =
+					state.s_state[PRJQUOTA].blocks;
+			fqs->qs_gquota.qfs_nextents =
+					state.s_state[PRJQUOTA].nextents;
+		}
+	}
+	return 0;
+}
+
 static int quota_getxstate(struct super_block *sb, void __user *addr)
 {
 	struct fs_quota_stat fqs;
 	int ret;
 
-	if (!sb->s_qcop->get_xstate)
+	if (!sb->s_qcop->get_state)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_xstate(sb, &fqs);
+	ret = quota_getstate(sb, &fqs);
 	if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
 		return -EFAULT;
 	return ret;
 }
 
+static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
+{
+	int type;
+	struct qc_state state;
+	int ret;
+
+	ret = sb->s_qcop->get_state(sb, &state);
+	if (ret < 0)
+		return ret;
+
+	memset(fqs, 0, sizeof(*fqs));
+	fqs->qs_version = FS_QSTAT_VERSION;
+	fqs->qs_flags = quota_state_to_flags(&state);
+	/* No quota enabled? */
+	if (!fqs->qs_flags)
+		return -ENOSYS;
+	fqs->qs_incoredqs = state.s_incoredqs;
+	/*
+	 * GETXSTATV quotactl has space for just one set of time limits so
+	 * report them for the first enabled quota type
+	 */
+	for (type = 0; type < XQM_MAXQUOTAS; type++)
+		if (state.s_state[type].flags & QCI_ACCT_ENABLED)
+			break;
+	BUG_ON(type == XQM_MAXQUOTAS);
+	fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
+	fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
+	fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
+	fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
+	fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
+	if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
+		fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
+		fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
+		fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
+	}
+	if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
+		fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
+		fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
+		fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
+	}
+	if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
+		fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino;
+		fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks;
+		fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents;
+	}
+	return 0;
+}
+
 static int quota_getxstatev(struct super_block *sb, void __user *addr)
 {
 	struct fs_quota_statv fqs;
 	int ret;
 
-	if (!sb->s_qcop->get_xstatev)
+	if (!sb->s_qcop->get_state)
 		return -ENOSYS;
 
 	memset(&fqs, 0, sizeof(fqs));
@@ -284,7 +444,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
 	default:
 		return -EINVAL;
 	}
-	ret = sb->s_qcop->get_xstatev(sb, &fqs);
+	ret = quota_getstatev(sb, &fqs);
 	if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
 		return -EFAULT;
 	return ret;
@@ -357,6 +517,30 @@ static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src)
 		dst->d_fieldmask |= QC_RT_SPACE;
 }
 
+static void copy_qcinfo_from_xfs_dqblk(struct qc_info *dst,
+				       struct fs_disk_quota *src)
+{
+	memset(dst, 0, sizeof(*dst));
+	dst->i_spc_timelimit = src->d_btimer;
+	dst->i_ino_timelimit = src->d_itimer;
+	dst->i_rt_spc_timelimit = src->d_rtbtimer;
+	dst->i_ino_warnlimit = src->d_iwarns;
+	dst->i_spc_warnlimit = src->d_bwarns;
+	dst->i_rt_spc_warnlimit = src->d_rtbwarns;
+	if (src->d_fieldmask & FS_DQ_BWARNS)
+		dst->i_fieldmask |= QC_SPC_WARNS;
+	if (src->d_fieldmask & FS_DQ_IWARNS)
+		dst->i_fieldmask |= QC_INO_WARNS;
+	if (src->d_fieldmask & FS_DQ_RTBWARNS)
+		dst->i_fieldmask |= QC_RT_SPC_WARNS;
+	if (src->d_fieldmask & FS_DQ_BTIMER)
+		dst->i_fieldmask |= QC_SPC_TIMER;
+	if (src->d_fieldmask & FS_DQ_ITIMER)
+		dst->i_fieldmask |= QC_INO_TIMER;
+	if (src->d_fieldmask & FS_DQ_RTBTIMER)
+		dst->i_fieldmask |= QC_RT_SPC_TIMER;
+}
+
 static int quota_setxquota(struct super_block *sb, int type, qid_t id,
 			   void __user *addr)
 {
@@ -371,6 +555,21 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
 	qid = make_kqid(current_user_ns(), type, id);
 	if (!qid_valid(qid))
 		return -EINVAL;
+	/* Are we actually setting timer / warning limits for all users? */
+	if (from_kqid(&init_user_ns, qid) == 0 &&
+	    fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) {
+		struct qc_info qinfo;
+		int ret;
+
+		if (!sb->s_qcop->set_info)
+			return -EINVAL;
+		copy_qcinfo_from_xfs_dqblk(&qinfo, &fdq);
+		ret = sb->s_qcop->set_info(sb, type, &qinfo);
+		if (ret)
+			return ret;
+		/* These are already done */
+		fdq.d_fieldmask &= ~(FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK);
+	}
 	copy_from_xfs_dqblk(&qdq, &fdq);
 	return sb->s_qcop->set_dqblk(sb, qid, &qdq);
 }
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index d65877fbe8f4..58efb83dec1c 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -349,6 +349,13 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
 				 struct dquot *dquot)
 {
 	int tmp = QT_TREEOFF;
+
+#ifdef __QUOTA_QT_PARANOIA
+	if (info->dqi_blocks <= QT_TREEOFF) {
+		quota_error(dquot->dq_sb, "Quota tree root isn't allocated!");
+		return -EIO;
+	}
+#endif
 	return do_insert_tree(info, dquot, &tmp, 0);
 }
 
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 9cb10d7197f7..2aa012a68e90 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -117,12 +117,16 @@ static int v2_read_file_info(struct super_block *sb, int type)
 	qinfo = info->dqi_priv;
 	if (version == 0) {
 		/* limits are stored as unsigned 32-bit data */
-		info->dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS;
+		info->dqi_max_spc_limit = 0xffffffffLL << QUOTABLOCK_BITS;
 		info->dqi_max_ino_limit = 0xffffffff;
 	} else {
-		/* used space is stored as unsigned 64-bit value in bytes */
-		info->dqi_max_spc_limit = 0xffffffffffffffffULL; /* 2^64-1 */
-		info->dqi_max_ino_limit = 0xffffffffffffffffULL;
+		/*
+		 * Used space is stored as unsigned 64-bit value in bytes but
+		 * quota core supports only signed 64-bit values so use that
+		 * as a limit
+		 */
+		info->dqi_max_spc_limit = 0x7fffffffffffffffLL; /* 2^63-1 */
+		info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
 	}
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index f1966b42c2fd..4e95430093d9 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -13,12 +13,14 @@
  */
 #define V2_INITQMAGICS {\
 	0xd9c01f11,	/* USRQUOTA */\
-	0xd9c01927	/* GRPQUOTA */\
+	0xd9c01927,	/* GRPQUOTA */\
+	0xd9c03f14,	/* PRJQUOTA */\
 }
 
 #define V2_INITQVERSIONS {\
 	1,		/* USRQUOTA */\
-	1		/* GRPQUOTA */\
+	1,		/* GRPQUOTA */\
+	1,		/* PRJQUOTA */\
 }
 
 /* First generic header */
diff --git a/fs/read_write.c b/fs/read_write.c
index 45d583c33879..819ef3faf1bb 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -477,7 +477,8 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
 
 	ret = filp->f_op->write_iter(&kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
-	*ppos = kiocb.ki_pos;
+	if (ret > 0)
+		*ppos = kiocb.ki_pos;
 	return ret;
 }
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9312b7842e03..742242b60972 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3278,22 +3278,22 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
  * We thank Mingming Cao for helping us understand in great detail what
  * to do in this section of the code.
  */
-static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
-				  struct iov_iter *iter, loff_t offset)
+static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+				  loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+	ret = blockdev_direct_IO(iocb, inode, iter, offset,
 				 reiserfs_get_blocks_direct_io);
 
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
-	if (unlikely((rw & WRITE) && ret < 0)) {
+	if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
 		loff_t isize = i_size_read(inode);
 		loff_t end = offset + count;
 
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index bb79cddf0a1f..2adcde137c3f 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -910,7 +910,6 @@ do {									\
 	if (!(cond))							\
 		reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
 			       __FILE__ ":%i:%s: " format "\n",		\
-			       in_interrupt() ? -1 : task_pid_nr(current), \
 			       __LINE__, __func__ , ##args);		\
 } while (0)
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 71fbbe3e2dab..68b5f182984e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -805,7 +805,7 @@ static const struct quotactl_ops reiserfs_qctl_operations = {
 	.quota_on = reiserfs_quota_on,
 	.quota_off = dquot_quota_off,
 	.quota_sync = dquot_quota_sync,
-	.get_info = dquot_get_dqinfo,
+	.get_state = dquot_get_state,
 	.set_info = dquot_set_dqinfo,
 	.get_dqblk = dquot_get_dqblk,
 	.set_dqblk = dquot_set_dqblk,
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1ba2baaf4367..6d6a96b4e73f 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -21,7 +21,6 @@
 
 #include "udfdecl.h"
 
-#include <linux/buffer_head.h>
 #include <linux/bitops.h>
 
 #include "udf_i.h"
@@ -63,15 +62,14 @@ static int __load_block_bitmap(struct super_block *sb,
 			  block_group, nr_groups);
 	}
 
-	if (bitmap->s_block_bitmap[block_group]) {
+	if (bitmap->s_block_bitmap[block_group])
 		return block_group;
-	} else {
-		retval = read_block_bitmap(sb, bitmap, block_group,
-					   block_group);
-		if (retval < 0)
-			return retval;
-		return block_group;
-	}
+
+	retval = read_block_bitmap(sb, bitmap, block_group, block_group);
+	if (retval < 0)
+		return retval;
+
+	return block_group;
 }
 
 static inline int load_block_bitmap(struct super_block *sb,
@@ -358,7 +356,6 @@ static void udf_table_free_blocks(struct super_block *sb,
 	struct kernel_lb_addr eloc;
 	struct extent_position oepos, epos;
 	int8_t etype;
-	int i;
 	struct udf_inode_info *iinfo;
 
 	mutex_lock(&sbi->s_alloc_mutex);
@@ -425,7 +422,6 @@ static void udf_table_free_blocks(struct super_block *sb,
 		}
 
 		if (epos.bh != oepos.bh) {
-			i = -1;
 			oepos.block = epos.block;
 			brelse(oepos.bh);
 			get_bh(epos.bh);
@@ -762,7 +758,7 @@ inline int udf_prealloc_blocks(struct super_block *sb,
 			       uint32_t block_count)
 {
 	struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
-	sector_t allocated;
+	int allocated;
 
 	if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
 		allocated = udf_bitmap_prealloc_blocks(sb,
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 05e90edd1992..541a12b5792d 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 3e44f575fb9c..c763fda257bf 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -16,7 +16,6 @@
 
 #include <linux/fs.h>
 #include <linux/string.h>
-#include <linux/buffer_head.h>
 
 struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 					 struct udf_fileident_bh *fibh,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 74050bff64f4..5dadad9960b9 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -33,7 +33,6 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>
 #include <linux/uio.h>
 
 #include "udf_i.h"
@@ -100,8 +99,7 @@ static int udf_adinicb_write_begin(struct file *file,
 	return 0;
 }
 
-static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
-				     struct iov_iter *iter,
+static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 				     loff_t offset)
 {
 	/* Fallback to buffered I/O. */
@@ -121,21 +119,21 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t retval;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
-	int err, pos;
-	size_t count = iov_iter_count(from);
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	int err;
 
 	mutex_lock(&inode->i_mutex);
+
+	retval = generic_write_checks(iocb, from);
+	if (retval <= 0)
+		goto out;
+
 	down_write(&iinfo->i_data_sem);
 	if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
-		if (file->f_flags & O_APPEND)
-			pos = inode->i_size;
-		else
-			pos = iocb->ki_pos;
+		loff_t end = iocb->ki_pos + iov_iter_count(from);
 
 		if (inode->i_sb->s_blocksize <
-				(udf_file_entry_alloc_offset(inode) +
-						pos + count)) {
+				(udf_file_entry_alloc_offset(inode) + end)) {
 			err = udf_expand_file_adinicb(inode);
 			if (err) {
 				mutex_unlock(&inode->i_mutex);
@@ -143,16 +141,14 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 				return err;
 			}
 		} else {
-			if (pos + count > inode->i_size)
-				iinfo->i_lenAlloc = pos + count;
-			else
-				iinfo->i_lenAlloc = inode->i_size;
+			iinfo->i_lenAlloc = max(end, inode->i_size);
 			up_write(&iinfo->i_data_sem);
 		}
 	} else
 		up_write(&iinfo->i_data_sem);
 
 	retval = __generic_file_write_iter(iocb, from);
+out:
 	mutex_unlock(&inode->i_mutex);
 
 	if (retval > 0) {
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 9c1fbd23913d..6afac3d561ac 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -33,7 +33,6 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>
 #include <linux/writeback.h>
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
@@ -215,8 +214,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 	return ret;
 }
 
-static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
-			     struct iov_iter *iter,
+static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
@@ -225,8 +223,8 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block);
-	if (unlikely(ret < 0 && (rw & WRITE)))
+	ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block);
+	if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE))
 		udf_write_failed(mapping, offset + count);
 	return ret;
 }
@@ -1637,7 +1635,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 			udf_get_lb_pblock(inode->i_sb, &iinfo->i_location, 0));
 	if (!bh) {
 		udf_debug("getblk failure\n");
-		return -ENOMEM;
+		return -EIO;
 	}
 
 	lock_buffer(bh);
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index c175b4dabc14..71d1c25f360d 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -23,7 +23,6 @@
 
 #include <linux/fs.h>
 #include <linux/string.h>
-#include <linux/buffer_head.h>
 #include <linux/crc-itu-t.h>
 
 #include "udf_i.h"
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 33b246b82c98..39661977c89c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
 #include <linux/sched.h>
 #include <linux/crc-itu-t.h>
 #include <linux/exportfs.h>
@@ -569,8 +568,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
 	*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
 		cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
 	udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
-	if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
-		mark_inode_dirty(dir);
+	dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
+	mark_inode_dirty(dir);
 	if (fibh.sbh != fibh.ebh)
 		brelse(fibh.ebh);
 	brelse(fibh.sbh);
@@ -683,6 +682,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY;
 	udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
 	inc_nlink(dir);
+	dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
 	mark_inode_dirty(dir);
 	unlock_new_inode(inode);
 	d_instantiate(dentry, inode);
@@ -1024,6 +1024,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	inc_nlink(inode);
 	inode->i_ctime = current_fs_time(inode->i_sb);
 	mark_inode_dirty(inode);
+	dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
+	mark_inode_dirty(dir);
 	ihold(inode);
 	d_instantiate(dentry, inode);
 
@@ -1127,7 +1129,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 		inode_dec_link_count(new_inode);
 	}
 	old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb);
+	new_dir->i_ctime = new_dir->i_mtime = current_fs_time(new_dir->i_sb);
 	mark_inode_dirty(old_dir);
+	mark_inode_dirty(new_dir);
 
 	if (dir_fi) {
 		dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index d6caf01a2097..5f861ed287c3 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -24,7 +24,6 @@
 
 #include <linux/fs.h>
 #include <linux/string.h>
-#include <linux/buffer_head.h>
 #include <linux/mutex.h>
 
 uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f169411c4ea0..6299f341967b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
 #include <linux/stat.h>
 #include <linux/cdrom.h>
 #include <linux/nls.h>
-#include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/vmalloc.h>
 #include <linux/errno.h>
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index ac10ca939f26..8dfbc4025e2f 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
 #include <linux/mm.h>
 #include <linux/stat.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>
 #include "udf_i.h"
 
 static int udf_pc_to_char(struct super_block *sb, unsigned char *from,
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8a9657d7f7c6..42b8c57795cb 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -22,7 +22,6 @@
 #include "udfdecl.h"
 #include <linux/fs.h>
 #include <linux/mm.h>
-#include <linux/buffer_head.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4f8cdc59bc38..1d8eef9cf0f5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1495,7 +1495,6 @@ xfs_end_io_direct_write(
 
 STATIC ssize_t
 xfs_vm_direct_IO(
-	int			rw,
 	struct kiocb		*iocb,
 	struct iov_iter		*iter,
 	loff_t			offset)
@@ -1503,15 +1502,14 @@ xfs_vm_direct_IO(
 	struct inode		*inode = iocb->ki_filp->f_mapping->host;
 	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
 
-	if (rw & WRITE) {
-		return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
-					    offset, xfs_get_blocks_direct,
+	if (iov_iter_rw(iter) == WRITE) {
+		return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+					    xfs_get_blocks_direct,
 					    xfs_end_io_direct_write, NULL,
 					    DIO_ASYNC_EXTEND);
 	}
-	return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
-				    offset, xfs_get_blocks_direct,
-				    NULL, NULL, 0);
+	return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+				    xfs_get_blocks_direct, NULL, NULL, 0);
 }
 
 /*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 44856c3b9617..1f12ad0a8585 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -279,7 +279,7 @@ xfs_file_read_iter(
 
 	XFS_STATS_INC(xs_read_calls);
 
-	if (unlikely(file->f_flags & O_DIRECT))
+	if (unlikely(iocb->ki_flags & IOCB_DIRECT))
 		ioflags |= XFS_IO_ISDIRECT;
 	if (file->f_mode & FMODE_NOCMTIME)
 		ioflags |= XFS_IO_INVIS;
@@ -544,18 +544,19 @@ xfs_zero_eof(
  */
 STATIC ssize_t
 xfs_file_aio_write_checks(
-	struct file		*file,
-	loff_t			*pos,
-	size_t			*count,
+	struct kiocb		*iocb,
+	struct iov_iter		*from,
 	int			*iolock)
 {
+	struct file		*file = iocb->ki_filp;
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	int			error = 0;
+	ssize_t			error = 0;
+	size_t			count = iov_iter_count(from);
 
 restart:
-	error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
-	if (error)
+	error = generic_write_checks(iocb, from);
+	if (error <= 0)
 		return error;
 
 	error = xfs_break_layouts(inode, iolock);
@@ -569,16 +570,17 @@ restart:
 	 * iolock shared, we need to update it to exclusive which implies
 	 * having to redo all checks before.
 	 */
-	if (*pos > i_size_read(inode)) {
+	if (iocb->ki_pos > i_size_read(inode)) {
 		bool	zero = false;
 
 		if (*iolock == XFS_IOLOCK_SHARED) {
 			xfs_rw_iunlock(ip, *iolock);
 			*iolock = XFS_IOLOCK_EXCL;
 			xfs_rw_ilock(ip, *iolock);
+			iov_iter_reexpand(from, count);
 			goto restart;
 		}
-		error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
+		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
 		if (error)
 			return error;
 	}
@@ -678,10 +680,11 @@ xfs_file_dio_aio_write(
 		xfs_rw_ilock(ip, iolock);
 	}
 
-	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
+	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
-	iov_iter_truncate(from, count);
+	count = iov_iter_count(from);
+	pos = iocb->ki_pos;
 
 	if (mapping->nrpages) {
 		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -734,24 +737,22 @@ xfs_file_buffered_aio_write(
 	ssize_t			ret;
 	int			enospc = 0;
 	int			iolock = XFS_IOLOCK_EXCL;
-	loff_t			pos = iocb->ki_pos;
-	size_t			count = iov_iter_count(from);
 
 	xfs_rw_ilock(ip, iolock);
 
-	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
+	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
 
-	iov_iter_truncate(from, count);
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = inode_to_bdi(inode);
 
 write_retry:
-	trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-	ret = generic_perform_write(file, from, pos);
+	trace_xfs_file_buffered_write(ip, iov_iter_count(from),
+				      iocb->ki_pos, 0);
+	ret = generic_perform_write(file, from, iocb->ki_pos);
 	if (likely(ret >= 0))
-		iocb->ki_pos = pos + ret;
+		iocb->ki_pos += ret;
 
 	/*
 	 * If we hit a space limit, try to free up some lingering preallocated
@@ -803,7 +804,7 @@ xfs_file_write_iter(
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	if (unlikely(file->f_flags & O_DIRECT))
+	if (unlikely(iocb->ki_flags & IOCB_DIRECT))
 		ret = xfs_file_dio_aio_write(iocb, from);
 	else
 		ret = xfs_file_buffered_aio_write(iocb, from);
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 0d4d3590cf85..996a04064894 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -168,10 +168,6 @@ extern int		xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
 					uint, struct qc_dqblk *);
 extern int		xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
 					struct qc_dqblk *);
-extern int		xfs_qm_scall_getqstat(struct xfs_mount *,
-					struct fs_quota_stat *);
-extern int		xfs_qm_scall_getqstatv(struct xfs_mount *,
-					struct fs_quota_statv *);
 extern int		xfs_qm_scall_quotaon(struct xfs_mount *, uint);
 extern int		xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
 
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 9b965db45800..9a25c9275fb3 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -38,7 +38,6 @@
 STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
 STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
 					uint);
-STATIC uint	xfs_qm_export_flags(uint);
 
 /*
  * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -389,159 +388,6 @@ xfs_qm_scall_quotaon(
 	return 0;
 }
 
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- * for Q_XGETQSTAT command.
- */
-int
-xfs_qm_scall_getqstat(
-	struct xfs_mount	*mp,
-	struct fs_quota_stat	*out)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	struct xfs_inode	*uip = NULL;
-	struct xfs_inode	*gip = NULL;
-	struct xfs_inode	*pip = NULL;
-	bool                    tempuqip = false;
-	bool                    tempgqip = false;
-	bool                    temppqip = false;
-
-	memset(out, 0, sizeof(fs_quota_stat_t));
-
-	out->qs_version = FS_QSTAT_VERSION;
-	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
-							(XFS_ALL_QUOTA_ACCT|
-							 XFS_ALL_QUOTA_ENFD));
-	uip = q->qi_uquotaip;
-	gip = q->qi_gquotaip;
-	pip = q->qi_pquotaip;
-	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-					0, 0, &uip) == 0)
-			tempuqip = true;
-	}
-	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-					0, 0, &gip) == 0)
-			tempgqip = true;
-	}
-	/*
-	 * Q_XGETQSTAT doesn't have room for both group and project quotas.
-	 * So, allow the project quota values to be copied out only if
-	 * there is no group quota information available.
-	 */
-	if (!gip) {
-		if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
-			if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
-						0, 0, &pip) == 0)
-				temppqip = true;
-		}
-	} else
-		pip = NULL;
-	if (uip) {
-		out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
-		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
-		if (tempuqip)
-			IRELE(uip);
-	}
-
-	if (gip) {
-		out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
-		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
-		if (tempgqip)
-			IRELE(gip);
-	}
-	if (pip) {
-		out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-		out->qs_gquota.qfs_nblks = pip->i_d.di_nblocks;
-		out->qs_gquota.qfs_nextents = pip->i_d.di_nextents;
-		if (temppqip)
-			IRELE(pip);
-	}
-	out->qs_incoredqs = q->qi_dquots;
-	out->qs_btimelimit = q->qi_btimelimit;
-	out->qs_itimelimit = q->qi_itimelimit;
-	out->qs_rtbtimelimit = q->qi_rtbtimelimit;
-	out->qs_bwarnlimit = q->qi_bwarnlimit;
-	out->qs_iwarnlimit = q->qi_iwarnlimit;
-
-	return 0;
-}
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- * for Q_XGETQSTATV command, to support separate project quota field.
- */
-int
-xfs_qm_scall_getqstatv(
-	struct xfs_mount	*mp,
-	struct fs_quota_statv	*out)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	struct xfs_inode	*uip = NULL;
-	struct xfs_inode	*gip = NULL;
-	struct xfs_inode	*pip = NULL;
-	bool                    tempuqip = false;
-	bool                    tempgqip = false;
-	bool                    temppqip = false;
-
-	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
-							(XFS_ALL_QUOTA_ACCT|
-							 XFS_ALL_QUOTA_ENFD));
-	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-	out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino;
-
-	uip = q->qi_uquotaip;
-	gip = q->qi_gquotaip;
-	pip = q->qi_pquotaip;
-	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-					0, 0, &uip) == 0)
-			tempuqip = true;
-	}
-	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-					0, 0, &gip) == 0)
-			tempgqip = true;
-	}
-	if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
-					0, 0, &pip) == 0)
-			temppqip = true;
-	}
-	if (uip) {
-		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
-		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
-		if (tempuqip)
-			IRELE(uip);
-	}
-
-	if (gip) {
-		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
-		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
-		if (tempgqip)
-			IRELE(gip);
-	}
-	if (pip) {
-		out->qs_pquota.qfs_nblks = pip->i_d.di_nblocks;
-		out->qs_pquota.qfs_nextents = pip->i_d.di_nextents;
-		if (temppqip)
-			IRELE(pip);
-	}
-	out->qs_incoredqs = q->qi_dquots;
-	out->qs_btimelimit = q->qi_btimelimit;
-	out->qs_itimelimit = q->qi_itimelimit;
-	out->qs_rtbtimelimit = q->qi_rtbtimelimit;
-	out->qs_bwarnlimit = q->qi_bwarnlimit;
-	out->qs_iwarnlimit = q->qi_iwarnlimit;
-
-	return 0;
-}
-
 #define XFS_QC_MASK \
 	(QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK)
 
@@ -873,28 +719,6 @@ out_put:
 	return error;
 }
 
-STATIC uint
-xfs_qm_export_flags(
-	uint flags)
-{
-	uint uflags;
-
-	uflags = 0;
-	if (flags & XFS_UQUOTA_ACCT)
-		uflags |= FS_QUOTA_UDQ_ACCT;
-	if (flags & XFS_GQUOTA_ACCT)
-		uflags |= FS_QUOTA_GDQ_ACCT;
-	if (flags & XFS_PQUOTA_ACCT)
-		uflags |= FS_QUOTA_PDQ_ACCT;
-	if (flags & XFS_UQUOTA_ENFD)
-		uflags |= FS_QUOTA_UDQ_ENFD;
-	if (flags & XFS_GQUOTA_ENFD)
-		uflags |= FS_QUOTA_GDQ_ENFD;
-	if (flags & XFS_PQUOTA_ENFD)
-		uflags |= FS_QUOTA_PDQ_ENFD;
-	return uflags;
-}
-
 
 STATIC int
 xfs_dqrele_inode(
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 6923905ab33d..7795e0d01382 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -23,10 +23,81 @@
 #include "xfs_inode.h"
 #include "xfs_quota.h"
 #include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
 #include "xfs_qm.h"
 #include <linux/quota.h>
 
 
+static void
+xfs_qm_fill_state(
+	struct qc_type_state	*tstate,
+	struct xfs_mount	*mp,
+	struct xfs_inode	*ip,
+	xfs_ino_t		ino)
+{
+	struct xfs_quotainfo *q = mp->m_quotainfo;
+	bool tempqip = false;
+
+	tstate->ino = ino;
+	if (!ip && ino == NULLFSINO)
+		return;
+	if (!ip) {
+		if (xfs_iget(mp, NULL, ino, 0, 0, &ip))
+			return;
+		tempqip = true;
+	}
+	tstate->flags |= QCI_SYSFILE;
+	tstate->blocks = ip->i_d.di_nblocks;
+	tstate->nextents = ip->i_d.di_nextents;
+	tstate->spc_timelimit = q->qi_btimelimit;
+	tstate->ino_timelimit = q->qi_itimelimit;
+	tstate->rt_spc_timelimit = q->qi_rtbtimelimit;
+	tstate->spc_warnlimit = q->qi_bwarnlimit;
+	tstate->ino_warnlimit = q->qi_iwarnlimit;
+	tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit;
+	if (tempqip)
+		IRELE(ip);
+}
+
+/*
+ * Return quota status information, such as enforcements, quota file inode
+ * numbers etc.
+ */
+static int
+xfs_fs_get_quota_state(
+	struct super_block	*sb,
+	struct qc_state		*state)
+{
+	struct xfs_mount *mp = XFS_M(sb);
+	struct xfs_quotainfo *q = mp->m_quotainfo;
+
+	memset(state, 0, sizeof(*state));
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return 0;
+	state->s_incoredqs = q->qi_dquots;
+	if (XFS_IS_UQUOTA_RUNNING(mp))
+		state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED;
+	if (XFS_IS_UQUOTA_ENFORCED(mp))
+		state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
+	if (XFS_IS_GQUOTA_RUNNING(mp))
+		state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED;
+	if (XFS_IS_GQUOTA_ENFORCED(mp))
+		state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
+	if (XFS_IS_PQUOTA_RUNNING(mp))
+		state->s_state[PRJQUOTA].flags |= QCI_ACCT_ENABLED;
+	if (XFS_IS_PQUOTA_ENFORCED(mp))
+		state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED;
+
+	xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip,
+			  mp->m_sb.sb_uquotino);
+	xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip,
+			  mp->m_sb.sb_gquotino);
+	xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip,
+			  mp->m_sb.sb_pquotino);
+	return 0;
+}
+
 STATIC int
 xfs_quota_type(int type)
 {
@@ -40,28 +111,40 @@ xfs_quota_type(int type)
 	}
 }
 
-STATIC int
-xfs_fs_get_xstate(
+#define XFS_QC_SETINFO_MASK (QC_TIMER_MASK | QC_WARNS_MASK)
+
+/*
+ * Adjust quota timers & warnings
+ */
+static int
+xfs_fs_set_info(
 	struct super_block	*sb,
-	struct fs_quota_stat	*fqs)
+	int			type,
+	struct qc_info		*info)
 {
-	struct xfs_mount	*mp = XFS_M(sb);
+	struct xfs_mount *mp = XFS_M(sb);
+	struct qc_dqblk newlim;
 
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
 	if (!XFS_IS_QUOTA_RUNNING(mp))
 		return -ENOSYS;
-	return xfs_qm_scall_getqstat(mp, fqs);
-}
+	if (!XFS_IS_QUOTA_ON(mp))
+		return -ESRCH;
+	if (info->i_fieldmask & ~XFS_QC_SETINFO_MASK)
+		return -EINVAL;
+	if ((info->i_fieldmask & XFS_QC_SETINFO_MASK) == 0)
+		return 0;
 
-STATIC int
-xfs_fs_get_xstatev(
-	struct super_block	*sb,
-	struct fs_quota_statv	*fqs)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
+	newlim.d_fieldmask = info->i_fieldmask;
+	newlim.d_spc_timer = info->i_spc_timelimit;
+	newlim.d_ino_timer = info->i_ino_timelimit;
+	newlim.d_rt_spc_timer = info->i_rt_spc_timelimit;
+	newlim.d_ino_warns = info->i_ino_warnlimit;
+	newlim.d_spc_warns = info->i_spc_warnlimit;
+	newlim.d_rt_spc_warns = info->i_rt_spc_warnlimit;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-	return xfs_qm_scall_getqstatv(mp, fqs);
+	return xfs_qm_scall_setqlim(mp, 0, xfs_quota_type(type), &newlim);
 }
 
 static unsigned int
@@ -178,8 +261,8 @@ xfs_fs_set_dqblk(
 }
 
 const struct quotactl_ops xfs_quotactl_operations = {
-	.get_xstatev		= xfs_fs_get_xstatev,
-	.get_xstate		= xfs_fs_get_xstate,
+	.get_state		= xfs_fs_get_quota_state,
+	.set_info		= xfs_fs_set_info,
 	.quota_enable		= xfs_quota_enable,
 	.quota_disable		= xfs_quota_disable,
 	.rm_xquota		= xfs_fs_rm_xquota,
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index 3378dcf4c31e..940d5ec122c9 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -39,6 +39,10 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
 	debug_dma_unmap_page(dev, addr, size, dir, true);
 }
 
+/*
+ * dma_maps_sg_attrs returns 0 on error and > 0 on success.
+ * It should never return a value < 0.
+ */
 static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 				   int nents, enum dma_data_direction dir,
 				   struct dma_attrs *attrs)
@@ -51,6 +55,7 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 		kmemcheck_mark_initialized(sg_virt(s), s->length);
 	BUG_ON(!valid_dma_direction(dir));
 	ents = ops->map_sg(dev, sg, nents, dir, attrs);
+	BUG_ON(ents < 0);
 	debug_dma_map_sg(dev, sg, nents, ents, dir);
 
 	return ents;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 7aec86127335..8210e8797c12 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -164,6 +164,8 @@ enum {
 		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+						  struct request_queue *q);
 void blk_mq_finish_init(struct request_queue *q);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
@@ -218,6 +220,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
 		void *priv);
@@ -227,7 +230,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q);
 
 /*
  * Driver command data is immediately after the request. So subtract request
- * size to get back to the original request.
+ * size to get back to the original request, add request size to get the PDU.
  */
 static inline struct request *blk_mq_rq_from_pdu(void *pdu)
 {
@@ -235,7 +238,7 @@ static inline struct request *blk_mq_rq_from_pdu(void *pdu)
 }
 static inline void *blk_mq_rq_to_pdu(struct request *rq)
 {
-	return (void *) rq + sizeof(*rq);
+	return rq + 1;
 }
 
 #define queue_for_each_hw_ctx(q, hctx, i)				\
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d8358799c594..df334cbacc6d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -404,26 +404,11 @@ static inline bool d_mountpoint(const struct dentry *dentry)
 /*
  * Directory cache entry type accessor functions.
  */
-static inline void __d_set_type(struct dentry *dentry, unsigned type)
-{
-	dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type;
-}
-
-static inline void __d_clear_type(struct dentry *dentry)
-{
-	__d_set_type(dentry, DCACHE_MISS_TYPE);
-}
-
-static inline void d_set_type(struct dentry *dentry, unsigned type)
-{
-	spin_lock(&dentry->d_lock);
-	__d_set_type(dentry, type);
-	spin_unlock(&dentry->d_lock);
-}
-
 static inline unsigned __d_entry_type(const struct dentry *dentry)
 {
-	return dentry->d_flags & DCACHE_ENTRY_TYPE;
+	unsigned type = READ_ONCE(dentry->d_flags);
+	smp_rmb();
+	return type & DCACHE_ENTRY_TYPE;
 }
 
 static inline bool d_is_miss(const struct dentry *dentry)
@@ -482,6 +467,44 @@ static inline bool d_is_positive(const struct dentry *dentry)
 	return !d_is_negative(dentry);
 }
 
+/**
+ * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs)
+ * @dentry: The dentry in question
+ *
+ * Returns true if the dentry represents either an absent name or a name that
+ * doesn't map to an inode (ie. ->d_inode is NULL).  The dentry could represent
+ * a true miss, a whiteout that isn't represented by a 0,0 chardev or a
+ * fallthrough marker in an opaque directory.
+ *
+ * Note!  (1) This should be used *only* by a filesystem to examine its own
+ * dentries.  It should not be used to look at some other filesystem's
+ * dentries.  (2) It should also be used in combination with d_inode() to get
+ * the inode.  (3) The dentry may have something attached to ->d_lower and the
+ * type field of the flags may be set to something other than miss or whiteout.
+ */
+static inline bool d_really_is_negative(const struct dentry *dentry)
+{
+	return dentry->d_inode == NULL;
+}
+
+/**
+ * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs)
+ * @dentry: The dentry in question
+ *
+ * Returns true if the dentry represents a name that maps to an inode
+ * (ie. ->d_inode is not NULL).  The dentry might still represent a whiteout if
+ * that is represented on medium as a 0,0 chardev.
+ *
+ * Note!  (1) This should be used *only* by a filesystem to examine its own
+ * dentries.  It should not be used to look at some other filesystem's
+ * dentries.  (2) It should also be used in combination with d_inode() to get
+ * the inode.
+ */
+static inline bool d_really_is_positive(const struct dentry *dentry)
+{
+	return dentry->d_inode != NULL;
+}
+
 extern void d_set_fallthru(struct dentry *dentry);
 
 static inline bool d_is_fallthru(const struct dentry *dentry)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index c3007cb4bfa6..ac07ff090919 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -34,6 +34,10 @@ struct dma_map_ops {
 	void (*unmap_page)(struct device *dev, dma_addr_t dma_handle,
 			   size_t size, enum dma_data_direction dir,
 			   struct dma_attrs *attrs);
+	/*
+	 * map_sg returns 0 on error and a value > 0 on success.
+	 * It should never return a value < 0.
+	 */
 	int (*map_sg)(struct device *dev, struct scatterlist *sg,
 		      int nents, enum dma_data_direction dir,
 		      struct dma_attrs *attrs);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c4e927358503..c7496f263860 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -315,6 +315,8 @@ struct address_space;
 struct writeback_control;
 
 #define IOCB_EVENTFD		(1 << 0)
+#define IOCB_APPEND		(1 << 1)
+#define IOCB_DIRECT		(1 << 2)
 
 struct kiocb {
 	struct file		*ki_filp;
@@ -329,10 +331,13 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
 	return kiocb->ki_complete == NULL;
 }
 
+static inline int iocb_flags(struct file *file);
+
 static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 {
 	*kiocb = (struct kiocb) {
 		.ki_filp = filp,
+		.ki_flags = iocb_flags(filp),
 	};
 }
 
@@ -383,7 +388,7 @@ struct address_space_operations {
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, gfp_t);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
+	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
 	/*
 	 * migrate the contents of a page to the specified target. If
 	 * migrate_mode is MIGRATE_ASYNC, it must not block.
@@ -2574,7 +2579,7 @@ extern int sb_min_blocksize(struct super_block *, int);
 
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
-int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
+extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
@@ -2617,8 +2622,8 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
-ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *,
-		loff_t, get_block_t, dio_iodone_t, int flags);
+ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
+		  get_block_t, dio_iodone_t, int flags);
 int dax_clear_blocks(struct inode *, sector_t block, long size);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
@@ -2643,16 +2648,18 @@ enum {
 
 void dio_end_io(struct bio *bio, int error);
 
-ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
-	get_block_t get_block, dio_iodone_t end_io,
-	dio_submit_t submit_io,	int flags);
+ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+			     struct block_device *bdev, struct iov_iter *iter,
+			     loff_t offset, get_block_t get_block,
+			     dio_iodone_t end_io, dio_submit_t submit_io,
+			     int flags);
 
-static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
-		struct inode *inode, struct iov_iter *iter, loff_t offset,
-		get_block_t get_block)
+static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
+					 struct inode *inode,
+					 struct iov_iter *iter, loff_t offset,
+					 get_block_t get_block)
 {
-	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter,
+	return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
 				    offset, get_block, NULL, NULL,
 				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
@@ -2785,6 +2792,16 @@ static inline bool io_is_direct(struct file *filp)
 	return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
 }
 
+static inline int iocb_flags(struct file *file)
+{
+	int res = 0;
+	if (file->f_flags & O_APPEND)
+		res |= IOCB_APPEND;
+	if (io_is_direct(file))
+		res |= IOCB_DIRECT;
+	return res;
+}
+
 static inline ino_t parent_ino(struct dentry *dentry)
 {
 	ino_t res;
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
deleted file mode 100644
index f62f78aef4ac..000000000000
--- a/include/linux/nbd.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * 1999 Copyright (C) Pavel Machek, pavel@ucw.cz. This code is GPL.
- * 1999/11/04 Copyright (C) 1999 VMware, Inc. (Regis "HPReg" Duchesne)
- *            Made nbd_end_request() use the io_request_lock
- * 2001 Copyright (C) Steven Whitehouse
- *            New nbd_end_request() for compatibility with new linux block
- *            layer code.
- * 2003/06/24 Louis D. Langholtz <ldl@aros.net>
- *            Removed unneeded blksize_bits field from nbd_device struct.
- *            Cleanup PARANOIA usage & code.
- * 2004/02/19 Paul Clements
- *            Removed PARANOIA, plus various cleanup and comments
- */
-#ifndef LINUX_NBD_H
-#define LINUX_NBD_H
-
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-#include <uapi/linux/nbd.h>
-
-struct request;
-
-struct nbd_device {
-	int flags;
-	int harderror;		/* Code of hard error			*/
-	struct socket * sock;	/* If == NULL, device is not ready, yet	*/
-	int magic;
-
-	spinlock_t queue_lock;
-	struct list_head queue_head;	/* Requests waiting result */
-	struct request *active_req;
-	wait_queue_head_t active_wq;
-	struct list_head waiting_queue;	/* Requests to be sent */
-	wait_queue_head_t waiting_wq;
-
-	struct mutex tx_lock;
-	struct gendisk *disk;
-	int blksize;
-	u64 bytesize;
-	pid_t pid; /* pid of nbd-client, if attached */
-	int xmit_timeout;
-	int disconnect; /* a disconnect has been requested by user */
-};
-
-#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b01ccf371fdc..410abd172feb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -447,13 +447,12 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file)
 /*
  * linux/fs/nfs/direct.c
  */
-extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
+extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
 			struct iov_iter *iter,
 			loff_t pos);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
-			struct iov_iter *iter,
-			loff_t pos);
+			struct iov_iter *iter);
 
 /*
  * linux/fs/nfs/dir.c
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0adad4a5419b..8dbd05e70f09 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -117,8 +117,9 @@ struct nvme_ns {
 
 	unsigned ns_id;
 	int lba_shift;
-	int ms;
-	int pi_type;
+	u16 ms;
+	bool ext;
+	u8 pi_type;
 	u64 mode_select_num_blocks;
 	u32 mode_select_block_len;
 };
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e63112fb55be..353db8dc4c6e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1178,6 +1178,7 @@ unsigned char pci_bus_max_busnr(struct pci_bus *bus);
 void pci_setup_bridge(struct pci_bus *bus);
 resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 					 unsigned long type);
+resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 
 #define PCI_VGA_STATE_CHANGE_BRIDGE (1 << 0)
 #define PCI_VGA_STATE_CHANGE_DECODES (1 << 1)
@@ -1673,13 +1674,25 @@ int pci_ext_cfg_avail(void);
 void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
 
 #ifdef CONFIG_PCI_IOV
+int pci_iov_virtfn_bus(struct pci_dev *dev, int id);
+int pci_iov_virtfn_devfn(struct pci_dev *dev, int id);
+
 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
 void pci_disable_sriov(struct pci_dev *dev);
 int pci_num_vf(struct pci_dev *dev);
 int pci_vfs_assigned(struct pci_dev *dev);
 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
 int pci_sriov_get_totalvfs(struct pci_dev *dev);
+resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
 #else
+static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)
+{
+	return -ENOSYS;
+}
+static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id)
+{
+	return -ENOSYS;
+}
 static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
 { return -ENODEV; }
 static inline void pci_disable_sriov(struct pci_dev *dev) { }
@@ -1690,6 +1703,8 @@ static inline int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
 { return 0; }
 static inline int pci_sriov_get_totalvfs(struct pci_dev *dev)
 { return 0; }
+static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
+{ return 0; }
 #endif
 
 #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 8884f6e507f7..8e7a25b068b0 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -40,6 +40,7 @@ enum pstore_type_id {
 	PSTORE_TYPE_PPC_OF	= 5,
 	PSTORE_TYPE_PPC_COMMON	= 6,
 	PSTORE_TYPE_PMSG	= 7,
+	PSTORE_TYPE_PPC_OPAL	= 8,
 	PSTORE_TYPE_UNKNOWN	= 255
 };
 
diff --git a/include/linux/quota.h b/include/linux/quota.h
index d534e8ed308a..b2505acfd3c0 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -50,6 +50,7 @@
 
 #undef USRQUOTA
 #undef GRPQUOTA
+#undef PRJQUOTA
 enum quota_type {
 	USRQUOTA = 0,		/* element used for user quotas */
 	GRPQUOTA = 1,		/* element used for group quotas */
@@ -319,6 +320,7 @@ struct dquot_operations {
 	/* get reserved quota for delayed alloc, value returned is managed by
 	 * quota code only */
 	qsize_t *(*get_reserved_space) (struct inode *);
+	int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */
 };
 
 struct path;
@@ -344,7 +346,10 @@ struct qc_dqblk {
 	int d_rt_spc_warns;	/* # warnings issued wrt RT space */
 };
 
-/* Field specifiers for ->set_dqblk() in struct qc_dqblk */
+/*
+ * Field specifiers for ->set_dqblk() in struct qc_dqblk and also for
+ * ->set_info() in struct qc_info
+ */
 #define	QC_INO_SOFT	(1<<0)
 #define	QC_INO_HARD	(1<<1)
 #define	QC_SPC_SOFT	(1<<2)
@@ -365,6 +370,51 @@ struct qc_dqblk {
 #define	QC_INO_COUNT	(1<<13)
 #define	QC_RT_SPACE	(1<<14)
 #define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE)
+#define QC_FLAGS	(1<<15)
+
+#define QCI_SYSFILE		(1 << 0)	/* Quota file is hidden from userspace */
+#define QCI_ROOT_SQUASH		(1 << 1)	/* Root squash turned on */
+#define QCI_ACCT_ENABLED	(1 << 2)	/* Quota accounting enabled */
+#define QCI_LIMITS_ENFORCED	(1 << 3)	/* Quota limits enforced */
+
+/* Structures for communicating via ->get_state */
+struct qc_type_state {
+	unsigned int flags;		/* Flags QCI_* */
+	unsigned int spc_timelimit;	/* Time after which space softlimit is
+					 * enforced */
+	unsigned int ino_timelimit;	/* Ditto for inode softlimit */
+	unsigned int rt_spc_timelimit;	/* Ditto for real-time space */
+	unsigned int spc_warnlimit;	/* Limit for number of space warnings */
+	unsigned int ino_warnlimit;	/* Ditto for inodes */
+	unsigned int rt_spc_warnlimit;	/* Ditto for real-time space */
+	unsigned long long ino;		/* Inode number of quota file */
+	blkcnt_t blocks;		/* Number of 512-byte blocks in the file */
+	blkcnt_t nextents;		/* Number of extents in the file */
+};
+
+struct qc_state {
+	unsigned int s_incoredqs;	/* Number of dquots in core */
+	/*
+	 * Per quota type information. The array should really have
+	 * max(MAXQUOTAS, XQM_MAXQUOTAS) entries. BUILD_BUG_ON in
+	 * quota_getinfo() makes sure XQM_MAXQUOTAS is large enough.  Once VFS
+	 * supports project quotas, this can be changed to MAXQUOTAS
+	 */
+	struct qc_type_state s_state[XQM_MAXQUOTAS];
+};
+
+/* Structure for communicating via ->set_info */
+struct qc_info {
+	int i_fieldmask;	/* mask of fields to change in ->set_info() */
+	unsigned int i_flags;		/* Flags QCI_* */
+	unsigned int i_spc_timelimit;	/* Time after which space softlimit is
+					 * enforced */
+	unsigned int i_ino_timelimit;	/* Ditto for inode softlimit */
+	unsigned int i_rt_spc_timelimit;/* Ditto for real-time space */
+	unsigned int i_spc_warnlimit;	/* Limit for number of space warnings */
+	unsigned int i_ino_warnlimit;	/* Limit for number of inode warnings */
+	unsigned int i_rt_spc_warnlimit;	/* Ditto for real-time space */
+};
 
 /* Operations handling requests from userspace */
 struct quotactl_ops {
@@ -373,12 +423,10 @@ struct quotactl_ops {
 	int (*quota_enable)(struct super_block *, unsigned int);
 	int (*quota_disable)(struct super_block *, unsigned int);
 	int (*quota_sync)(struct super_block *, int);
-	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
-	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
+	int (*set_info)(struct super_block *, int, struct qc_info *);
 	int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
 	int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
-	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
-	int (*get_xstatev)(struct super_block *, struct fs_quota_statv *);
+	int (*get_state)(struct super_block *, struct qc_state *);
 	int (*rm_xquota)(struct super_block *, unsigned int);
 };
 
@@ -389,7 +437,19 @@ struct quota_format_type {
 	struct quota_format_type *qf_next;
 };
 
-/* Quota state flags - they actually come in two flavors - for users and groups */
+/**
+ * Quota state flags - they actually come in two flavors - for users and groups.
+ *
+ * Actual typed flags layout:
+ *				USRQUOTA	GRPQUOTA
+ *  DQUOT_USAGE_ENABLED		0x0001		0x0002
+ *  DQUOT_LIMITS_ENABLED	0x0004		0x0008
+ *  DQUOT_SUSPENDED		0x0010		0x0020
+ *
+ * Following bits are used for non-typed flags:
+ *  DQUOT_QUOTA_SYS_FILE	0x0040
+ *  DQUOT_NEGATIVE_USAGE	0x0080
+ */
 enum {
 	_DQUOT_USAGE_ENABLED = 0,		/* Track disk usage for users */
 	_DQUOT_LIMITS_ENABLED,			/* Enforce quota limits for users */
@@ -398,9 +458,9 @@ enum {
 						 * memory to turn them on */
 	_DQUOT_STATE_FLAGS
 };
-#define DQUOT_USAGE_ENABLED	(1 << _DQUOT_USAGE_ENABLED)
-#define DQUOT_LIMITS_ENABLED	(1 << _DQUOT_LIMITS_ENABLED)
-#define DQUOT_SUSPENDED		(1 << _DQUOT_SUSPENDED)
+#define DQUOT_USAGE_ENABLED	(1 << _DQUOT_USAGE_ENABLED * MAXQUOTAS)
+#define DQUOT_LIMITS_ENABLED	(1 << _DQUOT_LIMITS_ENABLED * MAXQUOTAS)
+#define DQUOT_SUSPENDED		(1 << _DQUOT_SUSPENDED * MAXQUOTAS)
 #define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
 				 DQUOT_SUSPENDED)
 /* Other quota flags */
@@ -414,15 +474,21 @@ enum {
 						 */
 #define DQUOT_NEGATIVE_USAGE	(1 << (DQUOT_STATE_LAST + 1))
 					       /* Allow negative quota usage */
-
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-	return flags << _DQUOT_STATE_FLAGS * type;
+	return flags << type;
 }
 
 static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
 {
-	return (flags >> _DQUOT_STATE_FLAGS * type) & DQUOT_STATE_FLAGS;
+	return (flags >> type) & DQUOT_STATE_FLAGS;
+}
+
+/* Bitmap of quota types where flag is set in flags */
+static __always_inline unsigned dquot_state_types(unsigned flags, unsigned flag)
+{
+	BUILD_BUG_ON_NOT_POWER_OF_2(flag);
+	return (flags / flag) & ((1 << MAXQUOTAS) - 1);
 }
 
 #ifdef CONFIG_QUOTA_NETLINK_INTERFACE
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index df73258cca47..77ca6601ff25 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -95,8 +95,8 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
 int dquot_quota_off(struct super_block *sb, int type);
 int dquot_writeback_dquots(struct super_block *sb, int type);
 int dquot_quota_sync(struct super_block *sb, int type);
-int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int dquot_get_state(struct super_block *sb, struct qc_state *state);
+int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii);
 int dquot_get_dqblk(struct super_block *sb, struct kqid id,
 		struct qc_dqblk *di);
 int dquot_set_dqblk(struct super_block *sb, struct kqid id,
@@ -134,10 +134,7 @@ static inline bool sb_has_quota_suspended(struct super_block *sb, int type)
 
 static inline unsigned sb_any_quota_suspended(struct super_block *sb)
 {
-	unsigned type, tmsk = 0;
-	for (type = 0; type < MAXQUOTAS; type++)
-		tmsk |= sb_has_quota_suspended(sb, type) << type;
-	return tmsk;
+	return dquot_state_types(sb_dqopt(sb)->flags, DQUOT_SUSPENDED);
 }
 
 /* Does kernel know about any quota information for given sb + type? */
@@ -149,10 +146,7 @@ static inline bool sb_has_quota_loaded(struct super_block *sb, int type)
 
 static inline unsigned sb_any_quota_loaded(struct super_block *sb)
 {
-	unsigned type, tmsk = 0;
-	for (type = 0; type < MAXQUOTAS; type++)
-		tmsk |= sb_has_quota_loaded(sb, type) << type;
-	return	tmsk;
+	return dquot_state_types(sb_dqopt(sb)->flags, DQUOT_USAGE_ENABLED);
 }
 
 static inline bool sb_has_quota_active(struct super_block *sb, int type)
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 0991913f4953..71f711db4500 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -10,7 +10,7 @@ enum string_size_units {
 	STRING_UNITS_2,		/* use binary powers of 2^10 */
 };
 
-void string_get_size(u64 size, enum string_size_units units,
+void string_get_size(u64 size, u64 blk_size, enum string_size_units units,
 		     char *buf, int len);
 
 #define UNESCAPE_SPACE		0x01
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 15f11fb9fff6..8b01e1c3c614 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -112,6 +112,14 @@ static inline bool iter_is_iovec(struct iov_iter *i)
 }
 
 /*
+ * Get one of READ or WRITE out of iter->type without any other flags OR'd in
+ * with it.
+ *
+ * The ?: is just for type safety.
+ */
+#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK)
+
+/*
  * Cap the iov_iter by given limit; note that the second argument is
  * *not* the new size - it's upper limit for such.  Passing it a value
  * greater than the amount of data in iov_iter is fine - it'll just do
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 007a0bc01b74..784bc2c0929f 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -135,6 +135,7 @@ enum fc_vport_state {
 #define FC_PORTSPEED_40GBIT		0x100
 #define FC_PORTSPEED_50GBIT		0x200
 #define FC_PORTSPEED_100GBIT		0x400
+#define FC_PORTSPEED_25GBIT		0x800
 #define FC_PORTSPEED_NOT_NEGOTIATED	(1 << 15) /* Speed not established */
 
 /*
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index 1f49b8341c99..9c95b2c1c88a 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -36,11 +36,12 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 
-#define __DQUOT_VERSION__	"dquot_6.5.2"
+#define __DQUOT_VERSION__	"dquot_6.6.0"
 
-#define MAXQUOTAS 2
+#define MAXQUOTAS 3
 #define USRQUOTA  0		/* element used for user quotas */
 #define GRPQUOTA  1		/* element used for group quotas */
+#define PRJQUOTA  2		/* element used for project quotas */
 
 /*
  * Definitions for the default names of the quotas files.
@@ -48,6 +49,7 @@
 #define INITQFNAMES { \
 	"user",    /* USRQUOTA */ \
 	"group",   /* GRPQUOTA */ \
+	"project", /* PRJQUOTA */ \
 	"undefined", \
 };
 
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index f68719f405af..a48378958062 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -67,7 +67,7 @@
 #define __HYPERVISOR_vcpu_op              24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
-#define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_xsm_op               27
 #define __HYPERVISOR_nmi_op               28
 #define __HYPERVISOR_sched_op             29
 #define __HYPERVISOR_callback_op          30
@@ -75,7 +75,11 @@
 #define __HYPERVISOR_event_channel_op     32
 #define __HYPERVISOR_physdev_op           33
 #define __HYPERVISOR_hvm_op               34
+#define __HYPERVISOR_sysctl               35
+#define __HYPERVISOR_domctl               36
+#define __HYPERVISOR_kexec_op             37
 #define __HYPERVISOR_tmem_op              38
+#define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 83338210ee04..c643e6a94c9a 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -27,13 +27,58 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
 
 struct vm_area_struct;
+
+/*
+ * xen_remap_domain_mfn_array() - map an array of foreign frames
+ * @vma:     VMA to map the pages into
+ * @addr:    Address at which to map the pages
+ * @gfn:     Array of GFNs to map
+ * @nr:      Number entries in the GFN array
+ * @err_ptr: Returns per-GFN error status.
+ * @prot:    page protection mask
+ * @domid:   Domain owning the pages
+ * @pages:   Array of pages if this domain has an auto-translated physmap
+ *
+ * @gfn and @err_ptr may point to the same buffer, the GFNs will be
+ * overwritten by the error codes after they are mapped.
+ *
+ * Returns the number of successfully mapped frames, or a -ve error
+ * code.
+ */
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       xen_pfn_t *gfn, int nr,
+			       int *err_ptr, pgprot_t prot,
+			       unsigned domid,
+			       struct page **pages);
+
+/* xen_remap_domain_mfn_range() - map a range of foreign frames
+ * @vma:     VMA to map the pages into
+ * @addr:    Address at which to map the pages
+ * @gfn:     First GFN to map.
+ * @nr:      Number frames to map
+ * @prot:    page protection mask
+ * @domid:   Domain owning the pages
+ * @pages:   Array of pages if this domain has an auto-translated physmap
+ *
+ * Returns the number of successfully mapped frames, or a -ve error
+ * code.
+ */
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 			       unsigned long addr,
-			       xen_pfn_t mfn, int nr,
+			       xen_pfn_t gfn, int nr,
 			       pgprot_t prot, unsigned domid,
 			       struct page **pages);
 int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
 			       int numpgs, struct page **pages);
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+			      unsigned long addr,
+			      xen_pfn_t *gfn, int nr,
+			      int *err_ptr, pgprot_t prot,
+			      unsigned domid,
+			      struct page **pages);
+int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+			      int nr, struct page **pages);
 
 bool xen_running_on_version_or_later(unsigned int major, unsigned int minor);
 
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index b0f1c9e5d687..289c0b5f08fe 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -46,6 +46,10 @@
 #include <xen/interface/io/xenbus.h>
 #include <xen/interface/io/xs_wire.h>
 
+#define XENBUS_MAX_RING_PAGE_ORDER 4
+#define XENBUS_MAX_RING_PAGES      (1U << XENBUS_MAX_RING_PAGE_ORDER)
+#define INVALID_GRANT_HANDLE       (~0U)
+
 /* Register callback to watch this node. */
 struct xenbus_watch
 {
@@ -199,15 +203,19 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
 			 const char *pathfmt, ...);
 
 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-			   int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-			   grant_handle_t *handle, void *vaddr);
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+		      unsigned int nr_pages, grant_ref_t *grefs);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
+			   unsigned int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev,
+		    grant_ref_t *gnt_refs, unsigned int nr_grefs,
+		    grant_handle_t *handles, unsigned long *vaddrs,
+		    bool *leaked);
 
 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-		      grant_handle_t handle, void *vaddr);
+		      grant_handle_t *handles, unsigned int nr_handles,
+		      unsigned long *vaddrs);
 
 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_free_evtchn(struct xenbus_device *dev, int port);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 1826c7407258..c98ae818eb4e 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -4,6 +4,7 @@
  * Copyright 31 August 2008 James Bottomley
  * Copyright (C) 2013, Intel Corporation
  */
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
 #include <linux/export.h>
@@ -14,7 +15,8 @@
 
 /**
  * string_get_size - get the size in the specified units
- * @size:	The size to be converted
+ * @size:	The size to be converted in blocks
+ * @blk_size:	Size of the block (use 1 for size in bytes)
  * @units:	units to use (powers of 1000 or 1024)
  * @buf:	buffer to format to
  * @len:	length of buffer
@@ -24,14 +26,14 @@
  * at least 9 bytes and will always be zero terminated.
  *
  */
-void string_get_size(u64 size, const enum string_size_units units,
+void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
 		     char *buf, int len)
 {
 	static const char *const units_10[] = {
-		"B", "kB", "MB", "GB", "TB", "PB", "EB"
+		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
 	};
 	static const char *const units_2[] = {
-		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
+		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
 	};
 	static const char *const *const units_str[] = {
 		[STRING_UNITS_10] = units_10,
@@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units,
 		[STRING_UNITS_2] = 1024,
 	};
 	int i, j;
-	u32 remainder = 0, sf_cap;
+	u32 remainder = 0, sf_cap, exp;
 	char tmp[8];
+	const char *unit;
 
 	tmp[0] = '\0';
 	i = 0;
-	if (size >= divisor[units]) {
-		while (size >= divisor[units]) {
-			remainder = do_div(size, divisor[units]);
-			i++;
-		}
+	if (!size)
+		goto out;
 
-		sf_cap = size;
-		for (j = 0; sf_cap*10 < 1000; j++)
-			sf_cap *= 10;
+	while (blk_size >= divisor[units]) {
+		remainder = do_div(blk_size, divisor[units]);
+		i++;
+	}
 
-		if (j) {
-			remainder *= 1000;
-			remainder /= divisor[units];
-			snprintf(tmp, sizeof(tmp), ".%03u", remainder);
-			tmp[j+1] = '\0';
-		}
+	exp = divisor[units] / (u32)blk_size;
+	if (size >= exp) {
+		remainder = do_div(size, divisor[units]);
+		remainder *= blk_size;
+		i++;
+	} else {
+		remainder *= size;
+	}
+
+	size *= blk_size;
+	size += remainder / divisor[units];
+	remainder %= divisor[units];
+
+	while (size >= divisor[units]) {
+		remainder = do_div(size, divisor[units]);
+		i++;
 	}
 
+	sf_cap = size;
+	for (j = 0; sf_cap*10 < 1000; j++)
+		sf_cap *= 10;
+
+	if (j) {
+		remainder *= 1000;
+		remainder /= divisor[units];
+		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
+		tmp[j+1] = '\0';
+	}
+
+ out:
+	if (i >= ARRAY_SIZE(units_2))
+		unit = "UNK";
+	else
+		unit = units_str[units][i];
+
 	snprintf(buf, len, "%u%s %s", (u32)size,
-		 tmp, units_str[units][i]);
+		 tmp, unit);
 }
 EXPORT_SYMBOL(string_get_size);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 12548d03c11d..6bf5e42d560a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1693,7 +1693,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t *ppos = &iocb->ki_pos;
 	loff_t pos = *ppos;
 
-	if (io_is_direct(file)) {
+	if (iocb->ki_flags & IOCB_DIRECT) {
 		struct address_space *mapping = file->f_mapping;
 		struct inode *inode = mapping->host;
 		size_t count = iov_iter_count(iter);
@@ -1706,7 +1706,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 					pos + count - 1);
 		if (!retval) {
 			struct iov_iter data = *iter;
-			retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos);
+			retval = mapping->a_ops->direct_IO(iocb, &data, pos);
 		}
 
 		if (retval > 0) {
@@ -2259,41 +2259,38 @@ EXPORT_SYMBOL(read_cache_page_gfp);
  * Returns appropriate error code that caller should return or
  * zero in case that write should be allowed.
  */
-inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk)
+inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
+	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	unsigned long limit = rlimit(RLIMIT_FSIZE);
+	loff_t pos;
 
-        if (unlikely(*pos < 0))
-                return -EINVAL;
+	if (!iov_iter_count(from))
+		return 0;
 
-	if (!isblk) {
-		/* FIXME: this is for backwards compatibility with 2.4 */
-		if (file->f_flags & O_APPEND)
-                        *pos = i_size_read(inode);
+	/* FIXME: this is for backwards compatibility with 2.4 */
+	if (iocb->ki_flags & IOCB_APPEND)
+		iocb->ki_pos = i_size_read(inode);
 
-		if (limit != RLIM_INFINITY) {
-			if (*pos >= limit) {
-				send_sig(SIGXFSZ, current, 0);
-				return -EFBIG;
-			}
-			if (*count > limit - (typeof(limit))*pos) {
-				*count = limit - (typeof(limit))*pos;
-			}
+	pos = iocb->ki_pos;
+
+	if (limit != RLIM_INFINITY) {
+		if (iocb->ki_pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
 		}
+		iov_iter_truncate(from, limit - (unsigned long)pos);
 	}
 
 	/*
 	 * LFS rule
 	 */
-	if (unlikely(*pos + *count > MAX_NON_LFS &&
+	if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
 				!(file->f_flags & O_LARGEFILE))) {
-		if (*pos >= MAX_NON_LFS) {
+		if (pos >= MAX_NON_LFS)
 			return -EFBIG;
-		}
-		if (*count > MAX_NON_LFS - (unsigned long)*pos) {
-			*count = MAX_NON_LFS - (unsigned long)*pos;
-		}
+		iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
 	}
 
 	/*
@@ -2303,34 +2300,11 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 	 * exceeded without writing data we send a signal and return EFBIG.
 	 * Linus frestrict idea will clean these up nicely..
 	 */
-	if (likely(!isblk)) {
-		if (unlikely(*pos >= inode->i_sb->s_maxbytes)) {
-			if (*count || *pos > inode->i_sb->s_maxbytes) {
-				return -EFBIG;
-			}
-			/* zero-length writes at ->s_maxbytes are OK */
-		}
+	if (unlikely(pos >= inode->i_sb->s_maxbytes))
+		return -EFBIG;
 
-		if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
-			*count = inode->i_sb->s_maxbytes - *pos;
-	} else {
-#ifdef CONFIG_BLOCK
-		loff_t isize;
-		if (bdev_read_only(I_BDEV(inode)))
-			return -EPERM;
-		isize = i_size_read(inode);
-		if (*pos >= isize) {
-			if (*count || *pos > isize)
-				return -ENOSPC;
-		}
-
-		if (*pos + *count > isize)
-			*count = isize - *pos;
-#else
-		return -EPERM;
-#endif
-	}
-	return 0;
+	iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
+	return iov_iter_count(from);
 }
 EXPORT_SYMBOL(generic_write_checks);
 
@@ -2394,7 +2368,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 	}
 
 	data = *from;
-	written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos);
+	written = mapping->a_ops->direct_IO(iocb, &data, pos);
 
 	/*
 	 * Finally, try again to invalidate clean pages which might have been
@@ -2556,23 +2530,12 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct address_space * mapping = file->f_mapping;
 	struct inode 	*inode = mapping->host;
-	loff_t		pos = iocb->ki_pos;
 	ssize_t		written = 0;
 	ssize_t		err;
 	ssize_t		status;
-	size_t		count = iov_iter_count(from);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = inode_to_bdi(inode);
-	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-	if (err)
-		goto out;
-
-	if (count == 0)
-		goto out;
-
-	iov_iter_truncate(from, count);
-
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -2581,10 +2544,10 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (err)
 		goto out;
 
-	if (io_is_direct(file)) {
-		loff_t endbyte;
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		loff_t pos, endbyte;
 
-		written = generic_file_direct_write(iocb, from, pos);
+		written = generic_file_direct_write(iocb, from, iocb->ki_pos);
 		/*
 		 * If the write stopped short of completing, fall back to
 		 * buffered writes.  Some filesystems do this for writes to
@@ -2592,13 +2555,10 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		 * not succeed (even if it did, DAX does not handle dirty
 		 * page-cache pages correctly).
 		 */
-		if (written < 0 || written == count || IS_DAX(inode))
+		if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
 			goto out;
 
-		pos += written;
-		count -= written;
-
-		status = generic_perform_write(file, from, pos);
+		status = generic_perform_write(file, from, pos = iocb->ki_pos);
 		/*
 		 * If generic_perform_write() returned a synchronous error
 		 * then we want to return the number of bytes which were
@@ -2610,15 +2570,15 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			err = status;
 			goto out;
 		}
-		iocb->ki_pos = pos + status;
 		/*
 		 * We need to ensure that the page cache pages are written to
 		 * disk and invalidated to preserve the expected O_DIRECT
 		 * semantics.
 		 */
 		endbyte = pos + status - 1;
-		err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
+		err = filemap_write_and_wait_range(mapping, pos, endbyte);
 		if (err == 0) {
+			iocb->ki_pos = endbyte + 1;
 			written += status;
 			invalidate_mapping_pages(mapping,
 						 pos >> PAGE_CACHE_SHIFT,
@@ -2630,9 +2590,9 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			 */
 		}
 	} else {
-		written = generic_perform_write(file, from, pos);
-		if (likely(written >= 0))
-			iocb->ki_pos = pos + written;
+		written = generic_perform_write(file, from, iocb->ki_pos);
+		if (likely(written > 0))
+			iocb->ki_pos += written;
 	}
 out:
 	current->backing_dev_info = NULL;
@@ -2656,7 +2616,9 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t ret;
 
 	mutex_lock(&inode->i_mutex);
-	ret = __generic_file_write_iter(iocb, from);
+	ret = generic_write_checks(iocb, from);
+	if (ret > 0)
+		ret = __generic_file_write_iter(iocb, from);
 	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0) {
diff --git a/mm/page_io.c b/mm/page_io.c
index a96c8562d835..6424869e275e 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -277,9 +277,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 
 		set_page_writeback(page);
 		unlock_page(page);
-		ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE,
-						&kiocb, &from,
-						kiocb.ki_pos);
+		ret = mapping->a_ops->direct_IO(&kiocb, &from, kiocb.ki_pos);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
 			ret = 0;
diff --git a/scripts/xen-hypercalls.sh b/scripts/xen-hypercalls.sh
new file mode 100644
index 000000000000..676d9226814f
--- /dev/null
+++ b/scripts/xen-hypercalls.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+out="$1"
+shift
+in="$@"
+
+for i in $in; do
+	eval $CPP $LINUXINCLUDE -dD -imacros "$i" -x c /dev/null
+done | \
+awk '$1 == "#define" && $2 ~ /__HYPERVISOR_[a-z][a-z_0-9]*/ { v[$3] = $2 }
+	END {   print "/* auto-generated by scripts/xen-hypercall.sh */"
+		for (i in v) if (!(v[i] in v))
+			print "HYPERCALL("substr(v[i], 14)")"}' | sort -u >$out
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 13146d701413..0095a80a997f 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -240,7 +240,7 @@ static int snd_pmac_pcm_prepare(struct snd_pmac *chip, struct pmac_stream *rec,
 	 */
 	spin_lock_irq(&chip->reg_lock);
 	snd_pmac_dma_stop(rec);
-	st_le16(&chip->extra_dma.cmds->command, DBDMA_STOP);
+	chip->extra_dma.cmds->command = cpu_to_le16(DBDMA_STOP);
 	snd_pmac_dma_set_command(rec, &chip->extra_dma);
 	snd_pmac_dma_run(rec, RUN);
 	spin_unlock_irq(&chip->reg_lock);
@@ -251,15 +251,15 @@ static int snd_pmac_pcm_prepare(struct snd_pmac *chip, struct pmac_stream *rec,
 	 */
 	offset = runtime->dma_addr;
 	for (i = 0, cp = rec->cmd.cmds; i < rec->nperiods; i++, cp++) {
-		st_le32(&cp->phy_addr, offset);
-		st_le16(&cp->req_count, rec->period_size);
-		/*st_le16(&cp->res_count, 0);*/
-		st_le16(&cp->xfer_status, 0);
+		cp->phy_addr = cpu_to_le32(offset);
+		cp->req_count = cpu_to_le16(rec->period_size);
+		/*cp->res_count = cpu_to_le16(0);*/
+		cp->xfer_status = cpu_to_le16(0);
 		offset += rec->period_size;
 	}
 	/* make loop */
-	st_le16(&cp->command, DBDMA_NOP + BR_ALWAYS);
-	st_le32(&cp->cmd_dep, rec->cmd.addr);
+	cp->command = cpu_to_le16(DBDMA_NOP + BR_ALWAYS);
+	cp->cmd_dep = cpu_to_le32(rec->cmd.addr);
 
 	snd_pmac_dma_stop(rec);
 	snd_pmac_dma_set_command(rec, &rec->cmd);
@@ -328,7 +328,7 @@ static snd_pcm_uframes_t snd_pmac_pcm_pointer(struct snd_pmac *chip,
 #if 1 /* hmm.. how can we get the current dma pointer?? */
 	int stat;
 	volatile struct dbdma_cmd __iomem *cp = &rec->cmd.cmds[rec->cur_period];
-	stat = ld_le16(&cp->xfer_status);
+	stat = le16_to_cpu(cp->xfer_status);
 	if (stat & (ACTIVE|DEAD)) {
 		count = in_le16(&cp->res_count);
 		if (count)
@@ -427,26 +427,26 @@ static inline void snd_pmac_pcm_dead_xfer(struct pmac_stream *rec,
 		memcpy((void *)emergency_dbdma.cmds, (void *)cp,
 		       sizeof(struct dbdma_cmd));
 		emergency_in_use = 1;
-		st_le16(&cp->xfer_status, 0);
-		st_le16(&cp->req_count, rec->period_size);
+		cp->xfer_status = cpu_to_le16(0);
+		cp->req_count = cpu_to_le16(rec->period_size);
 		cp = emergency_dbdma.cmds;
 	}
 
 	/* now bump the values to reflect the amount
 	   we haven't yet shifted */
-	req = ld_le16(&cp->req_count);
-	res = ld_le16(&cp->res_count);
-	phy = ld_le32(&cp->phy_addr);
+	req = le16_to_cpu(cp->req_count);
+	res = le16_to_cpu(cp->res_count);
+	phy = le32_to_cpu(cp->phy_addr);
 	phy += (req - res);
-	st_le16(&cp->req_count, res);
-	st_le16(&cp->res_count, 0);
-	st_le16(&cp->xfer_status, 0);
-	st_le32(&cp->phy_addr, phy);
+	cp->req_count = cpu_to_le16(res);
+	cp->res_count = cpu_to_le16(0);
+	cp->xfer_status = cpu_to_le16(0);
+	cp->phy_addr = cpu_to_le32(phy);
 
-	st_le32(&cp->cmd_dep, rec->cmd.addr
+	cp->cmd_dep = cpu_to_le32(rec->cmd.addr
 		+ sizeof(struct dbdma_cmd)*((rec->cur_period+1)%rec->nperiods));
 
-	st_le16(&cp->command, OUTPUT_MORE | BR_ALWAYS | INTR_ALWAYS);
+	cp->command = cpu_to_le16(OUTPUT_MORE | BR_ALWAYS | INTR_ALWAYS);
 
 	/* point at our patched up command block */
 	out_le32(&rec->dma->cmdptr, emergency_dbdma.addr);
@@ -475,7 +475,7 @@ static void snd_pmac_pcm_update(struct snd_pmac *chip, struct pmac_stream *rec)
 			else
 				cp = &rec->cmd.cmds[rec->cur_period];
 
-			stat = ld_le16(&cp->xfer_status);
+			stat = le16_to_cpu(cp->xfer_status);
 
 			if (stat & DEAD) {
 				snd_pmac_pcm_dead_xfer(rec, cp);
@@ -489,9 +489,9 @@ static void snd_pmac_pcm_update(struct snd_pmac *chip, struct pmac_stream *rec)
 				break;
 
 			/*printk(KERN_DEBUG "update frag %d\n", rec->cur_period);*/
-			st_le16(&cp->xfer_status, 0);
-			st_le16(&cp->req_count, rec->period_size);
-			/*st_le16(&cp->res_count, 0);*/
+			cp->xfer_status = cpu_to_le16(0);
+			cp->req_count = cpu_to_le16(rec->period_size);
+			/*cp->res_count = cpu_to_le16(0);*/
 			rec->cur_period++;
 			if (rec->cur_period >= rec->nperiods) {
 				rec->cur_period = 0;
@@ -760,11 +760,11 @@ void snd_pmac_beep_dma_start(struct snd_pmac *chip, int bytes, unsigned long add
 	struct pmac_stream *rec = &chip->playback;
 
 	snd_pmac_dma_stop(rec);
-	st_le16(&chip->extra_dma.cmds->req_count, bytes);
-	st_le16(&chip->extra_dma.cmds->xfer_status, 0);
-	st_le32(&chip->extra_dma.cmds->cmd_dep, chip->extra_dma.addr);
-	st_le32(&chip->extra_dma.cmds->phy_addr, addr);
-	st_le16(&chip->extra_dma.cmds->command, OUTPUT_MORE + BR_ALWAYS);
+	chip->extra_dma.cmds->req_count = cpu_to_le16(bytes);
+	chip->extra_dma.cmds->xfer_status = cpu_to_le16(0);
+	chip->extra_dma.cmds->cmd_dep = cpu_to_le32(chip->extra_dma.addr);
+	chip->extra_dma.cmds->phy_addr = cpu_to_le32(addr);
+	chip->extra_dma.cmds->command = cpu_to_le16(OUTPUT_MORE + BR_ALWAYS);
 	out_le32(&chip->awacs->control,
 		 (in_le32(&chip->awacs->control) & ~0x1f00)
 		 | (speed << 8));
@@ -776,7 +776,7 @@ void snd_pmac_beep_dma_start(struct snd_pmac *chip, int bytes, unsigned long add
 void snd_pmac_beep_dma_stop(struct snd_pmac *chip)
 {
 	snd_pmac_dma_stop(&chip->playback);
-	st_le16(&chip->extra_dma.cmds->command, DBDMA_STOP);
+	chip->extra_dma.cmds->command = cpu_to_le16(DBDMA_STOP);
 	snd_pmac_pcm_set_format(chip); /* reset format */
 }
 
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 2958fe9a74e9..5ad042345ab9 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -12,37 +12,37 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR
 
 export CFLAGS
 
-TARGETS = pmu copyloops mm tm primitives stringloops
+SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian
 
 endif
 
-all: $(TARGETS)
+all: $(SUB_DIRS)
 
-$(TARGETS):
+$(SUB_DIRS):
 	$(MAKE) -k -C $@ all
 
 include ../lib.mk
 
 override define RUN_TESTS
-	@for TARGET in $(TARGETS); do \
+	@for TARGET in $(SUB_DIRS); do \
 		$(MAKE) -C $$TARGET run_tests; \
 	done;
 endef
 
 override define INSTALL_RULE
-	@for TARGET in $(TARGETS); do \
+	@for TARGET in $(SUB_DIRS); do \
 		$(MAKE) -C $$TARGET install; \
 	done;
 endef
 
 override define EMIT_TESTS
-	@for TARGET in $(TARGETS); do \
+	@for TARGET in $(SUB_DIRS); do \
 		$(MAKE) -s -C $$TARGET emit_tests; \
 	done;
 endef
 
 clean:
-	@for TARGET in $(TARGETS); do \
+	@for TARGET in $(SUB_DIRS); do \
 		$(MAKE) -C $$TARGET clean; \
 	done;
 	rm -f tags
@@ -50,4 +50,4 @@ clean:
 tags:
 	find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: tags $(TARGETS)
+.PHONY: tags $(SUB_DIRS)
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index c05023514ce8..384843ea0d40 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -2,6 +2,7 @@
 CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 CFLAGS += -D SELFTEST
+CFLAGS += -maltivec
 
 # Use our CFLAGS for the implicit .S rule
 ASFLAGS = $(CFLAGS)
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index d1dc37425510..50ae7d2091ce 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -4,39 +4,6 @@
 
 #define r1	1
 
-#define vr0     0
-#define vr1     1
-#define vr2     2
-#define vr3     3
-#define vr4     4
-#define vr5     5
-#define vr6     6
-#define vr7     7
-#define vr8     8
-#define vr9     9
-#define vr10    10
-#define vr11    11
-#define vr12    12
-#define vr13    13
-#define vr14    14
-#define vr15    15
-#define vr16    16
-#define vr17    17
-#define vr18    18
-#define vr19    19
-#define vr20    20
-#define vr21    21
-#define vr22    22
-#define vr23    23
-#define vr24    24
-#define vr25    25
-#define vr26    26
-#define vr27    27
-#define vr28    28
-#define vr29    29
-#define vr30    30
-#define vr31    31
-
 #define R14 r14
 #define R15 r15
 #define R16 r16
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 8ebc58a09311..f7997affd143 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -11,6 +11,10 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sys/stat.h>
 
 #include "subunit.h"
 #include "utils.h"
@@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name)
 
 	return rc;
 }
+
+static char auxv[4096];
+
+void *get_auxv_entry(int type)
+{
+	ElfW(auxv_t) *p;
+	void *result;
+	ssize_t num;
+	int fd;
+
+	fd = open("/proc/self/auxv", O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		return NULL;
+	}
+
+	result = NULL;
+
+	num = read(fd, auxv, sizeof(auxv));
+	if (num < 0) {
+		perror("read");
+		goto out;
+	}
+
+	if (num > sizeof(auxv)) {
+		printf("Overflowed auxv buffer\n");
+		goto out;
+	}
+
+	p = (ElfW(auxv_t) *)auxv;
+
+	while (p->a_type != AT_NULL) {
+		if (p->a_type == type) {
+			result = (void *)p->a_un.a_val;
+			break;
+		}
+
+		p++;
+	}
+out:
+	close(fd);
+	return result;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
index 9768dea37bf3..a07104c2afe6 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -5,15 +5,10 @@
 
 #define _GNU_SOURCE	/* For CPU_ZERO etc. */
 
-#include <elf.h>
 #include <errno.h>
-#include <fcntl.h>
-#include <link.h>
 #include <sched.h>
 #include <setjmp.h>
 #include <stdlib.h>
-#include <sys/stat.h>
-#include <sys/types.h>
 #include <sys/wait.h>
 
 #include "utils.h"
@@ -256,45 +251,3 @@ out:
 	return rc;
 }
 
-static char auxv[4096];
-
-void *get_auxv_entry(int type)
-{
-	ElfW(auxv_t) *p;
-	void *result;
-	ssize_t num;
-	int fd;
-
-	fd = open("/proc/self/auxv", O_RDONLY);
-	if (fd == -1) {
-		perror("open");
-		return NULL;
-	}
-
-	result = NULL;
-
-	num = read(fd, auxv, sizeof(auxv));
-	if (num < 0) {
-		perror("read");
-		goto out;
-	}
-
-	if (num > sizeof(auxv)) {
-		printf("Overflowed auxv buffer\n");
-		goto out;
-	}
-
-	p = (ElfW(auxv_t) *)auxv;
-
-	while (p->a_type != AT_NULL) {
-		if (p->a_type == type) {
-			result = (void *)p->a_un.a_val;
-			break;
-		}
-
-		p++;
-	}
-out:
-	close(fd);
-	return result;
-}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index 0f0339c8a6f6..ca5d72ae3be6 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe);
 extern int notify_parent_of_error(union pipe write_pipe);
 extern pid_t eat_cpu(int (test_function)(void));
 extern bool require_paranoia_below(int level);
-extern void *get_auxv_entry(int type);
 
 struct addr_range {
 	uint64_t first, last;
diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore
new file mode 100644
index 000000000000..89e762eab676
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore
@@ -0,0 +1,2 @@
+switch_endian_test
+check-reversed.S
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
new file mode 100644
index 000000000000..081473db22b7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -0,0 +1,24 @@
+CC := $(CROSS_COMPILE)gcc
+PROGS := switch_endian_test
+
+ASFLAGS += -O2 -Wall -g -nostdlib -m64
+
+all: $(PROGS)
+
+switch_endian_test: check-reversed.S
+
+check-reversed.o: check.o
+	$(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@
+
+check-reversed.S: check-reversed.o
+	hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
+
+run_tests: all
+	@-for PROG in $(PROGS); do \
+		./$$PROG; \
+	done;
+
+clean:
+	rm -f $(PROGS) *.o check-reversed.S
+
+.PHONY: all run_tests clean
diff --git a/tools/testing/selftests/powerpc/switch_endian/check.S b/tools/testing/selftests/powerpc/switch_endian/check.S
new file mode 100644
index 000000000000..e2484d2c24f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/check.S
@@ -0,0 +1,100 @@
+#include "common.h"
+
+/*
+ * Checks that registers contain what we expect, ie. they were not clobbered by
+ * the syscall.
+ *
+ * r15: pattern to check registers against.
+ *
+ * At the end r3 == 0 if everything's OK.
+ */
+	nop			# guaranteed to be illegal in reverse-endian
+	mr	r9,r15
+	cmpd    r9,r3		# check r3
+	bne     1f
+	addi    r9,r15,4	# check r4
+	cmpd    r9,r4
+	bne     1f
+	lis     r9,0x00FF	# check CR
+	ori     r9,r9,0xF000
+	mfcr    r10
+	and     r10,r10,r9
+	cmpw    r9,r10
+	addi    r9,r15,34
+	bne     1f
+	addi    r9,r15,32	# check LR
+	mflr    r10
+	cmpd    r9,r10
+	bne     1f
+	addi    r9,r15,5	# check r5
+	cmpd    r9,r5
+	bne     1f
+	addi    r9,r15,6	# check r6
+	cmpd    r9,r6
+	bne     1f
+	addi    r9,r15,7	# check r7
+	cmpd    r9,r7
+	bne     1f
+	addi    r9,r15,8	# check r8
+	cmpd    r9,r8
+	bne     1f
+	addi    r9,r15,13	# check r13
+	cmpd    r9,r13
+	bne     1f
+	addi    r9,r15,14	# check r14
+	cmpd    r9,r14
+	bne     1f
+	addi    r9,r15,16	# check r16
+	cmpd    r9,r16
+	bne     1f
+	addi    r9,r15,17	# check r17
+	cmpd    r9,r17
+	bne     1f
+	addi    r9,r15,18	# check r18
+	cmpd    r9,r18
+	bne     1f
+	addi    r9,r15,19	# check r19
+	cmpd    r9,r19
+	bne     1f
+	addi    r9,r15,20	# check r20
+	cmpd    r9,r20
+	bne     1f
+	addi    r9,r15,21	# check r21
+	cmpd    r9,r21
+	bne     1f
+	addi    r9,r15,22	# check r22
+	cmpd    r9,r22
+	bne     1f
+	addi    r9,r15,23	# check r23
+	cmpd    r9,r23
+	bne     1f
+	addi    r9,r15,24	# check r24
+	cmpd    r9,r24
+	bne     1f
+	addi    r9,r15,25	# check r25
+	cmpd    r9,r25
+	bne     1f
+	addi    r9,r15,26	# check r26
+	cmpd    r9,r26
+	bne     1f
+	addi    r9,r15,27	# check r27
+	cmpd    r9,r27
+	bne     1f
+	addi    r9,r15,28	# check r28
+	cmpd    r9,r28
+	bne     1f
+	addi    r9,r15,29	# check r29
+	cmpd    r9,r29
+	bne     1f
+	addi    r9,r15,30	# check r30
+	cmpd    r9,r30
+	bne     1f
+	addi    r9,r15,31	# check r31
+	cmpd    r9,r31
+	bne     1f
+	b	2f
+1:	mr	r3, r9
+	li	r0, __NR_exit
+	sc
+2:	li	r0, __NR_switch_endian
+	nop
diff --git a/tools/testing/selftests/powerpc/switch_endian/common.h b/tools/testing/selftests/powerpc/switch_endian/common.h
new file mode 100644
index 000000000000..69e399698c64
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/common.h
@@ -0,0 +1,6 @@
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#ifndef __NR_switch_endian
+#define __NR_switch_endian 363
+#endif
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
new file mode 100644
index 000000000000..ef7c971abb67
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -0,0 +1,81 @@
+#include "common.h"
+
+	.data
+	.balign 8
+message:
+	.ascii "success: switch_endian_test\n\0"
+
+	.section ".toc"
+	.balign 8
+pattern:
+	.llong 0x5555AAAA5555AAAA
+
+	.text
+FUNC_START(_start)
+	/* Load the pattern */
+	ld	r15, pattern@TOC(%r2)
+
+	/* Setup CR, only CR2-CR4 are maintained */
+	lis	r3, 0x00FF
+	ori	r3, r3, 0xF000
+	mtcr	r3
+
+	/* Load the pattern slightly modified into the registers */
+	mr	r3, r15
+	addi	r4, r15, 4
+
+	addi	r5, r15, 32
+	mtlr	r5
+
+	addi	r5, r15, 5
+	addi	r6, r15, 6
+	addi	r7, r15, 7
+	addi	r8, r15, 8
+
+	/* r9 - r12 are clobbered */
+
+	addi	r13, r15, 13
+	addi	r14, r15, 14
+
+	/* Skip r15 we're using it */
+
+	addi	r16, r15, 16
+	addi	r17, r15, 17
+	addi	r18, r15, 18
+	addi	r19, r15, 19
+	addi	r20, r15, 20
+	addi	r21, r15, 21
+	addi	r22, r15, 22
+	addi	r23, r15, 23
+	addi	r24, r15, 24
+	addi	r25, r15, 25
+	addi	r26, r15, 26
+	addi	r27, r15, 27
+	addi	r28, r15, 28
+	addi	r29, r15, 29
+	addi	r30, r15, 30
+	addi	r31, r15, 31
+
+	/*
+	 * Call the syscall to switch endian.
+	 * It clobbers r9-r12, XER, CTR and CR0-1,5-7.
+	 */
+	li r0, __NR_switch_endian
+	sc
+
+#include "check-reversed.S"
+
+	/* Flip back, r0 already has the switch syscall number */
+	.long	0x02000044	/* sc */
+
+#include "check.S"
+
+	li	r0, __NR_write
+	li	r3, 1	/* stdout */
+	ld	r4, message@got(%r2)
+	li	r5, 28	/* strlen(message3) */
+	sc
+	li      r0, __NR_exit
+	li	r3, 0
+	sc
+	b       .
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 33d02cc54a3e..2699635d2cd9 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -1 +1,2 @@
 tm-resched-dscr
+tm-syscall
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 34f2ec634b40..1b616fa79e93 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,9 +1,12 @@
-TEST_PROGS := tm-resched-dscr
+TEST_PROGS := tm-resched-dscr tm-syscall
 
 all: $(TEST_PROGS)
 
 $(TEST_PROGS): ../harness.c
 
+tm-syscall: tm-syscall-asm.S
+tm-syscall: CFLAGS += -mhtm
+
 include ../../lib.mk
 
 clean:
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
new file mode 100644
index 000000000000..431f61ae2368
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -0,0 +1,27 @@
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+	.text
+FUNC_START(getppid_tm_active)
+	tbegin.
+	beq 1f
+	li	r0, __NR_getppid
+	sc
+	tend.
+	blr
+1:
+	li	r3, -1
+	blr
+
+FUNC_START(getppid_tm_suspended)
+	tbegin.
+	beq 1f
+	li	r0, __NR_getppid
+	tsuspend.
+	sc
+	tresume.
+	tend.
+	blr
+1:
+	li	r3, -1
+	blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
new file mode 100644
index 000000000000..3ed8d4b252fa
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2015, Sam Bobroff, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/tm.h>
+#include <asm/cputable.h>
+#include <linux/auxvec.h>
+#include <sys/time.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+extern int getppid_tm_active(void);
+extern int getppid_tm_suspended(void);
+
+unsigned retries = 0;
+
+#define TEST_DURATION 10 /* seconds */
+#define TM_RETRIES 100
+
+long failure_code(void)
+{
+	return __builtin_get_texasru() >> 24;
+}
+
+bool failure_is_persistent(void)
+{
+	return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
+}
+
+bool failure_is_syscall(void)
+{
+	return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+pid_t getppid_tm(bool suspend)
+{
+	int i;
+	pid_t pid;
+
+	for (i = 0; i < TM_RETRIES; i++) {
+		if (suspend)
+			pid = getppid_tm_suspended();
+		else
+			pid = getppid_tm_active();
+
+		if (pid >= 0)
+			return pid;
+
+		if (failure_is_persistent()) {
+			if (failure_is_syscall())
+				return -1;
+
+			printf("Unexpected persistent transaction failure.\n");
+			printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+			       __builtin_get_texasr(), __builtin_get_tfiar());
+			exit(-1);
+		}
+
+		retries++;
+	}
+
+	printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES);
+	printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+	       __builtin_get_texasr(), __builtin_get_tfiar());
+
+	exit(-1);
+}
+
+int tm_syscall(void)
+{
+	unsigned count = 0;
+	struct timeval end, now;
+
+	SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM));
+	setbuf(stdout, NULL);
+
+	printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION);
+
+	gettimeofday(&end, NULL);
+	now.tv_sec = TEST_DURATION;
+	now.tv_usec = 0;
+	timeradd(&end, &now, &end);
+
+	for (count = 0; timercmp(&now, &end, <); count++) {
+		/*
+		 * Test a syscall within a suspended transaction and verify
+		 * that it succeeds.
+		 */
+		FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+
+		/*
+		 * Test a syscall within an active transaction and verify that
+		 * it fails with the correct failure code.
+		 */
+		FAIL_IF(getppid_tm(false) != -1);  /* Should fail... */
+		FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+		FAIL_IF(!failure_is_syscall());    /* ...with code syscall. */
+		gettimeofday(&now, 0);
+	}
+
+	printf("%d active and suspended transactions behaved correctly.\n", count);
+	printf("(There were %d transaction retries.)\n", retries);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(tm_syscall, "tm_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h
index a93777ae0684..b7d41086bb0a 100644
--- a/tools/testing/selftests/powerpc/utils.h
+++ b/tools/testing/selftests/powerpc/utils.h
@@ -15,11 +15,12 @@ typedef   signed long long s64;
 
 /* Just for familiarity */
 typedef uint32_t u32;
+typedef uint16_t u16;
 typedef uint8_t u8;
 
 
 int test_harness(int (test_function)(void), char *name);
-
+extern void *get_auxv_entry(int type);
 
 /* Yes, this is evil */
 #define FAIL_IF(x)						\
diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore
new file mode 100644
index 000000000000..7c04395010cb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/.gitignore
@@ -0,0 +1 @@
+test-vphn
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
new file mode 100644
index 000000000000..e539f775fd8f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -0,0 +1,15 @@
+PROG := test-vphn
+
+CFLAGS += -m64
+
+all: $(PROG)
+
+$(PROG): ../harness.c
+
+run_tests: all
+	./$(PROG)
+
+clean:
+	rm -f $(PROG)
+
+.PHONY: all run_tests clean
diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c
new file mode 100644
index 000000000000..5742f6876b25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c
@@ -0,0 +1,410 @@
+#include <stdio.h>
+#include <byteswap.h>
+#include "utils.h"
+#include "subunit.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x)		bswap_32(x)
+#define be32_to_cpu(x)		bswap_32(x)
+#define be16_to_cpup(x)		bswap_16(*x)
+#define cpu_to_be64(x)		bswap_64(x)
+#else
+#define cpu_to_be32(x)		(x)
+#define be32_to_cpu(x)		(x)
+#define be16_to_cpup(x)		(*x)
+#define cpu_to_be64(x)		(x)
+#endif
+
+#include "vphn.c"
+
+static struct test {
+	char *descr;
+	long input[VPHN_REGISTER_COUNT];
+	u32 expected[VPHN_ASSOC_BUFSIZE];
+} all_tests[] = {
+	{
+		"vphn: no data",
+		{
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000000
+		}
+	},
+	{
+		"vphn: 1 x 16-bit value",
+		{
+			0x8001ffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000001,
+			0x00000001
+		}
+	},
+	{
+		"vphn: 2 x 16-bit values",
+		{
+			0x80018002ffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		"vphn: 3 x 16-bit values",
+		{
+			0x800180028003ffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000003,
+			0x00000001,
+			0x00000002,
+			0x00000003
+		}
+	},
+	{
+		"vphn: 4 x 16-bit values",
+		{
+			0x8001800280038004,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000004,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004
+		}
+	},
+	{
+		/* Parsing the next 16-bit value out of the next 64-bit input
+		 * value.
+		 */
+		"vphn: 5 x 16-bit values",
+		{
+			0x8001800280038004,
+			0x8005ffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000005,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005
+		}
+	},
+	{
+		/* Parse at most 6 x 64-bit input values */
+		"vphn: 24 x 16-bit values",
+		{
+			0x8001800280038004,
+			0x8005800680078008,
+			0x8009800a800b800c,
+			0x800d800e800f8010,
+			0x8011801280138014,
+			0x8015801680178018
+		},
+		{
+			0x00000018,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005,
+			0x00000006,
+			0x00000007,
+			0x00000008,
+			0x00000009,
+			0x0000000a,
+			0x0000000b,
+			0x0000000c,
+			0x0000000d,
+			0x0000000e,
+			0x0000000f,
+			0x00000010,
+			0x00000011,
+			0x00000012,
+			0x00000013,
+			0x00000014,
+			0x00000015,
+			0x00000016,
+			0x00000017,
+			0x00000018
+		}
+	},
+	{
+		"vphn: 1 x 32-bit value",
+		{
+			0x00000001ffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000001,
+			0x00000001
+		}
+	},
+	{
+		"vphn: 2 x 32-bit values",
+		{
+			0x0000000100000002,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		/* Parsing the next 32-bit value out of the next 64-bit input
+		 * value.
+		 */
+		"vphn: 3 x 32-bit values",
+		{
+			0x0000000100000002,
+			0x00000003ffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000003,
+			0x00000001,
+			0x00000002,
+			0x00000003
+		}
+	},
+	{
+		/* Parse at most 6 x 64-bit input values */
+		"vphn: 12 x 32-bit values",
+		{
+			0x0000000100000002,
+			0x0000000300000004,
+			0x0000000500000006,
+			0x0000000700000008,
+			0x000000090000000a,
+			0x0000000b0000000c
+		},
+		{
+			0x0000000c,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005,
+			0x00000006,
+			0x00000007,
+			0x00000008,
+			0x00000009,
+			0x0000000a,
+			0x0000000b,
+			0x0000000c
+		}
+	},
+	{
+		"vphn: 16-bit value followed by 32-bit value",
+		{
+			0x800100000002ffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		"vphn: 32-bit value followed by 16-bit value",
+		{
+			0x000000018002ffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		/* Parse a 32-bit value split accross two consecutives 64-bit
+		 * input values.
+		 */
+		"vphn: 16-bit value followed by 2 x 32-bit values",
+		{
+			0x8001000000020000,
+			0x0003ffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000003,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005
+		}
+	},
+	{
+		/* The lower bits in 0x0001ffff don't get mixed up with the
+		 * 0xffff terminator.
+		 */
+		"vphn: 32-bit value has all ones in 16 lower bits",
+		{
+			0x0001ffff80028003,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000003,
+			0x0001ffff,
+			0x00000002,
+			0x00000003
+		}
+	},
+	{
+		/* The following input doesn't follow the specification.
+		 */
+		"vphn: last 32-bit value is truncated",
+		{
+			0x0000000100000002,
+			0x0000000300000004,
+			0x0000000500000006,
+			0x0000000700000008,
+			0x000000090000000a,
+			0x0000000b800c2bad
+		},
+		{
+			0x0000000c,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005,
+			0x00000006,
+			0x00000007,
+			0x00000008,
+			0x00000009,
+			0x0000000a,
+			0x0000000b,
+			0x0000000c
+		}
+	},
+	{
+		"vphn: garbage after terminator",
+		{
+			0xffff2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad
+		},
+		{
+			0x00000000
+		}
+	},
+	{
+		NULL
+	}
+};
+
+static int test_one(struct test *test)
+{
+	__be32 output[VPHN_ASSOC_BUFSIZE] = { 0 };
+	int i, len;
+
+	vphn_unpack_associativity(test->input, output);
+
+	len = be32_to_cpu(output[0]);
+	if (len != test->expected[0]) {
+		printf("expected %d elements, got %d\n", test->expected[0],
+		       len);
+		return 1;
+	}
+
+	for (i = 1; i < len; i++) {
+		u32 val = be32_to_cpu(output[i]);
+		if (val != test->expected[i]) {
+			printf("element #%d is 0x%x, should be 0x%x\n", i, val,
+			       test->expected[i]);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int test_vphn(void)
+{
+	static struct test *test;
+
+	for (test = all_tests; test->descr; test++) {
+		int ret;
+
+		ret = test_one(test);
+		test_finish(test->descr, ret);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	return test_harness(test_vphn, "test-vphn");
+}
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c
new file mode 120000
index 000000000000..186b906e66d5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.c
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.c
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.h b/tools/testing/selftests/powerpc/vphn/vphn.h
new file mode 120000
index 000000000000..7131efe38c65
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.h
+\ No newline at end of file