1252 files changed, 23655 insertions, 20525 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2a4bc78c27ec..65731b060e3f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2227,7 +2227,7 @@
 			  forcing Dual Address Cycle for PCI cards supporting
 			  greater than 32-bit addressing.
 
-	iommu.strict=	[ARM64, X86] Configure TLB invalidation behaviour
+	iommu.strict=	[ARM64, X86, S390] Configure TLB invalidation behaviour
 			Format: { "0" | "1" }
 			0 - Lazy mode.
 			  Request that DMA unmap operations use deferred
@@ -3596,6 +3596,13 @@
 			[NFS] set the TCP port on which the NFSv4 callback
 			channel should listen.
 
+	nfs.delay_retrans=
+			[NFS] specifies the number of times the NFSv4 client
+			retries the request before returning an EAGAIN error,
+			after a reply of NFS4ERR_DELAY from the server.
+			Only applies if the softerr mount option is enabled,
+			and the specified value is >= 0.
+
 	nfs.enable_ino64=
 			[NFS] enable 64-bit inode numbers.
 			If zero, the NFS client will fake up a 32-bit inode
@@ -5687,9 +5694,10 @@
 	s390_iommu=	[HW,S390]
 			Set s390 IOTLB flushing mode
 		strict
-			With strict flushing every unmap operation will result in
-			an IOTLB flush. Default is lazy flushing before reuse,
-			which is faster.
+			With strict flushing every unmap operation will result
+			in an IOTLB flush. Default is lazy flushing before
+			reuse, which is faster. Deprecated, equivalent to
+			iommu.strict=1.
 
 	s390_iommu_aperture=	[KNL,S390]
 			Specifies the size of the per device DMA address space
diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst
index 4b8399ac592b..ced7b335e2e0 100644
--- a/Documentation/arch/arm64/elf_hwcaps.rst
+++ b/Documentation/arch/arm64/elf_hwcaps.rst
@@ -174,7 +174,7 @@ HWCAP2_DCPODP
     Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
 
 HWCAP2_SVE2
-    Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
+    Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001.
 
 HWCAP2_SVEAES
     Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
@@ -222,7 +222,7 @@ HWCAP2_RNG
     Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001.
 
 HWCAP2_BTI
-    Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001.
+    Functionality implied by ID_AA64PFR1_EL1.BT == 0b0001.
 
 HWCAP2_MTE
     Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
@@ -232,7 +232,7 @@ HWCAP2_ECV
     Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001.
 
 HWCAP2_AFP
-    Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001.
+    Functionality implied by ID_AA64MMFR1_EL1.AFP == 0b0001.
 
 HWCAP2_RPRES
     Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
index a52996b22f75..7b2384de471f 100644
--- a/Documentation/arch/riscv/hwprobe.rst
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -77,6 +77,9 @@ The following keys are defined:
   * :c:macro:`RISCV_HWPROBE_EXT_ZBS`: The Zbs extension is supported, as defined
        in version 1.0 of the Bit-Manipulation ISA extensions.
 
+  * :c:macro:`RISCV_HWPROBE_EXT_ZICBOZ`: The Zicboz extension is supported, as
+       ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
+
 * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
   information about the selected set of processors.
 
@@ -96,3 +99,6 @@ The following keys are defined:
 
   * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
     not supported at all and will generate a misaligned address fault.
+
+* :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which
+  represents the size of the Zicboz block in bytes.
diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst
index 8960fac42c40..54d199dce78b 100644
--- a/Documentation/arch/riscv/uabi.rst
+++ b/Documentation/arch/riscv/uabi.rst
@@ -42,6 +42,26 @@ An example string following the order is::
 
    rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux
 
+"isa" and "hart isa" lines in /proc/cpuinfo
+-------------------------------------------
+
+The "isa" line in /proc/cpuinfo describes the lowest common denominator of
+RISC-V ISA extensions recognized by the kernel and implemented on all harts. The
+"hart isa" line, in contrast, describes the set of extensions recognized by the
+kernel on the particular hart being described, even if those extensions may not
+be present on all harts in the system.
+
+In both lines, the presence of an extension guarantees only that the hardware
+has the described capability. Additional kernel support or policy changes may be
+required before an extension's capability is fully usable by userspace programs.
+Similarly, for S-mode extensions, presence in one of these lines does not
+guarantee that the kernel is taking advantage of the extension, or that the
+feature will be visible in guest VMs managed by this kernel.
+
+Inversely, the absence of an extension in these lines does not necessarily mean
+the hardware does not support that feature. The running kernel may not recognize
+the extension, or may have deliberately removed it from the listing.
+
 Misaligned accesses
 -------------------
 
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 0d2647fb358d..723408e399ab 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -37,16 +37,14 @@ prototype in a header for the wrapper kfunc.
 An example is given below::
 
         /* Disables missing prototype warnings */
-        __diag_push();
-        __diag_ignore_all("-Wmissing-prototypes",
-                          "Global kfuncs as their definitions will be in BTF");
+        __bpf_kfunc_start_defs();
 
         __bpf_kfunc struct task_struct *bpf_find_get_task_by_vpid(pid_t nr)
         {
                 return find_get_task_by_vpid(nr);
         }
 
-        __diag_pop();
+        __bpf_kfunc_end_defs();
 
 A wrapper kfunc is often needed when we need to annotate parameters of the
 kfunc. Otherwise one may directly make the kfunc visible to the BPF program by
diff --git a/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml b/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml
new file mode 100644
index 000000000000..9816c4cacc7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/renesas,shmobile-lcdc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas SH-Mobile LCD Controller (LCDC)
+
+maintainers:
+  - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+properties:
+  compatible:
+    enum:
+      - renesas,r8a7740-lcdc # R-Mobile A1
+      - renesas,sh73a0-lcdc  # SH-Mobile AG5
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 5
+    description:
+      Only the functional clock is mandatory.
+      Some of the optional clocks are model-dependent (e.g. "video" (a.k.a.
+      "vou" or "dv_clk") is available on R-Mobile A1 only).
+
+  clock-names:
+    minItems: 1
+    items:
+      - const: fck
+      - enum: [ media, lclk, hdmi, video ]
+      - enum: [ media, lclk, hdmi, video ]
+      - enum: [ media, lclk, hdmi, video ]
+      - enum: [ media, lclk, hdmi, video ]
+
+  power-domains:
+    maxItems: 1
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: LCD port (R-Mobile A1 and SH-Mobile AG5)
+        unevaluatedProperties: false
+
+      port@1:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: HDMI port (R-Mobile A1 LCDC1 and SH-Mobile AG5)
+        unevaluatedProperties: false
+
+      port@2:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: MIPI-DSI port (SH-Mobile AG5)
+        unevaluatedProperties: false
+
+    required:
+      - port@0
+
+    unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - power-domains
+  - ports
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: renesas,r8a7740-lcdc
+    then:
+      properties:
+        ports:
+          properties:
+            port@2: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: renesas,sh73a0-lcdc
+    then:
+      properties:
+        ports:
+          required:
+            - port@1
+            - port@2
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7740-clock.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    lcd-controller@fe940000 {
+        compatible = "renesas,r8a7740-lcdc";
+        reg = <0xfe940000 0x4000>;
+        interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&mstp1_clks R8A7740_CLK_LCDC0>,
+                 <&cpg_clocks R8A7740_CLK_M3>, <&lcdlclk0_clk>,
+                 <&vou_clk>;
+        clock-names = "fck", "media", "lclk", "video";
+        power-domains = <&pd_a4lc>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                lcdc0_rgb: endpoint {
+                };
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
index 0aa41bd9ddca..37975ee61c5a 100644
--- a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
+++ b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
@@ -11,10 +11,10 @@ maintainers:
 
 properties:
   compatible:
-    - enum:
-        - solomon,ssd1322
-        - solomon,ssd1325
-        - solomon,ssd1327
+    enum:
+      - solomon,ssd1322
+      - solomon,ssd1325
+      - solomon,ssd1327
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml
index e5a3c355ee1f..29921aab9d97 100644
--- a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml
+++ b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml
@@ -24,6 +24,8 @@ properties:
   linux,keycodes:
     maxItems: 1
 
+  wakeup-source: true
+
 required:
   - compatible
   - linux,keycodes
diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml
index 4080422a9eb5..037e5d3c447f 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml
@@ -34,6 +34,9 @@ properties:
   vdd-supply:
     description: Regulator for voltage.
 
+  vddio-supply:
+    description: Optional Regulator for I/O voltage.
+
   reset-gpios:
     maxItems: 1
 
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index b1b2cf81b42f..aa9e1c0895a5 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -110,6 +110,7 @@ properties:
               - qcom,sdm630-smmu-v2
               - qcom,sdm845-smmu-v2
               - qcom,sm6350-smmu-v2
+              - qcom,sm7150-smmu-v2
           - const: qcom,adreno-smmu
           - const: qcom,smmu-v2
       - description: Qcom Adreno GPUs on Google Cheza platform
@@ -409,6 +410,7 @@ allOf:
           contains:
             enum:
               - qcom,sm6350-smmu-v2
+              - qcom,sm7150-smmu-v2
               - qcom,sm8150-smmu-500
               - qcom,sm8250-smmu-500
     then:
diff --git a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml
index 6ffbed204c25..8f50e23ca8c9 100644
--- a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml
@@ -15,12 +15,19 @@ allOf:
 
 properties:
   compatible:
-    enum:
-      - fsl,imx23-pwm
+    oneOf:
+      - const: fsl,imx23-pwm
+      - items:
+          - enum:
+              - fsl,imx28-pwm
+          - const: fsl,imx23-pwm
 
   reg:
     maxItems: 1
 
+  clocks:
+    maxItems: 1
+
   "#pwm-cells":
     const: 3
 
@@ -31,6 +38,7 @@ properties:
 required:
   - compatible
   - reg
+  - clocks
   - fsl,pwm-number
 
 additionalProperties: false
@@ -40,6 +48,7 @@ examples:
     pwm@80064000 {
         compatible = "fsl,imx23-pwm";
         reg = <0x80064000 0x2000>;
+        clocks = <&clks 30>;
         #pwm-cells = <3>;
         fsl,pwm-number = <8>;
     };
diff --git a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml
index 895415772d1d..09102dda4942 100644
--- a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml
@@ -21,6 +21,7 @@ properties:
       - mediatek,mt8188-scp
       - mediatek,mt8192-scp
       - mediatek,mt8195-scp
+      - mediatek,mt8195-scp-dual
 
   reg:
     description:
@@ -31,10 +32,7 @@ properties:
 
   reg-names:
     minItems: 2
-    items:
-      - const: sram
-      - const: cfg
-      - const: l1tcm
+    maxItems: 3
 
   clocks:
     description:
@@ -58,6 +56,93 @@ properties:
   memory-region:
     maxItems: 1
 
+  cros-ec-rpmsg:
+    $ref: /schemas/mfd/google,cros-ec.yaml
+    description:
+      This subnode represents the rpmsg device. The properties
+      of this node are defined by the individual bindings for
+      the rpmsg devices.
+
+    required:
+      - mediatek,rpmsg-name
+
+    unevaluatedProperties: false
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 1
+
+  ranges:
+    description:
+      Standard ranges definition providing address translations for
+      local SCP SRAM address spaces to bus addresses.
+
+patternProperties:
+  "^scp@[a-f0-9]+$":
+    type: object
+    description:
+      The MediaTek SCP integrated to SoC might be a multi-core version.
+      The other cores are represented as child nodes of the boot core.
+      There are some integration differences for the IP like the usage of
+      address translator for translating SoC bus addresses into address space
+      for the processor.
+
+      Each SCP core has own cache memory. The SRAM and L1TCM are shared by
+      cores. The power of cache, SRAM and L1TCM power should be enabled
+      before booting SCP cores. The size of cache, SRAM, and L1TCM are varied
+      on differnt SoCs.
+
+      The SCP cores do not use an MMU, but has a set of registers to
+      control the translations between 32-bit CPU addresses into system bus
+      addresses. Cache and memory access settings are provided through a
+      Memory Protection Unit (MPU), programmable only from the SCP.
+
+    properties:
+      compatible:
+        enum:
+          - mediatek,scp-core
+
+      reg:
+        description: The base address and size of SRAM.
+        maxItems: 1
+
+      reg-names:
+        const: sram
+
+      interrupts:
+        maxItems: 1
+
+      firmware-name:
+        $ref: /schemas/types.yaml#/definitions/string
+        description:
+          If present, name (or relative path) of the file within the
+          firmware search path containing the firmware image used when
+          initializing sub cores of multi-core SCP.
+
+      memory-region:
+        maxItems: 1
+
+      cros-ec-rpmsg:
+        $ref: /schemas/mfd/google,cros-ec.yaml
+        description:
+          This subnode represents the rpmsg device. The properties
+          of this node are defined by the individual bindings for
+          the rpmsg devices.
+
+        required:
+          - mediatek,rpmsg-name
+
+        unevaluatedProperties: false
+
+    required:
+      - compatible
+      - reg
+      - reg-names
+
+    additionalProperties: false
+
 required:
   - compatible
   - reg
@@ -87,23 +172,39 @@ allOf:
         reg:
           maxItems: 2
         reg-names:
+          items:
+            - const: sram
+            - const: cfg
+  - if:
+      properties:
+        compatible:
+          enum:
+            - mediatek,mt8192-scp
+            - mediatek,mt8195-scp
+    then:
+      properties:
+        reg:
+          maxItems: 3
+        reg-names:
+          items:
+            - const: sram
+            - const: cfg
+            - const: l1tcm
+  - if:
+      properties:
+        compatible:
+          enum:
+            - mediatek,mt8195-scp-dual
+    then:
+      properties:
+        reg:
           maxItems: 2
+        reg-names:
+          items:
+            - const: cfg
+            - const: l1tcm
 
-additionalProperties:
-  type: object
-  description:
-    Subnodes of the SCP represent rpmsg devices. The names of the devices
-    are not important. The properties of these nodes are defined by the
-    individual bindings for the rpmsg devices.
-  properties:
-    mediatek,rpmsg-name:
-      $ref: /schemas/types.yaml#/definitions/string-array
-      description:
-        Contains the name for the rpmsg device. Used to match
-        the subnode to rpmsg device announced by SCP.
-
-  required:
-    - mediatek,rpmsg-name
+additionalProperties: false
 
 examples:
   - |
@@ -118,7 +219,42 @@ examples:
         clocks = <&infracfg CLK_INFRA_SCPSYS>;
         clock-names = "main";
 
-        cros_ec {
+        cros-ec-rpmsg {
+            compatible = "google,cros-ec-rpmsg";
             mediatek,rpmsg-name = "cros-ec-rpmsg";
         };
     };
+
+  - |
+    scp@10500000 {
+        compatible = "mediatek,mt8195-scp-dual";
+        reg = <0x10720000 0xe0000>,
+              <0x10700000 0x8000>;
+        reg-names = "cfg", "l1tcm";
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0 0x10500000 0x100000>;
+
+        scp@0 {
+            compatible = "mediatek,scp-core";
+            reg = <0x0 0xa0000>;
+            reg-names = "sram";
+
+            cros-ec-rpmsg {
+                compatible = "google,cros-ec-rpmsg";
+                mediatek,rpmsg-name = "cros-ec-rpmsg";
+            };
+        };
+
+        scp@a0000 {
+            compatible = "mediatek,scp-core";
+            reg = <0xa0000 0x20000>;
+            reg-names = "sram";
+
+            cros-ec-rpmsg {
+                compatible = "google,cros-ec-rpmsg";
+                mediatek,rpmsg-name = "cros-ec-rpmsg";
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
index a2b0079de039..661c2b425da3 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
@@ -66,7 +66,9 @@ allOf:
               - qcom,msm8953-adsp-pil
               - qcom,msm8974-adsp-pil
               - qcom,msm8996-adsp-pil
+              - qcom,msm8996-slpi-pil
               - qcom,msm8998-adsp-pas
+              - qcom,msm8998-slpi-pas
               - qcom,sdm845-adsp-pas
               - qcom,sdm845-cdsp-pas
               - qcom,sdm845-slpi-pas
@@ -84,24 +86,6 @@ allOf:
         compatible:
           contains:
             enum:
-              - qcom,msm8996-slpi-pil
-              - qcom,msm8998-slpi-pas
-    then:
-      properties:
-        clocks:
-          items:
-            - description: XO clock
-            - description: AGGRE2 clock
-        clock-names:
-          items:
-            - const: xo
-            - const: aggre2
-
-  - if:
-      properties:
-        compatible:
-          contains:
-            enum:
               - qcom,msm8226-adsp-pil
               - qcom,msm8953-adsp-pil
               - qcom,msm8974-adsp-pil
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml
index 0643faae2c39..971734085d51 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml
@@ -220,7 +220,6 @@ allOf:
             - description: GCC MSS GPLL0 clock
             - description: GCC MSS SNOC_AXI clock
             - description: GCC MSS MNOC_AXI clock
-            - description: RPM PNOC clock
             - description: RPM QDSS clock
         clock-names:
           items:
@@ -231,7 +230,6 @@ allOf:
             - const: gpll0_mss
             - const: snoc_axi
             - const: mnoc_axi
-            - const: pnoc
             - const: qdss
         glink-edge: false
       required:
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml
index 689d5d535331..f10f329677d8 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml
@@ -16,6 +16,7 @@ description:
 properties:
   compatible:
     enum:
+      - qcom,sc7180-adsp-pas
       - qcom,sc7180-mpss-pas
       - qcom,sc7280-mpss-pas
 
@@ -30,26 +31,6 @@ properties:
     items:
       - const: xo
 
-  interrupts:
-    minItems: 6
-
-  interrupt-names:
-    minItems: 6
-
-  power-domains:
-    minItems: 2
-    items:
-      - description: CX power domain
-      - description: MX power domain
-      - description: MSS power domain
-
-  power-domain-names:
-    minItems: 2
-    items:
-      - const: cx
-      - const: mx
-      - const: mss
-
   memory-region:
     maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
@@ -75,19 +56,69 @@ allOf:
       properties:
         compatible:
           enum:
+            - qcom,sc7180-adsp-pas
+    then:
+      properties:
+        interrupts:
+          maxItems: 5
+        interrupt-names:
+          maxItems: 5
+    else:
+      properties:
+        interrupts:
+          minItems: 6
+        interrupt-names:
+          minItems: 6
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,sc7180-adsp-pas
+    then:
+      properties:
+        power-domains:
+          items:
+            - description: LCX power domain
+            - description: LMX power domain
+        power-domain-names:
+          items:
+            - const: lcx
+            - const: lmx
+
+  - if:
+      properties:
+        compatible:
+          enum:
             - qcom,sc7180-mpss-pas
     then:
       properties:
         power-domains:
-          minItems: 3
+          items:
+            - description: CX power domain
+            - description: MX power domain
+            - description: MSS power domain
         power-domain-names:
-          minItems: 3
-    else:
+          items:
+            - const: cx
+            - const: mx
+            - const: mss
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,sc7280-mpss-pas
+    then:
       properties:
         power-domains:
-          maxItems: 2
+          items:
+            - description: CX power domain
+            - description: MX power domain
         power-domain-names:
-          maxItems: 2
+          items:
+            - const: cx
+            - const: mx
 
 unevaluatedProperties: false
 
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml
new file mode 100644
index 000000000000..3e4a03eb4532
--- /dev/null
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/remoteproc/qcom,sm6375-pas.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SM6375 Peripheral Authentication Service
+
+maintainers:
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description:
+  Qualcomm SM6375 SoC Peripheral Authentication Service loads and boots
+  firmware on the Qualcomm DSP Hexagon cores.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sm6375-adsp-pas
+      - qcom,sm6375-cdsp-pas
+      - qcom,sm6375-mpss-pas
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: XO clock
+
+  clock-names:
+    items:
+      - const: xo
+
+  memory-region:
+    maxItems: 1
+    description: Reference to the reserved-memory for the Hexagon core
+
+  firmware-name:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: Firmware name for the Hexagon core
+
+  smd-edge: false
+
+required:
+  - compatible
+  - reg
+
+allOf:
+  - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,sm6375-adsp-pas
+            - qcom,sm6375-cdsp-pas
+    then:
+      properties:
+        interrupts:
+          maxItems: 5
+        interrupt-names:
+          maxItems: 5
+    else:
+      properties:
+        interrupts:
+          minItems: 6
+        interrupt-names:
+          minItems: 6
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,sm6375-adsp-pas
+    then:
+      properties:
+        power-domains:
+          items:
+            - description: LCX power domain
+            - description: LMX power domain
+        power-domain-names:
+          items:
+            - const: lcx
+            - const: lmx
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,sm6375-cdsp-pas
+            - qcom,sm6375-mpss-pas
+    then:
+      properties:
+        power-domains:
+          items:
+            - description: CX power domain
+        power-domain-names:
+          items:
+            - const: cx
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,rpmcc.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/mailbox/qcom-ipcc.h>
+    #include <dt-bindings/power/qcom-rpmpd.h>
+
+    remoteproc_adsp: remoteproc@a400000 {
+        compatible = "qcom,sm6375-adsp-pas";
+        reg = <0x0a400000 0x100>;
+
+        interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>,
+                              <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
+                              <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
+                              <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
+                              <&smp2p_adsp_in 3 IRQ_TYPE_EDGE_RISING>;
+        interrupt-names = "wdog", "fatal", "ready",
+                          "handover", "stop-ack";
+
+        clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>;
+        clock-names = "xo";
+
+        power-domains = <&rpmpd SM6375_VDD_LPI_CX>,
+                        <&rpmpd SM6375_VDD_LPI_MX>;
+        power-domain-names = "lcx", "lmx";
+
+        memory-region = <&pil_adsp_mem>;
+
+        qcom,smem-states = <&smp2p_adsp_out 0>;
+        qcom,smem-state-names = "stop";
+
+        glink-edge {
+            interrupts-extended = <&ipcc IPCC_CLIENT_LPASS
+                                         IPCC_MPROC_SIGNAL_GLINK_QMP
+                                         IRQ_TYPE_EDGE_RISING>;
+            mboxes = <&ipcc IPCC_CLIENT_LPASS
+                            IPCC_MPROC_SIGNAL_GLINK_QMP>;
+
+            label = "lpass";
+            qcom,remote-pid = <2>;
+
+            /* ... */
+        };
+    };
diff --git a/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml
index baccd98754a9..faf16cf14085 100644
--- a/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml
@@ -66,6 +66,17 @@ properties:
       Should contain the name of the default firmware image
       file located on the firmware search path.
 
+  interrupts:
+    maxItems: 1
+    description:
+      Interrupt specifiers enable the virtio/rpmsg communication between MPU
+      and the PRU/RTU cores. For the values of the interrupt cells please refer
+      to interrupt-controller/ti,pruss-intc.yaml schema.
+
+  interrupt-names:
+    items:
+      - const: vring
+
 if:
   properties:
     compatible:
@@ -171,6 +182,9 @@ examples:
               <0x22400 0x100>;
         reg-names = "iram", "control", "debug";
         firmware-name = "am65x-pru0_0-fw";
+        interrupt-parent = <&icssg0_intc>;
+        interrupts = <16 2 2>;
+        interrupt-names = "vring";
       };
 
       rtu0_0: rtu@4000 {
@@ -180,6 +194,9 @@ examples:
               <0x23400 0x100>;
         reg-names = "iram", "control", "debug";
         firmware-name = "am65x-rtu0_0-fw";
+        interrupt-parent = <&icssg0_intc>;
+        interrupts = <20 4 4>;
+        interrupt-names = "vring";
       };
 
       tx_pru0_0: txpru@a000 {
@@ -198,6 +215,9 @@ examples:
               <0x24400 0x100>;
         reg-names = "iram", "control", "debug";
         firmware-name = "am65x-pru0_1-fw";
+        interrupt-parent = <&icssg0_intc>;
+        interrupts = <18 3 3>;
+        interrupt-names = "vring";
       };
 
       rtu0_1: rtu@6000 {
@@ -207,6 +227,9 @@ examples:
               <0x23c00 0x100>;
         reg-names = "iram", "control", "debug";
         firmware-name = "am65x-rtu0_1-fw";
+        interrupt-parent = <&icssg0_intc>;
+        interrupts = <22 5 5>;
+        interrupt-names = "vring";
       };
 
       tx_pru0_1: txpru@c000 {
diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
index 443e2e7ab467..69845ec32e81 100644
--- a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
@@ -15,9 +15,15 @@ allOf:
 
 properties:
   compatible:
-    enum:
-      - amlogic,meson-gxbb-wdt
-      - amlogic,t7-wdt
+    oneOf:
+      - enum:
+          - amlogic,meson-gxbb-wdt
+          - amlogic,t7-wdt
+      - items:
+          - enum:
+              - amlogic,c3-wdt
+              - amlogic,s4-wdt
+          - const: amlogic,t7-wdt
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
index a8197632d6d2..3208adb3e52e 100644
--- a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
@@ -47,7 +47,15 @@ Optional properties for AST2500-compatible watchdogs:
 			   is configured as push-pull, then set the pulse
 			   polarity to active-high. The default is active-low.
 
-Example:
+Optional properties for AST2500- and AST2600-compatible watchdogs:
+ - aspeed,reset-mask: A bitmask indicating which peripherals will be reset if
+		      the watchdog timer expires.  On AST2500 this should be a
+		      single word defined using the AST2500_WDT_RESET_* macros;
+		      on AST2600 this should be a two-word array with the first
+		      word defined using the AST2600_WDT_RESET1_* macros and the
+		      second word defined using the AST2600_WDT_RESET2_* macros.
+
+Examples:
 
 	wdt1: watchdog@1e785000 {
 		compatible = "aspeed,ast2400-wdt";
@@ -55,3 +63,11 @@ Example:
 		aspeed,reset-type = "system";
 		aspeed,external-signal;
 	};
+
+	#include <dt-bindings/watchdog/aspeed-wdt.h>
+	wdt2: watchdog@1e785040 {
+		compatible = "aspeed,ast2600-wdt";
+		reg = <0x1e785040 0x40>;
+		aspeed,reset-mask = <AST2600_WDT_RESET1_DEFAULT
+				     (AST2600_WDT_RESET2_DEFAULT & ~AST2600_WDT_RESET2_LPC)>;
+	};
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
index 4b7ed1355701..9c50766bf690 100644
--- a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
@@ -30,6 +30,11 @@ properties:
   clocks:
     maxItems: 1
 
+  fsl,ext-reset-output:
+    description:
+      When set, wdog can generate external reset from the wdog_any pin.
+    type: boolean
+
 required:
   - compatible
   - interrupts
diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
index 5046dfa55f13..c12bc852aedc 100644
--- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
@@ -21,6 +21,8 @@ properties:
               - qcom,apss-wdt-ipq5018
               - qcom,apss-wdt-ipq5332
               - qcom,apss-wdt-ipq9574
+              - qcom,apss-wdt-msm8226
+              - qcom,apss-wdt-msm8974
               - qcom,apss-wdt-msm8994
               - qcom,apss-wdt-qcm2290
               - qcom,apss-wdt-qcs404
diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index 198d805d611c..f04ce1215a03 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -122,12 +122,9 @@ are exportable by setting the s_export_op field in the struct
 super_block.  This field must point to a "struct export_operations"
 struct which has the following members:
 
-  encode_fh (optional)
+  encode_fh (mandatory)
     Takes a dentry and creates a filehandle fragment which may later be used
-    to find or create a dentry for the same object.  The default
-    implementation creates a filehandle fragment that encodes a 32bit inode
-    and generation number for the inode encoded, and if necessary the
-    same information for the parent.
+    to find or create a dentry for the same object.
 
   fh_to_dentry (mandatory)
     Given a filehandle fragment, this should find the implied object and
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 5b93268e400f..0407f361f32a 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -344,10 +344,11 @@ escaping the colons with a single backslash.  For example:
 
   mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged
 
-Since kernel version v6.5, directory names containing colons can also
-be provided as lower layer using the fsconfig syscall from new mount api:
+Since kernel version v6.8, directory names containing colons can also
+be configured as lower layer using the "lowerdir+" mount options and the
+fsconfig syscall from new mount api.  For example:
 
-  fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0);
+  fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/a:lower::dir", 0);
 
 In the latter case, colons in lower layer directory names will be escaped
 as an octal characters (\072) when displayed in /proc/self/mountinfo.
@@ -416,6 +417,16 @@ Only the data of the files in the "data-only" lower layers may be visible
 when a "metacopy" file in one of the lower layers above it, has a "redirect"
 to the absolute path of the "lower data" file in the "data-only" lower layer.
 
+Since kernel version v6.8, "data-only" lower layers can also be added using
+the "datadir+" mount options and the fsconfig syscall from new mount api.
+For example:
+
+  fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l1", 0);
+  fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l2", 0);
+  fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l3", 0);
+  fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do1", 0);
+  fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do2", 0);
+
 
 fs-verity support
 ----------------------
@@ -504,6 +515,29 @@ directory tree on the same or different underlying filesystem, and even
 to a different machine.  With the "inodes index" feature, trying to mount
 the copied layers will fail the verification of the lower root file handle.
 
+Nesting overlayfs mounts
+------------------------
+
+It is possible to use a lower directory that is stored on an overlayfs
+mount. For regular files this does not need any special care. However, files
+that have overlayfs attributes, such as whiteouts or "overlay.*" xattrs will be
+interpreted by the underlying overlayfs mount and stripped out. In order to
+allow the second overlayfs mount to see the attributes they must be escaped.
+
+Overlayfs specific xattrs are escaped by using a special prefix of
+"overlay.overlay.". So, a file with a "trusted.overlay.overlay.metacopy" xattr
+in the lower dir will be exposed as a regular file with a
+"trusted.overlay.metacopy" xattr in the overlayfs mount. This can be nested by
+repeating the prefix multiple time, as each instance only removes one prefix.
+
+A lower dir with a regular whiteout will always be handled by the overlayfs
+mount, so to support storing an effective whiteout file in an overlayfs mount an
+alternative form of whiteout is supported. This form is a regular, zero-size
+file with the "overlay.whiteout" xattr set, inside a directory with the
+"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs,
+but can be used by userspace tools (like containers) that generate lower layers.
+These alternative whiteouts can be escaped using the standard xattr escape
+mechanism in order to properly nest to any depth.
 
 Non-standard behavior
 ---------------------
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index d69f59700a23..878e72b2f8b7 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1052,3 +1052,12 @@ kill_anon_super(), or kill_block_super() helpers.
 
 Lock ordering has been changed so that s_umount ranks above open_mutex again.
 All places where s_umount was taken under open_mutex have been fixed up.
+
+---
+
+**mandatory**
+
+export_operations ->encode_fh() no longer has a default implementation to
+encode FILEID_INO32_GEN* file handles.
+Filesystems that used the default implementation may use the generic helper
+generic_encode_ino32_fh() explicitly.
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index c6ba4889575a..572d83a414d0 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -71,6 +71,10 @@ definitions:
         name: roce-bit
       -
         name: migratable-bit
+      -
+        name: ipsec-crypto-bit
+      -
+        name: ipsec-packet-bit
   -
     type: enum
     name: sb-threshold-type
diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
index 6d8acdbe9be1..769149d98773 100644
--- a/Documentation/networking/smc-sysctl.rst
+++ b/Documentation/networking/smc-sysctl.rst
@@ -44,18 +44,16 @@ smcr_testlink_time - INTEGER
 
 wmem - INTEGER
 	Initial size of send buffer used by SMC sockets.
-	The default value inherits from net.ipv4.tcp_wmem[1].
 
 	The minimum value is 16KiB and there is no hard limit for max value, but
 	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
 
-	Default: 16K
+	Default: 64KiB
 
 rmem - INTEGER
 	Initial size of receive buffer (RMB) used by SMC sockets.
-	The default value inherits from net.ipv4.tcp_rmem[1].
 
 	The minimum value is 16KiB and there is no hard limit for max value, but
 	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
 
-	Default: 128K
+	Default: 64KiB
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index 8e9bebcf0a2e..e35e6b18df40 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -59,8 +59,12 @@ Synopsis of fprobe-events
                   and bitfield are supported.
 
   (\*1) This is available only when BTF is enabled.
-  (\*2) only for the probe on function entry (offs == 0).
-  (\*3) only for return probe.
+  (\*2) only for the probe on function entry (offs == 0). Note, this argument access
+        is best effort, because depending on the argument type, it may be passed on
+        the stack. But this only support the arguments via registers.
+  (\*3) only for return probe. Note that this is also best effort. Depending on the
+        return value type, it might be passed via a pair of registers. But this only
+        accesses one register.
   (\*4) this is useful for fetching a field of data structures.
   (\*5) "u" means user-space dereference.
 
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 8a2dfee38145..bf9cecb69fc9 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -61,8 +61,12 @@ Synopsis of kprobe_events
 		  (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr"
                   and bitfield are supported.
 
-  (\*1) only for the probe on function entry (offs == 0).
-  (\*2) only for return probe.
+  (\*1) only for the probe on function entry (offs == 0). Note, this argument access
+        is best effort, because depending on the argument type, it may be passed on
+        the stack. But this only support the arguments via registers.
+  (\*2) only for return probe. Note that this is also best effort. Depending on the
+        return value type, it might be passed via a pair of registers. But this only
+        accesses one register.
   (\*3) this is useful for fetching a field of data structures.
   (\*4) "u" means user-space dereference. See :ref:`user_mem_access`.
 
diff --git a/Documentation/userspace-api/media/v4l/subdev-formats.rst b/Documentation/userspace-api/media/v4l/subdev-formats.rst
index a3a35eeed708..eb3cd20b0cf2 100644
--- a/Documentation/userspace-api/media/v4l/subdev-formats.rst
+++ b/Documentation/userspace-api/media/v4l/subdev-formats.rst
@@ -949,6 +949,78 @@ The following tables list existing packed RGB formats.
       - b\ :sub:`2`
       - b\ :sub:`1`
       - b\ :sub:`0`
+    * .. _MEDIA-BUS-FMT-RGB666-2X9-BE:
+
+      - MEDIA_BUS_FMT_RGB666_2X9_BE
+      - 0x1025
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      - r\ :sub:`5`
+      - r\ :sub:`4`
+      - r\ :sub:`3`
+      - r\ :sub:`2`
+      - r\ :sub:`1`
+      - r\ :sub:`0`
+      - g\ :sub:`5`
+      - g\ :sub:`4`
+      - g\ :sub:`3`
+    * -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      - g\ :sub:`2`
+      - g\ :sub:`1`
+      - g\ :sub:`0`
+      - b\ :sub:`5`
+      - b\ :sub:`4`
+      - b\ :sub:`3`
+      - b\ :sub:`2`
+      - b\ :sub:`1`
+      - b\ :sub:`0`
     * .. _MEDIA-BUS-FMT-BGR666-1X18:
 
       - MEDIA_BUS_FMT_BGR666_1X18
diff --git a/MAINTAINERS b/MAINTAINERS
index 0b275b8d6bd2..97f51d5ec1cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -311,7 +311,7 @@ ACPI COMPONENT ARCHITECTURE (ACPICA)
 M:	Robert Moore <robert.moore@intel.com>
 M:	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
 L:	linux-acpi@vger.kernel.org
-L:	acpica-devel@lists.linuxfoundation.org
+L:	acpica-devel@lists.linux.dev
 S:	Supported
 W:	https://acpica.org/
 W:	https://github.com/acpica/acpica/
@@ -6534,7 +6534,7 @@ F:	drivers/gpu/drm/ast/
 
 DRM DRIVER FOR BOCHS VIRTUAL GPU
 M:	Gerd Hoffmann <kraxel@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/tiny/bochs.c
@@ -6781,7 +6781,7 @@ F:	drivers/gpu/drm/tiny/repaper.c
 DRM DRIVER FOR QEMU'S CIRRUS DEVICE
 M:	Dave Airlie <airlied@redhat.com>
 M:	Gerd Hoffmann <kraxel@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Obsolete
 W:	https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -6790,7 +6790,7 @@ F:	drivers/gpu/drm/tiny/cirrus.c
 DRM DRIVER FOR QXL VIRTUAL GPU
 M:	Dave Airlie <airlied@redhat.com>
 M:	Gerd Hoffmann <kraxel@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	spice-devel@lists.freedesktop.org
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -7133,7 +7133,7 @@ F:	drivers/gpu/host1x/
 F:	include/linux/host1x.h
 F:	include/uapi/drm/tegra_drm.h
 
-DRM DRIVERS FOR RENESAS
+DRM DRIVERS FOR RENESAS R-CAR
 M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 M:	Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
 L:	dri-devel@lists.freedesktop.org
@@ -7144,7 +7144,16 @@ F:	Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml
 F:	Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml
 F:	Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
 F:	Documentation/devicetree/bindings/display/renesas,du.yaml
-F:	drivers/gpu/drm/renesas/
+F:	drivers/gpu/drm/renesas/rcar-du/
+
+DRM DRIVERS FOR RENESAS SHMOBILE
+M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+M:	Geert Uytterhoeven <geert+renesas@glider.be>
+L:	dri-devel@lists.freedesktop.org
+L:	linux-renesas-soc@vger.kernel.org
+S:	Supported
+F:	Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml
+F:	drivers/gpu/drm/renesas/shmobile/
 F:	include/linux/platform_data/shmob_drm.h
 
 DRM DRIVERS FOR ROCKCHIP
@@ -7880,7 +7889,7 @@ F:	drivers/net/can/usb/etas_es58x/
 ETHERNET BRIDGE
 M:	Roopa Prabhu <roopa@nvidia.com>
 M:	Nikolay Aleksandrov <razor@blackwall.org>
-L:	bridge@lists.linux-foundation.org (moderated for non-subscribers)
+L:	bridge@lists.linux.dev
 L:	netdev@vger.kernel.org
 S:	Maintained
 W:	http://www.linuxfoundation.org/en/Net:Bridge
@@ -8156,6 +8165,27 @@ F:	include/linux/fs_types.h
 F:	include/uapi/linux/fs.h
 F:	include/uapi/linux/openat2.h
 
+FILESYSTEMS [EXPORTFS]
+M:	Chuck Lever <chuck.lever@oracle.com>
+M:	Jeff Layton <jlayton@kernel.org>
+R:	Amir Goldstein <amir73il@gmail.com>
+L:	linux-fsdevel@vger.kernel.org
+L:	linux-nfs@vger.kernel.org
+S:	Supported
+F:	Documentation/filesystems/nfs/exporting.rst
+F:	fs/exportfs/
+F:	fs/fhandle.c
+F:	include/linux/exportfs.h
+
+FILESYSTEMS [IOMAP]
+M:	Christian Brauner <brauner@kernel.org>
+R:	Darrick J. Wong <djwong@kernel.org>
+L:	linux-xfs@vger.kernel.org
+L:	linux-fsdevel@vger.kernel.org
+S:	Supported
+F:	fs/iomap/
+F:	include/linux/iomap.h
+
 FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 M:	Riku Voipio <riku.voipio@iki.fi>
 L:	linux-hwmon@vger.kernel.org
@@ -11076,15 +11106,6 @@ L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/sgi/ioc3-eth.c
 
-IOMAP FILESYSTEM LIBRARY
-M:	Darrick J. Wong <djwong@kernel.org>
-L:	linux-xfs@vger.kernel.org
-L:	linux-fsdevel@vger.kernel.org
-S:	Supported
-T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
-F:	fs/iomap/
-F:	include/linux/iomap.h
-
 IOMMU DMA-API LAYER
 M:	Robin Murphy <robin.murphy@arm.com>
 L:	iommu@lists.linux.dev
@@ -11548,7 +11569,6 @@ S:	Supported
 W:	http://nfs.sourceforge.net/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
 F:	Documentation/filesystems/nfs/
-F:	fs/exportfs/
 F:	fs/lockd/
 F:	fs/nfs_common/
 F:	fs/nfsd/
@@ -16374,7 +16394,7 @@ M:	Juergen Gross <jgross@suse.com>
 R:	Ajay Kaher <akaher@vmware.com>
 R:	Alexey Makhalov <amakhalov@vmware.com>
 R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	x86@kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
@@ -22952,7 +22972,7 @@ VIRTIO AND VHOST VSOCK DRIVER
 M:	Stefan Hajnoczi <stefanha@redhat.com>
 M:	Stefano Garzarella <sgarzare@redhat.com>
 L:	kvm@vger.kernel.org
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/vhost/vsock.c
@@ -22964,7 +22984,7 @@ F:	net/vmw_vsock/virtio_transport_common.c
 VIRTIO BALLOON
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 M:	David Hildenbrand <david@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/virtio/virtio_balloon.c
 F:	include/linux/balloon_compaction.h
@@ -22976,7 +22996,7 @@ M:	"Michael S. Tsirkin" <mst@redhat.com>
 M:	Jason Wang <jasowang@redhat.com>
 R:	Paolo Bonzini <pbonzini@redhat.com>
 R:	Stefan Hajnoczi <stefanha@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/block/virtio_blk.c
 F:	drivers/scsi/virtio_scsi.c
@@ -22985,7 +23005,7 @@ F:	include/uapi/linux/virtio_scsi.h
 
 VIRTIO CONSOLE DRIVER
 M:	Amit Shah <amit@kernel.org>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/char/virtio_console.c
 F:	include/linux/virtio_console.h
@@ -22995,7 +23015,7 @@ VIRTIO CORE AND NET DRIVERS
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 M:	Jason Wang <jasowang@redhat.com>
 R:	Xuan Zhuo <xuanzhuo@linux.alibaba.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-bus-vdpa
 F:	Documentation/ABI/testing/sysfs-class-vduse
@@ -23014,7 +23034,7 @@ F:	tools/virtio/
 
 VIRTIO CRYPTO DRIVER
 M:	Gonglei <arei.gonglei@huawei.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	linux-crypto@vger.kernel.org
 S:	Maintained
 F:	drivers/crypto/virtio/
@@ -23025,7 +23045,7 @@ M:	Cornelia Huck <cohuck@redhat.com>
 M:	Halil Pasic <pasic@linux.ibm.com>
 M:	Eric Farman <farman@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	kvm@vger.kernel.org
 S:	Supported
 F:	arch/s390/include/uapi/asm/virtio-ccw.h
@@ -23035,7 +23055,7 @@ VIRTIO FILE SYSTEM
 M:	Vivek Goyal <vgoyal@redhat.com>
 M:	Stefan Hajnoczi <stefanha@redhat.com>
 M:	Miklos Szeredi <miklos@szeredi.hu>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	linux-fsdevel@vger.kernel.org
 S:	Supported
 W:	https://virtio-fs.gitlab.io/
@@ -23047,7 +23067,7 @@ VIRTIO GPIO DRIVER
 M:	Enrico Weigelt, metux IT consult <info@metux.net>
 M:	Viresh Kumar <vireshk@kernel.org>
 L:	linux-gpio@vger.kernel.org
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/gpio/gpio-virtio.c
 F:	include/uapi/linux/virtio_gpio.h
@@ -23058,7 +23078,7 @@ M:	Gerd Hoffmann <kraxel@redhat.com>
 R:	Gurchetan Singh <gurchetansingh@chromium.org>
 R:	Chia-I Wu <olvaffe@gmail.com>
 L:	dri-devel@lists.freedesktop.org
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/ci/xfails/virtio*
@@ -23069,7 +23089,7 @@ VIRTIO HOST (VHOST)
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 M:	Jason Wang <jasowang@redhat.com>
 L:	kvm@vger.kernel.org
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	netdev@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
@@ -23085,7 +23105,7 @@ M:	Jason Wang <jasowang@redhat.com>
 M:	Mike Christie <michael.christie@oracle.com>
 R:	Paolo Bonzini <pbonzini@redhat.com>
 R:	Stefan Hajnoczi <stefanha@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/vhost/scsi.c
 
@@ -23093,7 +23113,7 @@ VIRTIO I2C DRIVER
 M:	Conghui Chen <conghui.chen@intel.com>
 M:	Viresh Kumar <viresh.kumar@linaro.org>
 L:	linux-i2c@vger.kernel.org
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/i2c/busses/i2c-virtio.c
 F:	include/uapi/linux/virtio_i2c.h
@@ -23106,14 +23126,14 @@ F:	include/uapi/linux/virtio_input.h
 
 VIRTIO IOMMU DRIVER
 M:	Jean-Philippe Brucker <jean-philippe@linaro.org>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/iommu/virtio-iommu.c
 F:	include/uapi/linux/virtio_iommu.h
 
 VIRTIO MEM DRIVER
 M:	David Hildenbrand <david@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 W:	https://virtio-mem.gitlab.io/
 F:	drivers/virtio/virtio_mem.c
@@ -23121,7 +23141,7 @@ F:	include/uapi/linux/virtio_mem.h
 
 VIRTIO PMEM DRIVER
 M:	Pankaj Gupta <pankaj.gupta.linux@gmail.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/nvdimm/nd_virtio.c
 F:	drivers/nvdimm/virtio_pmem.c
@@ -23129,7 +23149,7 @@ F:	drivers/nvdimm/virtio_pmem.c
 VIRTIO SOUND DRIVER
 M:	Anton Yakovlev <anton.yakovlev@opensynergy.com>
 M:	"Michael S. Tsirkin" <mst@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Maintained
 F:	include/uapi/linux/virtio_snd.h
@@ -23178,16 +23198,9 @@ W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/test-drivers/vivid/*
 
-VLYNQ BUS
-M:	Florian Fainelli <f.fainelli@gmail.com>
-L:	openwrt-devel@lists.openwrt.org (subscribers-only)
-S:	Maintained
-F:	drivers/vlynq/vlynq.c
-F:	include/linux/vlynq.h
-
 VM SOCKETS (AF_VSOCK)
 M:	Stefano Garzarella <sgarzare@redhat.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/vsockmon.c
@@ -23231,7 +23244,7 @@ VMWARE HYPERVISOR INTERFACE
 M:	Ajay Kaher <akaher@vmware.com>
 M:	Alexey Makhalov <amakhalov@vmware.com>
 R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
-L:	virtualization@lists.linux-foundation.org
+L:	virtualization@lists.linux.dev
 L:	x86@kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vmware
diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
index de1566e32cb8..68e8301c0df2 100644
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -32,9 +32,6 @@ struct kprobe;
 
 void arch_remove_kprobe(struct kprobe *p);
 
-int kprobe_exceptions_notify(struct notifier_block *self,
-			     unsigned long val, void *data);
-
 struct prev_kprobe {
 	struct kprobe *kp;
 	unsigned long status;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index bb9f0e5b0b63..10fd74bf85f9 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1076,7 +1076,6 @@ CONFIG_QCOM_IPCC=y
 CONFIG_OMAP_IOMMU=y
 CONFIG_OMAP_IOMMU_DEBUG=y
 CONFIG_ROCKCHIP_IOMMU=y
-CONFIG_TEGRA_IOMMU_GART=y
 CONFIG_TEGRA_IOMMU_SMMU=y
 CONFIG_EXYNOS_IOMMU=y
 CONFIG_QCOM_IOMMU=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 23c131b0854b..9e81b1849e4c 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -100,7 +100,6 @@ CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
-CONFIG_MTD_AR7_PARTS=m
 CONFIG_MTD_CMDLINE_PARTS=m
 CONFIG_MTD_OF_PARTS=m
 CONFIG_MTD_AFS_PARTS=m
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 613f07b8ce15..8635b7216bfc 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -292,7 +292,6 @@ CONFIG_CHROME_PLATFORMS=y
 CONFIG_CROS_EC=y
 CONFIG_CROS_EC_I2C=m
 CONFIG_CROS_EC_SPI=m
-CONFIG_TEGRA_IOMMU_GART=y
 CONFIG_TEGRA_IOMMU_SMMU=y
 CONFIG_ARCH_TEGRA_2x_SOC=y
 CONFIG_ARCH_TEGRA_3x_SOC=y
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index 72529f5e2bed..a41b503b7dcd 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -23,6 +23,8 @@
 #define PMUSERENR		__ACCESS_CP15(c9,  0, c14, 0)
 #define PMINTENSET		__ACCESS_CP15(c9,  0, c14, 1)
 #define PMINTENCLR		__ACCESS_CP15(c9,  0, c14, 2)
+#define PMCEID2			__ACCESS_CP15(c9,  0, c14, 4)
+#define PMCEID3			__ACCESS_CP15(c9,  0, c14, 5)
 #define PMMIR			__ACCESS_CP15(c9,  0, c14, 6)
 #define PMCCFILTR		__ACCESS_CP15(c14, 0, c15, 7)
 
@@ -150,21 +152,6 @@ static inline u64 read_pmccntr(void)
 	return read_sysreg(PMCCNTR);
 }
 
-static inline void write_pmxevcntr(u32 val)
-{
-	write_sysreg(val, PMXEVCNTR);
-}
-
-static inline u32 read_pmxevcntr(void)
-{
-	return read_sysreg(PMXEVCNTR);
-}
-
-static inline void write_pmxevtyper(u32 val)
-{
-	write_sysreg(val, PMXEVTYPER);
-}
-
 static inline void write_pmcntenset(u32 val)
 {
 	write_sysreg(val, PMCNTENSET);
@@ -205,16 +192,6 @@ static inline void write_pmuserenr(u32 val)
 	write_sysreg(val, PMUSERENR);
 }
 
-static inline u32 read_pmceid0(void)
-{
-	return read_sysreg(PMCEID0);
-}
-
-static inline u32 read_pmceid1(void)
-{
-	return read_sysreg(PMCEID1);
-}
-
 static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u32 clr) {}
 static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
@@ -231,6 +208,7 @@ static inline void kvm_vcpu_pmu_resync_el0(void) {}
 
 /* PMU Version in DFR Register */
 #define ARMV8_PMU_DFR_VER_NI        0
+#define ARMV8_PMU_DFR_VER_V3P1      0x4
 #define ARMV8_PMU_DFR_VER_V3P4      0x5
 #define ARMV8_PMU_DFR_VER_V3P5      0x6
 #define ARMV8_PMU_DFR_VER_IMP_DEF   0xF
@@ -251,4 +229,24 @@ static inline bool is_pmuv3p5(int pmuver)
 	return pmuver >= ARMV8_PMU_DFR_VER_V3P5;
 }
 
+static inline u64 read_pmceid0(void)
+{
+	u64 val = read_sysreg(PMCEID0);
+
+	if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1)
+		val |= (u64)read_sysreg(PMCEID2) << 32;
+
+	return val;
+}
+
+static inline u64 read_pmceid1(void)
+{
+	u64 val = read_sysreg(PMCEID1);
+
+	if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1)
+		val |= (u64)read_sysreg(PMCEID3) << 32;
+
+	return val;
+}
+
 #endif
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index e26a278d301a..5b8dbf1b0be4 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -40,8 +40,6 @@ struct kprobe_ctlblk {
 
 void arch_remove_kprobe(struct kprobe *);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
-int kprobe_exceptions_notify(struct notifier_block *self,
-			     unsigned long val, void *data);
 
 /* optinsn template addresses */
 extern __visible kprobe_opcode_t optprobe_template_entry[];
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index 4a74f3b6d775..bf7de35ffcbc 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -859,7 +859,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&scp_pins>;
 
-	cros_ec {
+	cros-ec-rpmsg {
 		compatible = "google,cros-ec-rpmsg";
 		mediatek,rpmsg-name = "cros-ec-rpmsg";
 	};
diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
index 1447eed0ea36..f2281250ac35 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
@@ -1312,7 +1312,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&scp_pins>;
 
-	cros-ec {
+	cros-ec-rpmsg {
 		compatible = "google,cros-ec-rpmsg";
 		mediatek,rpmsg-name = "cros-ec-rpmsg";
 	};
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index 18dc2fb3d7b7..c27404fa4418 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -46,12 +46,12 @@ static inline u32 read_pmuver(void)
 			ID_AA64DFR0_EL1_PMUVer_SHIFT);
 }
 
-static inline void write_pmcr(u32 val)
+static inline void write_pmcr(u64 val)
 {
 	write_sysreg(val, pmcr_el0);
 }
 
-static inline u32 read_pmcr(void)
+static inline u64 read_pmcr(void)
 {
 	return read_sysreg(pmcr_el0);
 }
@@ -71,21 +71,6 @@ static inline u64 read_pmccntr(void)
 	return read_sysreg(pmccntr_el0);
 }
 
-static inline void write_pmxevcntr(u32 val)
-{
-	write_sysreg(val, pmxevcntr_el0);
-}
-
-static inline u32 read_pmxevcntr(void)
-{
-	return read_sysreg(pmxevcntr_el0);
-}
-
-static inline void write_pmxevtyper(u32 val)
-{
-	write_sysreg(val, pmxevtyper_el0);
-}
-
 static inline void write_pmcntenset(u32 val)
 {
 	write_sysreg(val, pmcntenset_el0);
@@ -106,7 +91,7 @@ static inline void write_pmintenclr(u32 val)
 	write_sysreg(val, pmintenclr_el1);
 }
 
-static inline void write_pmccfiltr(u32 val)
+static inline void write_pmccfiltr(u64 val)
 {
 	write_sysreg(val, pmccfiltr_el0);
 }
@@ -126,12 +111,12 @@ static inline void write_pmuserenr(u32 val)
 	write_sysreg(val, pmuserenr_el0);
 }
 
-static inline u32 read_pmceid0(void)
+static inline u64 read_pmceid0(void)
 {
 	return read_sysreg(pmceid0_el0);
 }
 
-static inline u32 read_pmceid1(void)
+static inline u64 read_pmceid1(void)
 {
 	return read_sysreg(pmceid1_el0);
 }
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 05cd82eeca13..be7a3680dadf 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -37,8 +37,6 @@ struct kprobe_ctlblk {
 
 void arch_remove_kprobe(struct kprobe *);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
-int kprobe_exceptions_notify(struct notifier_block *self,
-			     unsigned long val, void *data);
 void __kretprobe_trampoline(void);
 void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index 17f687510c48..d977713ec0ba 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -54,7 +54,6 @@
 	ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO);			\
 	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
-	asmlinkage long __arm64_sys##name(const struct pt_regs *regs);		\
 	asmlinkage long __arm64_sys##name(const struct pt_regs *regs)		\
 	{									\
 		return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__));	\
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f6b2e2906fc9..646591c67e7a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -999,6 +999,37 @@ static void init_32bit_cpu_features(struct cpuinfo_32bit *info)
 	init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
 }
 
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+static bool enable_pseudo_nmi;
+
+static int __init early_enable_pseudo_nmi(char *p)
+{
+	return kstrtobool(p, &enable_pseudo_nmi);
+}
+early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi);
+
+static __init void detect_system_supports_pseudo_nmi(void)
+{
+	struct device_node *np;
+
+	if (!enable_pseudo_nmi)
+		return;
+
+	/*
+	 * Detect broken MediaTek firmware that doesn't properly save and
+	 * restore GIC priorities.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "arm,gic-v3");
+	if (np && of_property_read_bool(np, "mediatek,broken-save-restore-fw")) {
+		pr_info("Pseudo-NMI disabled due to MediaTek Chromebook GICR save problem\n");
+		enable_pseudo_nmi = false;
+	}
+	of_node_put(np);
+}
+#else /* CONFIG_ARM64_PSEUDO_NMI */
+static inline void detect_system_supports_pseudo_nmi(void) { }
+#endif
+
 void __init init_cpu_features(struct cpuinfo_arm64 *info)
 {
 	/* Before we start using the tables, make sure it is sorted */
@@ -1058,6 +1089,13 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	init_cpucap_indirect_list();
 
 	/*
+	 * Detect broken pseudo-NMI. Must be called _before_ the call to
+	 * setup_boot_cpu_capabilities() since it interacts with
+	 * can_use_gic_priorities().
+	 */
+	detect_system_supports_pseudo_nmi();
+
+	/*
 	 * Detect and enable early CPU capabilities based on the boot CPU,
 	 * after we have initialised the CPU feature infrastructure.
 	 */
@@ -2085,14 +2123,6 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
 #endif /* CONFIG_ARM64_E0PD */
 
 #ifdef CONFIG_ARM64_PSEUDO_NMI
-static bool enable_pseudo_nmi;
-
-static int __init early_enable_pseudo_nmi(char *p)
-{
-	return kstrtobool(p, &enable_pseudo_nmi);
-}
-early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi);
-
 static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
 				   int scope)
 {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index be95b523c101..defbab84e9e5 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -965,10 +965,7 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
 
 static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
 {
-	DECLARE_STATIC_KEY_FALSE(supports_pseudo_nmis);
-
-	if (!system_uses_irq_prio_masking() ||
-	    !static_branch_likely(&supports_pseudo_nmis))
+	if (!system_uses_irq_prio_masking())
 		return false;
 
 	switch (ipi) {
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index d889a0b97bc1..ee123820a476 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -136,6 +136,7 @@ config LOONGARCH
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_PREEMPT_DYNAMIC_KEY
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RETHOOK
 	select HAVE_RSEQ
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index b86f2ff31659..9eeb0c05f3f4 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -68,6 +68,8 @@ LDFLAGS_vmlinux			+= -static -n -nostdlib
 ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
 cflags-y			+= $(call cc-option,-mexplicit-relocs)
 KBUILD_CFLAGS_KERNEL		+= $(call cc-option,-mdirect-extern-access)
+KBUILD_AFLAGS_MODULE		+= $(call cc-option,-fno-direct-access-external-data)
+KBUILD_CFLAGS_MODULE		+= $(call cc-option,-fno-direct-access-external-data)
 KBUILD_AFLAGS_MODULE		+= $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
 KBUILD_CFLAGS_MODULE		+= $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
 else
diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h
index e27f0c72d324..99af8b3160a8 100644
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -36,19 +36,19 @@
 static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	__asm__ __volatile__(						\
-	"am"#asm_op"_db.w" " $zero, %1, %0	\n"			\
+	"am"#asm_op".w" " $zero, %1, %0	\n"				\
 	: "+ZB" (v->counter)						\
 	: "r" (I)							\
 	: "memory");							\
 }
 
-#define ATOMIC_OP_RETURN(op, I, asm_op, c_op)				\
-static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix)		\
+static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v)	\
 {									\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-	"am"#asm_op"_db.w" " %1, %2, %0		\n"			\
+	"am"#asm_op#mb".w" " %1, %2, %0		\n"			\
 	: "+ZB" (v->counter), "=&r" (result)				\
 	: "r" (I)							\
 	: "memory");							\
@@ -56,13 +56,13 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 	return result c_op I;						\
 }
 
-#define ATOMIC_FETCH_OP(op, I, asm_op)					\
-static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix)			\
+static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v)	\
 {									\
 	int result;							\
 									\
 	__asm__ __volatile__(						\
-	"am"#asm_op"_db.w" " %1, %2, %0		\n"			\
+	"am"#asm_op#mb".w" " %1, %2, %0		\n"			\
 	: "+ZB" (v->counter), "=&r" (result)				\
 	: "r" (I)							\
 	: "memory");							\
@@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 
 #define ATOMIC_OPS(op, I, asm_op, c_op)					\
 	ATOMIC_OP(op, I, asm_op)					\
-	ATOMIC_OP_RETURN(op, I, asm_op, c_op)				\
-	ATOMIC_FETCH_OP(op, I, asm_op)
+	ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db,         )		\
+	ATOMIC_OP_RETURN(op, I, asm_op, c_op,    , _relaxed)		\
+	ATOMIC_FETCH_OP(op, I, asm_op, _db,         )			\
+	ATOMIC_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC_OPS(add, i, add, +)
 ATOMIC_OPS(sub, -i, add, +)
 
+#define arch_atomic_add_return		arch_atomic_add_return
+#define arch_atomic_add_return_acquire	arch_atomic_add_return
+#define arch_atomic_add_return_release	arch_atomic_add_return
 #define arch_atomic_add_return_relaxed	arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return		arch_atomic_sub_return
+#define arch_atomic_sub_return_acquire	arch_atomic_sub_return
+#define arch_atomic_sub_return_release	arch_atomic_sub_return
 #define arch_atomic_sub_return_relaxed	arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_add		arch_atomic_fetch_add
+#define arch_atomic_fetch_add_acquire	arch_atomic_fetch_add
+#define arch_atomic_fetch_add_release	arch_atomic_fetch_add
 #define arch_atomic_fetch_add_relaxed	arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub		arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_acquire	arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_release	arch_atomic_fetch_sub
 #define arch_atomic_fetch_sub_relaxed	arch_atomic_fetch_sub_relaxed
 
 #undef ATOMIC_OPS
 
 #define ATOMIC_OPS(op, I, asm_op)					\
 	ATOMIC_OP(op, I, asm_op)					\
-	ATOMIC_FETCH_OP(op, I, asm_op)
+	ATOMIC_FETCH_OP(op, I, asm_op, _db,         )			\
+	ATOMIC_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC_OPS(and, i, and)
 ATOMIC_OPS(or, i, or)
 ATOMIC_OPS(xor, i, xor)
 
+#define arch_atomic_fetch_and		arch_atomic_fetch_and
+#define arch_atomic_fetch_and_acquire	arch_atomic_fetch_and
+#define arch_atomic_fetch_and_release	arch_atomic_fetch_and
 #define arch_atomic_fetch_and_relaxed	arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or		arch_atomic_fetch_or
+#define arch_atomic_fetch_or_acquire	arch_atomic_fetch_or
+#define arch_atomic_fetch_or_release	arch_atomic_fetch_or
 #define arch_atomic_fetch_or_relaxed	arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor		arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_acquire	arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_release	arch_atomic_fetch_xor
 #define arch_atomic_fetch_xor_relaxed	arch_atomic_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
@@ -172,18 +196,18 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
 static inline void arch_atomic64_##op(long i, atomic64_t *v)		\
 {									\
 	__asm__ __volatile__(						\
-	"am"#asm_op"_db.d " " $zero, %1, %0	\n"			\
+	"am"#asm_op".d " " $zero, %1, %0	\n"			\
 	: "+ZB" (v->counter)						\
 	: "r" (I)							\
 	: "memory");							\
 }
 
-#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op)					\
-static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v)	\
+#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix)			\
+static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v)	\
 {										\
 	long result;								\
 	__asm__ __volatile__(							\
-	"am"#asm_op"_db.d " " %1, %2, %0		\n"			\
+	"am"#asm_op#mb".d " " %1, %2, %0		\n"			\
 	: "+ZB" (v->counter), "=&r" (result)					\
 	: "r" (I)								\
 	: "memory");								\
@@ -191,13 +215,13 @@ static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v)	\
 	return result c_op I;							\
 }
 
-#define ATOMIC64_FETCH_OP(op, I, asm_op)					\
-static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v)	\
+#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix)				\
+static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v)	\
 {										\
 	long result;								\
 										\
 	__asm__ __volatile__(							\
-	"am"#asm_op"_db.d " " %1, %2, %0		\n"			\
+	"am"#asm_op#mb".d " " %1, %2, %0		\n"			\
 	: "+ZB" (v->counter), "=&r" (result)					\
 	: "r" (I)								\
 	: "memory");								\
@@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v)	\
 
 #define ATOMIC64_OPS(op, I, asm_op, c_op)				      \
 	ATOMIC64_OP(op, I, asm_op)					      \
-	ATOMIC64_OP_RETURN(op, I, asm_op, c_op)				      \
-	ATOMIC64_FETCH_OP(op, I, asm_op)
+	ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db,         )		      \
+	ATOMIC64_OP_RETURN(op, I, asm_op, c_op,    , _relaxed)		      \
+	ATOMIC64_FETCH_OP(op, I, asm_op, _db,         )			      \
+	ATOMIC64_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC64_OPS(add, i, add, +)
 ATOMIC64_OPS(sub, -i, add, +)
 
+#define arch_atomic64_add_return		arch_atomic64_add_return
+#define arch_atomic64_add_return_acquire	arch_atomic64_add_return
+#define arch_atomic64_add_return_release	arch_atomic64_add_return
 #define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return		arch_atomic64_sub_return
+#define arch_atomic64_sub_return_acquire	arch_atomic64_sub_return
+#define arch_atomic64_sub_return_release	arch_atomic64_sub_return
 #define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_add			arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_acquire		arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_release		arch_atomic64_fetch_add
 #define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub			arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_acquire		arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_release		arch_atomic64_fetch_sub
 #define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
 
 #undef ATOMIC64_OPS
 
 #define ATOMIC64_OPS(op, I, asm_op)					      \
 	ATOMIC64_OP(op, I, asm_op)					      \
-	ATOMIC64_FETCH_OP(op, I, asm_op)
+	ATOMIC64_FETCH_OP(op, I, asm_op, _db,         )			      \
+	ATOMIC64_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC64_OPS(and, i, and)
 ATOMIC64_OPS(or, i, or)
 ATOMIC64_OPS(xor, i, xor)
 
+#define arch_atomic64_fetch_and		arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_acquire	arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_release	arch_atomic64_fetch_and
 #define arch_atomic64_fetch_and_relaxed	arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or		arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_acquire	arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_release	arch_atomic64_fetch_or
 #define arch_atomic64_fetch_or_relaxed	arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor		arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_acquire	arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_release	arch_atomic64_fetch_xor
 #define arch_atomic64_fetch_xor_relaxed	arch_atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 008a88ead60d..d8f637f9e400 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -65,6 +65,8 @@ enum reg2_op {
 	revbd_op	= 0x0f,
 	revh2w_op	= 0x10,
 	revhd_op	= 0x11,
+	extwh_op	= 0x16,
+	extwb_op	= 0x17,
 	iocsrrdb_op     = 0x19200,
 	iocsrrdh_op     = 0x19201,
 	iocsrrdw_op     = 0x19202,
@@ -572,6 +574,8 @@ static inline void emit_##NAME(union loongarch_instruction *insn,	\
 DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op)
 DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op)
 DEF_EMIT_REG2_FORMAT(revbd, revbd_op)
+DEF_EMIT_REG2_FORMAT(extwh, extwh_op)
+DEF_EMIT_REG2_FORMAT(extwb, extwb_op)
 
 #define DEF_EMIT_REG2I5_FORMAT(NAME, OP)				\
 static inline void emit_##NAME(union loongarch_instruction *insn,	\
@@ -623,6 +627,9 @@ DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op)
 DEF_EMIT_REG2I12_FORMAT(andi, andi_op)
 DEF_EMIT_REG2I12_FORMAT(ori, ori_op)
 DEF_EMIT_REG2I12_FORMAT(xori, xori_op)
+DEF_EMIT_REG2I12_FORMAT(ldb, ldb_op)
+DEF_EMIT_REG2I12_FORMAT(ldh, ldh_op)
+DEF_EMIT_REG2I12_FORMAT(ldw, ldw_op)
 DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op)
 DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op)
 DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op)
@@ -701,9 +708,12 @@ static inline void emit_##NAME(union loongarch_instruction *insn,	\
 	insn->reg3_format.rk = rk;					\
 }
 
+DEF_EMIT_REG3_FORMAT(addw, addw_op)
 DEF_EMIT_REG3_FORMAT(addd, addd_op)
 DEF_EMIT_REG3_FORMAT(subd, subd_op)
 DEF_EMIT_REG3_FORMAT(muld, muld_op)
+DEF_EMIT_REG3_FORMAT(divd, divd_op)
+DEF_EMIT_REG3_FORMAT(modd, modd_op)
 DEF_EMIT_REG3_FORMAT(divdu, divdu_op)
 DEF_EMIT_REG3_FORMAT(moddu, moddu_op)
 DEF_EMIT_REG3_FORMAT(and, and_op)
@@ -715,6 +725,9 @@ DEF_EMIT_REG3_FORMAT(srlw, srlw_op)
 DEF_EMIT_REG3_FORMAT(srld, srld_op)
 DEF_EMIT_REG3_FORMAT(sraw, sraw_op)
 DEF_EMIT_REG3_FORMAT(srad, srad_op)
+DEF_EMIT_REG3_FORMAT(ldxb, ldxb_op)
+DEF_EMIT_REG3_FORMAT(ldxh, ldxh_op)
+DEF_EMIT_REG3_FORMAT(ldxw, ldxw_op)
 DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op)
 DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op)
 DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op)
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index b9f567e66016..ed5da02b1cf6 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -32,7 +32,7 @@ static inline void set_my_cpu_offset(unsigned long off)
 #define __my_cpu_offset __my_cpu_offset
 
 #define PERCPU_OP(op, asm_op, c_op)					\
-static inline unsigned long __percpu_##op(void *ptr,			\
+static __always_inline unsigned long __percpu_##op(void *ptr,		\
 			unsigned long val, int size)			\
 {									\
 	unsigned long ret;						\
@@ -63,7 +63,7 @@ PERCPU_OP(and, and, &)
 PERCPU_OP(or, or, |)
 #undef PERCPU_OP
 
-static inline unsigned long __percpu_read(void *ptr, int size)
+static __always_inline unsigned long __percpu_read(void *ptr, int size)
 {
 	unsigned long ret;
 
@@ -100,7 +100,7 @@ static inline unsigned long __percpu_read(void *ptr, int size)
 	return ret;
 }
 
-static inline void __percpu_write(void *ptr, unsigned long val, int size)
+static __always_inline void __percpu_write(void *ptr, unsigned long val, int size)
 {
 	switch (size) {
 	case 1:
@@ -132,8 +132,8 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size)
 	}
 }
 
-static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
-						int size)
+static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
+						   int size)
 {
 	switch (size) {
 	case 1:
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index ef35c871244f..5bca12d16e06 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -504,8 +504,9 @@ asmlinkage void start_secondary(void)
 	unsigned int cpu;
 
 	sync_counter();
-	cpu = smp_processor_id();
+	cpu = raw_smp_processor_id();
 	set_my_cpu_offset(per_cpu_offset(cpu));
+	rcutree_report_cpu_starting(cpu);
 
 	cpu_probe();
 	constant_clockevent_init();
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index db9342b2d0e6..169ff8b3915e 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -411,7 +411,11 @@ static int add_exception_handler(const struct bpf_insn *insn,
 	off_t offset;
 	struct exception_table_entry *ex;
 
-	if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
+	if (!ctx->image || !ctx->prog->aux->extable)
+		return 0;
+
+	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
+	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
 		return 0;
 
 	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
@@ -450,7 +454,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 {
 	u8 tm = -1;
 	u64 func_addr;
-	bool func_addr_fixed;
+	bool func_addr_fixed, sign_extend;
 	int i = insn - ctx->prog->insnsi;
 	int ret, jmp_offset;
 	const u8 code = insn->code;
@@ -468,8 +472,23 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	/* dst = src */
 	case BPF_ALU | BPF_MOV | BPF_X:
 	case BPF_ALU64 | BPF_MOV | BPF_X:
-		move_reg(ctx, dst, src);
-		emit_zext_32(ctx, dst, is32);
+		switch (off) {
+		case 0:
+			move_reg(ctx, dst, src);
+			emit_zext_32(ctx, dst, is32);
+			break;
+		case 8:
+			move_reg(ctx, t1, src);
+			emit_insn(ctx, extwb, dst, t1);
+			break;
+		case 16:
+			move_reg(ctx, t1, src);
+			emit_insn(ctx, extwh, dst, t1);
+			break;
+		case 32:
+			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
+			break;
+		}
 		break;
 
 	/* dst = imm */
@@ -534,39 +553,71 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	/* dst = dst / src */
 	case BPF_ALU | BPF_DIV | BPF_X:
 	case BPF_ALU64 | BPF_DIV | BPF_X:
-		emit_zext_32(ctx, dst, is32);
-		move_reg(ctx, t1, src);
-		emit_zext_32(ctx, t1, is32);
-		emit_insn(ctx, divdu, dst, dst, t1);
-		emit_zext_32(ctx, dst, is32);
+		if (!off) {
+			emit_zext_32(ctx, dst, is32);
+			move_reg(ctx, t1, src);
+			emit_zext_32(ctx, t1, is32);
+			emit_insn(ctx, divdu, dst, dst, t1);
+			emit_zext_32(ctx, dst, is32);
+		} else {
+			emit_sext_32(ctx, dst, is32);
+			move_reg(ctx, t1, src);
+			emit_sext_32(ctx, t1, is32);
+			emit_insn(ctx, divd, dst, dst, t1);
+			emit_sext_32(ctx, dst, is32);
+		}
 		break;
 
 	/* dst = dst / imm */
 	case BPF_ALU | BPF_DIV | BPF_K:
 	case BPF_ALU64 | BPF_DIV | BPF_K:
-		move_imm(ctx, t1, imm, is32);
-		emit_zext_32(ctx, dst, is32);
-		emit_insn(ctx, divdu, dst, dst, t1);
-		emit_zext_32(ctx, dst, is32);
+		if (!off) {
+			move_imm(ctx, t1, imm, is32);
+			emit_zext_32(ctx, dst, is32);
+			emit_insn(ctx, divdu, dst, dst, t1);
+			emit_zext_32(ctx, dst, is32);
+		} else {
+			move_imm(ctx, t1, imm, false);
+			emit_sext_32(ctx, t1, is32);
+			emit_sext_32(ctx, dst, is32);
+			emit_insn(ctx, divd, dst, dst, t1);
+			emit_sext_32(ctx, dst, is32);
+		}
 		break;
 
 	/* dst = dst % src */
 	case BPF_ALU | BPF_MOD | BPF_X:
 	case BPF_ALU64 | BPF_MOD | BPF_X:
-		emit_zext_32(ctx, dst, is32);
-		move_reg(ctx, t1, src);
-		emit_zext_32(ctx, t1, is32);
-		emit_insn(ctx, moddu, dst, dst, t1);
-		emit_zext_32(ctx, dst, is32);
+		if (!off) {
+			emit_zext_32(ctx, dst, is32);
+			move_reg(ctx, t1, src);
+			emit_zext_32(ctx, t1, is32);
+			emit_insn(ctx, moddu, dst, dst, t1);
+			emit_zext_32(ctx, dst, is32);
+		} else {
+			emit_sext_32(ctx, dst, is32);
+			move_reg(ctx, t1, src);
+			emit_sext_32(ctx, t1, is32);
+			emit_insn(ctx, modd, dst, dst, t1);
+			emit_sext_32(ctx, dst, is32);
+		}
 		break;
 
 	/* dst = dst % imm */
 	case BPF_ALU | BPF_MOD | BPF_K:
 	case BPF_ALU64 | BPF_MOD | BPF_K:
-		move_imm(ctx, t1, imm, is32);
-		emit_zext_32(ctx, dst, is32);
-		emit_insn(ctx, moddu, dst, dst, t1);
-		emit_zext_32(ctx, dst, is32);
+		if (!off) {
+			move_imm(ctx, t1, imm, is32);
+			emit_zext_32(ctx, dst, is32);
+			emit_insn(ctx, moddu, dst, dst, t1);
+			emit_zext_32(ctx, dst, is32);
+		} else {
+			move_imm(ctx, t1, imm, false);
+			emit_sext_32(ctx, t1, is32);
+			emit_sext_32(ctx, dst, is32);
+			emit_insn(ctx, modd, dst, dst, t1);
+			emit_sext_32(ctx, dst, is32);
+		}
 		break;
 
 	/* dst = -dst */
@@ -712,6 +763,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 		break;
 
 	case BPF_ALU | BPF_END | BPF_FROM_BE:
+	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
 		switch (imm) {
 		case 16:
 			emit_insn(ctx, revb2h, dst, dst);
@@ -828,7 +880,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 
 	/* PC += off */
 	case BPF_JMP | BPF_JA:
-		jmp_offset = bpf2la_offset(i, off, ctx);
+	case BPF_JMP32 | BPF_JA:
+		if (BPF_CLASS(code) == BPF_JMP)
+			jmp_offset = bpf2la_offset(i, off, ctx);
+		else
+			jmp_offset = bpf2la_offset(i, imm, ctx);
 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
 			goto toofar;
 		break;
@@ -879,31 +935,56 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+	/* dst_reg = (s64)*(signed size *)(src_reg + off) */
+	case BPF_LDX | BPF_MEMSX | BPF_B:
+	case BPF_LDX | BPF_MEMSX | BPF_H:
+	case BPF_LDX | BPF_MEMSX | BPF_W:
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+		sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
+			      BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
 		switch (BPF_SIZE(code)) {
 		case BPF_B:
 			if (is_signed_imm12(off)) {
-				emit_insn(ctx, ldbu, dst, src, off);
+				if (sign_extend)
+					emit_insn(ctx, ldb, dst, src, off);
+				else
+					emit_insn(ctx, ldbu, dst, src, off);
 			} else {
 				move_imm(ctx, t1, off, is32);
-				emit_insn(ctx, ldxbu, dst, src, t1);
+				if (sign_extend)
+					emit_insn(ctx, ldxb, dst, src, t1);
+				else
+					emit_insn(ctx, ldxbu, dst, src, t1);
 			}
 			break;
 		case BPF_H:
 			if (is_signed_imm12(off)) {
-				emit_insn(ctx, ldhu, dst, src, off);
+				if (sign_extend)
+					emit_insn(ctx, ldh, dst, src, off);
+				else
+					emit_insn(ctx, ldhu, dst, src, off);
 			} else {
 				move_imm(ctx, t1, off, is32);
-				emit_insn(ctx, ldxhu, dst, src, t1);
+				if (sign_extend)
+					emit_insn(ctx, ldxh, dst, src, t1);
+				else
+					emit_insn(ctx, ldxhu, dst, src, t1);
 			}
 			break;
 		case BPF_W:
 			if (is_signed_imm12(off)) {
-				emit_insn(ctx, ldwu, dst, src, off);
-			} else if (is_signed_imm14(off)) {
-				emit_insn(ctx, ldptrw, dst, src, off);
+				if (sign_extend)
+					emit_insn(ctx, ldw, dst, src, off);
+				else
+					emit_insn(ctx, ldwu, dst, src, off);
 			} else {
 				move_imm(ctx, t1, off, is32);
-				emit_insn(ctx, ldxwu, dst, src, t1);
+				if (sign_extend)
+					emit_insn(ctx, ldxw, dst, src, t1);
+				else
+					emit_insn(ctx, ldxwu, dst, src, t1);
 			}
 			break;
 		case BPF_DW:
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index caad195ba5c1..a2311c4bce6a 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -2,7 +2,6 @@
 # All platforms listed in alphabetic order
 
 platform-$(CONFIG_MIPS_ALCHEMY)		+= alchemy/
-platform-$(CONFIG_AR7)			+= ar7/
 platform-$(CONFIG_ATH25)		+= ath25/
 platform-$(CONFIG_ATH79)		+= ath79/
 platform-$(CONFIG_BCM47XX)		+= bcm47xx/
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index bc8421859006..76db82542519 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -202,28 +202,6 @@ config MIPS_ALCHEMY
 	select SYS_SUPPORTS_ZBOOT
 	select COMMON_CLK
 
-config AR7
-	bool "Texas Instruments AR7"
-	select BOOT_ELF32
-	select COMMON_CLK
-	select DMA_NONCOHERENT
-	select CEVT_R4K
-	select CSRC_R4K
-	select IRQ_MIPS_CPU
-	select NO_EXCEPT_FILL
-	select SWAP_IO_SPACE
-	select SYS_HAS_CPU_MIPS32_R1
-	select SYS_HAS_EARLY_PRINTK
-	select SYS_SUPPORTS_32BIT_KERNEL
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select SYS_SUPPORTS_MIPS16
-	select SYS_SUPPORTS_ZBOOT_UART16550
-	select GPIOLIB
-	select VLYNQ
-	help
-	  Support for the Texas Instruments AR7 System-on-a-Chip
-	  family: TNETD7100, 7200 and 7300.
-
 config ATH25
 	bool "Atheros AR231x/AR531x SoC support"
 	select CEVT_R4K
diff --git a/arch/mips/ar7/Makefile b/arch/mips/ar7/Makefile
deleted file mode 100644
index cd51c6c6e686..000000000000
--- a/arch/mips/ar7/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-y := \
-	prom.o \
-	setup.o \
-	memory.o \
-	irq.o \
-	time.o \
-	platform.o \
-	gpio.o \
-	clock.o
diff --git a/arch/mips/ar7/Platform b/arch/mips/ar7/Platform
deleted file mode 100644
index a9257cc01c3c..000000000000
--- a/arch/mips/ar7/Platform
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Texas Instruments AR7
-#
-cflags-$(CONFIG_AR7)		+= -I$(srctree)/arch/mips/include/asm/mach-ar7
-load-$(CONFIG_AR7)		+= 0xffffffff94100000
diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c
deleted file mode 100644
index c717acbc5506..000000000000
--- a/arch/mips/ar7/clock.c
+++ /dev/null
@@ -1,439 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org>
- * Copyright (C) 2009 Florian Fainelli <florian@openwrt.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/export.h>
-#include <linux/delay.h>
-#include <linux/gcd.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <linux/clkdev.h>
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-
-#include <asm/addrspace.h>
-#include <asm/mach-ar7/ar7.h>
-
-#define BOOT_PLL_SOURCE_MASK	0x3
-#define CPU_PLL_SOURCE_SHIFT	16
-#define BUS_PLL_SOURCE_SHIFT	14
-#define USB_PLL_SOURCE_SHIFT	18
-#define DSP_PLL_SOURCE_SHIFT	22
-#define BOOT_PLL_SOURCE_AFE	0
-#define BOOT_PLL_SOURCE_BUS	0
-#define BOOT_PLL_SOURCE_REF	1
-#define BOOT_PLL_SOURCE_XTAL	2
-#define BOOT_PLL_SOURCE_CPU	3
-#define BOOT_PLL_BYPASS		0x00000020
-#define BOOT_PLL_ASYNC_MODE	0x02000000
-#define BOOT_PLL_2TO1_MODE	0x00008000
-
-#define TNETD7200_CLOCK_ID_CPU	0
-#define TNETD7200_CLOCK_ID_DSP	1
-#define TNETD7200_CLOCK_ID_USB	2
-
-#define TNETD7200_DEF_CPU_CLK	211000000
-#define TNETD7200_DEF_DSP_CLK	125000000
-#define TNETD7200_DEF_USB_CLK	48000000
-
-struct tnetd7300_clock {
-	u32 ctrl;
-#define PREDIV_MASK	0x001f0000
-#define PREDIV_SHIFT	16
-#define POSTDIV_MASK	0x0000001f
-	u32 unused1[3];
-	u32 pll;
-#define MUL_MASK	0x0000f000
-#define MUL_SHIFT	12
-#define PLL_MODE_MASK	0x00000001
-#define PLL_NDIV	0x00000800
-#define PLL_DIV		0x00000002
-#define PLL_STATUS	0x00000001
-	u32 unused2[3];
-};
-
-struct tnetd7300_clocks {
-	struct tnetd7300_clock bus;
-	struct tnetd7300_clock cpu;
-	struct tnetd7300_clock usb;
-	struct tnetd7300_clock dsp;
-};
-
-struct tnetd7200_clock {
-	u32 ctrl;
-	u32 unused1[3];
-#define DIVISOR_ENABLE_MASK 0x00008000
-	u32 mul;
-	u32 prediv;
-	u32 postdiv;
-	u32 postdiv2;
-	u32 unused2[6];
-	u32 cmd;
-	u32 status;
-	u32 cmden;
-	u32 padding[15];
-};
-
-struct tnetd7200_clocks {
-	struct tnetd7200_clock cpu;
-	struct tnetd7200_clock dsp;
-	struct tnetd7200_clock usb;
-};
-
-struct clk_rate {
-	u32 rate;
-};
-static struct clk_rate bus_clk = {
-	.rate	= 125000000,
-};
-
-static struct clk_rate cpu_clk = {
-	.rate	= 150000000,
-};
-
-static void approximate(int base, int target, int *prediv,
-			int *postdiv, int *mul)
-{
-	int i, j, k, freq, res = target;
-	for (i = 1; i <= 16; i++)
-		for (j = 1; j <= 32; j++)
-			for (k = 1; k <= 32; k++) {
-				freq = abs(base / j * i / k - target);
-				if (freq < res) {
-					res = freq;
-					*mul = i;
-					*prediv = j;
-					*postdiv = k;
-				}
-			}
-}
-
-static void calculate(int base, int target, int *prediv, int *postdiv,
-	int *mul)
-{
-	int tmp_gcd, tmp_base, tmp_freq;
-
-	for (*prediv = 1; *prediv <= 32; (*prediv)++) {
-		tmp_base = base / *prediv;
-		tmp_gcd = gcd(target, tmp_base);
-		*mul = target / tmp_gcd;
-		*postdiv = tmp_base / tmp_gcd;
-		if ((*mul < 1) || (*mul >= 16))
-			continue;
-		if ((*postdiv > 0) & (*postdiv <= 32))
-			break;
-	}
-
-	if (base / *prediv * *mul / *postdiv != target) {
-		approximate(base, target, prediv, postdiv, mul);
-		tmp_freq = base / *prediv * *mul / *postdiv;
-		printk(KERN_WARNING
-		       "Adjusted requested frequency %d to %d\n",
-		       target, tmp_freq);
-	}
-
-	printk(KERN_DEBUG "Clocks: prediv: %d, postdiv: %d, mul: %d\n",
-	       *prediv, *postdiv, *mul);
-}
-
-static int tnetd7300_dsp_clock(void)
-{
-	u32 didr1, didr2;
-	u8 rev = ar7_chip_rev();
-	didr1 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x18));
-	didr2 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x1c));
-	if (didr2 & (1 << 23))
-		return 0;
-	if ((rev >= 0x23) && (rev != 0x57))
-		return 250000000;
-	if ((((didr2 & 0x1fff) << 10) | ((didr1 & 0xffc00000) >> 22))
-	    > 4208000)
-		return 250000000;
-	return 0;
-}
-
-static int tnetd7300_get_clock(u32 shift, struct tnetd7300_clock *clock,
-	u32 *bootcr, u32 bus_clock)
-{
-	int product;
-	int base_clock = AR7_REF_CLOCK;
-	u32 ctrl = readl(&clock->ctrl);
-	u32 pll = readl(&clock->pll);
-	int prediv = ((ctrl & PREDIV_MASK) >> PREDIV_SHIFT) + 1;
-	int postdiv = (ctrl & POSTDIV_MASK) + 1;
-	int divisor = prediv * postdiv;
-	int mul = ((pll & MUL_MASK) >> MUL_SHIFT) + 1;
-
-	switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) {
-	case BOOT_PLL_SOURCE_BUS:
-		base_clock = bus_clock;
-		break;
-	case BOOT_PLL_SOURCE_REF:
-		base_clock = AR7_REF_CLOCK;
-		break;
-	case BOOT_PLL_SOURCE_XTAL:
-		base_clock = AR7_XTAL_CLOCK;
-		break;
-	case BOOT_PLL_SOURCE_CPU:
-		base_clock = cpu_clk.rate;
-		break;
-	}
-
-	if (*bootcr & BOOT_PLL_BYPASS)
-		return base_clock / divisor;
-
-	if ((pll & PLL_MODE_MASK) == 0)
-		return (base_clock >> (mul / 16 + 1)) / divisor;
-
-	if ((pll & (PLL_NDIV | PLL_DIV)) == (PLL_NDIV | PLL_DIV)) {
-		product = (mul & 1) ?
-			(base_clock * mul) >> 1 :
-			(base_clock * (mul - 1)) >> 2;
-		return product / divisor;
-	}
-
-	if (mul == 16)
-		return base_clock / divisor;
-
-	return base_clock * mul / divisor;
-}
-
-static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock,
-	u32 *bootcr, u32 frequency)
-{
-	int prediv, postdiv, mul;
-	int base_clock = bus_clk.rate;
-
-	switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) {
-	case BOOT_PLL_SOURCE_BUS:
-		base_clock = bus_clk.rate;
-		break;
-	case BOOT_PLL_SOURCE_REF:
-		base_clock = AR7_REF_CLOCK;
-		break;
-	case BOOT_PLL_SOURCE_XTAL:
-		base_clock = AR7_XTAL_CLOCK;
-		break;
-	case BOOT_PLL_SOURCE_CPU:
-		base_clock = cpu_clk.rate;
-		break;
-	}
-
-	calculate(base_clock, frequency, &prediv, &postdiv, &mul);
-
-	writel(((prediv - 1) << PREDIV_SHIFT) | (postdiv - 1), &clock->ctrl);
-	mdelay(1);
-	writel(4, &clock->pll);
-	while (readl(&clock->pll) & PLL_STATUS)
-		;
-	writel(((mul - 1) << MUL_SHIFT) | (0xff << 3) | 0x0e, &clock->pll);
-	mdelay(75);
-}
-
-static void __init tnetd7300_init_clocks(void)
-{
-	u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4);
-	struct tnetd7300_clocks *clocks =
-					ioremap(UR8_REGS_CLOCKS,
-					sizeof(struct tnetd7300_clocks));
-	u32 dsp_clk;
-	struct clk *clk;
-
-	bus_clk.rate = tnetd7300_get_clock(BUS_PLL_SOURCE_SHIFT,
-		&clocks->bus, bootcr, AR7_AFE_CLOCK);
-
-	if (*bootcr & BOOT_PLL_ASYNC_MODE)
-		cpu_clk.rate = tnetd7300_get_clock(CPU_PLL_SOURCE_SHIFT,
-			&clocks->cpu, bootcr, AR7_AFE_CLOCK);
-	else
-		cpu_clk.rate = bus_clk.rate;
-
-	dsp_clk = tnetd7300_dsp_clock();
-	if (dsp_clk == 250000000)
-		tnetd7300_set_clock(DSP_PLL_SOURCE_SHIFT, &clocks->dsp,
-			bootcr, dsp_clk);
-
-	iounmap(clocks);
-	iounmap(bootcr);
-
-	clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate);
-	clkdev_create(clk, "cpu", NULL);
-	clk = clk_register_fixed_rate(NULL, "dsp", NULL, 0, dsp_clk);
-	clkdev_create(clk, "dsp", NULL);
-}
-
-static void tnetd7200_set_clock(int base, struct tnetd7200_clock *clock,
-	int prediv, int postdiv, int postdiv2, int mul, u32 frequency)
-{
-	printk(KERN_INFO
-		"Clocks: base = %d, frequency = %u, prediv = %d, "
-		"postdiv = %d, postdiv2 = %d, mul = %d\n",
-		base, frequency, prediv, postdiv, postdiv2, mul);
-
-	writel(0, &clock->ctrl);
-	writel(DIVISOR_ENABLE_MASK | ((prediv - 1) & 0x1F), &clock->prediv);
-	writel((mul - 1) & 0xF, &clock->mul);
-
-	while (readl(&clock->status) & 0x1)
-		; /* nop */
-
-	writel(DIVISOR_ENABLE_MASK | ((postdiv - 1) & 0x1F), &clock->postdiv);
-
-	writel(readl(&clock->cmden) | 1, &clock->cmden);
-	writel(readl(&clock->cmd) | 1, &clock->cmd);
-
-	while (readl(&clock->status) & 0x1)
-		; /* nop */
-
-	writel(DIVISOR_ENABLE_MASK | ((postdiv2 - 1) & 0x1F), &clock->postdiv2);
-
-	writel(readl(&clock->cmden) | 1, &clock->cmden);
-	writel(readl(&clock->cmd) | 1, &clock->cmd);
-
-	while (readl(&clock->status) & 0x1)
-		; /* nop */
-
-	writel(readl(&clock->ctrl) | 1, &clock->ctrl);
-}
-
-static int tnetd7200_get_clock_base(int clock_id, u32 *bootcr)
-{
-	if (*bootcr & BOOT_PLL_ASYNC_MODE)
-		/* Async */
-		switch (clock_id) {
-		case TNETD7200_CLOCK_ID_DSP:
-			return AR7_REF_CLOCK;
-		default:
-			return AR7_AFE_CLOCK;
-		}
-	else
-		/* Sync */
-		if (*bootcr & BOOT_PLL_2TO1_MODE)
-			/* 2:1 */
-			switch (clock_id) {
-			case TNETD7200_CLOCK_ID_DSP:
-				return AR7_REF_CLOCK;
-			default:
-				return AR7_AFE_CLOCK;
-			}
-		else
-			/* 1:1 */
-			return AR7_REF_CLOCK;
-}
-
-
-static void __init tnetd7200_init_clocks(void)
-{
-	u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4);
-	struct tnetd7200_clocks *clocks =
-					ioremap(AR7_REGS_CLOCKS,
-					sizeof(struct tnetd7200_clocks));
-	int cpu_base, cpu_mul, cpu_prediv, cpu_postdiv;
-	int dsp_base, dsp_mul, dsp_prediv, dsp_postdiv;
-	int usb_base, usb_mul, usb_prediv, usb_postdiv;
-	struct clk *clk;
-
-	cpu_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_CPU, bootcr);
-	dsp_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_DSP, bootcr);
-
-	if (*bootcr & BOOT_PLL_ASYNC_MODE) {
-		printk(KERN_INFO "Clocks: Async mode\n");
-
-		printk(KERN_INFO "Clocks: Setting DSP clock\n");
-		calculate(dsp_base, TNETD7200_DEF_DSP_CLK,
-			&dsp_prediv, &dsp_postdiv, &dsp_mul);
-		bus_clk.rate =
-			((dsp_base / dsp_prediv) * dsp_mul) / dsp_postdiv;
-		tnetd7200_set_clock(dsp_base, &clocks->dsp,
-			dsp_prediv, dsp_postdiv * 2, dsp_postdiv, dsp_mul * 2,
-			bus_clk.rate);
-
-		printk(KERN_INFO "Clocks: Setting CPU clock\n");
-		calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv,
-			&cpu_postdiv, &cpu_mul);
-		cpu_clk.rate =
-			((cpu_base / cpu_prediv) * cpu_mul) / cpu_postdiv;
-		tnetd7200_set_clock(cpu_base, &clocks->cpu,
-			cpu_prediv, cpu_postdiv, -1, cpu_mul,
-			cpu_clk.rate);
-
-	} else
-		if (*bootcr & BOOT_PLL_2TO1_MODE) {
-			printk(KERN_INFO "Clocks: Sync 2:1 mode\n");
-
-			printk(KERN_INFO "Clocks: Setting CPU clock\n");
-			calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv,
-				&cpu_postdiv, &cpu_mul);
-			cpu_clk.rate = ((cpu_base / cpu_prediv) * cpu_mul)
-								/ cpu_postdiv;
-			tnetd7200_set_clock(cpu_base, &clocks->cpu,
-				cpu_prediv, cpu_postdiv, -1, cpu_mul,
-				cpu_clk.rate);
-
-			printk(KERN_INFO "Clocks: Setting DSP clock\n");
-			calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv,
-				&dsp_postdiv, &dsp_mul);
-			bus_clk.rate = cpu_clk.rate / 2;
-			tnetd7200_set_clock(dsp_base, &clocks->dsp,
-				dsp_prediv, dsp_postdiv * 2, dsp_postdiv,
-				dsp_mul * 2, bus_clk.rate);
-		} else {
-			printk(KERN_INFO "Clocks: Sync 1:1 mode\n");
-
-			printk(KERN_INFO "Clocks: Setting DSP clock\n");
-			calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv,
-				&dsp_postdiv, &dsp_mul);
-			bus_clk.rate = ((dsp_base / dsp_prediv) * dsp_mul)
-								/ dsp_postdiv;
-			tnetd7200_set_clock(dsp_base, &clocks->dsp,
-				dsp_prediv, dsp_postdiv * 2, dsp_postdiv,
-				dsp_mul * 2, bus_clk.rate);
-
-			cpu_clk.rate = bus_clk.rate;
-		}
-
-	printk(KERN_INFO "Clocks: Setting USB clock\n");
-	usb_base = bus_clk.rate;
-	calculate(usb_base, TNETD7200_DEF_USB_CLK, &usb_prediv,
-		&usb_postdiv, &usb_mul);
-	tnetd7200_set_clock(usb_base, &clocks->usb,
-		usb_prediv, usb_postdiv, -1, usb_mul,
-		TNETD7200_DEF_USB_CLK);
-
-	iounmap(clocks);
-	iounmap(bootcr);
-
-	clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate);
-	clkdev_create(clk, "cpu", NULL);
-	clkdev_create(clk, "dsp", NULL);
-}
-
-void __init ar7_init_clocks(void)
-{
-	struct clk *clk;
-
-	switch (ar7_chip_id()) {
-	case AR7_CHIP_7100:
-	case AR7_CHIP_7200:
-		tnetd7200_init_clocks();
-		break;
-	case AR7_CHIP_7300:
-		tnetd7300_init_clocks();
-		break;
-	default:
-		break;
-	}
-	clk = clk_register_fixed_rate(NULL, "bus", NULL, 0, bus_clk.rate);
-	clkdev_create(clk, "bus", NULL);
-	/* adjust vbus clock rate */
-	clk = clk_register_fixed_factor(NULL, "vbus", "bus", 0, 1, 2);
-	clkdev_create(clk, "vbus", NULL);
-	clkdev_create(clk, "cpmac", "cpmac.1");
-	clkdev_create(clk, "cpmac", "cpmac.1");
-}
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
deleted file mode 100644
index 4ed833b9cc2f..000000000000
--- a/arch/mips/ar7/gpio.c
+++ /dev/null
@@ -1,332 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org>
- * Copyright (C) 2009-2010 Florian Fainelli <florian@openwrt.org>
- */
-
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/gpio/driver.h>
-
-#include <asm/mach-ar7/ar7.h>
-
-#define AR7_GPIO_MAX 32
-#define TITAN_GPIO_MAX 51
-
-struct ar7_gpio_chip {
-	void __iomem		*regs;
-	struct gpio_chip	chip;
-};
-
-static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT;
-
-	return !!(readl(gpio_in) & (1 << gpio));
-}
-
-static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_in0 = gpch->regs + TITAN_GPIO_INPUT_0;
-	void __iomem *gpio_in1 = gpch->regs + TITAN_GPIO_INPUT_1;
-
-	return readl(gpio >> 5 ? gpio_in1 : gpio_in0) & (1 << (gpio & 0x1f));
-}
-
-static void ar7_gpio_set_value(struct gpio_chip *chip,
-				unsigned gpio, int value)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_out = gpch->regs + AR7_GPIO_OUTPUT;
-	unsigned tmp;
-
-	tmp = readl(gpio_out) & ~(1 << gpio);
-	if (value)
-		tmp |= 1 << gpio;
-	writel(tmp, gpio_out);
-}
-
-static void titan_gpio_set_value(struct gpio_chip *chip,
-				unsigned gpio, int value)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_out0 = gpch->regs + TITAN_GPIO_OUTPUT_0;
-	void __iomem *gpio_out1 = gpch->regs + TITAN_GPIO_OUTPUT_1;
-	unsigned tmp;
-
-	tmp = readl(gpio >> 5 ? gpio_out1 : gpio_out0) & ~(1 << (gpio & 0x1f));
-	if (value)
-		tmp |= 1 << (gpio & 0x1f);
-	writel(tmp, gpio >> 5 ? gpio_out1 : gpio_out0);
-}
-
-static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR;
-
-	writel(readl(gpio_dir) | (1 << gpio), gpio_dir);
-
-	return 0;
-}
-
-static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0;
-	void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1;
-
-	if (gpio >= TITAN_GPIO_MAX)
-		return -EINVAL;
-
-	writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) | (1 << (gpio & 0x1f)),
-			gpio >> 5 ? gpio_dir1 : gpio_dir0);
-	return 0;
-}
-
-static int ar7_gpio_direction_output(struct gpio_chip *chip,
-					unsigned gpio, int value)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR;
-
-	ar7_gpio_set_value(chip, gpio, value);
-	writel(readl(gpio_dir) & ~(1 << gpio), gpio_dir);
-
-	return 0;
-}
-
-static int titan_gpio_direction_output(struct gpio_chip *chip,
-					unsigned gpio, int value)
-{
-	struct ar7_gpio_chip *gpch = gpiochip_get_data(chip);
-	void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0;
-	void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1;
-
-	if (gpio >= TITAN_GPIO_MAX)
-		return -EINVAL;
-
-	titan_gpio_set_value(chip, gpio, value);
-	writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) & ~(1 <<
-		(gpio & 0x1f)), gpio >> 5 ? gpio_dir1 : gpio_dir0);
-
-	return 0;
-}
-
-static struct ar7_gpio_chip ar7_gpio_chip = {
-	.chip = {
-		.label			= "ar7-gpio",
-		.direction_input	= ar7_gpio_direction_input,
-		.direction_output	= ar7_gpio_direction_output,
-		.set			= ar7_gpio_set_value,
-		.get			= ar7_gpio_get_value,
-		.base			= 0,
-		.ngpio			= AR7_GPIO_MAX,
-	}
-};
-
-static struct ar7_gpio_chip titan_gpio_chip = {
-	.chip = {
-		.label			= "titan-gpio",
-		.direction_input	= titan_gpio_direction_input,
-		.direction_output	= titan_gpio_direction_output,
-		.set			= titan_gpio_set_value,
-		.get			= titan_gpio_get_value,
-		.base			= 0,
-		.ngpio			= TITAN_GPIO_MAX,
-	}
-};
-
-static inline int ar7_gpio_enable_ar7(unsigned gpio)
-{
-	void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE;
-
-	writel(readl(gpio_en) | (1 << gpio), gpio_en);
-
-	return 0;
-}
-
-static inline int ar7_gpio_enable_titan(unsigned gpio)
-{
-	void __iomem *gpio_en0 = titan_gpio_chip.regs  + TITAN_GPIO_ENBL_0;
-	void __iomem *gpio_en1 = titan_gpio_chip.regs  + TITAN_GPIO_ENBL_1;
-
-	writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) | (1 << (gpio & 0x1f)),
-		gpio >> 5 ? gpio_en1 : gpio_en0);
-
-	return 0;
-}
-
-int ar7_gpio_enable(unsigned gpio)
-{
-	return ar7_is_titan() ? ar7_gpio_enable_titan(gpio) :
-				ar7_gpio_enable_ar7(gpio);
-}
-EXPORT_SYMBOL(ar7_gpio_enable);
-
-static inline int ar7_gpio_disable_ar7(unsigned gpio)
-{
-	void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE;
-
-	writel(readl(gpio_en) & ~(1 << gpio), gpio_en);
-
-	return 0;
-}
-
-static inline int ar7_gpio_disable_titan(unsigned gpio)
-{
-	void __iomem *gpio_en0 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_0;
-	void __iomem *gpio_en1 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_1;
-
-	writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) & ~(1 << (gpio & 0x1f)),
-			gpio >> 5 ? gpio_en1 : gpio_en0);
-
-	return 0;
-}
-
-int ar7_gpio_disable(unsigned gpio)
-{
-	return ar7_is_titan() ? ar7_gpio_disable_titan(gpio) :
-				ar7_gpio_disable_ar7(gpio);
-}
-EXPORT_SYMBOL(ar7_gpio_disable);
-
-struct titan_gpio_cfg {
-	u32 reg;
-	u32 shift;
-	u32 func;
-};
-
-static const struct titan_gpio_cfg titan_gpio_table[] = {
-	/* reg, start bit, mux value */
-	{4, 24, 1},
-	{4, 26, 1},
-	{4, 28, 1},
-	{4, 30, 1},
-	{5, 6, 1},
-	{5, 8, 1},
-	{5, 10, 1},
-	{5, 12, 1},
-	{7, 14, 3},
-	{7, 16, 3},
-	{7, 18, 3},
-	{7, 20, 3},
-	{7, 22, 3},
-	{7, 26, 3},
-	{7, 28, 3},
-	{7, 30, 3},
-	{8, 0, 3},
-	{8, 2, 3},
-	{8, 4, 3},
-	{8, 10, 3},
-	{8, 14, 3},
-	{8, 16, 3},
-	{8, 18, 3},
-	{8, 20, 3},
-	{9, 8, 3},
-	{9, 10, 3},
-	{9, 12, 3},
-	{9, 14, 3},
-	{9, 18, 3},
-	{9, 20, 3},
-	{9, 24, 3},
-	{9, 26, 3},
-	{9, 28, 3},
-	{9, 30, 3},
-	{10, 0, 3},
-	{10, 2, 3},
-	{10, 8, 3},
-	{10, 10, 3},
-	{10, 12, 3},
-	{10, 14, 3},
-	{13, 12, 3},
-	{13, 14, 3},
-	{13, 16, 3},
-	{13, 18, 3},
-	{13, 24, 3},
-	{13, 26, 3},
-	{13, 28, 3},
-	{13, 30, 3},
-	{14, 2, 3},
-	{14, 6, 3},
-	{14, 8, 3},
-	{14, 12, 3}
-};
-
-static int titan_gpio_pinsel(unsigned gpio)
-{
-	struct titan_gpio_cfg gpio_cfg;
-	u32 mux_status, pin_sel_reg, tmp;
-	void __iomem *pin_sel = (void __iomem *)KSEG1ADDR(AR7_REGS_PINSEL);
-
-	if (gpio >= ARRAY_SIZE(titan_gpio_table))
-		return -EINVAL;
-
-	gpio_cfg = titan_gpio_table[gpio];
-	pin_sel_reg = gpio_cfg.reg - 1;
-
-	mux_status = (readl(pin_sel + pin_sel_reg) >> gpio_cfg.shift) & 0x3;
-
-	/* Check the mux status */
-	if (!((mux_status == 0) || (mux_status == gpio_cfg.func)))
-		return 0;
-
-	/* Set the pin sel value */
-	tmp = readl(pin_sel + pin_sel_reg);
-	tmp |= ((gpio_cfg.func & 0x3) << gpio_cfg.shift);
-	writel(tmp, pin_sel + pin_sel_reg);
-
-	return 0;
-}
-
-/* Perform minimal Titan GPIO configuration */
-static void titan_gpio_init(void)
-{
-	unsigned i;
-
-	for (i = 44; i < 48; i++) {
-		titan_gpio_pinsel(i);
-		ar7_gpio_enable_titan(i);
-		titan_gpio_direction_input(&titan_gpio_chip.chip, i);
-	}
-}
-
-int __init ar7_gpio_init(void)
-{
-	int ret;
-	struct ar7_gpio_chip *gpch;
-	unsigned size;
-
-	if (!ar7_is_titan()) {
-		gpch = &ar7_gpio_chip;
-		size = 0x10;
-	} else {
-		gpch = &titan_gpio_chip;
-		size = 0x1f;
-	}
-
-	gpch->regs = ioremap(AR7_REGS_GPIO, size);
-	if (!gpch->regs) {
-		printk(KERN_ERR "%s: failed to ioremap regs\n",
-					gpch->chip.label);
-		return -ENOMEM;
-	}
-
-	ret = gpiochip_add_data(&gpch->chip, gpch);
-	if (ret) {
-		printk(KERN_ERR "%s: failed to add gpiochip\n",
-					gpch->chip.label);
-		iounmap(gpch->regs);
-		return ret;
-	}
-	printk(KERN_INFO "%s: registered %d GPIOs\n",
-				gpch->chip.label, gpch->chip.ngpio);
-
-	if (ar7_is_titan())
-		titan_gpio_init();
-
-	return ret;
-}
diff --git a/arch/mips/ar7/irq.c b/arch/mips/ar7/irq.c
deleted file mode 100644
index f0a7942d393e..000000000000
--- a/arch/mips/ar7/irq.c
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org>
- */
-
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/mipsregs.h>
-#include <asm/mach-ar7/ar7.h>
-
-#define EXCEPT_OFFSET	0x80
-#define PACE_OFFSET	0xA0
-#define CHNLS_OFFSET	0x200
-
-#define REG_OFFSET(irq, reg)	((irq) / 32 * 0x4 + reg * 0x10)
-#define SEC_REG_OFFSET(reg)	(EXCEPT_OFFSET + reg * 0x8)
-#define SEC_SR_OFFSET		(SEC_REG_OFFSET(0))	/* 0x80 */
-#define CR_OFFSET(irq)		(REG_OFFSET(irq, 1))	/* 0x10 */
-#define SEC_CR_OFFSET		(SEC_REG_OFFSET(1))	/* 0x88 */
-#define ESR_OFFSET(irq)		(REG_OFFSET(irq, 2))	/* 0x20 */
-#define SEC_ESR_OFFSET		(SEC_REG_OFFSET(2))	/* 0x90 */
-#define ECR_OFFSET(irq)		(REG_OFFSET(irq, 3))	/* 0x30 */
-#define SEC_ECR_OFFSET		(SEC_REG_OFFSET(3))	/* 0x98 */
-#define PIR_OFFSET		(0x40)
-#define MSR_OFFSET		(0x44)
-#define PM_OFFSET(irq)		(REG_OFFSET(irq, 5))	/* 0x50 */
-#define TM_OFFSET(irq)		(REG_OFFSET(irq, 6))	/* 0x60 */
-
-#define REG(addr) ((u32 *)(KSEG1ADDR(AR7_REGS_IRQ) + addr))
-
-#define CHNL_OFFSET(chnl) (CHNLS_OFFSET + (chnl * 4))
-
-static int ar7_irq_base;
-
-static void ar7_unmask_irq(struct irq_data *d)
-{
-	writel(1 << ((d->irq - ar7_irq_base) % 32),
-	       REG(ESR_OFFSET(d->irq - ar7_irq_base)));
-}
-
-static void ar7_mask_irq(struct irq_data *d)
-{
-	writel(1 << ((d->irq - ar7_irq_base) % 32),
-	       REG(ECR_OFFSET(d->irq - ar7_irq_base)));
-}
-
-static void ar7_ack_irq(struct irq_data *d)
-{
-	writel(1 << ((d->irq - ar7_irq_base) % 32),
-	       REG(CR_OFFSET(d->irq - ar7_irq_base)));
-}
-
-static void ar7_unmask_sec_irq(struct irq_data *d)
-{
-	writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ESR_OFFSET));
-}
-
-static void ar7_mask_sec_irq(struct irq_data *d)
-{
-	writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ECR_OFFSET));
-}
-
-static void ar7_ack_sec_irq(struct irq_data *d)
-{
-	writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_CR_OFFSET));
-}
-
-static struct irq_chip ar7_irq_type = {
-	.name = "AR7",
-	.irq_unmask = ar7_unmask_irq,
-	.irq_mask = ar7_mask_irq,
-	.irq_ack = ar7_ack_irq
-};
-
-static struct irq_chip ar7_sec_irq_type = {
-	.name = "AR7",
-	.irq_unmask = ar7_unmask_sec_irq,
-	.irq_mask = ar7_mask_sec_irq,
-	.irq_ack = ar7_ack_sec_irq,
-};
-
-static void __init ar7_irq_init(int base)
-{
-	int i;
-	/*
-	 * Disable interrupts and clear pending
-	 */
-	writel(0xffffffff, REG(ECR_OFFSET(0)));
-	writel(0xff, REG(ECR_OFFSET(32)));
-	writel(0xffffffff, REG(SEC_ECR_OFFSET));
-	writel(0xffffffff, REG(CR_OFFSET(0)));
-	writel(0xff, REG(CR_OFFSET(32)));
-	writel(0xffffffff, REG(SEC_CR_OFFSET));
-
-	ar7_irq_base = base;
-
-	for (i = 0; i < 40; i++) {
-		writel(i, REG(CHNL_OFFSET(i)));
-		/* Primary IRQ's */
-		irq_set_chip_and_handler(base + i, &ar7_irq_type,
-					 handle_level_irq);
-		/* Secondary IRQ's */
-		if (i < 32)
-			irq_set_chip_and_handler(base + i + 40,
-						 &ar7_sec_irq_type,
-						 handle_level_irq);
-	}
-
-	if (request_irq(2, no_action, IRQF_NO_THREAD, "AR7 cascade interrupt",
-			NULL))
-		pr_err("Failed to request irq 2 (AR7 cascade interrupt)\n");
-	if (request_irq(ar7_irq_base, no_action, IRQF_NO_THREAD,
-			"AR7 cascade interrupt", NULL)) {
-		pr_err("Failed to request irq %d (AR7 cascade interrupt)\n",
-		       ar7_irq_base);
-	}
-	set_c0_status(IE_IRQ0);
-}
-
-void __init arch_init_irq(void)
-{
-	mips_cpu_irq_init();
-	ar7_irq_init(8);
-}
-
-static void ar7_cascade(void)
-{
-	u32 status;
-	int i, irq;
-
-	/* Primary IRQ's */
-	irq = readl(REG(PIR_OFFSET)) & 0x3f;
-	if (irq) {
-		do_IRQ(ar7_irq_base + irq);
-		return;
-	}
-
-	/* Secondary IRQ's are cascaded through primary '0' */
-	writel(1, REG(CR_OFFSET(irq)));
-	status = readl(REG(SEC_SR_OFFSET));
-	for (i = 0; i < 32; i++) {
-		if (status & 1) {
-			do_IRQ(ar7_irq_base + i + 40);
-			return;
-		}
-		status >>= 1;
-	}
-
-	spurious_interrupt();
-}
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
-	if (pending & STATUSF_IP7)		/* cpu timer */
-		do_IRQ(7);
-	else if (pending & STATUSF_IP2)		/* int0 hardware line */
-		ar7_cascade();
-	else
-		spurious_interrupt();
-}
diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c
deleted file mode 100644
index ce8024c1a54e..000000000000
--- a/arch/mips/ar7/memory.c
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org>
- */
-#include <linux/memblock.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/pfn.h>
-#include <linux/proc_fs.h>
-#include <linux/string.h>
-#include <linux/swap.h>
-
-#include <asm/bootinfo.h>
-#include <asm/page.h>
-#include <asm/sections.h>
-
-#include <asm/mach-ar7/ar7.h>
-
-static int __init memsize(void)
-{
-	u32 size = (64 << 20);
-	u32 *addr = (u32 *)KSEG1ADDR(AR7_SDRAM_BASE + size - 4);
-	u32 *kernel_end = (u32 *)KSEG1ADDR(CPHYSADDR((u32)&_end));
-	u32 *tmpaddr = addr;
-
-	while (tmpaddr > kernel_end) {
-		*tmpaddr = (u32)tmpaddr;
-		size >>= 1;
-		tmpaddr -= size >> 2;
-	}
-
-	do {
-		tmpaddr += size >> 2;
-		if (*tmpaddr != (u32)tmpaddr)
-			break;
-		size <<= 1;
-	} while (size < (64 << 20));
-
-	writel((u32)tmpaddr, &addr);
-
-	return size;
-}
-
-void __init prom_meminit(void)
-{
-	unsigned long pages;
-
-	pages = memsize() >> PAGE_SHIFT;
-	memblock_add(PHYS_OFFSET, pages << PAGE_SHIFT);
-}
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
deleted file mode 100644
index 215149a85d83..000000000000
--- a/arch/mips/ar7/platform.c
+++ /dev/null
@@ -1,722 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/platform_device.h>
-#include <linux/mtd/physmap.h>
-#include <linux/serial.h>
-#include <linux/serial_8250.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <linux/vlynq.h>
-#include <linux/leds.h>
-#include <linux/string.h>
-#include <linux/etherdevice.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
-#include <linux/gpio.h>
-#include <linux/clk.h>
-
-#include <asm/addrspace.h>
-#include <asm/mach-ar7/ar7.h>
-#include <asm/mach-ar7/prom.h>
-
-/*****************************************************************************
- * VLYNQ Bus
- ****************************************************************************/
-struct plat_vlynq_data {
-	struct plat_vlynq_ops ops;
-	int gpio_bit;
-	int reset_bit;
-};
-
-static int vlynq_on(struct vlynq_device *dev)
-{
-	int ret;
-	struct plat_vlynq_data *pdata = dev->dev.platform_data;
-
-	ret = gpio_request(pdata->gpio_bit, "vlynq");
-	if (ret)
-		goto out;
-
-	ar7_device_reset(pdata->reset_bit);
-
-	ret = ar7_gpio_disable(pdata->gpio_bit);
-	if (ret)
-		goto out_enabled;
-
-	ret = ar7_gpio_enable(pdata->gpio_bit);
-	if (ret)
-		goto out_enabled;
-
-	ret = gpio_direction_output(pdata->gpio_bit, 0);
-	if (ret)
-		goto out_gpio_enabled;
-
-	msleep(50);
-
-	gpio_set_value(pdata->gpio_bit, 1);
-
-	msleep(50);
-
-	return 0;
-
-out_gpio_enabled:
-	ar7_gpio_disable(pdata->gpio_bit);
-out_enabled:
-	ar7_device_disable(pdata->reset_bit);
-	gpio_free(pdata->gpio_bit);
-out:
-	return ret;
-}
-
-static void vlynq_off(struct vlynq_device *dev)
-{
-	struct plat_vlynq_data *pdata = dev->dev.platform_data;
-
-	ar7_gpio_disable(pdata->gpio_bit);
-	gpio_free(pdata->gpio_bit);
-	ar7_device_disable(pdata->reset_bit);
-}
-
-static struct resource vlynq_low_res[] = {
-	{
-		.name	= "regs",
-		.flags	= IORESOURCE_MEM,
-		.start	= AR7_REGS_VLYNQ0,
-		.end	= AR7_REGS_VLYNQ0 + 0xff,
-	},
-	{
-		.name	= "irq",
-		.flags	= IORESOURCE_IRQ,
-		.start	= 29,
-		.end	= 29,
-	},
-	{
-		.name	= "mem",
-		.flags	= IORESOURCE_MEM,
-		.start	= 0x04000000,
-		.end	= 0x04ffffff,
-	},
-	{
-		.name	= "devirq",
-		.flags	= IORESOURCE_IRQ,
-		.start	= 80,
-		.end	= 111,
-	},
-};
-
-static struct resource vlynq_high_res[] = {
-	{
-		.name	= "regs",
-		.flags	= IORESOURCE_MEM,
-		.start	= AR7_REGS_VLYNQ1,
-		.end	= AR7_REGS_VLYNQ1 + 0xff,
-	},
-	{
-		.name	= "irq",
-		.flags	= IORESOURCE_IRQ,
-		.start	= 33,
-		.end	= 33,
-	},
-	{
-		.name	= "mem",
-		.flags	= IORESOURCE_MEM,
-		.start	= 0x0c000000,
-		.end	= 0x0cffffff,
-	},
-	{
-		.name	= "devirq",
-		.flags	= IORESOURCE_IRQ,
-		.start	= 112,
-		.end	= 143,
-	},
-};
-
-static struct plat_vlynq_data vlynq_low_data = {
-	.ops = {
-		.on	= vlynq_on,
-		.off	= vlynq_off,
-	},
-	.reset_bit	= 20,
-	.gpio_bit	= 18,
-};
-
-static struct plat_vlynq_data vlynq_high_data = {
-	.ops = {
-		.on	= vlynq_on,
-		.off	= vlynq_off,
-	},
-	.reset_bit	= 16,
-	.gpio_bit	= 19,
-};
-
-static struct platform_device vlynq_low = {
-	.id		= 0,
-	.name		= "vlynq",
-	.dev = {
-		.platform_data	= &vlynq_low_data,
-	},
-	.resource	= vlynq_low_res,
-	.num_resources	= ARRAY_SIZE(vlynq_low_res),
-};
-
-static struct platform_device vlynq_high = {
-	.id		= 1,
-	.name		= "vlynq",
-	.dev = {
-		.platform_data	= &vlynq_high_data,
-	},
-	.resource	= vlynq_high_res,
-	.num_resources	= ARRAY_SIZE(vlynq_high_res),
-};
-
-/*****************************************************************************
- * Flash
- ****************************************************************************/
-static struct resource physmap_flash_resource = {
-	.name	= "mem",
-	.flags	= IORESOURCE_MEM,
-	.start	= 0x10000000,
-	.end	= 0x107fffff,
-};
-
-static const char *ar7_probe_types[] = { "ar7part", NULL };
-
-static struct physmap_flash_data physmap_flash_data = {
-	.width	= 2,
-	.part_probe_types = ar7_probe_types,
-};
-
-static struct platform_device physmap_flash = {
-	.name		= "physmap-flash",
-	.dev = {
-		.platform_data	= &physmap_flash_data,
-	},
-	.resource	= &physmap_flash_resource,
-	.num_resources	= 1,
-};
-
-/*****************************************************************************
- * Ethernet
- ****************************************************************************/
-static struct resource cpmac_low_res[] = {
-	{
-		.name	= "regs",
-		.flags	= IORESOURCE_MEM,
-		.start	= AR7_REGS_MAC0,
-		.end	= AR7_REGS_MAC0 + 0x7ff,
-	},
-	{
-		.name	= "irq",
-		.flags	= IORESOURCE_IRQ,
-		.start	= 27,
-		.end	= 27,
-	},
-};
-
-static struct resource cpmac_high_res[] = {
-	{
-		.name	= "regs",
-		.flags	= IORESOURCE_MEM,
-		.start	= AR7_REGS_MAC1,
-		.end	= AR7_REGS_MAC1 + 0x7ff,
-	},
-	{
-		.name	= "irq",
-		.flags	= IORESOURCE_IRQ,
-		.start	= 41,
-		.end	= 41,
-	},
-};
-
-static struct fixed_phy_status fixed_phy_status __initdata = {
-	.link		= 1,
-	.speed		= 100,
-	.duplex		= 1,
-};
-
-static struct plat_cpmac_data cpmac_low_data = {
-	.reset_bit	= 17,
-	.power_bit	= 20,
-	.phy_mask	= 0x80000000,
-};
-
-static struct plat_cpmac_data cpmac_high_data = {
-	.reset_bit	= 21,
-	.power_bit	= 22,
-	.phy_mask	= 0x7fffffff,
-};
-
-static u64 cpmac_dma_mask = DMA_BIT_MASK(32);
-
-static struct platform_device cpmac_low = {
-	.id		= 0,
-	.name		= "cpmac",
-	.dev = {
-		.dma_mask		= &cpmac_dma_mask,
-		.coherent_dma_mask	= DMA_BIT_MASK(32),
-		.platform_data		= &cpmac_low_data,
-	},
-	.resource	= cpmac_low_res,
-	.num_resources	= ARRAY_SIZE(cpmac_low_res),
-};
-
-static struct platform_device cpmac_high = {
-	.id		= 1,
-	.name		= "cpmac",
-	.dev = {
-		.dma_mask		= &cpmac_dma_mask,
-		.coherent_dma_mask	= DMA_BIT_MASK(32),
-		.platform_data		= &cpmac_high_data,
-	},
-	.resource	= cpmac_high_res,
-	.num_resources	= ARRAY_SIZE(cpmac_high_res),
-};
-
-static void __init cpmac_get_mac(int instance, unsigned char *dev_addr)
-{
-	char name[5], *mac;
-
-	sprintf(name, "mac%c", 'a' + instance);
-	mac = prom_getenv(name);
-	if (!mac && instance) {
-		sprintf(name, "mac%c", 'a');
-		mac = prom_getenv(name);
-	}
-
-	if (mac) {
-		if (!mac_pton(mac, dev_addr)) {
-			pr_warn("cannot parse mac address, using random address\n");
-			eth_random_addr(dev_addr);
-		}
-	} else
-		eth_random_addr(dev_addr);
-}
-
-/*****************************************************************************
- * USB
- ****************************************************************************/
-static struct resource usb_res[] = {
-	{
-		.name	= "regs",
-		.flags	= IORESOURCE_MEM,
-		.start	= AR7_REGS_USB,
-		.end	= AR7_REGS_USB + 0xff,
-	},
-	{
-		.name	= "irq",
-		.flags	= IORESOURCE_IRQ,
-		.start	= 32,
-		.end	= 32,
-	},
-	{
-		.name	= "mem",
-		.flags	= IORESOURCE_MEM,
-		.start	= 0x03400000,
-		.end	= 0x03401fff,
-	},
-};
-
-static struct platform_device ar7_udc = {
-	.name		= "ar7_udc",
-	.resource	= usb_res,
-	.num_resources	= ARRAY_SIZE(usb_res),
-};
-
-/*****************************************************************************
- * LEDs
- ****************************************************************************/
-static const struct gpio_led default_leds[] = {
-	{
-		.name			= "status",
-		.gpio			= 8,
-		.active_low		= 1,
-	},
-};
-
-static const struct gpio_led titan_leds[] = {
-	{ .name = "status", .gpio = 8, .active_low = 1, },
-	{ .name = "wifi", .gpio = 13, .active_low = 1, },
-};
-
-static const struct gpio_led dsl502t_leds[] = {
-	{
-		.name			= "status",
-		.gpio			= 9,
-		.active_low		= 1,
-	},
-	{
-		.name			= "ethernet",
-		.gpio			= 7,
-		.active_low		= 1,
-	},
-	{
-		.name			= "usb",
-		.gpio			= 12,
-		.active_low		= 1,
-	},
-};
-
-static const struct gpio_led dg834g_leds[] = {
-	{
-		.name			= "ppp",
-		.gpio			= 6,
-		.active_low		= 1,
-	},
-	{
-		.name			= "status",
-		.gpio			= 7,
-		.active_low		= 1,
-	},
-	{
-		.name			= "adsl",
-		.gpio			= 8,
-		.active_low		= 1,
-	},
-	{
-		.name			= "wifi",
-		.gpio			= 12,
-		.active_low		= 1,
-	},
-	{
-		.name			= "power",
-		.gpio			= 14,
-		.active_low		= 1,
-		.default_trigger	= "default-on",
-	},
-};
-
-static const struct gpio_led fb_sl_leds[] = {
-	{
-		.name			= "1",
-		.gpio			= 7,
-	},
-	{
-		.name			= "2",
-		.gpio			= 13,
-		.active_low		= 1,
-	},
-	{
-		.name			= "3",
-		.gpio			= 10,
-		.active_low		= 1,
-	},
-	{
-		.name			= "4",
-		.gpio			= 12,
-		.active_low		= 1,
-	},
-	{
-		.name			= "5",
-		.gpio			= 9,
-		.active_low		= 1,
-	},
-};
-
-static const struct gpio_led fb_fon_leds[] = {
-	{
-		.name			= "1",
-		.gpio			= 8,
-	},
-	{
-		.name			= "2",
-		.gpio			= 3,
-		.active_low		= 1,
-	},
-	{
-		.name			= "3",
-		.gpio			= 5,
-	},
-	{
-		.name			= "4",
-		.gpio			= 4,
-		.active_low		= 1,
-	},
-	{
-		.name			= "5",
-		.gpio			= 11,
-		.active_low		= 1,
-	},
-};
-
-static const struct gpio_led gt701_leds[] = {
-	{
-		.name			= "inet:green",
-		.gpio			= 13,
-		.active_low		= 1,
-	},
-	{
-		.name			= "usb",
-		.gpio			= 12,
-		.active_low		= 1,
-	},
-	{
-		.name			= "inet:red",
-		.gpio			= 9,
-		.active_low		= 1,
-	},
-	{
-		.name			= "power:red",
-		.gpio			= 7,
-		.active_low		= 1,
-	},
-	{
-		.name			= "power:green",
-		.gpio			= 8,
-		.active_low		= 1,
-		.default_trigger	= "default-on",
-	},
-	{
-		.name			= "ethernet",
-		.gpio			= 10,
-		.active_low		= 1,
-	},
-};
-
-static struct gpio_led_platform_data ar7_led_data;
-
-static struct platform_device ar7_gpio_leds = {
-	.name = "leds-gpio",
-	.dev = {
-		.platform_data = &ar7_led_data,
-	}
-};
-
-static void __init detect_leds(void)
-{
-	char *prid, *usb_prod;
-
-	/* Default LEDs */
-	ar7_led_data.num_leds = ARRAY_SIZE(default_leds);
-	ar7_led_data.leds = default_leds;
-
-	/* FIXME: the whole thing is unreliable */
-	prid = prom_getenv("ProductID");
-	usb_prod = prom_getenv("usb_prod");
-
-	/* If we can't get the product id from PROM, use the default LEDs */
-	if (!prid)
-		return;
-
-	if (strstr(prid, "Fritz_Box_FON")) {
-		ar7_led_data.num_leds = ARRAY_SIZE(fb_fon_leds);
-		ar7_led_data.leds = fb_fon_leds;
-	} else if (strstr(prid, "Fritz_Box_")) {
-		ar7_led_data.num_leds = ARRAY_SIZE(fb_sl_leds);
-		ar7_led_data.leds = fb_sl_leds;
-	} else if ((!strcmp(prid, "AR7RD") || !strcmp(prid, "AR7DB"))
-		&& usb_prod != NULL && strstr(usb_prod, "DSL-502T")) {
-		ar7_led_data.num_leds = ARRAY_SIZE(dsl502t_leds);
-		ar7_led_data.leds = dsl502t_leds;
-	} else if (strstr(prid, "DG834")) {
-		ar7_led_data.num_leds = ARRAY_SIZE(dg834g_leds);
-		ar7_led_data.leds = dg834g_leds;
-	} else if (strstr(prid, "CYWM") || strstr(prid, "CYWL")) {
-		ar7_led_data.num_leds = ARRAY_SIZE(titan_leds);
-		ar7_led_data.leds = titan_leds;
-	} else if (strstr(prid, "GT701")) {
-		ar7_led_data.num_leds = ARRAY_SIZE(gt701_leds);
-		ar7_led_data.leds = gt701_leds;
-	}
-}
-
-/*****************************************************************************
- * Watchdog
- ****************************************************************************/
-static struct resource ar7_wdt_res = {
-	.name		= "regs",
-	.flags		= IORESOURCE_MEM,
-	.start		= -1,	/* Filled at runtime */
-	.end		= -1,	/* Filled at runtime */
-};
-
-static struct platform_device ar7_wdt = {
-	.name		= "ar7_wdt",
-	.resource	= &ar7_wdt_res,
-	.num_resources	= 1,
-};
-
-/*****************************************************************************
- * Init
- ****************************************************************************/
-static int __init ar7_register_uarts(void)
-{
-#ifdef CONFIG_SERIAL_8250
-	static struct uart_port uart_port __initdata;
-	struct clk *bus_clk;
-	int res;
-
-	memset(&uart_port, 0, sizeof(struct uart_port));
-
-	bus_clk = clk_get(NULL, "bus");
-	if (IS_ERR(bus_clk))
-		panic("unable to get bus clk");
-
-	uart_port.type		= PORT_AR7;
-	uart_port.uartclk	= clk_get_rate(bus_clk) / 2;
-	uart_port.iotype	= UPIO_MEM32;
-	uart_port.flags		= UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF;
-	uart_port.regshift	= 2;
-
-	uart_port.line		= 0;
-	uart_port.irq		= AR7_IRQ_UART0;
-	uart_port.mapbase	= AR7_REGS_UART0;
-	uart_port.membase	= ioremap(uart_port.mapbase, 256);
-
-	res = early_serial_setup(&uart_port);
-	if (res)
-		return res;
-
-	/* Only TNETD73xx have a second serial port */
-	if (ar7_has_second_uart()) {
-		uart_port.line		= 1;
-		uart_port.irq		= AR7_IRQ_UART1;
-		uart_port.mapbase	= UR8_REGS_UART1;
-		uart_port.membase	= ioremap(uart_port.mapbase, 256);
-
-		res = early_serial_setup(&uart_port);
-		if (res)
-			return res;
-	}
-#endif
-
-	return 0;
-}
-
-static void __init titan_fixup_devices(void)
-{
-	/* Set vlynq0 data */
-	vlynq_low_data.reset_bit = 15;
-	vlynq_low_data.gpio_bit = 14;
-
-	/* Set vlynq1 data */
-	vlynq_high_data.reset_bit = 16;
-	vlynq_high_data.gpio_bit = 7;
-
-	/* Set vlynq0 resources */
-	vlynq_low_res[0].start = TITAN_REGS_VLYNQ0;
-	vlynq_low_res[0].end = TITAN_REGS_VLYNQ0 + 0xff;
-	vlynq_low_res[1].start = 33;
-	vlynq_low_res[1].end = 33;
-	vlynq_low_res[2].start = 0x0c000000;
-	vlynq_low_res[2].end = 0x0fffffff;
-	vlynq_low_res[3].start = 80;
-	vlynq_low_res[3].end = 111;
-
-	/* Set vlynq1 resources */
-	vlynq_high_res[0].start = TITAN_REGS_VLYNQ1;
-	vlynq_high_res[0].end = TITAN_REGS_VLYNQ1 + 0xff;
-	vlynq_high_res[1].start = 34;
-	vlynq_high_res[1].end = 34;
-	vlynq_high_res[2].start = 0x40000000;
-	vlynq_high_res[2].end = 0x43ffffff;
-	vlynq_high_res[3].start = 112;
-	vlynq_high_res[3].end = 143;
-
-	/* Set cpmac0 data */
-	cpmac_low_data.phy_mask = 0x40000000;
-
-	/* Set cpmac1 data */
-	cpmac_high_data.phy_mask = 0x80000000;
-
-	/* Set cpmac0 resources */
-	cpmac_low_res[0].start = TITAN_REGS_MAC0;
-	cpmac_low_res[0].end = TITAN_REGS_MAC0 + 0x7ff;
-
-	/* Set cpmac1 resources */
-	cpmac_high_res[0].start = TITAN_REGS_MAC1;
-	cpmac_high_res[0].end = TITAN_REGS_MAC1 + 0x7ff;
-}
-
-static int __init ar7_register_devices(void)
-{
-	void __iomem *bootcr;
-	u32 val;
-	int res;
-
-	res = ar7_gpio_init();
-	if (res)
-		pr_warn("unable to register gpios: %d\n", res);
-
-	res = ar7_register_uarts();
-	if (res)
-		pr_err("unable to setup uart(s): %d\n", res);
-
-	res = platform_device_register(&physmap_flash);
-	if (res)
-		pr_warn("unable to register physmap-flash: %d\n", res);
-
-	if (ar7_is_titan())
-		titan_fixup_devices();
-
-	ar7_device_disable(vlynq_low_data.reset_bit);
-	res = platform_device_register(&vlynq_low);
-	if (res)
-		pr_warn("unable to register vlynq-low: %d\n", res);
-
-	if (ar7_has_high_vlynq()) {
-		ar7_device_disable(vlynq_high_data.reset_bit);
-		res = platform_device_register(&vlynq_high);
-		if (res)
-			pr_warn("unable to register vlynq-high: %d\n", res);
-	}
-
-	if (ar7_has_high_cpmac()) {
-		res = fixed_phy_add(PHY_POLL, cpmac_high.id,
-				    &fixed_phy_status);
-		if (!res) {
-			cpmac_get_mac(1, cpmac_high_data.dev_addr);
-
-			res = platform_device_register(&cpmac_high);
-			if (res)
-				pr_warn("unable to register cpmac-high: %d\n",
-					res);
-		} else
-			pr_warn("unable to add cpmac-high phy: %d\n", res);
-	} else
-		cpmac_low_data.phy_mask = 0xffffffff;
-
-	res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status);
-	if (!res) {
-		cpmac_get_mac(0, cpmac_low_data.dev_addr);
-		res = platform_device_register(&cpmac_low);
-		if (res)
-			pr_warn("unable to register cpmac-low: %d\n", res);
-	} else
-		pr_warn("unable to add cpmac-low phy: %d\n", res);
-
-	detect_leds();
-	res = platform_device_register(&ar7_gpio_leds);
-	if (res)
-		pr_warn("unable to register leds: %d\n", res);
-
-	res = platform_device_register(&ar7_udc);
-	if (res)
-		pr_warn("unable to register usb slave: %d\n", res);
-
-	/* Register watchdog only if enabled in hardware */
-	bootcr = ioremap(AR7_REGS_DCL, 4);
-	val = readl(bootcr);
-	iounmap(bootcr);
-	if (val & AR7_WDT_HW_ENA) {
-		if (ar7_has_high_vlynq())
-			ar7_wdt_res.start = UR8_REGS_WDT;
-		else
-			ar7_wdt_res.start = AR7_REGS_WDT;
-
-		ar7_wdt_res.end = ar7_wdt_res.start + 0x20;
-		res = platform_device_register(&ar7_wdt);
-		if (res)
-			pr_warn("unable to register watchdog: %d\n", res);
-	}
-
-	return 0;
-}
-device_initcall(ar7_register_devices);
diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c
deleted file mode 100644
index 5810d3993fc6..000000000000
--- a/arch/mips/ar7/prom.c
+++ /dev/null
@@ -1,256 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- * Putting things on the screen/serial line using YAMONs facilities.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/serial_reg.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/string.h>
-#include <linux/io.h>
-#include <asm/bootinfo.h>
-#include <asm/setup.h>
-
-#include <asm/mach-ar7/ar7.h>
-#include <asm/mach-ar7/prom.h>
-
-#define MAX_ENTRY 80
-
-struct env_var {
-	char	*name;
-	char	*value;
-};
-
-static struct env_var adam2_env[MAX_ENTRY];
-
-char *prom_getenv(const char *name)
-{
-	int i;
-
-	for (i = 0; (i < MAX_ENTRY) && adam2_env[i].name; i++)
-		if (!strcmp(name, adam2_env[i].name))
-			return adam2_env[i].value;
-
-	return NULL;
-}
-EXPORT_SYMBOL(prom_getenv);
-
-static void  __init ar7_init_cmdline(int argc, char *argv[])
-{
-	int i;
-
-	for (i = 1; i < argc; i++) {
-		strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE);
-		if (i < (argc - 1))
-			strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
-	}
-}
-
-struct psbl_rec {
-	u32	psbl_size;
-	u32	env_base;
-	u32	env_size;
-	u32	ffs_base;
-	u32	ffs_size;
-};
-
-static const char psp_env_version[] __initconst = "TIENV0.8";
-
-struct psp_env_chunk {
-	u8	num;
-	u8	ctrl;
-	u16	csum;
-	u8	len;
-	char	data[11];
-} __packed;
-
-struct psp_var_map_entry {
-	u8	num;
-	char	*value;
-};
-
-static const struct psp_var_map_entry psp_var_map[] = {
-	{  1,	"cpufrequency" },
-	{  2,	"memsize" },
-	{  3,	"flashsize" },
-	{  4,	"modetty0" },
-	{  5,	"modetty1" },
-	{  8,	"maca" },
-	{  9,	"macb" },
-	{ 28,	"sysfrequency" },
-	{ 38,	"mipsfrequency" },
-};
-
-/*
-
-Well-known variable (num is looked up in table above for matching variable name)
-Example: cpufrequency=211968000
-+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---
-| 01 |CTRL|CHECKSUM | 01 | _2 | _1 | _1 | _9 | _6 | _8 | _0 | _0 | _0 | \0 | FF
-+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---
-
-Name=Value pair in a single chunk
-Example: NAME=VALUE
-+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---
-| 00 |CTRL|CHECKSUM | 01 | _N | _A | _M | _E | _0 | _V | _A | _L | _U | _E | \0
-+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---
-
-Name=Value pair in 2 chunks (len is the number of chunks)
-Example: bootloaderVersion=1.3.7.15
-+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---
-| 00 |CTRL|CHECKSUM | 02 | _b | _o | _o | _t | _l | _o | _a | _d | _e | _r | _V
-+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---
-| _e | _r | _s | _i | _o | _n | \0 | _1 | _. | _3 | _. | _7 | _. | _1 | _5 | \0
-+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---
-
-Data is padded with 0xFF
-
-*/
-
-#define PSP_ENV_SIZE  4096
-
-static char psp_env_data[PSP_ENV_SIZE] = { 0, };
-
-static char * __init lookup_psp_var_map(u8 num)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(psp_var_map); i++)
-		if (psp_var_map[i].num == num)
-			return psp_var_map[i].value;
-
-	return NULL;
-}
-
-static void __init add_adam2_var(char *name, char *value)
-{
-	int i;
-
-	for (i = 0; i < MAX_ENTRY; i++) {
-		if (!adam2_env[i].name) {
-			adam2_env[i].name = name;
-			adam2_env[i].value = value;
-			return;
-		} else if (!strcmp(adam2_env[i].name, name)) {
-			adam2_env[i].value = value;
-			return;
-		}
-	}
-}
-
-static int __init parse_psp_env(void *psp_env_base)
-{
-	int i, n;
-	char *name, *value;
-	struct psp_env_chunk *chunks = (struct psp_env_chunk *)psp_env_data;
-
-	memcpy_fromio(chunks, psp_env_base, PSP_ENV_SIZE);
-
-	i = 1;
-	n = PSP_ENV_SIZE / sizeof(struct psp_env_chunk);
-	while (i < n) {
-		if ((chunks[i].num == 0xff) || ((i + chunks[i].len) > n))
-			break;
-		value = chunks[i].data;
-		if (chunks[i].num) {
-			name = lookup_psp_var_map(chunks[i].num);
-		} else {
-			name = value;
-			value += strlen(name) + 1;
-		}
-		if (name)
-			add_adam2_var(name, value);
-		i += chunks[i].len;
-	}
-	return 0;
-}
-
-static void __init ar7_init_env(struct env_var *env)
-{
-	int i;
-	struct psbl_rec *psbl = (struct psbl_rec *)(KSEG1ADDR(0x14000300));
-	void *psp_env = (void *)KSEG1ADDR(psbl->env_base);
-
-	if (strcmp(psp_env, psp_env_version) == 0) {
-		parse_psp_env(psp_env);
-	} else {
-		for (i = 0; i < MAX_ENTRY; i++, env++)
-			if (env->name)
-				add_adam2_var(env->name, env->value);
-	}
-}
-
-static void __init console_config(void)
-{
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-	char console_string[40];
-	int baud = 0;
-	char parity = '\0', bits = '\0', flow = '\0';
-	char *s, *p;
-
-	if (strstr(arcs_cmdline, "console="))
-		return;
-
-	s = prom_getenv("modetty0");
-	if (s) {
-		baud = simple_strtoul(s, &p, 10);
-		s = p;
-		if (*s == ',')
-			s++;
-		if (*s)
-			parity = *s++;
-		if (*s == ',')
-			s++;
-		if (*s)
-			bits = *s++;
-		if (*s == ',')
-			s++;
-		if (*s == 'h')
-			flow = 'r';
-	}
-
-	if (baud == 0)
-		baud = 38400;
-	if (parity != 'n' && parity != 'o' && parity != 'e')
-		parity = 'n';
-	if (bits != '7' && bits != '8')
-		bits = '8';
-
-	if (flow == 'r')
-		sprintf(console_string, " console=ttyS0,%d%c%c%c", baud,
-			parity, bits, flow);
-	else
-		sprintf(console_string, " console=ttyS0,%d%c%c", baud, parity,
-			bits);
-	strlcat(arcs_cmdline, console_string, COMMAND_LINE_SIZE);
-#endif
-}
-
-void __init prom_init(void)
-{
-	ar7_init_cmdline(fw_arg0, (char **)fw_arg1);
-	ar7_init_env((struct env_var *)fw_arg2);
-	console_config();
-}
-
-#define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4)))
-static inline unsigned int serial_in(int offset)
-{
-	return readl((void *)PORT(offset));
-}
-
-static inline void serial_out(int offset, int value)
-{
-	writel(value, (void *)PORT(offset));
-}
-
-void prom_putchar(char c)
-{
-	while ((serial_in(UART_LSR) & UART_LSR_TEMT) == 0)
-		;
-	serial_out(UART_TX, c);
-}
diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c
deleted file mode 100644
index 352d5dbc777c..000000000000
--- a/arch/mips/ar7/setup.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/pm.h>
-#include <linux/time.h>
-
-#include <asm/reboot.h>
-#include <asm/mach-ar7/ar7.h>
-#include <asm/mach-ar7/prom.h>
-
-static void ar7_machine_restart(char *command)
-{
-	u32 *softres_reg = ioremap(AR7_REGS_RESET + AR7_RESET_SOFTWARE, 1);
-
-	writel(1, softres_reg);
-}
-
-static void ar7_machine_halt(void)
-{
-	while (1)
-		;
-}
-
-static void ar7_machine_power_off(void)
-{
-	u32 *power_reg = (u32 *)ioremap(AR7_REGS_POWER, 1);
-	u32 power_state = readl(power_reg) | (3 << 30);
-
-	writel(power_state, power_reg);
-	ar7_machine_halt();
-}
-
-const char *get_system_type(void)
-{
-	u16 chip_id = ar7_chip_id();
-	u16 titan_variant_id = titan_chip_id();
-
-	switch (chip_id) {
-	case AR7_CHIP_7100:
-		return "TI AR7 (TNETD7100)";
-	case AR7_CHIP_7200:
-		return "TI AR7 (TNETD7200)";
-	case AR7_CHIP_7300:
-		return "TI AR7 (TNETD7300)";
-	case AR7_CHIP_TITAN:
-		switch (titan_variant_id) {
-		case TITAN_CHIP_1050:
-			return "TI AR7 (TNETV1050)";
-		case TITAN_CHIP_1055:
-			return "TI AR7 (TNETV1055)";
-		case TITAN_CHIP_1056:
-			return "TI AR7 (TNETV1056)";
-		case TITAN_CHIP_1060:
-			return "TI AR7 (TNETV1060)";
-		}
-		fallthrough;
-	default:
-		return "TI AR7 (unknown)";
-	}
-}
-
-static int __init ar7_init_console(void)
-{
-	return 0;
-}
-console_initcall(ar7_init_console);
-
-/*
- * Initializes basic routines and structures pointers, memory size (as
- * given by the bios and saves the command line.
- */
-void __init plat_mem_setup(void)
-{
-	unsigned long io_base;
-
-	_machine_restart = ar7_machine_restart;
-	_machine_halt = ar7_machine_halt;
-	pm_power_off = ar7_machine_power_off;
-
-	io_base = (unsigned long)ioremap(AR7_REGS_BASE, 0x10000);
-	if (!io_base)
-		panic("Can't remap IO base!");
-	set_io_port_base(io_base);
-
-	prom_meminit();
-
-	printk(KERN_INFO "%s, ID: 0x%04x, Revision: 0x%02x\n",
-			get_system_type(), ar7_chip_id(), ar7_chip_rev());
-}
diff --git a/arch/mips/ar7/time.c b/arch/mips/ar7/time.c
deleted file mode 100644
index 72aa77d7087b..000000000000
--- a/arch/mips/ar7/time.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- * Setting up the clock on the MIPS boards.
- */
-
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-
-#include <asm/time.h>
-#include <asm/mach-ar7/ar7.h>
-
-void __init plat_time_init(void)
-{
-	struct clk *cpu_clk;
-
-	/* Initialize ar7 clocks so the CPU clock frequency is correct */
-	ar7_init_clocks();
-
-	cpu_clk = clk_get(NULL, "cpu");
-	if (IS_ERR(cpu_clk)) {
-		printk(KERN_ERR "unable to get cpu clock\n");
-		return;
-	}
-
-	mips_hpt_frequency = clk_get_rate(cpu_clk) / 2;
-}
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index 96d28f211121..09dcd2c561d9 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -13,11 +13,6 @@
 #define PORT(offset) (CKSEG1ADDR(UART_BASE) + (offset))
 #endif
 
-#ifdef CONFIG_AR7
-#include <ar7.h>
-#define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
-#endif
-
 #ifdef CONFIG_MACH_INGENIC
 #define INGENIC_UART_BASE_ADDR	(0x10030000 + 0x1000 * CONFIG_ZBOOT_INGENIC_UART)
 #define PORT(offset) (CKSEG1ADDR(INGENIC_UART_BASE_ADDR) + (4 * offset))
diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
index acbbe8c4664c..c5c5a094c37d 100644
--- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
@@ -366,7 +366,6 @@
 
 	rom: memory@1fc00000 {
 		compatible = "mtd-rom";
-		probe-type = "map_rom";
 		reg = <0x1fc00000 0x2000>;
 
 		bank-width = <4>;
diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi
index 9c0099919db7..504e895e916e 100644
--- a/arch/mips/boot/dts/ingenic/jz4770.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi
@@ -461,7 +461,6 @@
 
 	rom: memory@1fc00000 {
 		compatible = "mtd-rom";
-		probe-type = "map_rom";
 		reg = <0x1fc00000 0x2000>;
 
 		bank-width = <4>;
diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts
index 129b6710b699..f9c262cc2e96 100644
--- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts
+++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts
@@ -8,7 +8,7 @@
 
 / {
 	compatible = "gnubee,gb-pc1", "mediatek,mt7621-soc";
-	model = "GB-PC1";
+	model = "GnuBee GB-PC1";
 
 	memory@0 {
 		device_type = "memory";
diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts
index f810cd10f4f4..b281e13f22ed 100644
--- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts
+++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts
@@ -8,7 +8,7 @@
 
 / {
 	compatible = "gnubee,gb-pc2", "mediatek,mt7621-soc";
-	model = "GB-PC2";
+	model = "GnuBee GB-PC2";
 
 	memory@0 {
 		device_type = "memory";
diff --git a/arch/mips/boot/dts/ralink/mt7621.dtsi b/arch/mips/boot/dts/ralink/mt7621.dtsi
index 7caed0d14f11..35a10258f235 100644
--- a/arch/mips/boot/dts/ralink/mt7621.dtsi
+++ b/arch/mips/boot/dts/ralink/mt7621.dtsi
@@ -300,14 +300,13 @@
 		compatible = "mediatek,mt7621-eth";
 		reg = <0x1e100000 0x10000>;
 
-		clocks = <&sysc MT7621_CLK_FE>,
-			 <&sysc MT7621_CLK_ETH>;
+		clocks = <&sysc MT7621_CLK_FE>, <&sysc MT7621_CLK_ETH>;
 		clock-names = "fe", "ethif";
 
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		resets = <&sysc MT7621_RST_FE &sysc MT7621_RST_ETH>;
+		resets = <&sysc MT7621_RST_FE>, <&sysc MT7621_RST_ETH>;
 		reset-names = "fe", "eth";
 
 		interrupt-parent = <&gic>;
diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig
deleted file mode 100644
index 329c60aa570a..000000000000
--- a/arch/mips/configs/ar7_defconfig
+++ /dev/null
@@ -1,119 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_KERNEL_LZMA=y
-CONFIG_SYSVIPC=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-# CONFIG_ELF_CORE is not set
-# CONFIG_KALLSYMS is not set
-# CONFIG_VM_EVENT_COUNTERS is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_AR7=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC=y
-# CONFIG_SECCOMP is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_MROUTE=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_DIAG is not set
-CONFIG_TCP_CONG_ADVANCED=y
-# CONFIG_TCP_CONG_BIC is not set
-# CONFIG_TCP_CONG_CUBIC is not set
-CONFIG_TCP_CONG_WESTWOOD=y
-# CONFIG_TCP_CONG_HTCP is not set
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_BRIDGE_NETFILTER is not set
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_RAW=m
-CONFIG_ATM=m
-CONFIG_ATM_BR2684=m
-CONFIG_ATM_BR2684_IPFILTER=y
-CONFIG_BRIDGE=y
-CONFIG_VLAN_8021Q=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=y
-CONFIG_HAMRADIO=y
-CONFIG_CFG80211=m
-CONFIG_MAC80211=m
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=y
-CONFIG_NETDEVICES=y
-CONFIG_CPMAC=y
-CONFIG_FIXED_PHY=y
-CONFIG_PPP=m
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPPOATM=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_HW_RANDOM=y
-CONFIG_GPIO_SYSFS=y
-# CONFIG_HWMON is not set
-CONFIG_WATCHDOG=y
-CONFIG_AR7_WDT=y
-# CONFIG_USB_SUPPORT is not set
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_GPIO=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
-# CONFIG_DNOTIFY is not set
-CONFIG_PROC_KCORE=y
-# CONFIG_PROC_PAGE_MONITOR is not set
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_COMPRESSION_OPTIONS=y
-CONFIG_SQUASHFS=y
-# CONFIG_CRYPTO_HW is not set
-CONFIG_STRIP_ASM_SYMS=y
-CONFIG_DEBUG_FS=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="rootfstype=squashfs,jffs2"
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 1843468f84a3..00329bb5de5a 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -177,7 +177,6 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=m
 CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_ISO9660_FS=m
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index fdf374574105..65adb538030d 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -70,10 +70,6 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 # CONFIG_HWMON is not set
 CONFIG_EXT2_FS=m
 CONFIG_EXT3_FS=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
 CONFIG_AUTOFS_FS=m
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 83d9a8ff4270..38f17b658421 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -229,9 +229,6 @@ CONFIG_EXT2_FS=m
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
 CONFIG_JFS_FS=m
 CONFIG_JFS_POSIX_ACL=y
 CONFIG_XFS_FS=m
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index ae1a7793e810..6f8046024557 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -317,11 +317,6 @@ CONFIG_UIO=m
 CONFIG_UIO_CIF=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_JFS_FS=m
 CONFIG_JFS_POSIX_ACL=y
 CONFIG_JFS_SECURITY=y
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index c07e30f63d8b..16a91eeff67f 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -323,11 +323,6 @@ CONFIG_UIO=m
 CONFIG_UIO_CIF=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_JFS_FS=m
 CONFIG_JFS_POSIX_ACL=y
 CONFIG_JFS_SECURITY=y
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index 0a5701020d3f..264aba29ea4f 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -323,11 +323,6 @@ CONFIG_UIO=m
 CONFIG_UIO_CIF=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_JFS_FS=m
 CONFIG_JFS_POSIX_ACL=y
 CONFIG_JFS_SECURITY=y
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 5c5e2186210c..08e1c1f2f4de 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -310,10 +310,6 @@ CONFIG_USB_LD=m
 CONFIG_USB_TEST=m
 CONFIG_EXT2_FS=m
 CONFIG_EXT3_FS=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
 CONFIG_AUTOFS_FS=m
diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h
index 68b1e5d458cf..bc27d99c9436 100644
--- a/arch/mips/include/asm/kprobes.h
+++ b/arch/mips/include/asm/kprobes.h
@@ -71,8 +71,6 @@ struct kprobe_ctlblk {
 	struct prev_kprobe prev_kprobe;
 };
 
-extern int kprobe_exceptions_notify(struct notifier_block *self,
-				    unsigned long val, void *data);
 
 #endif /* CONFIG_KPROBES */
 #endif /* _ASM_KPROBES_H */
diff --git a/arch/mips/include/asm/mach-ar7/ar7.h b/arch/mips/include/asm/mach-ar7/ar7.h
deleted file mode 100644
index 1e8621a6afa3..000000000000
--- a/arch/mips/include/asm/mach-ar7/ar7.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org>
- */
-
-#ifndef __AR7_H__
-#define __AR7_H__
-
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/errno.h>
-
-#include <asm/addrspace.h>
-
-#define AR7_SDRAM_BASE	0x14000000
-
-#define AR7_REGS_BASE	0x08610000
-
-#define AR7_REGS_MAC0	(AR7_REGS_BASE + 0x0000)
-#define AR7_REGS_GPIO	(AR7_REGS_BASE + 0x0900)
-/* 0x08610A00 - 0x08610BFF (512 bytes, 128 bytes / clock) */
-#define AR7_REGS_POWER	(AR7_REGS_BASE + 0x0a00)
-#define AR7_REGS_CLOCKS (AR7_REGS_POWER + 0x80)
-#define UR8_REGS_CLOCKS (AR7_REGS_POWER + 0x20)
-#define AR7_REGS_UART0	(AR7_REGS_BASE + 0x0e00)
-#define AR7_REGS_USB	(AR7_REGS_BASE + 0x1200)
-#define AR7_REGS_RESET	(AR7_REGS_BASE + 0x1600)
-#define AR7_REGS_PINSEL (AR7_REGS_BASE + 0x160C)
-#define AR7_REGS_VLYNQ0 (AR7_REGS_BASE + 0x1800)
-#define AR7_REGS_DCL	(AR7_REGS_BASE + 0x1a00)
-#define AR7_REGS_VLYNQ1 (AR7_REGS_BASE + 0x1c00)
-#define AR7_REGS_MDIO	(AR7_REGS_BASE + 0x1e00)
-#define AR7_REGS_IRQ	(AR7_REGS_BASE + 0x2400)
-#define AR7_REGS_MAC1	(AR7_REGS_BASE + 0x2800)
-
-#define AR7_REGS_WDT	(AR7_REGS_BASE + 0x1f00)
-#define UR8_REGS_WDT	(AR7_REGS_BASE + 0x0b00)
-#define UR8_REGS_UART1	(AR7_REGS_BASE + 0x0f00)
-
-/* Titan registers */
-#define TITAN_REGS_ESWITCH_BASE (0x08640000)
-#define TITAN_REGS_MAC0		(TITAN_REGS_ESWITCH_BASE)
-#define TITAN_REGS_MAC1		(TITAN_REGS_ESWITCH_BASE + 0x0800)
-#define TITAN_REGS_MDIO		(TITAN_REGS_ESWITCH_BASE + 0x02000)
-#define TITAN_REGS_VLYNQ0	(AR7_REGS_BASE + 0x1c00)
-#define TITAN_REGS_VLYNQ1	(AR7_REGS_BASE + 0x1300)
-
-#define AR7_RESET_PERIPHERAL	0x0
-#define AR7_RESET_SOFTWARE	0x4
-#define AR7_RESET_STATUS	0x8
-
-#define AR7_RESET_BIT_CPMAC_LO	17
-#define AR7_RESET_BIT_CPMAC_HI	21
-#define AR7_RESET_BIT_MDIO	22
-#define AR7_RESET_BIT_EPHY	26
-
-#define TITAN_RESET_BIT_EPHY1	28
-
-/* GPIO control registers */
-#define AR7_GPIO_INPUT	0x0
-#define AR7_GPIO_OUTPUT 0x4
-#define AR7_GPIO_DIR	0x8
-#define AR7_GPIO_ENABLE 0xc
-#define TITAN_GPIO_INPUT_0	0x0
-#define TITAN_GPIO_INPUT_1	0x4
-#define TITAN_GPIO_OUTPUT_0	0x8
-#define TITAN_GPIO_OUTPUT_1	0xc
-#define TITAN_GPIO_DIR_0	0x10
-#define TITAN_GPIO_DIR_1	0x14
-#define TITAN_GPIO_ENBL_0	0x18
-#define TITAN_GPIO_ENBL_1	0x1c
-
-#define AR7_CHIP_7100	0x18
-#define AR7_CHIP_7200	0x2b
-#define AR7_CHIP_7300	0x05
-#define AR7_CHIP_TITAN	0x07
-#define TITAN_CHIP_1050 0x0f
-#define TITAN_CHIP_1055 0x0e
-#define TITAN_CHIP_1056 0x0d
-#define TITAN_CHIP_1060 0x07
-
-/* Interrupts */
-#define AR7_IRQ_UART0	15
-#define AR7_IRQ_UART1	16
-
-/* Clocks */
-#define AR7_AFE_CLOCK	35328000
-#define AR7_REF_CLOCK	25000000
-#define AR7_XTAL_CLOCK	24000000
-
-/* DCL */
-#define AR7_WDT_HW_ENA	0x10
-
-struct plat_cpmac_data {
-	int reset_bit;
-	int power_bit;
-	u32 phy_mask;
-	char dev_addr[6];
-};
-
-struct plat_dsl_data {
-	int reset_bit_dsl;
-	int reset_bit_sar;
-};
-
-static inline int ar7_is_titan(void)
-{
-	return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x24)) & 0xffff) ==
-		AR7_CHIP_TITAN;
-}
-
-static inline u16 ar7_chip_id(void)
-{
-	return ar7_is_titan() ? AR7_CHIP_TITAN : (readl((void *)
-		KSEG1ADDR(AR7_REGS_GPIO + 0x14)) & 0xffff);
-}
-
-static inline u16 titan_chip_id(void)
-{
-	unsigned int val = readl((void *)KSEG1ADDR(AR7_REGS_GPIO +
-						TITAN_GPIO_INPUT_1));
-	return ((val >> 12) & 0x0f);
-}
-
-static inline u8 ar7_chip_rev(void)
-{
-	return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + (ar7_is_titan() ? 0x24 :
-		0x14))) >> 16) & 0xff;
-}
-
-static inline int ar7_has_high_cpmac(void)
-{
-	u16 chip_id = ar7_chip_id();
-	switch (chip_id) {
-	case AR7_CHIP_7100:
-	case AR7_CHIP_7200:
-		return 0;
-	case AR7_CHIP_7300:
-		return 1;
-	default:
-		return -ENXIO;
-	}
-}
-#define ar7_has_high_vlynq ar7_has_high_cpmac
-#define ar7_has_second_uart ar7_has_high_cpmac
-
-static inline void ar7_device_enable(u32 bit)
-{
-	void *reset_reg =
-		(void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL);
-	writel(readl(reset_reg) | (1 << bit), reset_reg);
-	msleep(20);
-}
-
-static inline void ar7_device_disable(u32 bit)
-{
-	void *reset_reg =
-		(void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL);
-	writel(readl(reset_reg) & ~(1 << bit), reset_reg);
-	msleep(20);
-}
-
-static inline void ar7_device_reset(u32 bit)
-{
-	ar7_device_disable(bit);
-	ar7_device_enable(bit);
-}
-
-static inline void ar7_device_on(u32 bit)
-{
-	void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER);
-	writel(readl(power_reg) | (1 << bit), power_reg);
-	msleep(20);
-}
-
-static inline void ar7_device_off(u32 bit)
-{
-	void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER);
-	writel(readl(power_reg) & ~(1 << bit), power_reg);
-	msleep(20);
-}
-
-int __init ar7_gpio_init(void);
-void __init ar7_init_clocks(void);
-
-/* Board specific GPIO functions */
-int ar7_gpio_enable(unsigned gpio);
-int ar7_gpio_disable(unsigned gpio);
-
-#endif /* __AR7_H__ */
diff --git a/arch/mips/include/asm/mach-ar7/irq.h b/arch/mips/include/asm/mach-ar7/irq.h
deleted file mode 100644
index 46bb730ea970..000000000000
--- a/arch/mips/include/asm/mach-ar7/irq.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Shamelessly copied from asm-mips/mach-emma2rh/
- * Copyright (C) 2003 by Ralf Baechle
- */
-#ifndef __ASM_AR7_IRQ_H
-#define __ASM_AR7_IRQ_H
-
-#define NR_IRQS 256
-
-#include <asm/mach-generic/irq.h>
-
-#endif /* __ASM_AR7_IRQ_H */
diff --git a/arch/mips/include/asm/mach-ar7/prom.h b/arch/mips/include/asm/mach-ar7/prom.h
deleted file mode 100644
index 9e1d20b06f57..000000000000
--- a/arch/mips/include/asm/mach-ar7/prom.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2006, 2007 Florian Fainelli <florian@openwrt.org>
- */
-
-#ifndef __PROM_H__
-#define __PROM_H__
-
-extern char *prom_getenv(const char *name);
-extern void prom_meminit(void);
-
-#endif /* __PROM_H__ */
diff --git a/arch/mips/include/asm/mach-ar7/spaces.h b/arch/mips/include/asm/mach-ar7/spaces.h
deleted file mode 100644
index a004d94dfbdd..000000000000
--- a/arch/mips/include/asm/mach-ar7/spaces.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle
- * Copyright (C) 2000, 2002  Maciej W. Rozycki
- * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc.
- */
-#ifndef _ASM_AR7_SPACES_H
-#define _ASM_AR7_SPACES_H
-
-/*
- * This handles the memory map.
- * We handle pages at KSEG0 for kernels with 32 bit address space.
- */
-#define PAGE_OFFSET	_AC(0x94000000, UL)
-#define PHYS_OFFSET	_AC(0x14000000, UL)
-
-#include <asm/mach-generic/spaces.h>
-
-#endif /* __ASM_AR7_SPACES_H */
diff --git a/arch/mips/include/asm/mach-loongson32/dma.h b/arch/mips/include/asm/mach-loongson32/dma.h
deleted file mode 100644
index e917b3ccb2c2..000000000000
--- a/arch/mips/include/asm/mach-loongson32/dma.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * Loongson 1 NAND platform support.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_DMA_H
-#define __ASM_MACH_LOONGSON32_DMA_H
-
-#define LS1X_DMA_CHANNEL0	0
-#define LS1X_DMA_CHANNEL1	1
-#define LS1X_DMA_CHANNEL2	2
-
-struct plat_ls1x_dma {
-	int nr_channels;
-};
-
-extern struct plat_ls1x_dma ls1b_dma_pdata;
-
-#endif /* __ASM_MACH_LOONGSON32_DMA_H */
diff --git a/arch/mips/include/asm/mach-loongson32/nand.h b/arch/mips/include/asm/mach-loongson32/nand.h
deleted file mode 100644
index aaf5ed19d78d..000000000000
--- a/arch/mips/include/asm/mach-loongson32/nand.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * Loongson 1 NAND platform support.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_NAND_H
-#define __ASM_MACH_LOONGSON32_NAND_H
-
-#include <linux/dmaengine.h>
-#include <linux/mtd/partitions.h>
-
-struct plat_ls1x_nand {
-	struct mtd_partition *parts;
-	unsigned int nr_parts;
-
-	int hold_cycle;
-	int wait_cycle;
-};
-
-extern struct plat_ls1x_nand ls1b_nand_pdata;
-
-bool ls1x_dma_filter_fn(struct dma_chan *chan, void *param);
-
-#endif /* __ASM_MACH_LOONGSON32_NAND_H */
diff --git a/arch/mips/include/asm/mach-loongson32/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h
index 2cdcfb5f6012..f74292b13bc3 100644
--- a/arch/mips/include/asm/mach-loongson32/platform.h
+++ b/arch/mips/include/asm/mach-loongson32/platform.h
@@ -8,9 +8,6 @@
 
 #include <linux/platform_device.h>
 
-#include <dma.h>
-#include <nand.h>
-
 extern struct platform_device ls1x_uart_pdev;
 extern struct platform_device ls1x_eth0_pdev;
 extern struct platform_device ls1x_eth1_pdev;
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
index f5b2ef979b43..8f0a7263a9d6 100644
--- a/arch/mips/kernel/relocate_kernel.S
+++ b/arch/mips/kernel/relocate_kernel.S
@@ -66,7 +66,6 @@ copy_word:
 	LONG_ADDIU	s6, s6, -1
 	beq		s6, zero, process_entry
 	b		copy_word
-	b		process_entry
 
 done:
 #ifdef CONFIG_SMP
diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c
index 8075590a9f83..623eb4bc7b41 100644
--- a/arch/mips/loongson32/common/platform.c
+++ b/arch/mips/loongson32/common/platform.c
@@ -15,8 +15,6 @@
 
 #include <platform.h>
 #include <loongson1.h>
-#include <dma.h>
-#include <nand.h>
 
 /* 8250/16550 compatible UART */
 #define LS1X_UART(_id)						\
diff --git a/arch/mips/loongson32/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c
index fed8d432ef20..fe115bdcb22c 100644
--- a/arch/mips/loongson32/ls1b/board.c
+++ b/arch/mips/loongson32/ls1b/board.c
@@ -8,8 +8,6 @@
 #include <linux/sizes.h>
 
 #include <loongson1.h>
-#include <dma.h>
-#include <nand.h>
 #include <platform.h>
 
 static const struct gpio_led ls1x_gpio_leds[] __initconst = {
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 105569c1b712..13009666204f 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -4,8 +4,8 @@
  *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
-#include <linux/of_irq.h>
 #include <linux/of_pci.h>
+#include <linux/pci.h>
 
 int (*ltq_pci_plat_arch_init)(struct pci_dev *dev) = NULL;
 int (*ltq_pci_plat_dev_init)(struct pci_dev *dev) = NULL;
diff --git a/arch/powerpc/include/asm/fb.h b/arch/powerpc/include/asm/fb.h
index 3cecf14d51de..c0c5d1df7ad1 100644
--- a/arch/powerpc/include/asm/fb.h
+++ b/arch/powerpc/include/asm/fb.h
@@ -8,12 +8,7 @@ static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
 					  unsigned long vm_start, unsigned long vm_end,
 					  unsigned long offset)
 {
-	/*
-	 * PowerPC's implementation of phys_mem_access_prot() does
-	 * not use the file argument. Set it to NULL in preparation
-	 * of later updates to the interface.
-	 */
-	return phys_mem_access_prot(NULL, PHYS_PFN(offset), vm_end - vm_start, prot);
+	return __phys_mem_access_prot(PHYS_PFN(offset), vm_end - vm_start, prot);
 }
 #define pgprot_framebuffer pgprot_framebuffer
 
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index c8e4b4fd4e33..4525a9c68260 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -84,8 +84,6 @@ struct arch_optimized_insn {
 	kprobe_opcode_t *insn;
 };
 
-extern int kprobe_exceptions_notify(struct notifier_block *self,
-					unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_handler(struct pt_regs *regs);
 extern int kprobe_post_handler(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 4f6e7d7ee388..d31a5ec1550d 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -10,7 +10,7 @@
 #include <linux/export.h>
 
 struct pt_regs;
-struct pci_bus;	
+struct pci_bus;
 struct device_node;
 struct iommu_table;
 struct rtc_time;
@@ -78,8 +78,8 @@ struct machdep_calls {
 	unsigned char 	(*nvram_read_val)(int addr);
 	void		(*nvram_write_val)(int addr, unsigned char val);
 	ssize_t		(*nvram_write)(char *buf, size_t count, loff_t *index);
-	ssize_t		(*nvram_read)(char *buf, size_t count, loff_t *index);	
-	ssize_t		(*nvram_size)(void);		
+	ssize_t		(*nvram_read)(char *buf, size_t count, loff_t *index);
+	ssize_t		(*nvram_size)(void);
 	void		(*nvram_sync)(void);
 
 	/* Exception handlers */
@@ -102,12 +102,11 @@ struct machdep_calls {
 	 */
 	long	 	(*feature_call)(unsigned int feature, ...);
 
-	/* Get legacy PCI/IDE interrupt mapping */ 
+	/* Get legacy PCI/IDE interrupt mapping */
 	int		(*pci_get_legacy_ide_irq)(struct pci_dev *dev, int channel);
-	
+
 	/* Get access protection for /dev/mem */
-	pgprot_t	(*phys_mem_access_prot)(struct file *file,
-						unsigned long pfn,
+	pgprot_t	(*phys_mem_access_prot)(unsigned long pfn,
 						unsigned long size,
 						pgprot_t vma_prot);
 
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index f5078a7dd85a..46a9c4491ed0 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -105,9 +105,7 @@ extern void of_scan_pci_bridge(struct pci_dev *dev);
 extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
 extern void of_rescan_bus(struct device_node *node, struct pci_bus *bus);
 
-struct file;
-extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
-					 unsigned long pfn,
+extern pgprot_t	pci_phys_mem_access_prot(unsigned long pfn,
 					 unsigned long size,
 					 pgprot_t prot);
 
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 2bfb7dd3b49e..9224f23065ff 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -120,9 +120,15 @@ static inline void mark_initmem_nx(void) { }
 int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 			  pte_t *ptep, pte_t entry, int dirty);
 
+pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size,
+				pgprot_t vma_prot);
+
 struct file;
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-			      unsigned long size, pgprot_t vma_prot);
+static inline pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+					    unsigned long size, pgprot_t vma_prot)
+{
+	return __phys_mem_access_prot(pfn, size, vma_prot);
+}
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 
 void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 3e28579f7c62..ebe259bdd462 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1280,13 +1280,19 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = {
 /*
  * A simple iommu_ops to allow less cruft in generic VFIO code.
  */
-static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
-					       struct device *dev)
+static int
+spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
+				    struct device *dev)
 {
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 	struct iommu_group *grp = iommu_group_get(dev);
 	struct iommu_table_group *table_group;
 	int ret = -EINVAL;
 
+	/* At first attach the ownership is already set */
+	if (!domain)
+		return 0;
+
 	if (!grp)
 		return -ENODEV;
 
@@ -1297,17 +1303,22 @@ static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
 	return ret;
 }
 
-static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
-{
-	struct iommu_group *grp = iommu_group_get(dev);
-	struct iommu_table_group *table_group;
+static const struct iommu_domain_ops spapr_tce_platform_domain_ops = {
+	.attach_dev = spapr_tce_platform_iommu_attach_dev,
+};
 
-	table_group = iommu_group_get_iommudata(grp);
-	table_group->ops->release_ownership(table_group);
-}
+static struct iommu_domain spapr_tce_platform_domain = {
+	.type = IOMMU_DOMAIN_PLATFORM,
+	.ops = &spapr_tce_platform_domain_ops,
+};
 
-static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
-	.attach_dev = spapr_tce_blocking_iommu_attach_dev,
+static struct iommu_domain spapr_tce_blocked_domain = {
+	.type = IOMMU_DOMAIN_BLOCKED,
+	/*
+	 * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain
+	 * also sets the dma_api ops
+	 */
+	.ops = &spapr_tce_platform_domain_ops,
 };
 
 static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
@@ -1322,22 +1333,6 @@ static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
 	return false;
 }
 
-static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
-{
-	struct iommu_domain *dom;
-
-	if (type != IOMMU_DOMAIN_BLOCKED)
-		return NULL;
-
-	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
-	if (!dom)
-		return NULL;
-
-	dom->ops = &spapr_tce_blocking_domain_ops;
-
-	return dom;
-}
-
 static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
 {
 	struct pci_dev *pdev;
@@ -1371,12 +1366,12 @@ static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
 }
 
 static const struct iommu_ops spapr_tce_iommu_ops = {
+	.default_domain = &spapr_tce_platform_domain,
+	.blocked_domain = &spapr_tce_blocked_domain,
 	.capable = spapr_tce_iommu_capable,
-	.domain_alloc = spapr_tce_iommu_domain_alloc,
 	.probe_device = spapr_tce_iommu_probe_device,
 	.release_device = spapr_tce_iommu_release_device,
 	.device_group = spapr_tce_iommu_device_group,
-	.set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
 };
 
 static struct attribute *spapr_tce_iommu_attrs[] = {
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 040255ddb569..d95a48eff412 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -521,8 +521,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
  * PCI device, it tries to find the PCI device first and calls the
  * above routine
  */
-pgprot_t pci_phys_mem_access_prot(struct file *file,
-				  unsigned long pfn,
+pgprot_t pci_phys_mem_access_prot(unsigned long pfn,
 				  unsigned long size,
 				  pgprot_t prot)
 {
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 9454b8395b6a..f38df72e64b8 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -752,6 +752,8 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
 
 /**
  * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space.
+ * @m: seq_file output target.
+ * @v: Unused.
  *
  * Base + size description of a range of RTAS-addressable memory set
  * aside for user space to use as work area(s) for certain RTAS
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1717554b04b1..3a440004b97d 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -35,18 +35,18 @@ unsigned long long memory_limit;
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-			      unsigned long size, pgprot_t vma_prot)
+pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size,
+				pgprot_t vma_prot)
 {
 	if (ppc_md.phys_mem_access_prot)
-		return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
+		return ppc_md.phys_mem_access_prot(pfn, size, vma_prot);
 
 	if (!page_is_ram(pfn))
 		vma_prot = pgprot_noncached(vma_prot);
 
 	return vma_prot;
 }
-EXPORT_SYMBOL(phys_mem_access_prot);
+EXPORT_SYMBOL(__phys_mem_access_prot);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 static DEFINE_MUTEX(linear_mapping_mutex);
diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c
index b37d52f40360..7fe34bee84d8 100644
--- a/arch/powerpc/platforms/pseries/rtas-work-area.c
+++ b/arch/powerpc/platforms/pseries/rtas-work-area.c
@@ -184,6 +184,7 @@ machine_arch_initcall(pseries, rtas_work_area_allocator_init);
 
 /**
  * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas.
+ * @limit: Upper limit for memblock allocation.
  */
 void __init rtas_work_area_reserve_arena(const phys_addr_t limit)
 {
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index eaa15a20e6ae..95a2a06acc6a 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -39,6 +39,7 @@ config RISCV
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAS_VDSO_DATA
+	select ARCH_KEEP_MEMBLOCK if ACPI
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
 	select ARCH_STACKWALK
@@ -48,6 +49,7 @@ config RISCV
 	select ARCH_SUPPORTS_HUGETLBFS if MMU
 	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
 	select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
+	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
 	select ARCH_USE_MEMTEST
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USES_CFI_TRAPS if CFI_CLANG
@@ -174,6 +176,11 @@ config GCC_SUPPORTS_DYNAMIC_FTRACE
 	def_bool CC_IS_GCC
 	depends on $(cc-option,-fpatchable-function-entry=8)
 
+config HAVE_SHADOW_CALL_STACK
+	def_bool $(cc-option,-fsanitize=shadow-call-stack)
+	# https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
+	depends on $(ld-option,--no-relax-gp)
+
 config ARCH_MMAP_RND_BITS_MIN
 	default 18 if 64BIT
 	default 8
@@ -635,6 +642,15 @@ config THREAD_SIZE_ORDER
 	  Specify the Pages of thread stack size (from 4KB to 64KB), which also
 	  affects irq stack size, which is equal to thread stack size.
 
+config RISCV_MISALIGNED
+	bool "Support misaligned load/store traps for kernel and userspace"
+	select SYSCTL_ARCH_UNALIGN_ALLOW
+	default y
+	help
+	  Say Y here if you want the kernel to embed support for misaligned
+	  load/store for both kernel and userspace. When disable, misaligned
+	  accesses will generate SIGBUS in userspace and panic in kernel.
+
 endmenu # "Platform type"
 
 menu "Kernel features"
@@ -902,6 +918,9 @@ config PORTABLE
 	select MMU
 	select OF
 
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+
 menu "Power management options"
 
 source "kernel/power/Kconfig"
diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug
index e69de29bb2d1..eafe17ebf710 100644
--- a/arch/riscv/Kconfig.debug
+++ b/arch/riscv/Kconfig.debug
@@ -0,0 +1 @@
+source "arch/riscv/kernel/tests/Kconfig.debug"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4d06f3402674..a74be78678eb 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -54,6 +54,10 @@ endif
 endif
 endif
 
+ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
+	KBUILD_LDFLAGS += --no-relax-gp
+endif
+
 # ISA string setting
 riscv-march-$(CONFIG_ARCH_RV32I)	:= rv32ima
 riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 22b13947bd13..8e7fc0edf21d 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -17,6 +17,7 @@
 KCOV_INSTRUMENT := n
 
 OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+OBJCOPYFLAGS_loader.bin :=-O binary
 OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 
 targets := Image Image.* loader loader.o loader.lds loader.bin
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 1edf3cd886c5..905881282a7c 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -37,6 +37,13 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PM=y
 CONFIG_CPU_IDLE=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
+CONFIG_CPUFREQ_DT=y
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM=m
 CONFIG_ACPI=y
@@ -95,6 +102,7 @@ CONFIG_NETLINK_DIAG=y
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_NET_9P=y
 CONFIG_NET_9P_VIRTIO=y
+CONFIG_CAN=m
 CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
@@ -102,6 +110,11 @@ CONFIG_PCIE_XILINX=y
 CONFIG_PCIE_FU740=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_NVME=m
@@ -124,8 +137,11 @@ CONFIG_VIRTIO_NET=y
 CONFIG_MACB=y
 CONFIG_E1000E=y
 CONFIG_R8169=y
+CONFIG_RAVB=y
 CONFIG_STMMAC_ETH=m
+CONFIG_MICREL_PHY=y
 CONFIG_MICROSEMI_PHY=y
+CONFIG_CAN_RCAR_CANFD=m
 CONFIG_INPUT_MOUSEDEV=y
 CONFIG_KEYBOARD_SUN4I_LRADC=m
 CONFIG_SERIAL_8250=y
@@ -136,16 +152,24 @@ CONFIG_SERIAL_SH_SCI=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_MV64XXX=m
+CONFIG_I2C_RIIC=y
 CONFIG_SPI=y
+CONFIG_SPI_RSPI=m
 CONFIG_SPI_SIFIVE=y
 CONFIG_SPI_SUN6I=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_GPIO_SIFIVE=y
+CONFIG_CPU_THERMAL=y
+CONFIG_DEVFREQ_THERMAL=y
+CONFIG_RZG2L_THERMAL=y
 CONFIG_WATCHDOG=y
 CONFIG_SUNXI_WATCHDOG=y
+CONFIG_RENESAS_RZG2LWDT=y
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_REGULATOR_GPIO=y
 CONFIG_DRM=m
 CONFIG_DRM_RADEON=m
 CONFIG_DRM_NOUVEAU=m
@@ -153,39 +177,69 @@ CONFIG_DRM_SUN4I=m
 CONFIG_DRM_VIRTIO_GPU=m
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SOC=y
+CONFIG_SND_SOC_RZ=m
+CONFIG_SND_SOC_WM8978=m
+CONFIG_SND_SIMPLE_CARD=m
 CONFIG_USB=y
+CONFIG_USB_OTG=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_XHCI_PLATFORM=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_RENESAS_USBHS=m
 CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
 CONFIG_USB_MUSB_HDRC=m
 CONFIG_USB_MUSB_SUNXI=m
 CONFIG_NOP_USB_XCEIV=m
+CONFIG_USB_GADGET=y
+CONFIG_USB_RENESAS_USBHS_UDC=m
+CONFIG_USB_CONFIGFS=m
+CONFIG_USB_CONFIGFS_SERIAL=y
+CONFIG_USB_CONFIGFS_ACM=y
+CONFIG_USB_CONFIGFS_OBEX=y
+CONFIG_USB_CONFIGFS_NCM=y
+CONFIG_USB_CONFIGFS_ECM=y
+CONFIG_USB_CONFIGFS_ECM_SUBSET=y
+CONFIG_USB_CONFIGFS_RNDIS=y
+CONFIG_USB_CONFIGFS_EEM=y
+CONFIG_USB_CONFIGFS_MASS_STORAGE=y
+CONFIG_USB_CONFIGFS_F_FS=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_CADENCE=y
 CONFIG_MMC_SPI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_STARFIVE=y
+CONFIG_MMC_SDHI=y
 CONFIG_MMC_SUNXI=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_SUN6I=y
 CONFIG_DMADEVICES=y
 CONFIG_DMA_SUN6I=m
+CONFIG_RZ_DMAC=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_RENESAS_OSTM=y
 CONFIG_SUN8I_DE2_CCU=m
 CONFIG_SUN50I_IOMMU=y
 CONFIG_RPMSG_CHAR=y
 CONFIG_RPMSG_CTRL=y
 CONFIG_RPMSG_VIRTIO=y
 CONFIG_ARCH_R9A07G043=y
+CONFIG_IIO=y
+CONFIG_RZG2L_ADC=m
+CONFIG_RESET_RZG2L_USBPHY_CTRL=y
 CONFIG_PHY_SUN4I_USB=m
+CONFIG_PHY_RCAR_GEN3_USB2=y
 CONFIG_LIBNVDIMM=y
 CONFIG_NVMEM_SUNXI_SID=y
 CONFIG_EXT4_FS=y
diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h
index d5604d2073bc..7dad0cf9d701 100644
--- a/arch/riscv/include/asm/acpi.h
+++ b/arch/riscv/include/asm/acpi.h
@@ -66,6 +66,8 @@ int acpi_get_riscv_isa(struct acpi_table_header *table,
 		       unsigned int cpu, const char **isa);
 
 static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
+void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size,
+			     u32 *cboz_size, u32 *cbop_size);
 #else
 static inline void acpi_init_rintc_map(void) { }
 static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
@@ -79,6 +81,10 @@ static inline int acpi_get_riscv_isa(struct acpi_table_header *table,
 	return -EINVAL;
 }
 
+static inline void acpi_get_cbo_block_size(struct acpi_table_header *table,
+					   u32 *cbom_size, u32 *cboz_size,
+					   u32 *cbop_size) { }
+
 #endif /* CONFIG_ACPI */
 
 #endif /*_ASM_ACPI_H*/
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index 61ba8ed43d8f..36b955c762ba 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -25,7 +25,6 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
 DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
 DECLARE_DO_ERROR_INFO(do_trap_break);
 
-asmlinkage unsigned long get_overflow_stack(void);
 asmlinkage void handle_bad_stack(struct pt_regs *regs);
 asmlinkage void do_page_fault(struct pt_regs *regs);
 asmlinkage void do_irq(struct pt_regs *regs);
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 114bbadaef41..b0487b39e674 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -82,6 +82,47 @@
 	.endr
 .endm
 
+#ifdef CONFIG_SMP
+#ifdef CONFIG_32BIT
+#define PER_CPU_OFFSET_SHIFT 2
+#else
+#define PER_CPU_OFFSET_SHIFT 3
+#endif
+
+.macro asm_per_cpu dst sym tmp
+	REG_L \tmp, TASK_TI_CPU_NUM(tp)
+	slli  \tmp, \tmp, PER_CPU_OFFSET_SHIFT
+	la    \dst, __per_cpu_offset
+	add   \dst, \dst, \tmp
+	REG_L \tmp, 0(\dst)
+	la    \dst, \sym
+	add   \dst, \dst, \tmp
+.endm
+#else /* CONFIG_SMP */
+.macro asm_per_cpu dst sym tmp
+	la    \dst, \sym
+.endm
+#endif /* CONFIG_SMP */
+
+.macro load_per_cpu dst ptr tmp
+	asm_per_cpu \dst \ptr \tmp
+	REG_L \dst, 0(\dst)
+.endm
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+/* gp is used as the shadow call stack pointer instead */
+.macro load_global_pointer
+.endm
+#else
+/* load __global_pointer to gp */
+.macro load_global_pointer
+.option push
+.option norelax
+	la gp, __global_pointer$
+.option pop
+.endm
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
 	/* save all GPs except x1 ~ x5 */
 	.macro save_from_x6_to_x31
 	REG_S x6,  PT_T1(sp)
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 65f6eee4ab8d..224b4dc02b50 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -15,13 +15,261 @@
 #include <asm/barrier.h>
 #include <asm/bitsperlong.h>
 
+#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE)
 #include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/fls.h>
+
+#else
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+#if (BITS_PER_LONG == 64)
+#define CTZW	"ctzw "
+#define CLZW	"clzw "
+#elif (BITS_PER_LONG == 32)
+#define CTZW	"ctz "
+#define CLZW	"clz "
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+static __always_inline unsigned long variable__ffs(unsigned long word)
+{
+	int num;
+
+	asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+				      RISCV_ISA_EXT_ZBB, 1)
+			  : : : : legacy);
+
+	asm volatile (".option push\n"
+		      ".option arch,+zbb\n"
+		      "ctz %0, %1\n"
+		      ".option pop\n"
+		      : "=r" (word) : "r" (word) :);
+
+	return word;
+
+legacy:
+	num = 0;
+#if BITS_PER_LONG == 64
+	if ((word & 0xffffffff) == 0) {
+		num += 32;
+		word >>= 32;
+	}
+#endif
+	if ((word & 0xffff) == 0) {
+		num += 16;
+		word >>= 16;
+	}
+	if ((word & 0xff) == 0) {
+		num += 8;
+		word >>= 8;
+	}
+	if ((word & 0xf) == 0) {
+		num += 4;
+		word >>= 4;
+	}
+	if ((word & 0x3) == 0) {
+		num += 2;
+		word >>= 2;
+	}
+	if ((word & 0x1) == 0)
+		num += 1;
+	return num;
+}
+
+/**
+ * __ffs - find first set bit in a long word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __ffs(word)				\
+	(__builtin_constant_p(word) ?		\
+	 (unsigned long)__builtin_ctzl(word) :	\
+	 variable__ffs(word))
+
+static __always_inline unsigned long variable__fls(unsigned long word)
+{
+	int num;
+
+	asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+				      RISCV_ISA_EXT_ZBB, 1)
+			  : : : : legacy);
+
+	asm volatile (".option push\n"
+		      ".option arch,+zbb\n"
+		      "clz %0, %1\n"
+		      ".option pop\n"
+		      : "=r" (word) : "r" (word) :);
+
+	return BITS_PER_LONG - 1 - word;
+
+legacy:
+	num = BITS_PER_LONG - 1;
+#if BITS_PER_LONG == 64
+	if (!(word & (~0ul << 32))) {
+		num -= 32;
+		word <<= 32;
+	}
+#endif
+	if (!(word & (~0ul << (BITS_PER_LONG - 16)))) {
+		num -= 16;
+		word <<= 16;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG - 8)))) {
+		num -= 8;
+		word <<= 8;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG - 4)))) {
+		num -= 4;
+		word <<= 4;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG - 2)))) {
+		num -= 2;
+		word <<= 2;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG - 1))))
+		num -= 1;
+	return num;
+}
+
+/**
+ * __fls - find last set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __fls(word)							\
+	(__builtin_constant_p(word) ?					\
+	 (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) :	\
+	 variable__fls(word))
+
+static __always_inline int variable_ffs(int x)
+{
+	int r;
+
+	if (!x)
+		return 0;
+
+	asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+				      RISCV_ISA_EXT_ZBB, 1)
+			  : : : : legacy);
+
+	asm volatile (".option push\n"
+		      ".option arch,+zbb\n"
+		      CTZW "%0, %1\n"
+		      ".option pop\n"
+		      : "=r" (r) : "r" (x) :);
+
+	return r + 1;
+
+legacy:
+	r = 1;
+	if (!(x & 0xffff)) {
+		x >>= 16;
+		r += 16;
+	}
+	if (!(x & 0xff)) {
+		x >>= 8;
+		r += 8;
+	}
+	if (!(x & 0xf)) {
+		x >>= 4;
+		r += 4;
+	}
+	if (!(x & 3)) {
+		x >>= 2;
+		r += 2;
+	}
+	if (!(x & 1)) {
+		x >>= 1;
+		r += 1;
+	}
+	return r;
+}
+
+/**
+ * ffs - find first set bit in a word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs routines.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first set bit if
+ * value is nonzero. The first (least significant) bit is at position 1.
+ */
+#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
+
+static __always_inline int variable_fls(unsigned int x)
+{
+	int r;
+
+	if (!x)
+		return 0;
+
+	asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+				      RISCV_ISA_EXT_ZBB, 1)
+			  : : : : legacy);
+
+	asm volatile (".option push\n"
+		      ".option arch,+zbb\n"
+		      CLZW "%0, %1\n"
+		      ".option pop\n"
+		      : "=r" (r) : "r" (x) :);
+
+	return 32 - r;
+
+legacy:
+	r = 32;
+	if (!(x & 0xffff0000u)) {
+		x <<= 16;
+		r -= 16;
+	}
+	if (!(x & 0xff000000u)) {
+		x <<= 8;
+		r -= 8;
+	}
+	if (!(x & 0xf0000000u)) {
+		x <<= 4;
+		r -= 4;
+	}
+	if (!(x & 0xc0000000u)) {
+		x <<= 2;
+		r -= 2;
+	}
+	if (!(x & 0x80000000u)) {
+		x <<= 1;
+		r -= 1;
+	}
+	return r;
+}
+
+/**
+ * fls - find last set bit in a word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as ffs, but returns the position of the most
+ * significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last set bit if
+ * value is nonzero. The last (most significant) bit is at position 32.
+ */
+#define fls(x)							\
+({								\
+	typeof(x) x_ = (x);					\
+	__builtin_constant_p(x_) ?				\
+	 (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0)	\
+	 :							\
+	 variable_fls(x_);					\
+})
+
+#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */
+
+#include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
 
 #include <asm-generic/bitops/hweight.h>
 
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index d0345bd659c9..a418c3112cd6 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -7,7 +7,10 @@
 #define _ASM_CPUFEATURE_H
 
 #include <linux/bitmap.h>
+#include <linux/jump_label.h>
 #include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <asm/errno.h>
 
 /*
  * These are probed via a device_initcall(), via either the SBI or directly
@@ -30,6 +33,104 @@ DECLARE_PER_CPU(long, misaligned_access_speed);
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
 
-void check_unaligned_access(int cpu);
+void riscv_user_isa_enable(void);
+
+#ifdef CONFIG_RISCV_MISALIGNED
+bool unaligned_ctl_available(void);
+bool check_unaligned_access_emulated(int cpu);
+void unaligned_emulation_finish(void);
+#else
+static inline bool unaligned_ctl_available(void)
+{
+	return false;
+}
+
+static inline bool check_unaligned_access_emulated(int cpu)
+{
+	return false;
+}
+
+static inline void unaligned_emulation_finish(void) {}
+#endif
+
+unsigned long riscv_get_elf_hwcap(void);
+
+struct riscv_isa_ext_data {
+	const unsigned int id;
+	const char *name;
+	const char *property;
+};
+
+extern const struct riscv_isa_ext_data riscv_isa_ext[];
+extern const size_t riscv_isa_ext_count;
+extern bool riscv_isa_fallback;
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext)	\
+	__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
+static __always_inline bool
+riscv_has_extension_likely(const unsigned long ext)
+{
+	compiletime_assert(ext < RISCV_ISA_EXT_MAX,
+			   "ext must be < RISCV_ISA_EXT_MAX");
+
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		asm_volatile_goto(
+		ALTERNATIVE("j	%l[l_no]", "nop", 0, %[ext], 1)
+		:
+		: [ext] "i" (ext)
+		:
+		: l_no);
+	} else {
+		if (!__riscv_isa_extension_available(NULL, ext))
+			goto l_no;
+	}
+
+	return true;
+l_no:
+	return false;
+}
+
+static __always_inline bool
+riscv_has_extension_unlikely(const unsigned long ext)
+{
+	compiletime_assert(ext < RISCV_ISA_EXT_MAX,
+			   "ext must be < RISCV_ISA_EXT_MAX");
+
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		asm_volatile_goto(
+		ALTERNATIVE("nop", "j	%l[l_yes]", 0, %[ext], 1)
+		:
+		: [ext] "i" (ext)
+		:
+		: l_yes);
+	} else {
+		if (__riscv_isa_extension_available(NULL, ext))
+			goto l_yes;
+	}
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
+		return true;
+
+	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
+
+static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
+		return true;
+
+	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
 
 #endif
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index b3b2dfbdf945..06c236bfab53 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -14,7 +14,7 @@
 #include <asm/auxvec.h>
 #include <asm/byteorder.h>
 #include <asm/cacheinfo.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 /*
  * These are used to set parameters in the core dumps.
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
index 6e4dee49d84b..7ab5e34318c8 100644
--- a/arch/riscv/include/asm/entry-common.h
+++ b/arch/riscv/include/asm/entry-common.h
@@ -8,4 +8,18 @@
 void handle_page_fault(struct pt_regs *regs);
 void handle_break(struct pt_regs *regs);
 
+#ifdef CONFIG_RISCV_MISALIGNED
+int handle_misaligned_load(struct pt_regs *regs);
+int handle_misaligned_store(struct pt_regs *regs);
+#else
+static inline int handle_misaligned_load(struct pt_regs *regs)
+{
+	return -1;
+}
+static inline int handle_misaligned_store(struct pt_regs *regs)
+{
+	return -1;
+}
+#endif
+
 #endif /* _ASM_RISCV_ENTRY_COMMON_H */
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index b55b434f0059..83ed25e43553 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -95,31 +95,31 @@ asm volatile(ALTERNATIVE(						\
 #endif
 
 /*
- * dcache.ipa rs1 (invalidate, physical address)
+ * th.dcache.ipa rs1 (invalidate, physical address)
  * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
  *   0000001    01010      rs1       000      00000  0001011
- * dache.iva rs1 (invalida, virtual address)
+ * th.dache.iva rs1 (invalida, virtual address)
  *   0000001    00110      rs1       000      00000  0001011
  *
- * dcache.cpa rs1 (clean, physical address)
+ * th.dcache.cpa rs1 (clean, physical address)
  * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
  *   0000001    01001      rs1       000      00000  0001011
- * dcache.cva rs1 (clean, virtual address)
+ * th.dcache.cva rs1 (clean, virtual address)
  *   0000001    00101      rs1       000      00000  0001011
  *
- * dcache.cipa rs1 (clean then invalidate, physical address)
+ * th.dcache.cipa rs1 (clean then invalidate, physical address)
  * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
  *   0000001    01011      rs1       000      00000  0001011
- * dcache.civa rs1 (... virtual address)
+ * th.dcache.civa rs1 (... virtual address)
  *   0000001    00111      rs1       000      00000  0001011
  *
- * sync.s (make sure all cache operations finished)
+ * th.sync.s (make sure all cache operations finished)
  * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
  *   0000000    11001     00000      000      00000  0001011
  */
-#define THEAD_inval_A0	".long 0x0265000b"
-#define THEAD_clean_A0	".long 0x0255000b"
-#define THEAD_flush_A0	".long 0x0275000b"
+#define THEAD_INVAL_A0	".long 0x0265000b"
+#define THEAD_CLEAN_A0	".long 0x0255000b"
+#define THEAD_FLUSH_A0	".long 0x0275000b"
 #define THEAD_SYNC_S	".long 0x0190000b"
 
 #define ALT_CMO_OP(_op, _start, _size, _cachesize)			\
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 6fc51c1b34cf..06d30526ef3b 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -8,9 +8,6 @@
 #ifndef _ASM_RISCV_HWCAP_H
 #define _ASM_RISCV_HWCAP_H
 
-#include <asm/alternative-macros.h>
-#include <asm/errno.h>
-#include <linux/bits.h>
 #include <uapi/asm/hwcap.h>
 
 #define RISCV_ISA_EXT_a		('a' - 'a')
@@ -69,76 +66,4 @@
 #define RISCV_ISA_EXT_SxAIA		RISCV_ISA_EXT_SSAIA
 #endif
 
-#ifndef __ASSEMBLY__
-
-#include <linux/jump_label.h>
-
-unsigned long riscv_get_elf_hwcap(void);
-
-struct riscv_isa_ext_data {
-	const unsigned int id;
-	const char *name;
-	const char *property;
-};
-
-extern const struct riscv_isa_ext_data riscv_isa_ext[];
-extern const size_t riscv_isa_ext_count;
-extern bool riscv_isa_fallback;
-
-unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-
-#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
-
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
-#define riscv_isa_extension_available(isa_bitmap, ext)	\
-	__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-
-static __always_inline bool
-riscv_has_extension_likely(const unsigned long ext)
-{
-	compiletime_assert(ext < RISCV_ISA_EXT_MAX,
-			   "ext must be < RISCV_ISA_EXT_MAX");
-
-	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-		asm_volatile_goto(
-		ALTERNATIVE("j	%l[l_no]", "nop", 0, %[ext], 1)
-		:
-		: [ext] "i" (ext)
-		:
-		: l_no);
-	} else {
-		if (!__riscv_isa_extension_available(NULL, ext))
-			goto l_no;
-	}
-
-	return true;
-l_no:
-	return false;
-}
-
-static __always_inline bool
-riscv_has_extension_unlikely(const unsigned long ext)
-{
-	compiletime_assert(ext < RISCV_ISA_EXT_MAX,
-			   "ext must be < RISCV_ISA_EXT_MAX");
-
-	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-		asm_volatile_goto(
-		ALTERNATIVE("nop", "j	%l[l_yes]", 0, %[ext], 1)
-		:
-		: [ext] "i" (ext)
-		:
-		: l_yes);
-	} else {
-		if (__riscv_isa_extension_available(NULL, ext))
-			goto l_yes;
-	}
-
-	return false;
-l_yes:
-	return true;
-}
-
-#endif
-
 #endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 78936f4ff513..5c48f48e79a6 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,6 +8,11 @@
 
 #include <uapi/asm/hwprobe.h>
 
-#define RISCV_HWPROBE_MAX_KEY 5
+#define RISCV_HWPROBE_MAX_KEY 6
+
+static inline bool riscv_hwprobe_key_is_valid(__s64 key)
+{
+	return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
+}
 
 #endif
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
index 6960beb75f32..e27179b26086 100644
--- a/arch/riscv/include/asm/insn-def.h
+++ b/arch/riscv/include/asm/insn-def.h
@@ -180,19 +180,19 @@
 	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51),		\
 	       __RD(0), RS1(gaddr), RS2(vmid))
 
-#define CBO_inval(base)						\
+#define CBO_INVAL(base)						\
 	INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),		\
 	       RS1(base), SIMM12(0))
 
-#define CBO_clean(base)						\
+#define CBO_CLEAN(base)						\
 	INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),		\
 	       RS1(base), SIMM12(1))
 
-#define CBO_flush(base)						\
+#define CBO_FLUSH(base)						\
 	INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),		\
 	       RS1(base), SIMM12(2))
 
-#define CBO_zero(base)						\
+#define CBO_ZERO(base)						\
 	INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),		\
 	       RS1(base), SIMM12(4))
 
diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h
index e4042d297580..6441ded3b0cf 100644
--- a/arch/riscv/include/asm/irq_stack.h
+++ b/arch/riscv/include/asm/irq_stack.h
@@ -12,6 +12,9 @@
 
 DECLARE_PER_CPU(ulong *, irq_stack_ptr);
 
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+				  void (*func)(struct pt_regs *));
+
 #ifdef CONFIG_VMAP_STACK
 /*
  * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 5488ecc337b6..57e887bfa34c 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -33,8 +33,8 @@
 #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
 #endif
 /*
- * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
- * define the PAGE_OFFSET value for SV39.
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so
+ * define the PAGE_OFFSET value for SV48 and SV39.
  */
 #define PAGE_OFFSET_L4		_AC(0xffffaf8000000000, UL)
 #define PAGE_OFFSET_L3		_AC(0xffffffd800000000, UL)
diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h
index 59ba1fbaf784..00f3369570a8 100644
--- a/arch/riscv/include/asm/pgtable-32.h
+++ b/arch/riscv/include/asm/pgtable-32.h
@@ -33,4 +33,7 @@
 					  _PAGE_WRITE | _PAGE_EXEC |	\
 					  _PAGE_USER | _PAGE_GLOBAL))
 
+static const __maybe_unused int pgtable_l4_enabled;
+static const __maybe_unused int pgtable_l5_enabled;
+
 #endif /* _ASM_RISCV_PGTABLE_32_H */
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 7a5097202e15..9a2c780a11e9 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -126,14 +126,18 @@ enum napot_cont_order {
 
 /*
  * [63:59] T-Head Memory Type definitions:
- *
- * 00000 - NC   Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable
+ * bit[63] SO - Strong Order
+ * bit[62] C - Cacheable
+ * bit[61] B - Bufferable
+ * bit[60] SH - Shareable
+ * bit[59] Sec - Trustable
+ * 00110 - NC   Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable
  * 01110 - PMA  Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable
- * 10000 - IO   Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable
+ * 10010 - IO   Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable
  */
 #define _PAGE_PMA_THEAD		((1UL << 62) | (1UL << 61) | (1UL << 60))
-#define _PAGE_NOCACHE_THEAD	0UL
-#define _PAGE_IO_THEAD		(1UL << 63)
+#define _PAGE_NOCACHE_THEAD	((1UL < 61) | (1UL << 60))
+#define _PAGE_IO_THEAD		((1UL << 63) | (1UL << 60))
 #define _PAGE_MTMASK_THEAD	(_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59))
 
 static inline u64 riscv_page_mtmask(void)
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index f896708e8331..179bd4afece4 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -16,9 +16,9 @@
 #define _PAGE_GLOBAL    (1 << 5)    /* Global */
 #define _PAGE_ACCESSED  (1 << 6)    /* Set by hardware on any access */
 #define _PAGE_DIRTY     (1 << 7)    /* Set by hardware on any write */
-#define _PAGE_SOFT      (1 << 8)    /* Reserved for software */
+#define _PAGE_SOFT      (3 << 8)    /* Reserved for software */
 
-#define _PAGE_SPECIAL   _PAGE_SOFT
+#define _PAGE_SPECIAL   (1 << 8)    /* RSW: 0x1 */
 #define _PAGE_TABLE     _PAGE_PRESENT
 
 /*
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index b2ba3f79cfe9..294044429e8e 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -291,6 +291,7 @@ static inline pte_t pud_pte(pud_t pud)
 }
 
 #ifdef CONFIG_RISCV_ISA_SVNAPOT
+#include <asm/cpufeature.h>
 
 static __always_inline bool has_svnapot(void)
 {
@@ -811,7 +812,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
  *	bit            5:	_PAGE_PROT_NONE (zero)
  *	bit            6:	exclusive marker
  *	bits      7 to 11:	swap type
- *	bits 11 to XLEN-1:	swap offset
+ *	bits 12 to XLEN-1:	swap offset
  */
 #define __SWP_TYPE_SHIFT	7
 #define __SWP_TYPE_BITS		5
@@ -914,7 +915,6 @@ extern uintptr_t _dtb_early_pa;
 #define dtb_early_pa	_dtb_early_pa
 #endif /* CONFIG_XIP_KERNEL */
 extern u64 satp_mode;
-extern bool pgtable_l4_enabled;
 
 void paging_init(void);
 void misc_mem_init(void);
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 441da1839c94..f19f861cda54 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -8,6 +8,7 @@
 
 #include <linux/const.h>
 #include <linux/cache.h>
+#include <linux/prctl.h>
 
 #include <vdso/processor.h>
 
@@ -82,6 +83,7 @@ struct thread_struct {
 	unsigned long bad_cause;
 	unsigned long vstate_ctrl;
 	struct __riscv_v_ext_state vstate;
+	unsigned long align_ctl;
 };
 
 /* Whitelist the fstate from the task_struct for hardened usercopy */
@@ -94,6 +96,7 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 
 #define INIT_THREAD {					\
 	.sp = sizeof(init_stack) + (long)&init_stack,	\
+	.align_ctl = PR_UNALIGN_NOPRINT,		\
 }
 
 #define task_pt_regs(tsk)						\
@@ -136,6 +139,12 @@ extern long riscv_v_vstate_ctrl_set_current(unsigned long arg);
 extern long riscv_v_vstate_ctrl_get_current(void);
 #endif /* CONFIG_RISCV_ISA_V */
 
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long addr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
+#define GET_UNALIGN_CTL(tsk, addr)	get_unalign_ctl((tsk), (addr))
+#define SET_UNALIGN_CTL(tsk, val)	set_unalign_ctl((tsk), (val))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PROCESSOR_H */
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 12dfda6bb924..0892f4421bc4 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -280,9 +280,6 @@ void sbi_set_timer(uint64_t stime_value);
 void sbi_shutdown(void);
 void sbi_send_ipi(unsigned int cpu);
 int sbi_remote_fence_i(const struct cpumask *cpu_mask);
-int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
-			   unsigned long start,
-			   unsigned long size);
 
 int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
 				unsigned long start,
diff --git a/arch/riscv/include/asm/scs.h b/arch/riscv/include/asm/scs.h
new file mode 100644
index 000000000000..0e45db78b24b
--- /dev/null
+++ b/arch/riscv/include/asm/scs.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifdef __ASSEMBLY__
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/* Load init_shadow_call_stack to gp. */
+.macro scs_load_init_stack
+	la	gp, init_shadow_call_stack
+	XIP_FIXUP_OFFSET gp
+.endm
+
+/* Load the per-CPU IRQ shadow call stack to gp. */
+.macro scs_load_irq_stack tmp
+	load_per_cpu gp, irq_shadow_call_stack_ptr, \tmp
+.endm
+
+/* Load task_scs_sp(current) to gp. */
+.macro scs_load_current
+	REG_L	gp, TASK_TI_SCS_SP(tp)
+.endm
+
+/* Load task_scs_sp(current) to gp, but only if tp has changed. */
+.macro scs_load_current_if_task_changed prev
+	beq	\prev, tp, _skip_scs
+	scs_load_current
+_skip_scs:
+.endm
+
+/* Save gp to task_scs_sp(current). */
+.macro scs_save_current
+	REG_S	gp, TASK_TI_SCS_SP(tp)
+.endm
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+.macro scs_load_init_stack
+.endm
+.macro scs_load_irq_stack tmp
+.endm
+.macro scs_load_current
+.endm
+.macro scs_load_current_if_task_changed prev
+.endm
+.macro scs_save_current
+.endm
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index a727be723c56..f90d8e42f3c7 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -9,7 +9,7 @@
 #include <linux/jump_label.h>
 #include <linux/sched/task_stack.h>
 #include <asm/vector.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 1833beb00489..574779900bfb 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -34,9 +34,6 @@
 
 #ifndef __ASSEMBLY__
 
-extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
-extern unsigned long spin_shadow_stack;
-
 #include <asm/processor.h>
 #include <asm/csr.h>
 
@@ -60,8 +57,20 @@ struct thread_info {
 	long			user_sp;	/* User stack pointer */
 	int			cpu;
 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
+#ifdef CONFIG_SHADOW_CALL_STACK
+	void			*scs_base;
+	void			*scs_sp;
+#endif
 };
 
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define INIT_SCS							\
+	.scs_base	= init_shadow_call_stack,			\
+	.scs_sp		= init_shadow_call_stack,
+#else
+#define INIT_SCS
+#endif
+
 /*
  * macros/functions for gaining access to the thread information structure
  *
@@ -71,6 +80,7 @@ struct thread_info {
 {						\
 	.flags		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	INIT_SCS				\
 }
 
 void arch_release_task_struct(struct task_struct *tsk);
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 120bcf2ed8a8..1eb5682b2af6 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb);
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	flush_tlb_mm(tlb->mm);
+#ifdef CONFIG_MMU
+	if (tlb->fullmm || tlb->need_flush_all)
+		flush_tlb_mm(tlb->mm);
+	else
+		flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end,
+				   tlb_get_unmap_size(tlb));
+#endif
 }
 
 #endif /* _ASM_RISCV_TLB_H */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index a09196f8de68..8f3418c5f172 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -11,6 +11,9 @@
 #include <asm/smp.h>
 #include <asm/errata_list.h>
 
+#define FLUSH_TLB_MAX_SIZE      ((unsigned long)-1)
+#define FLUSH_TLB_NO_ASID       ((unsigned long)-1)
+
 #ifdef CONFIG_MMU
 extern unsigned long asid_mask;
 
@@ -32,9 +35,12 @@ static inline void local_flush_tlb_page(unsigned long addr)
 #if defined(CONFIG_SMP) && defined(CONFIG_MMU)
 void flush_tlb_all(void);
 void flush_tlb_mm(struct mm_struct *mm);
+void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+			unsigned long end, unsigned int page_size);
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end);
+void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -51,14 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 	local_flush_tlb_all();
 }
 
-#define flush_tlb_mm(mm) flush_tlb_all()
-#endif /* !CONFIG_SMP || !CONFIG_MMU */
-
 /* Flush a range of kernel pages */
 static inline void flush_tlb_kernel_range(unsigned long start,
 	unsigned long end)
 {
-	flush_tlb_all();
+	local_flush_tlb_all();
 }
 
+#define flush_tlb_mm(mm) flush_tlb_all()
+#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
+#endif /* !CONFIG_SMP || !CONFIG_MMU */
+
 #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
index 14f5d27783b8..96b65a5396df 100644
--- a/arch/riscv/include/asm/vdso/processor.h
+++ b/arch/riscv/include/asm/vdso/processor.h
@@ -14,7 +14,7 @@ static inline void cpu_relax(void)
 	__asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
 #endif
 
-#ifdef __riscv_zihintpause
+#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE
 	/*
 	 * Reduce instruction retirement.
 	 * This assumes the PC changes.
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index c5ee07b3df07..87aaef656257 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -15,7 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <asm/ptrace.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/csr.h>
 #include <asm/asm.h>
 
diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h
index d696d6610231..11a71b8533d5 100644
--- a/arch/riscv/include/uapi/asm/elf.h
+++ b/arch/riscv/include/uapi/asm/elf.h
@@ -49,6 +49,7 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_TLS_DTPREL64	9
 #define R_RISCV_TLS_TPREL32	10
 #define R_RISCV_TLS_TPREL64	11
+#define R_RISCV_IRELATIVE	58
 
 /* Relocation types not used by the dynamic linker */
 #define R_RISCV_BRANCH		16
@@ -81,7 +82,6 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_ALIGN		43
 #define R_RISCV_RVC_BRANCH	44
 #define R_RISCV_RVC_JUMP	45
-#define R_RISCV_LUI		46
 #define R_RISCV_GPREL_I		47
 #define R_RISCV_GPREL_S		48
 #define R_RISCV_TPREL_I		49
@@ -93,6 +93,9 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_SET16		55
 #define R_RISCV_SET32		56
 #define R_RISCV_32_PCREL	57
+#define R_RISCV_PLT32		59
+#define R_RISCV_SET_ULEB128	60
+#define R_RISCV_SUB_ULEB128	61
 
 
 #endif /* _UAPI_ASM_RISCV_ELF_H */
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index d43e306ce2f9..b659ffcfcdb4 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -29,6 +29,7 @@ struct riscv_hwprobe {
 #define		RISCV_HWPROBE_EXT_ZBA		(1 << 3)
 #define		RISCV_HWPROBE_EXT_ZBB		(1 << 4)
 #define		RISCV_HWPROBE_EXT_ZBS		(1 << 5)
+#define		RISCV_HWPROBE_EXT_ZICBOZ	(1 << 6)
 #define RISCV_HWPROBE_KEY_CPUPERF_0	5
 #define		RISCV_HWPROBE_MISALIGNED_UNKNOWN	(0 << 0)
 #define		RISCV_HWPROBE_MISALIGNED_EMULATED	(1 << 0)
@@ -36,6 +37,7 @@ struct riscv_hwprobe {
 #define		RISCV_HWPROBE_MISALIGNED_FAST		(3 << 0)
 #define		RISCV_HWPROBE_MISALIGNED_UNSUPPORTED	(4 << 0)
 #define		RISCV_HWPROBE_MISALIGNED_MASK		(7 << 0)
+#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE	6
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 
 #endif
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 95cf25d48405..fee22a3d1b53 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -57,9 +57,10 @@ obj-y	+= stacktrace.o
 obj-y	+= cacheinfo.o
 obj-y	+= patch.o
 obj-y	+= probes/
+obj-y	+= tests/
 obj-$(CONFIG_MMU) += vdso.o vdso/
 
-obj-$(CONFIG_RISCV_M_MODE)	+= traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED)	+= traps_misaligned.o
 obj-$(CONFIG_FPU)		+= fpu.o
 obj-$(CONFIG_RISCV_ISA_V)	+= vector.o
 obj-$(CONFIG_SMP)		+= smpboot.o
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
index 56cb2c986c48..e619edc8b0cc 100644
--- a/arch/riscv/kernel/acpi.c
+++ b/arch/riscv/kernel/acpi.c
@@ -14,9 +14,10 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/efi.h>
 #include <linux/io.h>
+#include <linux/memblock.h>
 #include <linux/pci.h>
-#include <linux/efi.h>
 
 int acpi_noirq = 1;		/* skip ACPI IRQ initialization */
 int acpi_disabled = 1;
@@ -217,7 +218,89 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
 
 void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
 {
-	return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
+	efi_memory_desc_t *md, *region = NULL;
+	pgprot_t prot;
+
+	if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
+		return NULL;
+
+	for_each_efi_memory_desc(md) {
+		u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+		if (phys < md->phys_addr || phys >= end)
+			continue;
+
+		if (phys + size > end) {
+			pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
+			return NULL;
+		}
+		region = md;
+		break;
+	}
+
+	/*
+	 * It is fine for AML to remap regions that are not represented in the
+	 * EFI memory map at all, as it only describes normal memory, and MMIO
+	 * regions that require a virtual mapping to make them accessible to
+	 * the EFI runtime services.
+	 */
+	prot = PAGE_KERNEL_IO;
+	if (region) {
+		switch (region->type) {
+		case EFI_LOADER_CODE:
+		case EFI_LOADER_DATA:
+		case EFI_BOOT_SERVICES_CODE:
+		case EFI_BOOT_SERVICES_DATA:
+		case EFI_CONVENTIONAL_MEMORY:
+		case EFI_PERSISTENT_MEMORY:
+			if (memblock_is_map_memory(phys) ||
+			    !memblock_is_region_memory(phys, size)) {
+				pr_warn(FW_BUG "requested region covers kernel memory\n");
+				return NULL;
+			}
+
+			/*
+			 * Mapping kernel memory is permitted if the region in
+			 * question is covered by a single memblock with the
+			 * NOMAP attribute set: this enables the use of ACPI
+			 * table overrides passed via initramfs.
+			 * This particular use case only requires read access.
+			 */
+			fallthrough;
+
+		case EFI_RUNTIME_SERVICES_CODE:
+			/*
+			 * This would be unusual, but not problematic per se,
+			 * as long as we take care not to create a writable
+			 * mapping for executable code.
+			 */
+			prot = PAGE_KERNEL_RO;
+			break;
+
+		case EFI_ACPI_RECLAIM_MEMORY:
+			/*
+			 * ACPI reclaim memory is used to pass firmware tables
+			 * and other data that is intended for consumption by
+			 * the OS only, which may decide it wants to reclaim
+			 * that memory and use it for something else. We never
+			 * do that, but we usually add it to the linear map
+			 * anyway, in which case we should use the existing
+			 * mapping.
+			 */
+			if (memblock_is_map_memory(phys))
+				return (void __iomem *)__va(phys);
+			fallthrough;
+
+		default:
+			if (region->attribute & EFI_MEMORY_WB)
+				prot = PAGE_KERNEL;
+			else if ((region->attribute & EFI_MEMORY_WC) ||
+				 (region->attribute & EFI_MEMORY_WT))
+				prot = pgprot_writecombine(PAGE_KERNEL);
+		}
+	}
+
+	return ioremap_prot(phys, size, pgprot_val(prot));
 }
 
 #ifdef CONFIG_PCI
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index d6a75aac1d27..a03129f40c46 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -14,6 +14,7 @@
 #include <asm/thread_info.h>
 #include <asm/ptrace.h>
 #include <asm/cpu_ops_sbi.h>
+#include <asm/stacktrace.h>
 #include <asm/suspend.h>
 
 void asm_offsets(void);
@@ -38,7 +39,11 @@ void asm_offsets(void)
 	OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
 	OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
 	OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
+#ifdef CONFIG_SHADOW_CALL_STACK
+	OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
+#endif
 
+	OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
 	OFFSET(TASK_THREAD_F0,  task_struct, thread.fstate.f[0]);
 	OFFSET(TASK_THREAD_F1,  task_struct, thread.fstate.f[1]);
 	OFFSET(TASK_THREAD_F2,  task_struct, thread.fstate.f[2]);
@@ -479,4 +484,8 @@ void asm_offsets(void)
 	OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
 	OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
 	OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
+
+	DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
+	OFFSET(STACKFRAME_FP, stackframe, fp);
+	OFFSET(STACKFRAME_RA, stackframe, ra);
 }
diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S
index cfdecfbaad62..2b3d9398c113 100644
--- a/arch/riscv/kernel/copy-unaligned.S
+++ b/arch/riscv/kernel/copy-unaligned.S
@@ -9,7 +9,7 @@
 /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */
 /* Performs a memcpy without aligning buffers, using word loads and stores. */
 /* Note: The size is truncated to a multiple of 8 * SZREG */
-ENTRY(__riscv_copy_words_unaligned)
+SYM_FUNC_START(__riscv_copy_words_unaligned)
 	andi  a4, a2, ~((8*SZREG)-1)
 	beqz  a4, 2f
 	add   a3, a1, a4
@@ -36,12 +36,12 @@ ENTRY(__riscv_copy_words_unaligned)
 
 2:
 	ret
-END(__riscv_copy_words_unaligned)
+SYM_FUNC_END(__riscv_copy_words_unaligned)
 
 /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */
 /* Performs a memcpy without aligning buffers, using only byte accesses. */
 /* Note: The size is truncated to a multiple of 8 */
-ENTRY(__riscv_copy_bytes_unaligned)
+SYM_FUNC_START(__riscv_copy_bytes_unaligned)
 	andi a4, a2, ~(8-1)
 	beqz a4, 2f
 	add  a3, a1, a4
@@ -68,4 +68,4 @@ ENTRY(__riscv_copy_bytes_unaligned)
 
 2:
 	ret
-END(__riscv_copy_bytes_unaligned)
+SYM_FUNC_END(__riscv_copy_bytes_unaligned)
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index c17dacb1141c..d11d6320fb0d 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -125,13 +125,14 @@ old_interface:
  */
 int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
 {
-	int rc;
-
 	for (; node; node = node->parent) {
 		if (of_device_is_compatible(node, "riscv")) {
-			rc = riscv_of_processor_hartid(node, hartid);
-			if (!rc)
-				return 0;
+			*hartid = (unsigned long)of_get_cpu_hwid(node, 0);
+			if (*hartid == ~0UL) {
+				pr_warn("Found CPU without hart ID\n");
+				return -ENODEV;
+			}
+			return 0;
 		}
 	}
 
@@ -202,9 +203,8 @@ arch_initcall(riscv_cpuinfo_init);
 
 #ifdef CONFIG_PROC_FS
 
-static void print_isa(struct seq_file *f)
+static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap)
 {
-	seq_puts(f, "isa\t\t: ");
 
 	if (IS_ENABLED(CONFIG_32BIT))
 		seq_write(f, "rv32", 4);
@@ -212,7 +212,7 @@ static void print_isa(struct seq_file *f)
 		seq_write(f, "rv64", 4);
 
 	for (int i = 0; i < riscv_isa_ext_count; i++) {
-		if (!__riscv_isa_extension_available(NULL, riscv_isa_ext[i].id))
+		if (!__riscv_isa_extension_available(isa_bitmap, riscv_isa_ext[i].id))
 			continue;
 
 		/* Only multi-letter extensions are split by underscores */
@@ -276,7 +276,15 @@ static int c_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "processor\t: %lu\n", cpu_id);
 	seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
-	print_isa(m);
+
+	/*
+	 * For historical raisins, the isa: line is limited to the lowest common
+	 * denominator of extensions supported across all harts. A true list of
+	 * extensions supported on this hart is printed later in the hart isa:
+	 * line.
+	 */
+	seq_puts(m, "isa\t\t: ");
+	print_isa(m, NULL);
 	print_mmu(m);
 
 	if (acpi_disabled) {
@@ -292,6 +300,13 @@ static int c_show(struct seq_file *m, void *v)
 	seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid);
 	seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid);
 	seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid);
+
+	/*
+	 * Print the ISA extensions specific to this hart, which may show
+	 * additional extensions not present across all harts.
+	 */
+	seq_puts(m, "hart isa\t: ");
+	print_isa(m, hart_isa[cpu_id].isa);
 	seq_puts(m, "\n");
 
 	return 0;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index e3803822ab5a..b3785ffc1570 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -8,6 +8,7 @@
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
+#include <linux/cpuhotplug.h>
 #include <linux/ctype.h>
 #include <linux/log2.h>
 #include <linux/memory.h>
@@ -29,6 +30,7 @@
 
 #define MISALIGNED_ACCESS_JIFFIES_LG2 1
 #define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
 #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
 
 unsigned long elf_hwcap __read_mostly;
@@ -93,10 +95,10 @@ static bool riscv_isa_extension_check(int id)
 		return true;
 	case RISCV_ISA_EXT_ZICBOZ:
 		if (!riscv_cboz_block_size) {
-			pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n");
+			pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n");
 			return false;
 		} else if (!is_power_of_2(riscv_cboz_block_size)) {
-			pr_err("cboz-block-size present, but is not a power-of-2\n");
+			pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n");
 			return false;
 		}
 		return true;
@@ -206,10 +208,11 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
 		switch (*ext) {
 		case 's':
 			/*
-			 * Workaround for invalid single-letter 's' & 'u'(QEMU).
+			 * Workaround for invalid single-letter 's' & 'u' (QEMU).
 			 * No need to set the bit in riscv_isa as 's' & 'u' are
-			 * not valid ISA extensions. It works until multi-letter
-			 * extension starting with "Su" appears.
+			 * not valid ISA extensions. It works unless the first
+			 * multi-letter extension in the ISA string begins with
+			 * "Su" and is not prefixed with an underscore.
 			 */
 			if (ext[-1] != '_' && ext[1] == 'u') {
 				++isa;
@@ -558,23 +561,21 @@ unsigned long riscv_get_elf_hwcap(void)
 	return hwcap;
 }
 
-void check_unaligned_access(int cpu)
+static int check_unaligned_access(void *param)
 {
+	int cpu = smp_processor_id();
 	u64 start_cycles, end_cycles;
 	u64 word_cycles;
 	u64 byte_cycles;
 	int ratio;
 	unsigned long start_jiffies, now;
-	struct page *page;
+	struct page *page = param;
 	void *dst;
 	void *src;
 	long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
 
-	page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
-	if (!page) {
-		pr_warn("Can't alloc pages to measure memcpy performance");
-		return;
-	}
+	if (check_unaligned_access_emulated(cpu))
+		return 0;
 
 	/* Make an unaligned destination buffer. */
 	dst = (void *)((unsigned long)page_address(page) | 0x1);
@@ -628,7 +629,7 @@ void check_unaligned_access(int cpu)
 		pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
 			cpu);
 
-		goto out;
+		return 0;
 	}
 
 	if (word_cycles < byte_cycles)
@@ -642,18 +643,90 @@ void check_unaligned_access(int cpu)
 		(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
 
 	per_cpu(misaligned_access_speed, cpu) = speed;
+	return 0;
+}
 
-out:
-	__free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
+static void check_unaligned_access_nonboot_cpu(void *param)
+{
+	unsigned int cpu = smp_processor_id();
+	struct page **pages = param;
+
+	if (smp_processor_id() != 0)
+		check_unaligned_access(pages[cpu]);
 }
 
-static int check_unaligned_access_boot_cpu(void)
+static int riscv_online_cpu(unsigned int cpu)
 {
-	check_unaligned_access(0);
+	static struct page *buf;
+
+	/* We are already set since the last check */
+	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+		return 0;
+
+	buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+	if (!buf) {
+		pr_warn("Allocation failure, not measuring misaligned performance\n");
+		return -ENOMEM;
+	}
+
+	check_unaligned_access(buf);
+	__free_pages(buf, MISALIGNED_BUFFER_ORDER);
 	return 0;
 }
 
-arch_initcall(check_unaligned_access_boot_cpu);
+/* Measure unaligned access on all CPUs present at boot in parallel. */
+static int check_unaligned_access_all_cpus(void)
+{
+	unsigned int cpu;
+	unsigned int cpu_count = num_possible_cpus();
+	struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
+				     GFP_KERNEL);
+
+	if (!bufs) {
+		pr_warn("Allocation failure, not measuring misaligned performance\n");
+		return 0;
+	}
+
+	/*
+	 * Allocate separate buffers for each CPU so there's no fighting over
+	 * cache lines.
+	 */
+	for_each_cpu(cpu, cpu_online_mask) {
+		bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+		if (!bufs[cpu]) {
+			pr_warn("Allocation failure, not measuring misaligned performance\n");
+			goto out;
+		}
+	}
+
+	/* Check everybody except 0, who stays behind to tend jiffies. */
+	on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
+
+	/* Check core 0. */
+	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+
+	/* Setup hotplug callback for any new CPUs that come online. */
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+				  riscv_online_cpu, NULL);
+
+out:
+	unaligned_emulation_finish();
+	for_each_cpu(cpu, cpu_online_mask) {
+		if (bufs[cpu])
+			__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
+	}
+
+	kfree(bufs);
+	return 0;
+}
+
+arch_initcall(check_unaligned_access_all_cpus);
+
+void riscv_user_isa_enable(void)
+{
+	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
+		csr_set(CSR_SENVCFG, ENVCFG_CBZE);
+}
 
 #ifdef CONFIG_RISCV_ALTERNATIVE
 /*
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 143a2bb3e697..54ca4564a926 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -9,10 +9,15 @@
 
 #include <asm/asm.h>
 #include <asm/csr.h>
+#include <asm/scs.h>
 #include <asm/unistd.h>
+#include <asm/page.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/errata_list.h>
+#include <linux/sizes.h>
+
+	.section .irqentry.text, "ax"
 
 SYM_CODE_START(handle_exception)
 	/*
@@ -21,9 +26,9 @@ SYM_CODE_START(handle_exception)
 	 * register will contain 0, and we should continue on the current TP.
 	 */
 	csrrw tp, CSR_SCRATCH, tp
-	bnez tp, _save_context
+	bnez tp, .Lsave_context
 
-_restore_kernel_tpsp:
+.Lrestore_kernel_tpsp:
 	csrr tp, CSR_SCRATCH
 	REG_S sp, TASK_TI_KERNEL_SP(tp)
 
@@ -35,7 +40,7 @@ _restore_kernel_tpsp:
 	REG_L sp, TASK_TI_KERNEL_SP(tp)
 #endif
 
-_save_context:
+.Lsave_context:
 	REG_S sp, TASK_TI_USER_SP(tp)
 	REG_L sp, TASK_TI_KERNEL_SP(tp)
 	addi sp, sp, -(PT_SIZE_ON_STACK)
@@ -73,10 +78,11 @@ _save_context:
 	csrw CSR_SCRATCH, x0
 
 	/* Load the global pointer */
-.option push
-.option norelax
-	la gp, __global_pointer$
-.option pop
+	load_global_pointer
+
+	/* Load the kernel shadow call stack pointer if coming from userspace */
+	scs_load_current_if_task_changed s5
+
 	move a0, sp /* pt_regs */
 	la ra, ret_from_exception
 
@@ -123,6 +129,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
 	addi s0, sp, PT_SIZE_ON_STACK
 	REG_S s0, TASK_TI_KERNEL_SP(tp)
 
+	/* Save the kernel shadow call stack pointer */
+	scs_save_current
+
 	/*
 	 * Save TP into the scratch register , so we can find the kernel data
 	 * structures again.
@@ -170,67 +179,15 @@ SYM_CODE_END(ret_from_exception)
 
 #ifdef CONFIG_VMAP_STACK
 SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
-	/*
-	 * Takes the psuedo-spinlock for the shadow stack, in case multiple
-	 * harts are concurrently overflowing their kernel stacks.  We could
-	 * store any value here, but since we're overflowing the kernel stack
-	 * already we only have SP to use as a scratch register.  So we just
-	 * swap in the address of the spinlock, as that's definately non-zero.
-	 *
-	 * Pairs with a store_release in handle_bad_stack().
-	 */
-1:	la sp, spin_shadow_stack
-	REG_AMOSWAP_AQ sp, sp, (sp)
-	bnez sp, 1b
-
-	la sp, shadow_stack
-	addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
+	/* we reach here from kernel context, sscratch must be 0 */
+	csrrw x31, CSR_SCRATCH, x31
+	asm_per_cpu sp, overflow_stack, x31
+	li x31, OVERFLOW_STACK_SIZE
+	add sp, sp, x31
+	/* zero out x31 again and restore x31 */
+	xor x31, x31, x31
+	csrrw x31, CSR_SCRATCH, x31
 
-	//save caller register to shadow stack
-	addi sp, sp, -(PT_SIZE_ON_STACK)
-	REG_S x1,  PT_RA(sp)
-	REG_S x5,  PT_T0(sp)
-	REG_S x6,  PT_T1(sp)
-	REG_S x7,  PT_T2(sp)
-	REG_S x10, PT_A0(sp)
-	REG_S x11, PT_A1(sp)
-	REG_S x12, PT_A2(sp)
-	REG_S x13, PT_A3(sp)
-	REG_S x14, PT_A4(sp)
-	REG_S x15, PT_A5(sp)
-	REG_S x16, PT_A6(sp)
-	REG_S x17, PT_A7(sp)
-	REG_S x28, PT_T3(sp)
-	REG_S x29, PT_T4(sp)
-	REG_S x30, PT_T5(sp)
-	REG_S x31, PT_T6(sp)
-
-	la ra, restore_caller_reg
-	tail get_overflow_stack
-
-restore_caller_reg:
-	//save per-cpu overflow stack
-	REG_S a0, -8(sp)
-	//restore caller register from shadow_stack
-	REG_L x1,  PT_RA(sp)
-	REG_L x5,  PT_T0(sp)
-	REG_L x6,  PT_T1(sp)
-	REG_L x7,  PT_T2(sp)
-	REG_L x10, PT_A0(sp)
-	REG_L x11, PT_A1(sp)
-	REG_L x12, PT_A2(sp)
-	REG_L x13, PT_A3(sp)
-	REG_L x14, PT_A4(sp)
-	REG_L x15, PT_A5(sp)
-	REG_L x16, PT_A6(sp)
-	REG_L x17, PT_A7(sp)
-	REG_L x28, PT_T3(sp)
-	REG_L x29, PT_T4(sp)
-	REG_L x30, PT_T5(sp)
-	REG_L x31, PT_T6(sp)
-
-	//load per-cpu overflow stack
-	REG_L sp, -8(sp)
 	addi sp, sp, -(PT_SIZE_ON_STACK)
 
 	//save context to overflow stack
@@ -268,6 +225,43 @@ SYM_CODE_START(ret_from_fork)
 	tail syscall_exit_to_user_mode
 SYM_CODE_END(ret_from_fork)
 
+#ifdef CONFIG_IRQ_STACKS
+/*
+ * void call_on_irq_stack(struct pt_regs *regs,
+ * 		          void (*func)(struct pt_regs *));
+ *
+ * Calls func(regs) using the per-CPU IRQ stack.
+ */
+SYM_FUNC_START(call_on_irq_stack)
+	/* Create a frame record to save ra and s0 (fp) */
+	addi	sp, sp, -STACKFRAME_SIZE_ON_STACK
+	REG_S	ra, STACKFRAME_RA(sp)
+	REG_S	s0, STACKFRAME_FP(sp)
+	addi	s0, sp, STACKFRAME_SIZE_ON_STACK
+
+	/* Switch to the per-CPU shadow call stack */
+	scs_save_current
+	scs_load_irq_stack t0
+
+	/* Switch to the per-CPU IRQ stack and call the handler */
+	load_per_cpu t0, irq_stack_ptr, t1
+	li	t1, IRQ_STACK_SIZE
+	add	sp, t0, t1
+	jalr	a1
+
+	/* Switch back to the thread shadow call stack */
+	scs_load_current
+
+	/* Switch back to the thread stack and restore ra and s0 */
+	addi	sp, s0, -STACKFRAME_SIZE_ON_STACK
+	REG_L	ra, STACKFRAME_RA(sp)
+	REG_L	s0, STACKFRAME_FP(sp)
+	addi	sp, sp, STACKFRAME_SIZE_ON_STACK
+
+	ret
+SYM_FUNC_END(call_on_irq_stack)
+#endif /* CONFIG_IRQ_STACKS */
+
 /*
  * Integer register context switch
  * The callee-saved registers must be saved and restored.
@@ -297,6 +291,8 @@ SYM_FUNC_START(__switch_to)
 	REG_S s9,  TASK_THREAD_S9_RA(a3)
 	REG_S s10, TASK_THREAD_S10_RA(a3)
 	REG_S s11, TASK_THREAD_S11_RA(a3)
+	/* Save the kernel shadow call stack pointer */
+	scs_save_current
 	/* Restore context from next->thread */
 	REG_L ra,  TASK_THREAD_RA_RA(a4)
 	REG_L sp,  TASK_THREAD_SP_RA(a4)
@@ -314,6 +310,8 @@ SYM_FUNC_START(__switch_to)
 	REG_L s11, TASK_THREAD_S11_RA(a4)
 	/* The offset of thread_info in task_struct is zero. */
 	move tp, a1
+	/* Switch to the next shadow call stack */
+	scs_load_current
 	ret
 SYM_FUNC_END(__switch_to)
 
@@ -324,7 +322,7 @@ SYM_FUNC_END(__switch_to)
 	.section ".rodata"
 	.align LGREG
 	/* Exception vector table */
-SYM_CODE_START(excp_vect_table)
+SYM_DATA_START_LOCAL(excp_vect_table)
 	RISCV_PTR do_trap_insn_misaligned
 	ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
 	RISCV_PTR do_trap_insn_illegal
@@ -342,12 +340,11 @@ SYM_CODE_START(excp_vect_table)
 	RISCV_PTR do_page_fault   /* load page fault */
 	RISCV_PTR do_trap_unknown
 	RISCV_PTR do_page_fault   /* store page fault */
-excp_vect_table_end:
-SYM_CODE_END(excp_vect_table)
+SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end)
 
 #ifndef CONFIG_MMU
-SYM_CODE_START(__user_rt_sigreturn)
+SYM_DATA_START(__user_rt_sigreturn)
 	li a7, __NR_rt_sigreturn
 	ecall
-SYM_CODE_END(__user_rt_sigreturn)
+SYM_DATA_END(__user_rt_sigreturn)
 #endif
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
index dd2205473de7..2c543f130f93 100644
--- a/arch/riscv/kernel/fpu.S
+++ b/arch/riscv/kernel/fpu.S
@@ -19,7 +19,7 @@
 #include <asm/csr.h>
 #include <asm/asm-offsets.h>
 
-ENTRY(__fstate_save)
+SYM_FUNC_START(__fstate_save)
 	li  a2,  TASK_THREAD_F0
 	add a0, a0, a2
 	li t1, SR_FS
@@ -60,9 +60,9 @@ ENTRY(__fstate_save)
 	sw t0, TASK_THREAD_FCSR_F0(a0)
 	csrc CSR_STATUS, t1
 	ret
-ENDPROC(__fstate_save)
+SYM_FUNC_END(__fstate_save)
 
-ENTRY(__fstate_restore)
+SYM_FUNC_START(__fstate_restore)
 	li  a2,  TASK_THREAD_F0
 	add a0, a0, a2
 	li t1, SR_FS
@@ -103,4 +103,125 @@ ENTRY(__fstate_restore)
 	fscsr t0
 	csrc CSR_STATUS, t1
 	ret
-ENDPROC(__fstate_restore)
+SYM_FUNC_END(__fstate_restore)
+
+#define get_f32(which) fmv.x.s a0, which; j 2f
+#define put_f32(which) fmv.s.x which, a1; j 2f
+#if __riscv_xlen == 64
+# define get_f64(which) fmv.x.d a0, which; j 2f
+# define put_f64(which) fmv.d.x which, a1; j 2f
+#else
+# define get_f64(which) fsd which, 0(a1); j 2f
+# define put_f64(which) fld which, 0(a1); j 2f
+#endif
+
+.macro fp_access_prologue
+	/*
+	 * Compute jump offset to store the correct FP register since we don't
+	 * have indirect FP register access
+	 */
+	sll t0, a0, 3
+	la t2, 1f
+	add t0, t0, t2
+	li t1, SR_FS
+	csrs CSR_STATUS, t1
+	jr t0
+1:
+.endm
+
+.macro fp_access_epilogue
+2:
+	csrc CSR_STATUS, t1
+	ret
+.endm
+
+#define fp_access_body(__access_func) \
+	__access_func(f0); \
+	__access_func(f1); \
+	__access_func(f2); \
+	__access_func(f3); \
+	__access_func(f4); \
+	__access_func(f5); \
+	__access_func(f6); \
+	__access_func(f7); \
+	__access_func(f8); \
+	__access_func(f9); \
+	__access_func(f10); \
+	__access_func(f11); \
+	__access_func(f12); \
+	__access_func(f13); \
+	__access_func(f14); \
+	__access_func(f15); \
+	__access_func(f16); \
+	__access_func(f17); \
+	__access_func(f18); \
+	__access_func(f19); \
+	__access_func(f20); \
+	__access_func(f21); \
+	__access_func(f22); \
+	__access_func(f23); \
+	__access_func(f24); \
+	__access_func(f25); \
+	__access_func(f26); \
+	__access_func(f27); \
+	__access_func(f28); \
+	__access_func(f29); \
+	__access_func(f30); \
+	__access_func(f31)
+
+
+#ifdef CONFIG_RISCV_MISALIGNED
+
+/*
+ * Disable compressed instructions set to keep a constant offset between FP
+ * load/store/move instructions
+ */
+.option norvc
+/*
+ * put_f32_reg - Set a FP register from a register containing the value
+ * a0 = FP register index to be set
+ * a1 = value to be loaded in the FP register
+ */
+SYM_FUNC_START(put_f32_reg)
+	fp_access_prologue
+	fp_access_body(put_f32)
+	fp_access_epilogue
+SYM_FUNC_END(put_f32_reg)
+
+/*
+ * get_f32_reg - Get a FP register value and return it
+ * a0 = FP register index to be retrieved
+ */
+SYM_FUNC_START(get_f32_reg)
+	fp_access_prologue
+	fp_access_body(get_f32)
+	fp_access_epilogue
+SYM_FUNC_END(get_f32_reg)
+
+/*
+ * put_f64_reg - Set a 64 bits FP register from a value or a pointer.
+ * a0 = FP register index to be set
+ * a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we
+ * load the value to a pointer).
+ */
+SYM_FUNC_START(put_f64_reg)
+	fp_access_prologue
+	fp_access_body(put_f64)
+	fp_access_epilogue
+SYM_FUNC_END(put_f64_reg)
+
+/*
+ * put_f64_reg - Get a 64 bits FP register value and returned it or store it to
+ *	 	 a pointer.
+ * a0 = FP register index to be retrieved
+ * a1 = If xlen == 32, pointer which should be loaded with the FP register value
+ *	or unused if xlen == 64. In which case the FP register value is returned
+ *	through a0
+ */
+SYM_FUNC_START(get_f64_reg)
+	fp_access_prologue
+	fp_access_body(get_f64)
+	fp_access_epilogue
+SYM_FUNC_END(get_f64_reg)
+
+#endif /* CONFIG_RISCV_MISALIGNED */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 3710ea5d160f..b77397432403 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -14,11 +14,12 @@
 #include <asm/cpu_ops_sbi.h>
 #include <asm/hwcap.h>
 #include <asm/image.h>
+#include <asm/scs.h>
 #include <asm/xip_fixup.h>
 #include "efi-header.S"
 
 __HEAD
-ENTRY(_start)
+SYM_CODE_START(_start)
 	/*
 	 * Image header expected by Linux boot-loaders. The image header data
 	 * structure is described in asm/image.h.
@@ -110,10 +111,7 @@ relocate_enable_mmu:
 	csrw CSR_TVEC, a0
 
 	/* Reload the global pointer */
-.option push
-.option norelax
-	la gp, __global_pointer$
-.option pop
+	load_global_pointer
 
 	/*
 	 * Switch to kernel page tables.  A full fence is necessary in order to
@@ -134,10 +132,7 @@ secondary_start_sbi:
 	csrw CSR_IP, zero
 
 	/* Load the global pointer */
-	.option push
-	.option norelax
-		la gp, __global_pointer$
-	.option pop
+	load_global_pointer
 
 	/*
 	 * Disable FPU & VECTOR to detect illegal usage of
@@ -159,6 +154,7 @@ secondary_start_sbi:
 	XIP_FIXUP_OFFSET a3
 	add a3, a3, a1
 	REG_L sp, (a3)
+	scs_load_current
 
 .Lsecondary_start_common:
 
@@ -168,12 +164,12 @@ secondary_start_sbi:
 	XIP_FIXUP_OFFSET a0
 	call relocate_enable_mmu
 #endif
-	call setup_trap_vector
+	call .Lsetup_trap_vector
 	tail smp_callin
 #endif /* CONFIG_SMP */
 
 .align 2
-setup_trap_vector:
+.Lsetup_trap_vector:
 	/* Set trap vector to exception handler */
 	la a0, handle_exception
 	csrw CSR_TVEC, a0
@@ -191,9 +187,9 @@ setup_trap_vector:
 	wfi
 	j .Lsecondary_park
 
-END(_start)
+SYM_CODE_END(_start)
 
-ENTRY(_start_kernel)
+SYM_CODE_START(_start_kernel)
 	/* Mask all interrupts */
 	csrw CSR_IE, zero
 	csrw CSR_IP, zero
@@ -210,7 +206,7 @@ ENTRY(_start_kernel)
 	 * not implement PMPs, so we set up a quick trap handler to just skip
 	 * touching the PMPs on any trap.
 	 */
-	la a0, pmp_done
+	la a0, .Lpmp_done
 	csrw CSR_TVEC, a0
 
 	li a0, -1
@@ -218,7 +214,7 @@ ENTRY(_start_kernel)
 	li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
 	csrw CSR_PMPCFG0, a0
 .align 2
-pmp_done:
+.Lpmp_done:
 
 	/*
 	 * The hartid in a0 is expected later on, and we have no firmware
@@ -228,10 +224,7 @@ pmp_done:
 #endif /* CONFIG_RISCV_M_MODE */
 
 	/* Load the global pointer */
-.option push
-.option norelax
-	la gp, __global_pointer$
-.option pop
+	load_global_pointer
 
 	/*
 	 * Disable FPU & VECTOR to detect illegal usage of
@@ -282,12 +275,12 @@ pmp_done:
 	/* Clear BSS for flat non-ELF images */
 	la a3, __bss_start
 	la a4, __bss_stop
-	ble a4, a3, clear_bss_done
-clear_bss:
+	ble a4, a3, .Lclear_bss_done
+.Lclear_bss:
 	REG_S zero, (a3)
 	add a3, a3, RISCV_SZPTR
-	blt a3, a4, clear_bss
-clear_bss_done:
+	blt a3, a4, .Lclear_bss
+.Lclear_bss_done:
 #endif
 	la a2, boot_cpu_hartid
 	XIP_FIXUP_OFFSET a2
@@ -298,6 +291,7 @@ clear_bss_done:
 	la sp, init_thread_union + THREAD_SIZE
 	XIP_FIXUP_OFFSET sp
 	addi sp, sp, -PT_SIZE_ON_STACK
+	scs_load_init_stack
 #ifdef CONFIG_BUILTIN_DTB
 	la a0, __dtb_start
 	XIP_FIXUP_OFFSET a0
@@ -311,11 +305,12 @@ clear_bss_done:
 	call relocate_enable_mmu
 #endif /* CONFIG_MMU */
 
-	call setup_trap_vector
+	call .Lsetup_trap_vector
 	/* Restore C environment */
 	la tp, init_task
 	la sp, init_thread_union + THREAD_SIZE
 	addi sp, sp, -PT_SIZE_ON_STACK
+	scs_load_current
 
 #ifdef CONFIG_KASAN
 	call kasan_early_init
@@ -353,10 +348,10 @@ clear_bss_done:
 	tail .Lsecondary_start_common
 #endif /* CONFIG_RISCV_BOOT_SPINWAIT */
 
-END(_start_kernel)
+SYM_CODE_END(_start_kernel)
 
 #ifdef CONFIG_RISCV_M_MODE
-ENTRY(reset_regs)
+SYM_CODE_START_LOCAL(reset_regs)
 	li	sp, 0
 	li	gp, 0
 	li	tp, 0
@@ -454,5 +449,5 @@ ENTRY(reset_regs)
 .Lreset_regs_done_vector:
 #endif /* CONFIG_RISCV_ISA_V */
 	ret
-END(reset_regs)
+SYM_CODE_END(reset_regs)
 #endif /* CONFIG_RISCV_M_MODE */
diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S
index d698dd7df637..d040dcf4add4 100644
--- a/arch/riscv/kernel/hibernate-asm.S
+++ b/arch/riscv/kernel/hibernate-asm.S
@@ -21,7 +21,7 @@
  *
  * Always returns 0
  */
-ENTRY(__hibernate_cpu_resume)
+SYM_FUNC_START(__hibernate_cpu_resume)
 	/* switch to hibernated image's page table. */
 	csrw CSR_SATP, s0
 	sfence.vma
@@ -34,7 +34,7 @@ ENTRY(__hibernate_cpu_resume)
 	mv	a0, zero
 
 	ret
-END(__hibernate_cpu_resume)
+SYM_FUNC_END(__hibernate_cpu_resume)
 
 /*
  * Prepare to restore the image.
@@ -42,7 +42,7 @@ END(__hibernate_cpu_resume)
  * a1: satp of temporary page tables.
  * a2: cpu_resume.
  */
-ENTRY(hibernate_restore_image)
+SYM_FUNC_START(hibernate_restore_image)
 	mv	s0, a0
 	mv	s1, a1
 	mv	s2, a2
@@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image)
 	REG_L	a1, relocated_restore_code
 
 	jr	a1
-END(hibernate_restore_image)
+SYM_FUNC_END(hibernate_restore_image)
 
 /*
  * The below code will be executed from a 'safe' page.
@@ -58,7 +58,7 @@ END(hibernate_restore_image)
  * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume()
  * to restore the CPU context.
  */
-ENTRY(hibernate_core_restore_code)
+SYM_FUNC_START(hibernate_core_restore_code)
 	/* switch to temp page table. */
 	csrw satp, s1
 	sfence.vma
@@ -73,4 +73,4 @@ ENTRY(hibernate_core_restore_code)
 	bnez	s4, .Lcopy
 
 	jr	s2
-END(hibernate_core_restore_code)
+SYM_FUNC_END(hibernate_core_restore_code)
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 9cc0a7669271..9ceda02507ca 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -9,6 +9,7 @@
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
+#include <linux/scs.h>
 #include <linux/seq_file.h>
 #include <asm/sbi.h>
 #include <asm/smp.h>
@@ -34,6 +35,24 @@ EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode);
 #ifdef CONFIG_IRQ_STACKS
 #include <asm/irq_stack.h>
 
+DECLARE_PER_CPU(ulong *, irq_shadow_call_stack_ptr);
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+DEFINE_PER_CPU(ulong *, irq_shadow_call_stack_ptr);
+#endif
+
+static void init_irq_scs(void)
+{
+	int cpu;
+
+	if (!scs_is_enabled())
+		return;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(irq_shadow_call_stack_ptr, cpu) =
+			scs_alloc(cpu_to_node(cpu));
+}
+
 DEFINE_PER_CPU(ulong *, irq_stack_ptr);
 
 #ifdef CONFIG_VMAP_STACK
@@ -61,40 +80,22 @@ static void init_irq_stacks(void)
 #endif /* CONFIG_VMAP_STACK */
 
 #ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+static void ___do_softirq(struct pt_regs *regs)
+{
+	__do_softirq();
+}
+
 void do_softirq_own_stack(void)
 {
-#ifdef CONFIG_IRQ_STACKS
-	if (on_thread_stack()) {
-		ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
-					+ IRQ_STACK_SIZE/sizeof(ulong);
-		__asm__ __volatile(
-		"addi	sp, sp, -"RISCV_SZPTR  "\n"
-		REG_S"  ra, (sp)		\n"
-		"addi	sp, sp, -"RISCV_SZPTR  "\n"
-		REG_S"  s0, (sp)		\n"
-		"addi	s0, sp, 2*"RISCV_SZPTR "\n"
-		"move	sp, %[sp]		\n"
-		"call	__do_softirq		\n"
-		"addi	sp, s0, -2*"RISCV_SZPTR"\n"
-		REG_L"  s0, (sp)		\n"
-		"addi	sp, sp, "RISCV_SZPTR   "\n"
-		REG_L"  ra, (sp)		\n"
-		"addi	sp, sp, "RISCV_SZPTR   "\n"
-		:
-		: [sp] "r" (sp)
-		: "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6",
-#ifndef CONFIG_FRAME_POINTER
-		  "s0",
-#endif
-		  "memory");
-	} else
-#endif
+	if (on_thread_stack())
+		call_on_irq_stack(NULL, ___do_softirq);
+	else
 		__do_softirq();
 }
 #endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */
 
 #else
+static void init_irq_scs(void) {}
 static void init_irq_stacks(void) {}
 #endif /* CONFIG_IRQ_STACKS */
 
@@ -106,6 +107,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 
 void __init init_IRQ(void)
 {
+	init_irq_scs();
 	init_irq_stacks();
 	irqchip_init();
 	if (!handle_arch_irq)
diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
index 059c5e216ae7..de0a4b35d01e 100644
--- a/arch/riscv/kernel/kexec_relocate.S
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -17,27 +17,17 @@ SYM_CODE_START(riscv_kexec_relocate)
 	 * s1: (const) Phys address to jump to after relocation
 	 * s2: (const) Phys address of the FDT image
 	 * s3: (const) The hartid of the current hart
-	 * s4: Pointer to the destination address for the relocation
-	 * s5: (const) Number of words per page
-	 * s6: (const) 1, used for subtraction
-	 * s7: (const) kernel_map.va_pa_offset, used when switching MMU off
-	 * s8: (const) Physical address of the main loop
-	 * s9: (debug) indirection page counter
-	 * s10: (debug) entry counter
-	 * s11: (debug) copied words counter
+	 * s4: (const) kernel_map.va_pa_offset, used when switching MMU off
+	 * s5: Pointer to the destination address for the relocation
+	 * s6: (const) Physical address of the main loop
 	 */
 	mv	s0, a0
 	mv	s1, a1
 	mv	s2, a2
 	mv	s3, a3
-	mv	s4, zero
-	li	s5, (PAGE_SIZE / RISCV_SZPTR)
-	li	s6, 1
-	mv	s7, a4
-	mv	s8, zero
-	mv	s9, zero
-	mv	s10, zero
-	mv	s11, zero
+	mv	s4, a4
+	mv	s5, zero
+	mv	s6, zero
 
 	/* Disable / cleanup interrupts */
 	csrw	CSR_SIE, zero
@@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate)
 	 * the start of the loop below so that we jump there in
 	 * any case.
 	 */
-	la	s8, 1f
-	sub	s8, s8, s7
-	csrw	CSR_STVEC, s8
+	la	s6, 1f
+	sub	s6, s6, s4
+	csrw	CSR_STVEC, s6
+
+	/*
+	 * With C-extension, here we get 42 Bytes and the next
+	 * .align directive would pad zeros here up to 44 Bytes.
+	 * So manually put a nop here to avoid zeros padding.
+	*/
+	nop
 
 	/* Process entries in a loop */
 .align 2
 1:
-	addi	s10, s10, 1
 	REG_L	t0, 0(s0)		/* t0 = *image->entry */
 	addi	s0, s0, RISCV_SZPTR	/* image->entry++ */
 
 	/* IND_DESTINATION entry ? -> save destination address */
 	andi	t1, t0, 0x1
 	beqz	t1, 2f
-	andi	s4, t0, ~0x1
+	andi	s5, t0, ~0x1
 	j	1b
 
 2:
@@ -74,9 +70,8 @@ SYM_CODE_START(riscv_kexec_relocate)
 	andi	t1, t0, 0x2
 	beqz	t1, 2f
 	andi	s0, t0, ~0x2
-	addi	s9, s9, 1
 	csrw	CSR_SATP, zero
-	jalr	zero, s8, 0
+	jr	s6
 
 2:
 	/* IND_DONE entry ? -> jump to done label */
@@ -92,14 +87,13 @@ SYM_CODE_START(riscv_kexec_relocate)
 	andi	t1, t0, 0x8
 	beqz	t1, 1b		/* Unknown entry type, ignore it */
 	andi	t0, t0, ~0x8
-	mv	t3, s5		/* i = num words per page */
+	li	t3, (PAGE_SIZE / RISCV_SZPTR)	/* i = num words per page */
 3:	/* copy loop */
 	REG_L	t1, (t0)	/* t1 = *src_ptr */
-	REG_S	t1, (s4)	/* *dst_ptr = *src_ptr */
+	REG_S	t1, (s5)	/* *dst_ptr = *src_ptr */
 	addi	t0, t0, RISCV_SZPTR /* stc_ptr++ */
-	addi	s4, s4, RISCV_SZPTR /* dst_ptr++ */
-	sub	t3, t3, s6	/* i-- */
-	addi	s11, s11, 1	/* c++ */
+	addi	s5, s5, RISCV_SZPTR /* dst_ptr++ */
+	addi	t3, t3, -0x1	/* i-- */
 	beqz	t3, 1b		/* copy done ? */
 	j	3b
 
@@ -146,7 +140,7 @@ SYM_CODE_START(riscv_kexec_relocate)
 	 */
 	fence.i
 
-	jalr	zero, a2, 0
+	jr	a2
 
 SYM_CODE_END(riscv_kexec_relocate)
 riscv_kexec_relocate_end:
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 669b8697aa38..58dd96a2a153 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -82,7 +82,7 @@
 	.endm
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
-ENTRY(ftrace_caller)
+SYM_FUNC_START(ftrace_caller)
 	SAVE_ABI
 
 	addi	a0, t0, -FENTRY_RA_OFFSET
@@ -91,8 +91,7 @@ ENTRY(ftrace_caller)
 	mv	a1, ra
 	mv	a3, sp
 
-ftrace_call:
-	.global ftrace_call
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
 	call	ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -102,16 +101,15 @@ ftrace_call:
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	mv	a2, s0
 #endif
-ftrace_graph_call:
-	.global ftrace_graph_call
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
 	call	ftrace_stub
 #endif
 	RESTORE_ABI
 	jr t0
-ENDPROC(ftrace_caller)
+SYM_FUNC_END(ftrace_caller)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-ENTRY(ftrace_regs_caller)
+SYM_FUNC_START(ftrace_regs_caller)
 	SAVE_ALL
 
 	addi	a0, t0, -FENTRY_RA_OFFSET
@@ -120,8 +118,7 @@ ENTRY(ftrace_regs_caller)
 	mv	a1, ra
 	mv	a3, sp
 
-ftrace_regs_call:
-	.global ftrace_regs_call
+SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	call	ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -131,12 +128,11 @@ ftrace_regs_call:
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	mv	a2, s0
 #endif
-ftrace_graph_regs_call:
-	.global ftrace_graph_regs_call
+SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL)
 	call	ftrace_stub
 #endif
 
 	RESTORE_ALL
 	jr t0
-ENDPROC(ftrace_regs_caller)
+SYM_FUNC_END(ftrace_regs_caller)
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 8818a8fa9ff3..b4dd9ed6849e 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -61,7 +61,7 @@ SYM_TYPED_FUNC_START(ftrace_stub_graph)
 	ret
 SYM_FUNC_END(ftrace_stub_graph)
 
-ENTRY(return_to_handler)
+SYM_FUNC_START(return_to_handler)
 /*
  * On implementing the frame point test, the ideal way is to compare the
  * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return.
@@ -76,25 +76,25 @@ ENTRY(return_to_handler)
 	mv	a2, a0
 	RESTORE_RET_ABI_STATE
 	jalr	a2
-ENDPROC(return_to_handler)
+SYM_FUNC_END(return_to_handler)
 #endif
 
 #ifndef CONFIG_DYNAMIC_FTRACE
-ENTRY(MCOUNT_NAME)
+SYM_FUNC_START(MCOUNT_NAME)
 	la	t4, ftrace_stub
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	la	t0, ftrace_graph_return
 	REG_L	t1, 0(t0)
-	bne	t1, t4, do_ftrace_graph_caller
+	bne	t1, t4, .Ldo_ftrace_graph_caller
 
 	la	t3, ftrace_graph_entry
 	REG_L	t2, 0(t3)
 	la	t6, ftrace_graph_entry_stub
-	bne	t2, t6, do_ftrace_graph_caller
+	bne	t2, t6, .Ldo_ftrace_graph_caller
 #endif
 	la	t3, ftrace_trace_function
 	REG_L	t5, 0(t3)
-	bne	t5, t4, do_trace
+	bne	t5, t4, .Ldo_trace
 	ret
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -102,7 +102,7 @@ ENTRY(MCOUNT_NAME)
  * A pseudo representation for the function graph tracer:
  * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller)
  */
-do_ftrace_graph_caller:
+.Ldo_ftrace_graph_caller:
 	addi	a0, s0, -SZREG
 	mv	a1, ra
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
@@ -118,7 +118,7 @@ do_ftrace_graph_caller:
  * A pseudo representation for the function tracer:
  * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller)
  */
-do_trace:
+.Ldo_trace:
 	REG_L	a1, -SZREG(s0)
 	mv	a0, ra
 
@@ -126,6 +126,6 @@ do_trace:
 	jalr	t5
 	RESTORE_ABI_STATE
 	ret
-ENDPROC(MCOUNT_NAME)
+SYM_FUNC_END(MCOUNT_NAME)
 #endif
 EXPORT_SYMBOL(MCOUNT_NAME)
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 7c651d55fcbd..56a8c78e9e21 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -7,6 +7,9 @@
 #include <linux/elf.h>
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
@@ -14,6 +17,38 @@
 #include <asm/alternative.h>
 #include <asm/sections.h>
 
+struct used_bucket {
+	struct list_head head;
+	struct hlist_head *bucket;
+};
+
+struct relocation_head {
+	struct hlist_node node;
+	struct list_head *rel_entry;
+	void *location;
+};
+
+struct relocation_entry {
+	struct list_head head;
+	Elf_Addr value;
+	unsigned int type;
+};
+
+struct relocation_handlers {
+	int (*reloc_handler)(struct module *me, void *location, Elf_Addr v);
+	int (*accumulate_handler)(struct module *me, void *location,
+				  long buffer);
+};
+
+unsigned int initialize_relocation_hashtable(unsigned int num_relocations);
+void process_accumulated_relocations(struct module *me);
+int add_relocation_to_accumulate(struct module *me, int type, void *location,
+				 unsigned int hashtable_bits, Elf_Addr v);
+
+struct hlist_head *relocation_hashtable;
+
+struct list_head used_buckets_list;
+
 /*
  * The auipc+jalr instruction pair can reach any PC-relative offset
  * in the range [-2^31 - 2^11, 2^31 - 2^11)
@@ -27,68 +62,90 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
 #endif
 }
 
-static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
+static int riscv_insn_rmw(void *location, u32 keep, u32 set)
+{
+	u16 *parcel = location;
+	u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
+
+	insn &= keep;
+	insn |= set;
+
+	parcel[0] = cpu_to_le16(insn);
+	parcel[1] = cpu_to_le16(insn >> 16);
+	return 0;
+}
+
+static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set)
+{
+	u16 *parcel = location;
+	u16 insn = le16_to_cpu(*parcel);
+
+	insn &= keep;
+	insn |= set;
+
+	*parcel = cpu_to_le16(insn);
+	return 0;
+}
+
+static int apply_r_riscv_32_rela(struct module *me, void *location, Elf_Addr v)
 {
 	if (v != (u32)v) {
 		pr_err("%s: value %016llx out of range for 32-bit field\n",
 		       me->name, (long long)v);
 		return -EINVAL;
 	}
-	*location = v;
+	*(u32 *)location = v;
 	return 0;
 }
 
-static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_riscv_64_rela(struct module *me, void *location, Elf_Addr v)
 {
 	*(u64 *)location = v;
 	return 0;
 }
 
-static int apply_r_riscv_branch_rela(struct module *me, u32 *location,
+static int apply_r_riscv_branch_rela(struct module *me, void *location,
 				     Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - location;
 	u32 imm12 = (offset & 0x1000) << (31 - 12);
 	u32 imm11 = (offset & 0x800) >> (11 - 7);
 	u32 imm10_5 = (offset & 0x7e0) << (30 - 10);
 	u32 imm4_1 = (offset & 0x1e) << (11 - 4);
 
-	*location = (*location & 0x1fff07f) | imm12 | imm11 | imm10_5 | imm4_1;
-	return 0;
+	return riscv_insn_rmw(location, 0x1fff07f, imm12 | imm11 | imm10_5 | imm4_1);
 }
 
-static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
+static int apply_r_riscv_jal_rela(struct module *me, void *location,
 				  Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - location;
 	u32 imm20 = (offset & 0x100000) << (31 - 20);
 	u32 imm19_12 = (offset & 0xff000);
 	u32 imm11 = (offset & 0x800) << (20 - 11);
 	u32 imm10_1 = (offset & 0x7fe) << (30 - 10);
 
-	*location = (*location & 0xfff) | imm20 | imm19_12 | imm11 | imm10_1;
-	return 0;
+	return riscv_insn_rmw(location, 0xfff, imm20 | imm19_12 | imm11 | imm10_1);
 }
 
-static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location,
+static int apply_r_riscv_rvc_branch_rela(struct module *me, void *location,
 					 Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - location;
 	u16 imm8 = (offset & 0x100) << (12 - 8);
 	u16 imm7_6 = (offset & 0xc0) >> (6 - 5);
 	u16 imm5 = (offset & 0x20) >> (5 - 2);
 	u16 imm4_3 = (offset & 0x18) << (12 - 5);
 	u16 imm2_1 = (offset & 0x6) << (12 - 10);
 
-	*(u16 *)location = (*(u16 *)location & 0xe383) |
-		    imm8 | imm7_6 | imm5 | imm4_3 | imm2_1;
-	return 0;
+	return riscv_insn_rvc_rmw(location, 0xe383,
+			imm8 | imm7_6 | imm5 | imm4_3 | imm2_1);
 }
 
-static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
+static int apply_r_riscv_rvc_jump_rela(struct module *me, void *location,
 				       Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - location;
 	u16 imm11 = (offset & 0x800) << (12 - 11);
 	u16 imm10 = (offset & 0x400) >> (10 - 8);
 	u16 imm9_8 = (offset & 0x300) << (12 - 11);
@@ -98,16 +155,14 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
 	u16 imm4 = (offset & 0x10) << (12 - 5);
 	u16 imm3_1 = (offset & 0xe) << (12 - 10);
 
-	*(u16 *)location = (*(u16 *)location & 0xe003) |
-		    imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1;
-	return 0;
+	return riscv_insn_rvc_rmw(location, 0xe003,
+			imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1);
 }
 
-static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_hi20_rela(struct module *me, void *location,
 					 Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
-	s32 hi20;
+	ptrdiff_t offset = (void *)v - location;
 
 	if (!riscv_insn_valid_32bit_offset(offset)) {
 		pr_err(
@@ -116,23 +171,20 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
 		return -EINVAL;
 	}
 
-	hi20 = (offset + 0x800) & 0xfffff000;
-	*location = (*location & 0xfff) | hi20;
-	return 0;
+	return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, void *location,
 					   Elf_Addr v)
 {
 	/*
 	 * v is the lo12 value to fill. It is calculated before calling this
 	 * handler.
 	 */
-	*location = (*location & 0xfffff) | ((v & 0xfff) << 20);
-	return 0;
+	return riscv_insn_rmw(location, 0xfffff, (v & 0xfff) << 20);
 }
 
-static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, void *location,
 					   Elf_Addr v)
 {
 	/*
@@ -142,15 +194,12 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location,
 	u32 imm11_5 = (v & 0xfe0) << (31 - 11);
 	u32 imm4_0 = (v & 0x1f) << (11 - 4);
 
-	*location = (*location & 0x1fff07f) | imm11_5 | imm4_0;
-	return 0;
+	return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0);
 }
 
-static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_hi20_rela(struct module *me, void *location,
 				   Elf_Addr v)
 {
-	s32 hi20;
-
 	if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
@@ -158,22 +207,20 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
 		return -EINVAL;
 	}
 
-	hi20 = ((s32)v + 0x800) & 0xfffff000;
-	*location = (*location & 0xfff) | hi20;
-	return 0;
+	return riscv_insn_rmw(location, 0xfff, ((s32)v + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location,
+static int apply_r_riscv_lo12_i_rela(struct module *me, void *location,
 				     Elf_Addr v)
 {
 	/* Skip medlow checking because of filtering by HI20 already */
 	s32 hi20 = ((s32)v + 0x800) & 0xfffff000;
 	s32 lo12 = ((s32)v - hi20);
-	*location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20);
-	return 0;
+
+	return riscv_insn_rmw(location, 0xfffff, (lo12 & 0xfff) << 20);
 }
 
-static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
+static int apply_r_riscv_lo12_s_rela(struct module *me, void *location,
 				     Elf_Addr v)
 {
 	/* Skip medlow checking because of filtering by HI20 already */
@@ -181,20 +228,18 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
 	s32 lo12 = ((s32)v - hi20);
 	u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11);
 	u32 imm4_0 = (lo12 & 0x1f) << (11 - 4);
-	*location = (*location & 0x1fff07f) | imm11_5 | imm4_0;
-	return 0;
+
+	return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0);
 }
 
-static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_got_hi20_rela(struct module *me, void *location,
 				       Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
-	s32 hi20;
+	ptrdiff_t offset = (void *)v - location;
 
 	/* Always emit the got entry */
 	if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
-		offset = module_emit_got_entry(me, v);
-		offset = (void *)offset - (void *)location;
+		offset = (void *)module_emit_got_entry(me, v) - location;
 	} else {
 		pr_err(
 		  "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n",
@@ -202,22 +247,19 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
 		return -EINVAL;
 	}
 
-	hi20 = (offset + 0x800) & 0xfffff000;
-	*location = (*location & 0xfff) | hi20;
-	return 0;
+	return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
+static int apply_r_riscv_call_plt_rela(struct module *me, void *location,
 				       Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - location;
 	u32 hi20, lo12;
 
 	if (!riscv_insn_valid_32bit_offset(offset)) {
 		/* Only emit the plt entry if offset over 32-bit range */
 		if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
-			offset = module_emit_plt_entry(me, v);
-			offset = (void *)offset - (void *)location;
+			offset = (void *)module_emit_plt_entry(me, v) - location;
 		} else {
 			pr_err(
 			  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
@@ -228,15 +270,14 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 
 	hi20 = (offset + 0x800) & 0xfffff000;
 	lo12 = (offset - hi20) & 0xfff;
-	*location = (*location & 0xfff) | hi20;
-	*(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20);
-	return 0;
+	riscv_insn_rmw(location, 0xfff, hi20);
+	return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20);
 }
 
-static int apply_r_riscv_call_rela(struct module *me, u32 *location,
+static int apply_r_riscv_call_rela(struct module *me, void *location,
 				   Elf_Addr v)
 {
-	ptrdiff_t offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - location;
 	u32 hi20, lo12;
 
 	if (!riscv_insn_valid_32bit_offset(offset)) {
@@ -248,18 +289,17 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
 
 	hi20 = (offset + 0x800) & 0xfffff000;
 	lo12 = (offset - hi20) & 0xfff;
-	*location = (*location & 0xfff) | hi20;
-	*(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20);
-	return 0;
+	riscv_insn_rmw(location, 0xfff, hi20);
+	return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20);
 }
 
-static int apply_r_riscv_relax_rela(struct module *me, u32 *location,
+static int apply_r_riscv_relax_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	return 0;
 }
 
-static int apply_r_riscv_align_rela(struct module *me, u32 *location,
+static int apply_r_riscv_align_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	pr_err(
@@ -268,91 +308,446 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location,
 	return -EINVAL;
 }
 
-static int apply_r_riscv_add16_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add8_rela(struct module *me, void *location, Elf_Addr v)
+{
+	*(u8 *)location += (u8)v;
+	return 0;
+}
+
+static int apply_r_riscv_add16_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	*(u16 *)location += (u16)v;
 	return 0;
 }
 
-static int apply_r_riscv_add32_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add32_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	*(u32 *)location += (u32)v;
 	return 0;
 }
 
-static int apply_r_riscv_add64_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add64_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	*(u64 *)location += (u64)v;
 	return 0;
 }
 
-static int apply_r_riscv_sub16_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub8_rela(struct module *me, void *location, Elf_Addr v)
+{
+	*(u8 *)location -= (u8)v;
+	return 0;
+}
+
+static int apply_r_riscv_sub16_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	*(u16 *)location -= (u16)v;
 	return 0;
 }
 
-static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub32_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	*(u32 *)location -= (u32)v;
 	return 0;
 }
 
-static int apply_r_riscv_sub64_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub64_rela(struct module *me, void *location,
 				    Elf_Addr v)
 {
 	*(u64 *)location -= (u64)v;
 	return 0;
 }
 
-static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
-				Elf_Addr v) = {
-	[R_RISCV_32]			= apply_r_riscv_32_rela,
-	[R_RISCV_64]			= apply_r_riscv_64_rela,
-	[R_RISCV_BRANCH]		= apply_r_riscv_branch_rela,
-	[R_RISCV_JAL]			= apply_r_riscv_jal_rela,
-	[R_RISCV_RVC_BRANCH]		= apply_r_riscv_rvc_branch_rela,
-	[R_RISCV_RVC_JUMP]		= apply_r_riscv_rvc_jump_rela,
-	[R_RISCV_PCREL_HI20]		= apply_r_riscv_pcrel_hi20_rela,
-	[R_RISCV_PCREL_LO12_I]		= apply_r_riscv_pcrel_lo12_i_rela,
-	[R_RISCV_PCREL_LO12_S]		= apply_r_riscv_pcrel_lo12_s_rela,
-	[R_RISCV_HI20]			= apply_r_riscv_hi20_rela,
-	[R_RISCV_LO12_I]		= apply_r_riscv_lo12_i_rela,
-	[R_RISCV_LO12_S]		= apply_r_riscv_lo12_s_rela,
-	[R_RISCV_GOT_HI20]		= apply_r_riscv_got_hi20_rela,
-	[R_RISCV_CALL_PLT]		= apply_r_riscv_call_plt_rela,
-	[R_RISCV_CALL]			= apply_r_riscv_call_rela,
-	[R_RISCV_RELAX]			= apply_r_riscv_relax_rela,
-	[R_RISCV_ALIGN]			= apply_r_riscv_align_rela,
-	[R_RISCV_ADD16]			= apply_r_riscv_add16_rela,
-	[R_RISCV_ADD32]			= apply_r_riscv_add32_rela,
-	[R_RISCV_ADD64]			= apply_r_riscv_add64_rela,
-	[R_RISCV_SUB16]			= apply_r_riscv_sub16_rela,
-	[R_RISCV_SUB32]			= apply_r_riscv_sub32_rela,
-	[R_RISCV_SUB64]			= apply_r_riscv_sub64_rela,
+static int dynamic_linking_not_supported(struct module *me, void *location,
+					 Elf_Addr v)
+{
+	pr_err("%s: Dynamic linking not supported in kernel modules PC = %p\n",
+	       me->name, location);
+	return -EINVAL;
+}
+
+static int tls_not_supported(struct module *me, void *location, Elf_Addr v)
+{
+	pr_err("%s: Thread local storage not supported in kernel modules PC = %p\n",
+	       me->name, location);
+	return -EINVAL;
+}
+
+static int apply_r_riscv_sub6_rela(struct module *me, void *location, Elf_Addr v)
+{
+	u8 *byte = location;
+	u8 value = v;
+
+	*byte = (*byte - (value & 0x3f)) & 0x3f;
+	return 0;
+}
+
+static int apply_r_riscv_set6_rela(struct module *me, void *location, Elf_Addr v)
+{
+	u8 *byte = location;
+	u8 value = v;
+
+	*byte = (*byte & 0xc0) | (value & 0x3f);
+	return 0;
+}
+
+static int apply_r_riscv_set8_rela(struct module *me, void *location, Elf_Addr v)
+{
+	*(u8 *)location = (u8)v;
+	return 0;
+}
+
+static int apply_r_riscv_set16_rela(struct module *me, void *location,
+				    Elf_Addr v)
+{
+	*(u16 *)location = (u16)v;
+	return 0;
+}
+
+static int apply_r_riscv_set32_rela(struct module *me, void *location,
+				    Elf_Addr v)
+{
+	*(u32 *)location = (u32)v;
+	return 0;
+}
+
+static int apply_r_riscv_32_pcrel_rela(struct module *me, void *location,
+				       Elf_Addr v)
+{
+	*(u32 *)location = v - (uintptr_t)location;
+	return 0;
+}
+
+static int apply_r_riscv_plt32_rela(struct module *me, void *location,
+				    Elf_Addr v)
+{
+	ptrdiff_t offset = (void *)v - location;
+
+	if (!riscv_insn_valid_32bit_offset(offset)) {
+		/* Only emit the plt entry if offset over 32-bit range */
+		if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
+			offset = (void *)module_emit_plt_entry(me, v) - location;
+		} else {
+			pr_err("%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
+			       me->name, (long long)v, location);
+			return -EINVAL;
+		}
+	}
+
+	*(u32 *)location = (u32)offset;
+	return 0;
+}
+
+static int apply_r_riscv_set_uleb128(struct module *me, void *location, Elf_Addr v)
+{
+	*(long *)location = v;
+	return 0;
+}
+
+static int apply_r_riscv_sub_uleb128(struct module *me, void *location, Elf_Addr v)
+{
+	*(long *)location -= v;
+	return 0;
+}
+
+static int apply_6_bit_accumulation(struct module *me, void *location, long buffer)
+{
+	u8 *byte = location;
+	u8 value = buffer;
+
+	if (buffer > 0x3f) {
+		pr_err("%s: value %ld out of range for 6-bit relocation.\n",
+		       me->name, buffer);
+		return -EINVAL;
+	}
+
+	*byte = (*byte & 0xc0) | (value & 0x3f);
+	return 0;
+}
+
+static int apply_8_bit_accumulation(struct module *me, void *location, long buffer)
+{
+	if (buffer > U8_MAX) {
+		pr_err("%s: value %ld out of range for 8-bit relocation.\n",
+		       me->name, buffer);
+		return -EINVAL;
+	}
+	*(u8 *)location = (u8)buffer;
+	return 0;
+}
+
+static int apply_16_bit_accumulation(struct module *me, void *location, long buffer)
+{
+	if (buffer > U16_MAX) {
+		pr_err("%s: value %ld out of range for 16-bit relocation.\n",
+		       me->name, buffer);
+		return -EINVAL;
+	}
+	*(u16 *)location = (u16)buffer;
+	return 0;
+}
+
+static int apply_32_bit_accumulation(struct module *me, void *location, long buffer)
+{
+	if (buffer > U32_MAX) {
+		pr_err("%s: value %ld out of range for 32-bit relocation.\n",
+		       me->name, buffer);
+		return -EINVAL;
+	}
+	*(u32 *)location = (u32)buffer;
+	return 0;
+}
+
+static int apply_64_bit_accumulation(struct module *me, void *location, long buffer)
+{
+	*(u64 *)location = (u64)buffer;
+	return 0;
+}
+
+static int apply_uleb128_accumulation(struct module *me, void *location, long buffer)
+{
+	/*
+	 * ULEB128 is a variable length encoding. Encode the buffer into
+	 * the ULEB128 data format.
+	 */
+	u8 *p = location;
+
+	while (buffer != 0) {
+		u8 value = buffer & 0x7f;
+
+		buffer >>= 7;
+		value |= (!!buffer) << 7;
+
+		*p++ = value;
+	}
+	return 0;
+}
+
+/*
+ * Relocations defined in the riscv-elf-psabi-doc.
+ * This handles static linking only.
+ */
+static const struct relocation_handlers reloc_handlers[] = {
+	[R_RISCV_32]		= { .reloc_handler = apply_r_riscv_32_rela },
+	[R_RISCV_64]		= { .reloc_handler = apply_r_riscv_64_rela },
+	[R_RISCV_RELATIVE]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_COPY]		= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_JUMP_SLOT]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_TLS_DTPMOD32]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_TLS_DTPMOD64]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_TLS_DTPREL32]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_TLS_DTPREL64]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_TLS_TPREL32]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_TLS_TPREL64]	= { .reloc_handler = dynamic_linking_not_supported },
+	/* 12-15 undefined */
+	[R_RISCV_BRANCH]	= { .reloc_handler = apply_r_riscv_branch_rela },
+	[R_RISCV_JAL]		= { .reloc_handler = apply_r_riscv_jal_rela },
+	[R_RISCV_CALL]		= { .reloc_handler = apply_r_riscv_call_rela },
+	[R_RISCV_CALL_PLT]	= { .reloc_handler = apply_r_riscv_call_plt_rela },
+	[R_RISCV_GOT_HI20]	= { .reloc_handler = apply_r_riscv_got_hi20_rela },
+	[R_RISCV_TLS_GOT_HI20]	= { .reloc_handler = tls_not_supported },
+	[R_RISCV_TLS_GD_HI20]	= { .reloc_handler = tls_not_supported },
+	[R_RISCV_PCREL_HI20]	= { .reloc_handler = apply_r_riscv_pcrel_hi20_rela },
+	[R_RISCV_PCREL_LO12_I]	= { .reloc_handler = apply_r_riscv_pcrel_lo12_i_rela },
+	[R_RISCV_PCREL_LO12_S]	= { .reloc_handler = apply_r_riscv_pcrel_lo12_s_rela },
+	[R_RISCV_HI20]		= { .reloc_handler = apply_r_riscv_hi20_rela },
+	[R_RISCV_LO12_I]	= { .reloc_handler = apply_r_riscv_lo12_i_rela },
+	[R_RISCV_LO12_S]	= { .reloc_handler = apply_r_riscv_lo12_s_rela },
+	[R_RISCV_TPREL_HI20]	= { .reloc_handler = tls_not_supported },
+	[R_RISCV_TPREL_LO12_I]	= { .reloc_handler = tls_not_supported },
+	[R_RISCV_TPREL_LO12_S]	= { .reloc_handler = tls_not_supported },
+	[R_RISCV_TPREL_ADD]	= { .reloc_handler = tls_not_supported },
+	[R_RISCV_ADD8]		= { .reloc_handler = apply_r_riscv_add8_rela,
+				    .accumulate_handler = apply_8_bit_accumulation },
+	[R_RISCV_ADD16]		= { .reloc_handler = apply_r_riscv_add16_rela,
+				    .accumulate_handler = apply_16_bit_accumulation },
+	[R_RISCV_ADD32]		= { .reloc_handler = apply_r_riscv_add32_rela,
+				    .accumulate_handler = apply_32_bit_accumulation },
+	[R_RISCV_ADD64]		= { .reloc_handler = apply_r_riscv_add64_rela,
+				    .accumulate_handler = apply_64_bit_accumulation },
+	[R_RISCV_SUB8]		= { .reloc_handler = apply_r_riscv_sub8_rela,
+				    .accumulate_handler = apply_8_bit_accumulation },
+	[R_RISCV_SUB16]		= { .reloc_handler = apply_r_riscv_sub16_rela,
+				    .accumulate_handler = apply_16_bit_accumulation },
+	[R_RISCV_SUB32]		= { .reloc_handler = apply_r_riscv_sub32_rela,
+				    .accumulate_handler = apply_32_bit_accumulation },
+	[R_RISCV_SUB64]		= { .reloc_handler = apply_r_riscv_sub64_rela,
+				    .accumulate_handler = apply_64_bit_accumulation },
+	/* 41-42 reserved for future standard use */
+	[R_RISCV_ALIGN]		= { .reloc_handler = apply_r_riscv_align_rela },
+	[R_RISCV_RVC_BRANCH]	= { .reloc_handler = apply_r_riscv_rvc_branch_rela },
+	[R_RISCV_RVC_JUMP]	= { .reloc_handler = apply_r_riscv_rvc_jump_rela },
+	/* 46-50 reserved for future standard use */
+	[R_RISCV_RELAX]		= { .reloc_handler = apply_r_riscv_relax_rela },
+	[R_RISCV_SUB6]		= { .reloc_handler = apply_r_riscv_sub6_rela,
+				    .accumulate_handler = apply_6_bit_accumulation },
+	[R_RISCV_SET6]		= { .reloc_handler = apply_r_riscv_set6_rela,
+				    .accumulate_handler = apply_6_bit_accumulation },
+	[R_RISCV_SET8]		= { .reloc_handler = apply_r_riscv_set8_rela,
+				    .accumulate_handler = apply_8_bit_accumulation },
+	[R_RISCV_SET16]		= { .reloc_handler = apply_r_riscv_set16_rela,
+				    .accumulate_handler = apply_16_bit_accumulation },
+	[R_RISCV_SET32]		= { .reloc_handler = apply_r_riscv_set32_rela,
+				    .accumulate_handler = apply_32_bit_accumulation },
+	[R_RISCV_32_PCREL]	= { .reloc_handler = apply_r_riscv_32_pcrel_rela },
+	[R_RISCV_IRELATIVE]	= { .reloc_handler = dynamic_linking_not_supported },
+	[R_RISCV_PLT32]		= { .reloc_handler = apply_r_riscv_plt32_rela },
+	[R_RISCV_SET_ULEB128]	= { .reloc_handler = apply_r_riscv_set_uleb128,
+				    .accumulate_handler = apply_uleb128_accumulation },
+	[R_RISCV_SUB_ULEB128]	= { .reloc_handler = apply_r_riscv_sub_uleb128,
+				    .accumulate_handler = apply_uleb128_accumulation },
+	/* 62-191 reserved for future standard use */
+	/* 192-255 nonstandard ABI extensions  */
 };
 
+void process_accumulated_relocations(struct module *me)
+{
+	/*
+	 * Only ADD/SUB/SET/ULEB128 should end up here.
+	 *
+	 * Each bucket may have more than one relocation location. All
+	 * relocations for a location are stored in a list in a bucket.
+	 *
+	 * Relocations are applied to a temp variable before being stored to the
+	 * provided location to check for overflow. This also allows ULEB128 to
+	 * properly decide how many entries are needed before storing to
+	 * location. The final value is stored into location using the handler
+	 * for the last relocation to an address.
+	 *
+	 * Three layers of indexing:
+	 *	- Each of the buckets in use
+	 *	- Groups of relocations in each bucket by location address
+	 *	- Each relocation entry for a location address
+	 */
+	struct used_bucket *bucket_iter;
+	struct relocation_head *rel_head_iter;
+	struct relocation_entry *rel_entry_iter;
+	int curr_type;
+	void *location;
+	long buffer;
+
+	list_for_each_entry(bucket_iter, &used_buckets_list, head) {
+		hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) {
+			buffer = 0;
+			location = rel_head_iter->location;
+			list_for_each_entry(rel_entry_iter,
+					    rel_head_iter->rel_entry, head) {
+				curr_type = rel_entry_iter->type;
+				reloc_handlers[curr_type].reloc_handler(
+					me, &buffer, rel_entry_iter->value);
+				kfree(rel_entry_iter);
+			}
+			reloc_handlers[curr_type].accumulate_handler(
+				me, location, buffer);
+			kfree(rel_head_iter);
+		}
+		kfree(bucket_iter);
+	}
+
+	kfree(relocation_hashtable);
+}
+
+int add_relocation_to_accumulate(struct module *me, int type, void *location,
+				 unsigned int hashtable_bits, Elf_Addr v)
+{
+	struct relocation_entry *entry;
+	struct relocation_head *rel_head;
+	struct hlist_head *current_head;
+	struct used_bucket *bucket;
+	unsigned long hash;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	INIT_LIST_HEAD(&entry->head);
+	entry->type = type;
+	entry->value = v;
+
+	hash = hash_min((uintptr_t)location, hashtable_bits);
+
+	current_head = &relocation_hashtable[hash];
+
+	/* Find matching location (if any) */
+	bool found = false;
+	struct relocation_head *rel_head_iter;
+
+	hlist_for_each_entry(rel_head_iter, current_head, node) {
+		if (rel_head_iter->location == location) {
+			found = true;
+			rel_head = rel_head_iter;
+			break;
+		}
+	}
+
+	if (!found) {
+		rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL);
+		rel_head->rel_entry =
+			kmalloc(sizeof(struct list_head), GFP_KERNEL);
+		INIT_LIST_HEAD(rel_head->rel_entry);
+		rel_head->location = location;
+		INIT_HLIST_NODE(&rel_head->node);
+		if (!current_head->first) {
+			bucket =
+				kmalloc(sizeof(struct used_bucket), GFP_KERNEL);
+			INIT_LIST_HEAD(&bucket->head);
+			bucket->bucket = current_head;
+			list_add(&bucket->head, &used_buckets_list);
+		}
+		hlist_add_head(&rel_head->node, current_head);
+	}
+
+	/* Add relocation to head of discovered rel_head */
+	list_add_tail(&entry->head, rel_head->rel_entry);
+
+	return 0;
+}
+
+unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
+{
+	/* Can safely assume that bits is not greater than sizeof(long) */
+	unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
+	unsigned int hashtable_bits = ilog2(hashtable_size);
+
+	/*
+	 * Double size of hashtable if num_relocations * 1.25 is greater than
+	 * hashtable_size.
+	 */
+	int should_double_size = ((num_relocations + (num_relocations >> 2)) > (hashtable_size));
+
+	hashtable_bits += should_double_size;
+
+	hashtable_size <<= should_double_size;
+
+	relocation_hashtable = kmalloc_array(hashtable_size,
+					     sizeof(*relocation_hashtable),
+					     GFP_KERNEL);
+	__hash_init(relocation_hashtable, hashtable_size);
+
+	INIT_LIST_HEAD(&used_buckets_list);
+
+	return hashtable_bits;
+}
+
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 		       unsigned int symindex, unsigned int relsec,
 		       struct module *me)
 {
 	Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr;
-	int (*handler)(struct module *me, u32 *location, Elf_Addr v);
+	int (*handler)(struct module *me, void *location, Elf_Addr v);
 	Elf_Sym *sym;
-	u32 *location;
+	void *location;
 	unsigned int i, type;
 	Elf_Addr v;
 	int res;
+	unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
+	unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations);
 
 	pr_debug("Applying relocate section %u to %u\n", relsec,
 	       sechdrs[relsec].sh_info);
 
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+	for (i = 0; i < num_relocations; i++) {
 		/* This is where to make the change */
 		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
 			+ rel[i].r_offset;
@@ -370,8 +765,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
 		type = ELF_RISCV_R_TYPE(rel[i].r_info);
 
-		if (type < ARRAY_SIZE(reloc_handlers_rela))
-			handler = reloc_handlers_rela[type];
+		if (type < ARRAY_SIZE(reloc_handlers))
+			handler = reloc_handlers[type].reloc_handler;
 		else
 			handler = NULL;
 
@@ -427,11 +822,16 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 			}
 		}
 
-		res = handler(me, location, v);
+		if (reloc_handlers[type].accumulate_handler)
+			res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v);
+		else
+			res = handler(me, location, v);
 		if (res)
 			return res;
 	}
 
+	process_accumulated_relocations(me);
+
 	return 0;
 }
 
diff --git a/arch/riscv/kernel/probes/rethook_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S
index 21bac92a170a..f2cd83d9b0f0 100644
--- a/arch/riscv/kernel/probes/rethook_trampoline.S
+++ b/arch/riscv/kernel/probes/rethook_trampoline.S
@@ -75,7 +75,7 @@
 	REG_L x31, PT_T6(sp)
 	.endm
 
-ENTRY(arch_rethook_trampoline)
+SYM_CODE_START(arch_rethook_trampoline)
 	addi sp, sp, -(PT_SIZE_ON_STACK)
 	save_all_base_regs
 
@@ -90,4 +90,4 @@ ENTRY(arch_rethook_trampoline)
 	addi sp, sp, PT_SIZE_ON_STACK
 
 	ret
-ENDPROC(arch_rethook_trampoline)
+SYM_CODE_END(arch_rethook_trampoline)
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
index d3099d67816d..6c166029079c 100644
--- a/arch/riscv/kernel/probes/simulate-insn.c
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -24,7 +24,7 @@ static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index,
 				       unsigned long val)
 {
 	if (index == 0)
-		return false;
+		return true;
 	else if (index <= 31)
 		*((unsigned long *)regs + index) = val;
 	else
diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
index 194f166b2cc4..4b3dc8beaf77 100644
--- a/arch/riscv/kernel/probes/uprobes.c
+++ b/arch/riscv/kernel/probes/uprobes.c
@@ -3,6 +3,7 @@
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/uprobes.h>
+#include <asm/insn.h>
 
 #include "decode-insn.h"
 
@@ -17,6 +18,11 @@ bool is_swbp_insn(uprobe_opcode_t *insn)
 #endif
 }
 
+bool is_trap_insn(uprobe_opcode_t *insn)
+{
+	return riscv_insn_is_ebreak(*insn) || riscv_insn_is_c_ebreak(*insn);
+}
+
 unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
 {
 	return instruction_pointer(regs);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index e32d737e039f..4f21d970a129 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -25,6 +25,7 @@
 #include <asm/thread_info.h>
 #include <asm/cpuidle.h>
 #include <asm/vector.h>
+#include <asm/cpufeature.h>
 
 register unsigned long gp_in_global __asm__("gp");
 
@@ -41,6 +42,23 @@ void arch_cpu_idle(void)
 	cpu_do_idle();
 }
 
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+	if (!unaligned_ctl_available())
+		return -EINVAL;
+
+	tsk->thread.align_ctl = val;
+	return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+	if (!unaligned_ctl_available())
+		return -EINVAL;
+
+	return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
+}
+
 void __show_regs(struct pt_regs *regs)
 {
 	show_regs_print_info(KERN_DEFAULT);
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index c672c8ba9a2a..5a62ed1da453 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -11,6 +11,7 @@
 #include <linux/reboot.h>
 #include <asm/sbi.h>
 #include <asm/smp.h>
+#include <asm/tlbflush.h>
 
 /* default SBI version is 0.1 */
 unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
@@ -377,31 +378,14 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask)
 EXPORT_SYMBOL(sbi_remote_fence_i);
 
 /**
- * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
- *			     harts for the specified virtual address range.
- * @cpu_mask: A cpu mask containing all the target harts.
- * @start: Start of the virtual address
- * @size: Total size of the virtual address range.
- *
- * Return: 0 on success, appropriate linux error code otherwise.
- */
-int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
-			   unsigned long start,
-			   unsigned long size)
-{
-	return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
-			    cpu_mask, start, size, 0, 0);
-}
-EXPORT_SYMBOL(sbi_remote_sfence_vma);
-
-/**
  * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
- * remote harts for a virtual address range belonging to a specific ASID.
+ * remote harts for a virtual address range belonging to a specific ASID or not.
  *
  * @cpu_mask: A cpu mask containing all the target harts.
  * @start: Start of the virtual address
  * @size: Total size of the virtual address range.
- * @asid: The value of address space identifier (ASID).
+ * @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID
+ * for flushing all address spaces.
  *
  * Return: 0 on success, appropriate linux error code otherwise.
  */
@@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
 				unsigned long size,
 				unsigned long asid)
 {
-	return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
-			    cpu_mask, start, size, asid, 0);
+	if (asid == FLUSH_TLB_NO_ASID)
+		return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
+				    cpu_mask, start, size, 0, 0);
+	else
+		return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
+				    cpu_mask, start, size, asid, 0);
 }
 EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
 
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 0624f44d43ec..535a837de55d 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -25,6 +25,7 @@
 #include <asm/acpi.h>
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/early_ioremap.h>
 #include <asm/pgtable.h>
@@ -289,10 +290,13 @@ void __init setup_arch(char **cmdline_p)
 	riscv_fill_hwcap();
 	init_rt_signal_env();
 	apply_boot_alternatives();
+
 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
 	    riscv_isa_extension_available(NULL, ZICBOM))
 		riscv_noncoherent_supported();
 	riscv_set_dma_cache_alignment();
+
+	riscv_user_isa_enable();
 }
 
 static int __init topology_init(void)
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 21a4d0e111bc..88b6220b2608 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -384,30 +384,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
-	/* Are we from a system call? */
-	if (regs->cause == EXC_SYSCALL) {
-		/* Avoid additional syscall restarting via ret_from_exception */
-		regs->cause = -1UL;
-		/* If so, check system call restarting.. */
-		switch (regs->a0) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			regs->a0 = -EINTR;
-			break;
-
-		case -ERESTARTSYS:
-			if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
-				regs->a0 = -EINTR;
-				break;
-			}
-			fallthrough;
-		case -ERESTARTNOINTR:
-                        regs->a0 = regs->orig_a0;
-			regs->epc -= 0x4;
-			break;
-		}
-	}
-
 	rseq_signal_deliver(ksig, regs);
 
 	/* Set up the stack frame */
@@ -421,36 +397,67 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 void arch_do_signal_or_restart(struct pt_regs *regs)
 {
+	unsigned long continue_addr = 0, restart_addr = 0;
+	int retval = 0;
 	struct ksignal ksig;
+	bool syscall = (regs->cause == EXC_SYSCALL);
 
-	if (get_signal(&ksig)) {
-		/* Actually deliver the signal */
-		handle_signal(&ksig, regs);
-		return;
-	}
+	/* If we were from a system call, check for system call restarting */
+	if (syscall) {
+		continue_addr = regs->epc;
+		restart_addr = continue_addr - 4;
+		retval = regs->a0;
 
-	/* Did we come from a system call? */
-	if (regs->cause == EXC_SYSCALL) {
 		/* Avoid additional syscall restarting via ret_from_exception */
 		regs->cause = -1UL;
 
-		/* Restart the system call - no handlers present */
-		switch (regs->a0) {
+		/*
+		 * Prepare for system call restart. We do this here so that a
+		 * debugger will see the already changed PC.
+		 */
+		switch (retval) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
-                        regs->a0 = regs->orig_a0;
-			regs->epc -= 0x4;
-			break;
 		case -ERESTART_RESTARTBLOCK:
-                        regs->a0 = regs->orig_a0;
-			regs->a7 = __NR_restart_syscall;
-			regs->epc -= 0x4;
+			regs->a0 = regs->orig_a0;
+			regs->epc = restart_addr;
 			break;
 		}
 	}
 
 	/*
+	 * Get the signal to deliver. When running under ptrace, at this point
+	 * the debugger may change all of our registers.
+	 */
+	if (get_signal(&ksig)) {
+		/*
+		 * Depending on the signal settings, we may need to revert the
+		 * decision to restart the system call, but skip this if a
+		 * debugger has chosen to restart at a different PC.
+		 */
+		if (regs->epc == restart_addr &&
+		    (retval == -ERESTARTNOHAND ||
+		     retval == -ERESTART_RESTARTBLOCK ||
+		     (retval == -ERESTARTSYS &&
+		      !(ksig.ka.sa.sa_flags & SA_RESTART)))) {
+			regs->a0 = -EINTR;
+			regs->epc = continue_addr;
+		}
+
+		/* Actually deliver the signal */
+		handle_signal(&ksig, regs);
+		return;
+	}
+
+	/*
+	 * Handle restarting a different system call. As above, if a debugger
+	 * has chosen to restart at a different PC, ignore the restart.
+	 */
+	if (syscall && regs->epc == restart_addr && retval == -ERESTART_RESTARTBLOCK)
+		regs->a7 = __NR_restart_syscall;
+
+	/*
 	 * If there is no signal to deliver, we just put the saved
 	 * sigmask back.
 	 */
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 1b8da4e40a4d..d162bf339beb 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -25,6 +25,8 @@
 #include <linux/of.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/mm.h>
+
+#include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/cpufeature.h>
 #include <asm/irq.h>
@@ -246,13 +248,14 @@ asmlinkage __visible void smp_callin(void)
 
 	numa_add_cpu(curr_cpuid);
 	set_cpu_online(curr_cpuid, 1);
-	check_unaligned_access(curr_cpuid);
 
 	if (has_vector()) {
 		if (riscv_v_setup_vsize())
 			elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
 	}
 
+	riscv_user_isa_enable();
+
 	/*
 	 * Remote TLB flushes are ignored while the CPU is offline, so emit
 	 * a local TLB flush right now just in case.
diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S
index f7960c7c5f9e..2d54f309c140 100644
--- a/arch/riscv/kernel/suspend_entry.S
+++ b/arch/riscv/kernel/suspend_entry.S
@@ -16,7 +16,7 @@
 	.altmacro
 	.option norelax
 
-ENTRY(__cpu_suspend_enter)
+SYM_FUNC_START(__cpu_suspend_enter)
 	/* Save registers (except A0 and T0-T6) */
 	REG_S	ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0)
 	REG_S	sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0)
@@ -57,14 +57,11 @@ ENTRY(__cpu_suspend_enter)
 
 	/* Return to C code */
 	ret
-END(__cpu_suspend_enter)
+SYM_FUNC_END(__cpu_suspend_enter)
 
 SYM_TYPED_FUNC_START(__cpu_resume_enter)
 	/* Load the global pointer */
-	.option push
-	.option norelax
-		la gp, __global_pointer$
-	.option pop
+	load_global_pointer
 
 #ifdef CONFIG_MMU
 	/* Save A0 and A1 */
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index b651ec698a91..c712037dbe10 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -145,26 +145,38 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
 	for_each_cpu(cpu, cpus) {
 		struct riscv_isainfo *isainfo = &hart_isa[cpu];
 
-		if (riscv_isa_extension_available(isainfo->isa, ZBA))
-			pair->value |= RISCV_HWPROBE_EXT_ZBA;
-		else
-			missing |= RISCV_HWPROBE_EXT_ZBA;
-
-		if (riscv_isa_extension_available(isainfo->isa, ZBB))
-			pair->value |= RISCV_HWPROBE_EXT_ZBB;
-		else
-			missing |= RISCV_HWPROBE_EXT_ZBB;
-
-		if (riscv_isa_extension_available(isainfo->isa, ZBS))
-			pair->value |= RISCV_HWPROBE_EXT_ZBS;
-		else
-			missing |= RISCV_HWPROBE_EXT_ZBS;
+#define EXT_KEY(ext)									\
+	do {										\
+		if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext))	\
+			pair->value |= RISCV_HWPROBE_EXT_##ext;				\
+		else									\
+			missing |= RISCV_HWPROBE_EXT_##ext;				\
+	} while (false)
+
+		/*
+		 * Only use EXT_KEY() for extensions which can be exposed to userspace,
+		 * regardless of the kernel's configuration, as no other checks, besides
+		 * presence in the hart_isa bitmap, are made.
+		 */
+		EXT_KEY(ZBA);
+		EXT_KEY(ZBB);
+		EXT_KEY(ZBS);
+		EXT_KEY(ZICBOZ);
+#undef EXT_KEY
 	}
 
 	/* Now turn off reporting features if any CPU is missing it. */
 	pair->value &= ~missing;
 }
 
+static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+{
+	struct riscv_hwprobe pair;
+
+	hwprobe_isa_ext0(&pair, cpus);
+	return (pair.value & ext);
+}
+
 static u64 hwprobe_misaligned(const struct cpumask *cpus)
 {
 	int cpu;
@@ -215,6 +227,12 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
 		pair->value = hwprobe_misaligned(cpus);
 		break;
 
+	case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
+		pair->value = 0;
+		if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
+			pair->value = riscv_cboz_block_size;
+		break;
+
 	/*
 	 * For forward compatibility, unknown keys don't fail the whole
 	 * call, but get their element key set to -1 and value set to 0
diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug
new file mode 100644
index 000000000000..5dba64e8e977
--- /dev/null
+++ b/arch/riscv/kernel/tests/Kconfig.debug
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menu "arch/riscv/kernel Testing and Coverage"
+
+config AS_HAS_ULEB128
+	def_bool $(as-instr,.reloc label$(comma) R_RISCV_SET_ULEB128$(comma) 127\n.reloc label$(comma) R_RISCV_SUB_ULEB128$(comma) 127\nlabel:\n.word 0)
+
+menuconfig RUNTIME_KERNEL_TESTING_MENU
+       bool "arch/riscv/kernel runtime Testing"
+       def_bool y
+       help
+         Enable riscv kernel runtime testing.
+
+if RUNTIME_KERNEL_TESTING_MENU
+
+config RISCV_MODULE_LINKING_KUNIT
+       bool "KUnit test riscv module linking at runtime" if !KUNIT_ALL_TESTS
+       depends on KUNIT
+       default KUNIT_ALL_TESTS
+       help
+         Enable this option to test riscv module linking at boot. This will
+	 enable a module called "test_module_linking".
+
+         KUnit tests run during boot and output the results to the debug log
+         in TAP format (http://testanything.org/). Only useful for kernel devs
+         running the KUnit test harness, and not intended for inclusion into a
+         production build.
+
+         For more information on KUnit and unit tests in general please refer
+         to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+         If unsure, say N.
+
+endif # RUNTIME_TESTING_MENU
+
+endmenu # "arch/riscv/kernel runtime Testing"
diff --git a/arch/riscv/kernel/tests/Makefile b/arch/riscv/kernel/tests/Makefile
new file mode 100644
index 000000000000..7d6c76cffe20
--- /dev/null
+++ b/arch/riscv/kernel/tests/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_RISCV_MODULE_LINKING_KUNIT)	+= module_test/
diff --git a/arch/riscv/kernel/tests/module_test/Makefile b/arch/riscv/kernel/tests/module_test/Makefile
new file mode 100644
index 000000000000..d7a6fd8943de
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/Makefile
@@ -0,0 +1,15 @@
+obj-m += test_module_linking.o
+
+test_sub := test_sub6.o test_sub8.o test_sub16.o test_sub32.o test_sub64.o
+
+test_set := test_set6.o test_set8.o test_set16.o test_set32.o
+
+test_module_linking-objs += $(test_sub)
+
+test_module_linking-objs += $(test_set)
+
+ifeq ($(CONFIG_AS_HAS_ULEB128),y)
+test_module_linking-objs += test_uleb128.o
+endif
+
+test_module_linking-objs += test_module_linking_main.o
diff --git a/arch/riscv/kernel/tests/module_test/test_module_linking_main.c b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c
new file mode 100644
index 000000000000..8df5fa5b834e
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <kunit/test.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Test module linking");
+
+extern int test_set32(void);
+extern int test_set16(void);
+extern int test_set8(void);
+extern int test_set6(void);
+extern long test_sub64(void);
+extern int test_sub32(void);
+extern int test_sub16(void);
+extern int test_sub8(void);
+extern int test_sub6(void);
+
+#ifdef CONFIG_AS_HAS_ULEB128
+extern int test_uleb_basic(void);
+extern int test_uleb_large(void);
+#endif
+
+#define CHECK_EQ(lhs, rhs) KUNIT_ASSERT_EQ(test, lhs, rhs)
+
+void run_test_set(struct kunit *test);
+void run_test_sub(struct kunit *test);
+void run_test_uleb(struct kunit *test);
+
+void run_test_set(struct kunit *test)
+{
+	int val32 = test_set32();
+	int val16 = test_set16();
+	int val8 = test_set8();
+	int val6 = test_set6();
+
+	CHECK_EQ(val32, 0);
+	CHECK_EQ(val16, 0);
+	CHECK_EQ(val8, 0);
+	CHECK_EQ(val6, 0);
+}
+
+void run_test_sub(struct kunit *test)
+{
+	int val64 = test_sub64();
+	int val32 = test_sub32();
+	int val16 = test_sub16();
+	int val8 = test_sub8();
+	int val6 = test_sub6();
+
+	CHECK_EQ(val64, 0);
+	CHECK_EQ(val32, 0);
+	CHECK_EQ(val16, 0);
+	CHECK_EQ(val8, 0);
+	CHECK_EQ(val6, 0);
+}
+
+#ifdef CONFIG_AS_HAS_ULEB128
+void run_test_uleb(struct kunit *test)
+{
+	int val_uleb = test_uleb_basic();
+	int val_uleb2 = test_uleb_large();
+
+	CHECK_EQ(val_uleb, 0);
+	CHECK_EQ(val_uleb2, 0);
+}
+#endif
+
+static struct kunit_case __refdata riscv_module_linking_test_cases[] = {
+	KUNIT_CASE(run_test_set),
+	KUNIT_CASE(run_test_sub),
+#ifdef CONFIG_AS_HAS_ULEB128
+	KUNIT_CASE(run_test_uleb),
+#endif
+	{}
+};
+
+static struct kunit_suite riscv_module_linking_test_suite = {
+	.name = "riscv_checksum",
+	.test_cases = riscv_module_linking_test_cases,
+};
+
+kunit_test_suites(&riscv_module_linking_test_suite);
diff --git a/arch/riscv/kernel/tests/module_test/test_set16.S b/arch/riscv/kernel/tests/module_test/test_set16.S
new file mode 100644
index 000000000000..2be0e441a12e
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_set16.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set16
+test_set16:
+	lw	a0, set16
+	la	t0, set16
+#ifdef CONFIG_32BIT
+	slli	t0, t0, 16
+	srli	t0, t0, 16
+#else
+	slli	t0, t0, 48
+	srli	t0, t0, 48
+#endif
+	sub	a0, a0, t0
+	ret
+.data
+set16:
+	.reloc set16, R_RISCV_SET16, set16
+	.word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_set32.S b/arch/riscv/kernel/tests/module_test/test_set32.S
new file mode 100644
index 000000000000..de0444537e67
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_set32.S
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set32
+test_set32:
+	lw	a0, set32
+	la	t0, set32
+#ifndef CONFIG_32BIT
+	slli	t0, t0, 32
+	srli	t0, t0, 32
+#endif
+	sub	a0, a0, t0
+	ret
+.data
+set32:
+	.reloc set32, R_RISCV_SET32, set32
+	.word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_set6.S b/arch/riscv/kernel/tests/module_test/test_set6.S
new file mode 100644
index 000000000000..c39ce4c219eb
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_set6.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set6
+test_set6:
+	lw	a0, set6
+	la	t0, set6
+#ifdef CONFIG_32BIT
+	slli	t0, t0, 26
+	srli	t0, t0, 26
+#else
+	slli	t0, t0, 58
+	srli	t0, t0, 58
+#endif
+	sub	a0, a0, t0
+	ret
+.data
+set6:
+	.reloc set6, R_RISCV_SET6, set6
+	.word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_set8.S b/arch/riscv/kernel/tests/module_test/test_set8.S
new file mode 100644
index 000000000000..a656173f6f99
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_set8.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set8
+test_set8:
+	lw	a0, set8
+	la	t0, set8
+#ifdef CONFIG_32BIT
+	slli	t0, t0, 24
+	srli	t0, t0, 24
+#else
+	slli	t0, t0, 56
+	srli	t0, t0, 56
+#endif
+	sub	a0, a0, t0
+	ret
+.data
+set8:
+	.reloc set8, R_RISCV_SET8, set8
+	.word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub16.S b/arch/riscv/kernel/tests/module_test/test_sub16.S
new file mode 100644
index 000000000000..80f731d599ba
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_sub16.S
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub16
+test_sub16:
+	lh	a0, sub16
+	addi	a0, a0, -32
+	ret
+first:
+	.space 32
+second:
+
+.data
+sub16:
+	.reloc		sub16, R_RISCV_ADD16, second
+	.reloc		sub16, R_RISCV_SUB16, first
+	.half		0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub32.S b/arch/riscv/kernel/tests/module_test/test_sub32.S
new file mode 100644
index 000000000000..a341686e12df
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_sub32.S
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub32
+test_sub32:
+	lw	a0, sub32
+	addi	a0, a0, -32
+	ret
+first:
+	.space 32
+second:
+
+.data
+sub32:
+	.reloc		sub32, R_RISCV_ADD32, second
+	.reloc		sub32, R_RISCV_SUB32, first
+	.word		0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub6.S b/arch/riscv/kernel/tests/module_test/test_sub6.S
new file mode 100644
index 000000000000..e8b61c1ec527
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_sub6.S
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub6
+test_sub6:
+	lb	a0, sub6
+	addi	a0, a0, -32
+	ret
+first:
+	.space 32
+second:
+
+.data
+sub6:
+	.reloc		sub6, R_RISCV_SET6, second
+	.reloc		sub6, R_RISCV_SUB6, first
+	.byte		0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub64.S b/arch/riscv/kernel/tests/module_test/test_sub64.S
new file mode 100644
index 000000000000..a59e8afa88fd
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_sub64.S
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub64
+test_sub64:
+#ifdef CONFIG_32BIT
+	lw	a0, sub64
+#else
+	ld	a0, sub64
+#endif
+	addi	a0, a0, -32
+	ret
+first:
+	.space 32
+second:
+
+.data
+sub64:
+	.reloc		sub64, R_RISCV_ADD64, second
+	.reloc		sub64, R_RISCV_SUB64, first
+	.word		0
+	.word		0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub8.S b/arch/riscv/kernel/tests/module_test/test_sub8.S
new file mode 100644
index 000000000000..ac5d0ec98de3
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_sub8.S
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub8
+test_sub8:
+	lb	a0, sub8
+	addi	a0, a0, -32
+	ret
+first:
+	.space 32
+second:
+
+.data
+sub8:
+	.reloc		sub8, R_RISCV_ADD8, second
+	.reloc		sub8, R_RISCV_SUB8, first
+	.byte		0
diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S
new file mode 100644
index 000000000000..90f22049d553
--- /dev/null
+++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_uleb_basic
+test_uleb_basic:
+	ld	a0, second
+	addi	a0, a0, -127
+	ret
+
+.global test_uleb_large
+test_uleb_large:
+	ld	a0, fourth
+	addi	a0, a0, -0x07e8
+	ret
+
+.data
+first:
+	.space 127
+second:
+	.reloc second, R_RISCV_SET_ULEB128, second
+	.reloc second, R_RISCV_SUB_ULEB128, first
+	.dword 0
+third:
+	.space 1000
+fourth:
+	.reloc fourth, R_RISCV_SET_ULEB128, fourth
+	.reloc fourth, R_RISCV_SUB_ULEB128, third
+	.dword 0
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index fae8f610d867..a1b9be3c4332 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -36,7 +36,21 @@ int show_unhandled_signals = 1;
 
 static DEFINE_SPINLOCK(die_lock);
 
-static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs)
+static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns)
+{
+	const void __user *uaddr = (__force const void __user *)insns;
+
+	if (!user_mode(regs))
+		return get_kernel_nofault(*val, insns);
+
+	/* The user space code from other tasks cannot be accessed. */
+	if (regs != task_pt_regs(current))
+		return -EPERM;
+
+	return copy_from_user_nofault(val, uaddr, sizeof(*val));
+}
+
+static void dump_instr(const char *loglvl, struct pt_regs *regs)
 {
 	char str[sizeof("0000 ") * 12 + 2 + 1], *p = str;
 	const u16 *insns = (u16 *)instruction_pointer(regs);
@@ -45,7 +59,7 @@ static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs)
 	int i;
 
 	for (i = -10; i < 2; i++) {
-		bad = get_kernel_nofault(val, &insns[i]);
+		bad = copy_code(regs, &val, &insns[i]);
 		if (!bad) {
 			p += sprintf(p, i == 0 ? "(%04hx) " : "%04hx ", val);
 		} else {
@@ -74,7 +88,7 @@ void die(struct pt_regs *regs, const char *str)
 	print_modules();
 	if (regs) {
 		show_regs(regs);
-		dump_kernel_instr(KERN_EMERG, regs);
+		dump_instr(KERN_EMERG, regs);
 	}
 
 	cause = regs ? regs->cause : -1;
@@ -107,6 +121,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
 		print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
 		pr_cont("\n");
 		__show_regs(regs);
+		dump_instr(KERN_EMERG, regs);
 	}
 
 	force_sig_fault(signo, code, (void __user *)addr);
@@ -181,14 +196,6 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
 
 DO_ERROR_INFO(do_trap_load_fault,
 	SIGSEGV, SEGV_ACCERR, "load access fault");
-#ifndef CONFIG_RISCV_M_MODE
-DO_ERROR_INFO(do_trap_load_misaligned,
-	SIGBUS, BUS_ADRALN, "Oops - load address misaligned");
-DO_ERROR_INFO(do_trap_store_misaligned,
-	SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned");
-#else
-int handle_misaligned_load(struct pt_regs *regs);
-int handle_misaligned_store(struct pt_regs *regs);
 
 asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs)
 {
@@ -231,7 +238,6 @@ asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs
 		irqentry_nmi_exit(regs, state);
 	}
 }
-#endif
 DO_ERROR_INFO(do_trap_store_fault,
 	SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault");
 DO_ERROR_INFO(do_trap_ecall_s,
@@ -360,34 +366,10 @@ static void noinstr handle_riscv_irq(struct pt_regs *regs)
 asmlinkage void noinstr do_irq(struct pt_regs *regs)
 {
 	irqentry_state_t state = irqentry_enter(regs);
-#ifdef CONFIG_IRQ_STACKS
-	if (on_thread_stack()) {
-		ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
-					+ IRQ_STACK_SIZE/sizeof(ulong);
-		__asm__ __volatile(
-		"addi	sp, sp, -"RISCV_SZPTR  "\n"
-		REG_S"  ra, (sp)		\n"
-		"addi	sp, sp, -"RISCV_SZPTR  "\n"
-		REG_S"  s0, (sp)		\n"
-		"addi	s0, sp, 2*"RISCV_SZPTR "\n"
-		"move	sp, %[sp]		\n"
-		"move	a0, %[regs]		\n"
-		"call	handle_riscv_irq	\n"
-		"addi	sp, s0, -2*"RISCV_SZPTR"\n"
-		REG_L"  s0, (sp)		\n"
-		"addi	sp, sp, "RISCV_SZPTR   "\n"
-		REG_L"  ra, (sp)		\n"
-		"addi	sp, sp, "RISCV_SZPTR   "\n"
-		:
-		: [sp] "r" (sp), [regs] "r" (regs)
-		: "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6",
-#ifndef CONFIG_FRAME_POINTER
-		  "s0",
-#endif
-		  "memory");
-	} else
-#endif
+
+	if (IS_ENABLED(CONFIG_IRQ_STACKS) && on_thread_stack())
+		call_on_irq_stack(regs, handle_riscv_irq);
+	else
 		handle_riscv_irq(regs);
 
 	irqentry_exit(regs, state);
@@ -410,48 +392,14 @@ int is_valid_bugaddr(unsigned long pc)
 #endif /* CONFIG_GENERIC_BUG */
 
 #ifdef CONFIG_VMAP_STACK
-/*
- * Extra stack space that allows us to provide panic messages when the kernel
- * has overflowed its stack.
- */
-static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
+DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
 		overflow_stack)__aligned(16);
-/*
- * A temporary stack for use by handle_kernel_stack_overflow.  This is used so
- * we can call into C code to get the per-hart overflow stack.  Usage of this
- * stack must be protected by spin_shadow_stack.
- */
-long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16);
-
-/*
- * A pseudo spinlock to protect the shadow stack from being used by multiple
- * harts concurrently.  This isn't a real spinlock because the lock side must
- * be taken without a valid stack and only a single register, it's only taken
- * while in the process of panicing anyway so the performance and error
- * checking a proper spinlock gives us doesn't matter.
- */
-unsigned long spin_shadow_stack;
-
-asmlinkage unsigned long get_overflow_stack(void)
-{
-	return (unsigned long)this_cpu_ptr(overflow_stack) +
-		OVERFLOW_STACK_SIZE;
-}
 
 asmlinkage void handle_bad_stack(struct pt_regs *regs)
 {
 	unsigned long tsk_stk = (unsigned long)current->stack;
 	unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
 
-	/*
-	 * We're done with the shadow stack by this point, as we're on the
-	 * overflow stack.  Tell any other concurrent overflowing harts that
-	 * they can proceed with panicing by releasing the pseudo-spinlock.
-	 *
-	 * This pairs with an amoswap.aq in handle_kernel_stack_overflow.
-	 */
-	smp_store_release(&spin_shadow_stack, 0);
-
 	console_verbose();
 
 	pr_emerg("Insufficient stack space to handle exception!\n");
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 378f5b151443..5eba37147caa 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -6,12 +6,16 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/perf_event.h>
 #include <linux/irq.h>
 #include <linux/stringify.h>
 
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
+#include <asm/entry-common.h>
+#include <asm/hwprobe.h>
+#include <asm/cpufeature.h>
 
 #define INSN_MATCH_LB			0x3
 #define INSN_MASK_LB			0x707f
@@ -151,53 +155,134 @@
 #define PRECISION_S 0
 #define PRECISION_D 1
 
-#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn)			\
-static inline type load_##type(const type *addr)			\
-{									\
-	type val;							\
-	asm (#insn " %0, %1"						\
-	: "=&r" (val) : "m" (*addr));					\
-	return val;							\
+#ifdef CONFIG_FPU
+
+#define FP_GET_RD(insn)		(insn >> 7 & 0x1F)
+
+extern void put_f32_reg(unsigned long fp_reg, unsigned long value);
+
+static int set_f32_rd(unsigned long insn, struct pt_regs *regs,
+		      unsigned long val)
+{
+	unsigned long fp_reg = FP_GET_RD(insn);
+
+	put_f32_reg(fp_reg, val);
+	regs->status |= SR_FS_DIRTY;
+
+	return 0;
 }
 
-#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn)			\
-static inline void store_##type(type *addr, type val)			\
-{									\
-	asm volatile (#insn " %0, %1\n"					\
-	: : "r" (val), "m" (*addr));					\
+extern void put_f64_reg(unsigned long fp_reg, unsigned long value);
+
+static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val)
+{
+	unsigned long fp_reg = FP_GET_RD(insn);
+	unsigned long value;
+
+#if __riscv_xlen == 32
+	value = (unsigned long) &val;
+#else
+	value = val;
+#endif
+	put_f64_reg(fp_reg, value);
+	regs->status |= SR_FS_DIRTY;
+
+	return 0;
 }
 
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw)
-#if defined(CONFIG_64BIT)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld)
+#if __riscv_xlen == 32
+extern void get_f64_reg(unsigned long fp_reg, u64 *value);
+
+static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+		      struct pt_regs *regs)
+{
+	unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+	u64 val;
+
+	get_f64_reg(fp_reg, &val);
+	regs->status |= SR_FS_DIRTY;
+
+	return val;
+}
 #else
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw)
 
-static inline u64 load_u64(const u64 *addr)
+extern unsigned long get_f64_reg(unsigned long fp_reg);
+
+static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+				struct pt_regs *regs)
 {
-	return load_u32((u32 *)addr)
-		+ ((u64)load_u32((u32 *)addr + 1) << 32);
+	unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+	unsigned long val;
+
+	val = get_f64_reg(fp_reg);
+	regs->status |= SR_FS_DIRTY;
+
+	return val;
 }
 
-static inline void store_u64(u64 *addr, u64 val)
+#endif
+
+extern unsigned long get_f32_reg(unsigned long fp_reg);
+
+static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
+				struct pt_regs *regs)
 {
-	store_u32((u32 *)addr, val);
-	store_u32((u32 *)addr + 1, val >> 32);
+	unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+	unsigned long val;
+
+	val = get_f32_reg(fp_reg);
+	regs->status |= SR_FS_DIRTY;
+
+	return val;
 }
+
+#else /* CONFIG_FPU */
+static void set_f32_rd(unsigned long insn, struct pt_regs *regs,
+		       unsigned long val) {}
+
+static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {}
+
+static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+				struct pt_regs *regs)
+{
+	return 0;
+}
+
+static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
+				struct pt_regs *regs)
+{
+	return 0;
+}
+
 #endif
 
-static inline ulong get_insn(ulong mepc)
+#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs))
+#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs))
+#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs))
+
+#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs))
+#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
+#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
+
+#ifdef CONFIG_RISCV_M_MODE
+static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
+{
+	u8 val;
+
+	asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr));
+	*r_val = val;
+
+	return 0;
+}
+
+static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
+{
+	asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr));
+
+	return 0;
+}
+
+static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn)
 {
 	register ulong __mepc asm ("a2") = mepc;
 	ulong val, rvc_mask = 3, tmp;
@@ -226,23 +311,119 @@ static inline ulong get_insn(ulong mepc)
 	: [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask),
 	  [xlen_minus_16] "i" (XLEN_MINUS_16));
 
-	return val;
+	*r_insn = val;
+
+	return 0;
+}
+#else
+static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
+{
+	if (user_mode(regs)) {
+		return __get_user(*r_val, addr);
+	} else {
+		*r_val = *addr;
+		return 0;
+	}
+}
+
+static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
+{
+	if (user_mode(regs)) {
+		return __put_user(val, addr);
+	} else {
+		*addr = val;
+		return 0;
+	}
 }
 
+#define __read_insn(regs, insn, insn_addr)		\
+({							\
+	int __ret;					\
+							\
+	if (user_mode(regs)) {				\
+		__ret = __get_user(insn, insn_addr);	\
+	} else {					\
+		insn = *insn_addr;			\
+		__ret = 0;				\
+	}						\
+							\
+	__ret;						\
+})
+
+static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
+{
+	ulong insn = 0;
+
+	if (epc & 0x2) {
+		ulong tmp = 0;
+		u16 __user *insn_addr = (u16 __user *)epc;
+
+		if (__read_insn(regs, insn, insn_addr))
+			return -EFAULT;
+		/* __get_user() uses regular "lw" which sign extend the loaded
+		 * value make sure to clear higher order bits in case we "or" it
+		 * below with the upper 16 bits half.
+		 */
+		insn &= GENMASK(15, 0);
+		if ((insn & __INSN_LENGTH_MASK) != __INSN_LENGTH_32) {
+			*r_insn = insn;
+			return 0;
+		}
+		insn_addr++;
+		if (__read_insn(regs, tmp, insn_addr))
+			return -EFAULT;
+		*r_insn = (tmp << 16) | insn;
+
+		return 0;
+	} else {
+		u32 __user *insn_addr = (u32 __user *)epc;
+
+		if (__read_insn(regs, insn, insn_addr))
+			return -EFAULT;
+		if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) {
+			*r_insn = insn;
+			return 0;
+		}
+		insn &= GENMASK(15, 0);
+		*r_insn = insn;
+
+		return 0;
+	}
+}
+#endif
+
 union reg_data {
 	u8 data_bytes[8];
 	ulong data_ulong;
 	u64 data_u64;
 };
 
+static bool unaligned_ctl __read_mostly;
+
+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1;	/* Enabled by default */
+
 int handle_misaligned_load(struct pt_regs *regs)
 {
 	union reg_data val;
 	unsigned long epc = regs->epc;
-	unsigned long insn = get_insn(epc);
-	unsigned long addr = csr_read(mtval);
+	unsigned long insn;
+	unsigned long addr = regs->badaddr;
 	int i, fp = 0, shift = 0, len = 0;
 
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+
+	*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+	if (!unaligned_enabled)
+		return -1;
+
+	if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+		return -1;
+
+	if (get_insn(regs, epc, &insn))
+		return -1;
+
 	regs->epc = 0;
 
 	if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
@@ -305,13 +486,21 @@ int handle_misaligned_load(struct pt_regs *regs)
 		return -1;
 	}
 
+	if (!IS_ENABLED(CONFIG_FPU) && fp)
+		return -EOPNOTSUPP;
+
 	val.data_u64 = 0;
-	for (i = 0; i < len; i++)
-		val.data_bytes[i] = load_u8((void *)(addr + i));
+	for (i = 0; i < len; i++) {
+		if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i]))
+			return -1;
+	}
 
-	if (fp)
-		return -1;
-	SET_RD(insn, regs, val.data_ulong << shift >> shift);
+	if (!fp)
+		SET_RD(insn, regs, val.data_ulong << shift >> shift);
+	else if (len == 8)
+		set_f64_rd(insn, regs, val.data_u64);
+	else
+		set_f32_rd(insn, regs, val.data_ulong);
 
 	regs->epc = epc + INSN_LEN(insn);
 
@@ -322,9 +511,20 @@ int handle_misaligned_store(struct pt_regs *regs)
 {
 	union reg_data val;
 	unsigned long epc = regs->epc;
-	unsigned long insn = get_insn(epc);
-	unsigned long addr = csr_read(mtval);
-	int i, len = 0;
+	unsigned long insn;
+	unsigned long addr = regs->badaddr;
+	int i, len = 0, fp = 0;
+
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+
+	if (!unaligned_enabled)
+		return -1;
+
+	if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+		return -1;
+
+	if (get_insn(regs, epc, &insn))
+		return -1;
 
 	regs->epc = 0;
 
@@ -336,6 +536,14 @@ int handle_misaligned_store(struct pt_regs *regs)
 	} else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
 		len = 8;
 #endif
+	} else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) {
+		fp = 1;
+		len = 8;
+		val.data_u64 = GET_F64_RS2(insn, regs);
+	} else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) {
+		fp = 1;
+		len = 4;
+		val.data_ulong = GET_F32_RS2(insn, regs);
 	} else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
 		len = 2;
 #if defined(CONFIG_64BIT)
@@ -354,15 +562,88 @@ int handle_misaligned_store(struct pt_regs *regs)
 		   ((insn >> SH_RD) & 0x1f)) {
 		len = 4;
 		val.data_ulong = GET_RS2C(insn, regs);
+	} else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
+		fp = 1;
+		len = 8;
+		val.data_u64 = GET_F64_RS2S(insn, regs);
+	} else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) {
+		fp = 1;
+		len = 8;
+		val.data_u64 = GET_F64_RS2C(insn, regs);
+#if !defined(CONFIG_64BIT)
+	} else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) {
+		fp = 1;
+		len = 4;
+		val.data_ulong = GET_F32_RS2S(insn, regs);
+	} else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) {
+		fp = 1;
+		len = 4;
+		val.data_ulong = GET_F32_RS2C(insn, regs);
+#endif
 	} else {
 		regs->epc = epc;
 		return -1;
 	}
 
-	for (i = 0; i < len; i++)
-		store_u8((void *)(addr + i), val.data_bytes[i]);
+	if (!IS_ENABLED(CONFIG_FPU) && fp)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < len; i++) {
+		if (store_u8(regs, (void *)(addr + i), val.data_bytes[i]))
+			return -1;
+	}
 
 	regs->epc = epc + INSN_LEN(insn);
 
 	return 0;
 }
+
+bool check_unaligned_access_emulated(int cpu)
+{
+	long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
+	unsigned long tmp_var, tmp_val;
+	bool misaligned_emu_detected;
+
+	*mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+	__asm__ __volatile__ (
+		"       "REG_L" %[tmp], 1(%[ptr])\n"
+		: [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
+
+	misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED);
+	/*
+	 * If unaligned_ctl is already set, this means that we detected that all
+	 * CPUS uses emulated misaligned access at boot time. If that changed
+	 * when hotplugging the new cpu, this is something we don't handle.
+	 */
+	if (unlikely(unaligned_ctl && !misaligned_emu_detected)) {
+		pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
+		while (true)
+			cpu_relax();
+	}
+
+	return misaligned_emu_detected;
+}
+
+void unaligned_emulation_finish(void)
+{
+	int cpu;
+
+	/*
+	 * We can only support PR_UNALIGN controls if all CPUs have misaligned
+	 * accesses emulated since tasks requesting such control can run on any
+	 * CPU.
+	 */
+	for_each_present_cpu(cpu) {
+		if (per_cpu(misaligned_access_speed, cpu) !=
+					RISCV_HWPROBE_MISALIGNED_EMULATED) {
+			return;
+		}
+	}
+	unaligned_ctl = true;
+}
+
+bool unaligned_ctl_available(void)
+{
+	return unaligned_ctl;
+}
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index e8aa7c380007..9b517fe1b8a8 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -36,7 +36,7 @@ CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY
 endif
 
 # Disable -pg to prevent insert call site
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
 
 # Disable profiling and instrumentation for VDSO code
 GCOV_PROFILE := n
diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S
index 82f97d67c23e..8f884227e8bc 100644
--- a/arch/riscv/kernel/vdso/flush_icache.S
+++ b/arch/riscv/kernel/vdso/flush_icache.S
@@ -8,7 +8,7 @@
 
 	.text
 /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
-ENTRY(__vdso_flush_icache)
+SYM_FUNC_START(__vdso_flush_icache)
 	.cfi_startproc
 #ifdef CONFIG_SMP
 	li a7, __NR_riscv_flush_icache
@@ -19,4 +19,4 @@ ENTRY(__vdso_flush_icache)
 #endif
 	ret
 	.cfi_endproc
-ENDPROC(__vdso_flush_icache)
+SYM_FUNC_END(__vdso_flush_icache)
diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S
index bb0c05e2ffba..9c1bd531907f 100644
--- a/arch/riscv/kernel/vdso/getcpu.S
+++ b/arch/riscv/kernel/vdso/getcpu.S
@@ -8,11 +8,11 @@
 
 	.text
 /* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
-ENTRY(__vdso_getcpu)
+SYM_FUNC_START(__vdso_getcpu)
 	.cfi_startproc
 	/* For now, just do the syscall. */
 	li a7, __NR_getcpu
 	ecall
 	ret
 	.cfi_endproc
-ENDPROC(__vdso_getcpu)
+SYM_FUNC_END(__vdso_getcpu)
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
index d40bec6ac078..cadf725ef798 100644
--- a/arch/riscv/kernel/vdso/hwprobe.c
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -37,7 +37,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
 
 	/* This is something we can handle, fill out the pairs. */
 	while (p < end) {
-		if (p->key <= RISCV_HWPROBE_MAX_KEY) {
+		if (riscv_hwprobe_key_is_valid(p->key)) {
 			p->value = avd->all_cpu_hwprobe_values[p->key];
 
 		} else {
diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S
index 10438c7c626a..3dc022aa8931 100644
--- a/arch/riscv/kernel/vdso/rt_sigreturn.S
+++ b/arch/riscv/kernel/vdso/rt_sigreturn.S
@@ -7,10 +7,10 @@
 #include <asm/unistd.h>
 
 	.text
-ENTRY(__vdso_rt_sigreturn)
+SYM_FUNC_START(__vdso_rt_sigreturn)
 	.cfi_startproc
 	.cfi_signal_frame
 	li a7, __NR_rt_sigreturn
 	ecall
 	.cfi_endproc
-ENDPROC(__vdso_rt_sigreturn)
+SYM_FUNC_END(__vdso_rt_sigreturn)
diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S
index 4e704146c77a..77e57f830521 100644
--- a/arch/riscv/kernel/vdso/sys_hwprobe.S
+++ b/arch/riscv/kernel/vdso/sys_hwprobe.S
@@ -5,11 +5,11 @@
 #include <asm/unistd.h>
 
 .text
-ENTRY(riscv_hwprobe)
+SYM_FUNC_START(riscv_hwprobe)
 	.cfi_startproc
 	li a7, __NR_riscv_hwprobe
 	ecall
 	ret
 
 	.cfi_endproc
-ENDPROC(riscv_hwprobe)
+SYM_FUNC_END(riscv_hwprobe)
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 82ce64900f3d..cbe2a179331d 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -23,35 +23,31 @@ SECTIONS
 	.gnu.version_d	: { *(.gnu.version_d) }
 	.gnu.version_r	: { *(.gnu.version_r) }
 
-	.note		: { *(.note.*) }		:text	:note
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
+	.rodata		: {
+		*(.rodata .rodata.* .gnu.linkonce.r.*)
+		*(.got.plt) *(.got)
+		*(.data .data.* .gnu.linkonce.d.*)
+		*(.dynbss)
+		*(.bss .bss.* .gnu.linkonce.b.*)
+	}
+
+	.note		: { *(.note.*) }		:text	:note
+
 	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
 	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
 
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-
 	/*
-	 * This linker script is used both with -r and with -shared.
-	 * For the layouts to match, we need to skip more than enough
-	 * space for the dynamic symbol table, etc. If this amount is
-	 * insufficient, ld -shared will error; simply increase it here.
+	 * Text is well-separated from actual data: there's plenty of
+	 * stuff that isn't used at runtime in between.
 	 */
-	. = 0x800;
+	. = ALIGN(16);
 	.text		: { *(.text .text.*) }		:text
 
 	. = ALIGN(4);
 	.alternative : {
-		__alt_start = .;
 		*(.alternative)
-		__alt_end = .;
-	}
-
-	.data		: {
-		*(.got.plt) *(.got)
-		*(.data .data.* .gnu.linkonce.d.*)
-		*(.dynbss)
-		*(.bss .bss.* .gnu.linkonce.b.*)
 	}
 }
 
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 74bb27440527..a944294f6f23 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -14,7 +14,7 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_aia_imsic.h>
 
 struct aia_hgei_control {
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 48ae0d4b3932..225a435d9c9a 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/sbi.h>
 
 long kvm_arch_dev_ioctl(struct file *filp,
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
index 44bc324aeeb0..23c0e82b5103 100644
--- a/arch/riscv/kvm/tlb.c
+++ b/arch/riscv/kvm/tlb.c
@@ -12,7 +12,7 @@
 #include <linux/kvm_host.h>
 #include <asm/cacheflush.h>
 #include <asm/csr.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/insn-def.h>
 
 #define has_svinval()	riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
index 08ba48a395aa..030904d82b58 100644
--- a/arch/riscv/kvm/vcpu_fp.c
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -11,7 +11,7 @@
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/uaccess.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 #ifdef CONFIG_FPU
 void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index c6ebce6126b5..f8c9fa0c03c5 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -13,7 +13,7 @@
 #include <linux/uaccess.h>
 #include <linux/kvm_host.h>
 #include <asm/cacheflush.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_vcpu_vector.h>
 #include <asm/vector.h>
 
diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c
index b430cbb69521..b339a2682f25 100644
--- a/arch/riscv/kvm/vcpu_vector.c
+++ b/arch/riscv/kvm/vcpu_vector.c
@@ -11,7 +11,7 @@
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/uaccess.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_vcpu_vector.h>
 #include <asm/vector.h>
 
diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S
index d7a256eb53f4..b22de1231144 100644
--- a/arch/riscv/lib/clear_page.S
+++ b/arch/riscv/lib/clear_page.S
@@ -29,41 +29,41 @@ SYM_FUNC_START(clear_page)
 	lw	a1, riscv_cboz_block_size
 	add	a2, a0, a2
 .Lzero_loop:
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
 	CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
 	CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
 	CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
 	CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
-	CBO_zero(a0)
+	CBO_ZERO(a0)
 	add	a0, a0, a1
 	bltu	a0, a2, .Lzero_loop
 	ret
diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
index 1a40d01a9543..44e009ec5fef 100644
--- a/arch/riscv/lib/memcpy.S
+++ b/arch/riscv/lib/memcpy.S
@@ -7,8 +7,7 @@
 #include <asm/asm.h>
 
 /* void *memcpy(void *, const void *, size_t) */
-ENTRY(__memcpy)
-WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
 	move t6, a0  /* Preserve return value */
 
 	/* Defer to byte-oriented copy for small sizes */
@@ -105,6 +104,7 @@ WEAK(memcpy)
 	bltu a1, a3, 5b
 6:
 	ret
-END(__memcpy)
+SYM_FUNC_END(__memcpy)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
 SYM_FUNC_ALIAS(__pi_memcpy, __memcpy)
 SYM_FUNC_ALIAS(__pi___memcpy, __memcpy)
diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
index 838ff2022fe3..cb3e2e7ef0ba 100644
--- a/arch/riscv/lib/memmove.S
+++ b/arch/riscv/lib/memmove.S
@@ -7,7 +7,6 @@
 #include <asm/asm.h>
 
 SYM_FUNC_START(__memmove)
-SYM_FUNC_START_WEAK(memmove)
 	/*
 	 * Returns
 	 *   a0 - dest
@@ -26,8 +25,8 @@ SYM_FUNC_START_WEAK(memmove)
 	 */
 
 	/* Return if nothing to do */
-	beq a0, a1, return_from_memmove
-	beqz a2, return_from_memmove
+	beq a0, a1, .Lreturn_from_memmove
+	beqz a2, .Lreturn_from_memmove
 
 	/*
 	 * Register Uses
@@ -60,7 +59,7 @@ SYM_FUNC_START_WEAK(memmove)
 	 * small enough not to bother.
 	 */
 	andi t0, a2, -(2 * SZREG)
-	beqz t0, byte_copy
+	beqz t0, .Lbyte_copy
 
 	/*
 	 * Now solve for t5 and t6.
@@ -87,14 +86,14 @@ SYM_FUNC_START_WEAK(memmove)
 	 */
 	xor  t0, a0, a1
 	andi t1, t0, (SZREG - 1)
-	beqz t1, coaligned_copy
+	beqz t1, .Lcoaligned_copy
 	/* Fall through to misaligned fixup copy */
 
-misaligned_fixup_copy:
-	bltu a1, a0, misaligned_fixup_copy_reverse
+.Lmisaligned_fixup_copy:
+	bltu a1, a0, .Lmisaligned_fixup_copy_reverse
 
-misaligned_fixup_copy_forward:
-	jal  t0, byte_copy_until_aligned_forward
+.Lmisaligned_fixup_copy_forward:
+	jal  t0, .Lbyte_copy_until_aligned_forward
 
 	andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
 	slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
@@ -153,10 +152,10 @@ misaligned_fixup_copy_forward:
 	mv    t3, t6 /* Fix the dest pointer in case the loop was broken */
 
 	add  a1, t3, a5 /* Restore the src pointer */
-	j byte_copy_forward /* Copy any remaining bytes */
+	j .Lbyte_copy_forward /* Copy any remaining bytes */
 
-misaligned_fixup_copy_reverse:
-	jal  t0, byte_copy_until_aligned_reverse
+.Lmisaligned_fixup_copy_reverse:
+	jal  t0, .Lbyte_copy_until_aligned_reverse
 
 	andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
 	slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
@@ -215,18 +214,18 @@ misaligned_fixup_copy_reverse:
 	mv    t4, t5 /* Fix the dest pointer in case the loop was broken */
 
 	add  a4, t4, a5 /* Restore the src pointer */
-	j byte_copy_reverse /* Copy any remaining bytes */
+	j .Lbyte_copy_reverse /* Copy any remaining bytes */
 
 /*
  * Simple copy loops for SZREG co-aligned memory locations.
  * These also make calls to do byte copies for any unaligned
  * data at their terminations.
  */
-coaligned_copy:
-	bltu a1, a0, coaligned_copy_reverse
+.Lcoaligned_copy:
+	bltu a1, a0, .Lcoaligned_copy_reverse
 
-coaligned_copy_forward:
-	jal t0, byte_copy_until_aligned_forward
+.Lcoaligned_copy_forward:
+	jal t0, .Lbyte_copy_until_aligned_forward
 
 	1:
 	REG_L t1, ( 0 * SZREG)(a1)
@@ -235,10 +234,10 @@ coaligned_copy_forward:
 	REG_S t1, (-1 * SZREG)(t3)
 	bne   t3, t6, 1b
 
-	j byte_copy_forward /* Copy any remaining bytes */
+	j .Lbyte_copy_forward /* Copy any remaining bytes */
 
-coaligned_copy_reverse:
-	jal t0, byte_copy_until_aligned_reverse
+.Lcoaligned_copy_reverse:
+	jal t0, .Lbyte_copy_until_aligned_reverse
 
 	1:
 	REG_L t1, (-1 * SZREG)(a4)
@@ -247,7 +246,7 @@ coaligned_copy_reverse:
 	REG_S t1, ( 0 * SZREG)(t4)
 	bne   t4, t5, 1b
 
-	j byte_copy_reverse /* Copy any remaining bytes */
+	j .Lbyte_copy_reverse /* Copy any remaining bytes */
 
 /*
  * These are basically sub-functions within the function.  They
@@ -258,7 +257,7 @@ coaligned_copy_reverse:
  * up from where they were left and we avoid code duplication
  * without any overhead except the call in and return jumps.
  */
-byte_copy_until_aligned_forward:
+.Lbyte_copy_until_aligned_forward:
 	beq  t3, t5, 2f
 	1:
 	lb   t1,  0(a1)
@@ -269,7 +268,7 @@ byte_copy_until_aligned_forward:
 	2:
 	jalr zero, 0x0(t0) /* Return to multibyte copy loop */
 
-byte_copy_until_aligned_reverse:
+.Lbyte_copy_until_aligned_reverse:
 	beq  t4, t6, 2f
 	1:
 	lb   t1, -1(a4)
@@ -285,10 +284,10 @@ byte_copy_until_aligned_reverse:
  * These will byte copy until they reach the end of data to copy.
  * At that point, they will call to return from memmove.
  */
-byte_copy:
-	bltu a1, a0, byte_copy_reverse
+.Lbyte_copy:
+	bltu a1, a0, .Lbyte_copy_reverse
 
-byte_copy_forward:
+.Lbyte_copy_forward:
 	beq  t3, t4, 2f
 	1:
 	lb   t1,  0(a1)
@@ -299,7 +298,7 @@ byte_copy_forward:
 	2:
 	ret
 
-byte_copy_reverse:
+.Lbyte_copy_reverse:
 	beq  t4, t3, 2f
 	1:
 	lb   t1, -1(a4)
@@ -309,10 +308,10 @@ byte_copy_reverse:
 	bne  t4, t3, 1b
 	2:
 
-return_from_memmove:
+.Lreturn_from_memmove:
 	ret
 
-SYM_FUNC_END(memmove)
 SYM_FUNC_END(__memmove)
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
 SYM_FUNC_ALIAS(__pi_memmove, __memmove)
 SYM_FUNC_ALIAS(__pi___memmove, __memmove)
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
index 34c5360c6705..35f358e70bdb 100644
--- a/arch/riscv/lib/memset.S
+++ b/arch/riscv/lib/memset.S
@@ -8,8 +8,7 @@
 #include <asm/asm.h>
 
 /* void *memset(void *, int, size_t) */
-ENTRY(__memset)
-WEAK(memset)
+SYM_FUNC_START(__memset)
 	move t0, a0  /* Preserve return value */
 
 	/* Defer to byte-oriented fill for small sizes */
@@ -110,4 +109,5 @@ WEAK(memset)
 	bltu t0, a3, 5b
 6:
 	ret
-END(__memset)
+SYM_FUNC_END(__memset)
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 09b47ebacf2e..3ab438f30d13 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -10,8 +10,7 @@
 	_asm_extable	100b, \lbl
 	.endm
 
-ENTRY(__asm_copy_to_user)
-ENTRY(__asm_copy_from_user)
+SYM_FUNC_START(__asm_copy_to_user)
 
 	/* Enable access to user memory */
 	li t6, SR_SUM
@@ -181,13 +180,13 @@ ENTRY(__asm_copy_from_user)
 	csrc CSR_STATUS, t6
 	sub a0, t5, a0
 	ret
-ENDPROC(__asm_copy_to_user)
-ENDPROC(__asm_copy_from_user)
+SYM_FUNC_END(__asm_copy_to_user)
 EXPORT_SYMBOL(__asm_copy_to_user)
+SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
 EXPORT_SYMBOL(__asm_copy_from_user)
 
 
-ENTRY(__clear_user)
+SYM_FUNC_START(__clear_user)
 
 	/* Enable access to user memory */
 	li t6, SR_SUM
@@ -233,5 +232,5 @@ ENTRY(__clear_user)
 	csrc CSR_STATUS, t6
 	sub a0, a3, a0
 	ret
-ENDPROC(__clear_user)
+SYM_FUNC_END(__clear_user)
 EXPORT_SYMBOL(__clear_user)
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 9c454f90fd3d..3a4dfc8babcf 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -36,3 +36,4 @@ endif
 
 obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
 obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o
+obj-$(CONFIG_RISCV_NONSTANDARD_CACHE_OPS) += cache-ops.o
diff --git a/arch/riscv/mm/cache-ops.c b/arch/riscv/mm/cache-ops.c
new file mode 100644
index 000000000000..a993ad11d0ec
--- /dev/null
+++ b/arch/riscv/mm/cache-ops.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <asm/dma-noncoherent.h>
+
+struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init;
+
+void
+riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops)
+{
+	if (!ops)
+		return;
+	noncoherent_cache_ops = *ops;
+}
+EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops);
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index f1387272a551..55a34f2020a8 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -3,7 +3,9 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include <linux/acpi.h>
 #include <linux/of.h>
+#include <asm/acpi.h>
 #include <asm/cacheflush.h>
 
 #ifdef CONFIG_SMP
@@ -124,13 +126,24 @@ void __init riscv_init_cbo_blocksizes(void)
 	unsigned long cbom_hartid, cboz_hartid;
 	u32 cbom_block_size = 0, cboz_block_size = 0;
 	struct device_node *node;
+	struct acpi_table_header *rhct;
+	acpi_status status;
+
+	if (acpi_disabled) {
+		for_each_of_cpu_node(node) {
+			/* set block-size for cbom and/or cboz extension if available */
+			cbo_get_block_size(node, "riscv,cbom-block-size",
+					   &cbom_block_size, &cbom_hartid);
+			cbo_get_block_size(node, "riscv,cboz-block-size",
+					   &cboz_block_size, &cboz_hartid);
+		}
+	} else {
+		status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
+		if (ACPI_FAILURE(status))
+			return;
 
-	for_each_of_cpu_node(node) {
-		/* set block-size for cbom and/or cboz extension if available */
-		cbo_get_block_size(node, "riscv,cbom-block-size",
-				   &cbom_block_size, &cbom_hartid);
-		cbo_get_block_size(node, "riscv,cboz-block-size",
-				   &cboz_block_size, &cboz_hartid);
+		acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
+		acpi_put_table((struct acpi_table_header *)rhct);
 	}
 
 	if (cbom_block_size)
diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
index b76e7e192eb1..4e4e469b8dd6 100644
--- a/arch/riscv/mm/dma-noncoherent.c
+++ b/arch/riscv/mm/dma-noncoherent.c
@@ -15,12 +15,6 @@ static bool noncoherent_supported __ro_after_init;
 int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN;
 EXPORT_SYMBOL_GPL(dma_cache_alignment);
 
-struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = {
-	.wback = NULL,
-	.inv = NULL,
-	.wback_inv = NULL,
-};
-
 static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
 	void *vaddr = phys_to_virt(paddr);
@@ -31,7 +25,7 @@ static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 		return;
 	}
 #endif
-	ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
+	ALT_CMO_OP(CLEAN, vaddr, size, riscv_cbom_block_size);
 }
 
 static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
@@ -45,7 +39,7 @@ static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 	}
 #endif
 
-	ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
+	ALT_CMO_OP(INVAL, vaddr, size, riscv_cbom_block_size);
 }
 
 static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
@@ -59,7 +53,7 @@ static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
 	}
 #endif
 
-	ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+	ALT_CMO_OP(FLUSH, vaddr, size, riscv_cbom_block_size);
 }
 
 static inline bool arch_sync_dma_clean_before_fromdevice(void)
@@ -131,7 +125,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 	}
 #endif
 
-	ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size);
+	ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size);
 }
 
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
@@ -162,12 +156,3 @@ void __init riscv_set_dma_cache_alignment(void)
 	if (!noncoherent_supported)
 		dma_cache_alignment = 1;
 }
-
-void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops)
-{
-	if (!ops)
-		return;
-
-	noncoherent_cache_ops = *ops;
-}
-EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index d9a4e8702864..2e011cbddf3a 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -49,10 +49,12 @@ u64 satp_mode __ro_after_init = SATP_MODE_32;
 #endif
 EXPORT_SYMBOL(satp_mode);
 
+#ifdef CONFIG_64BIT
 bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
 bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
 EXPORT_SYMBOL(pgtable_l4_enabled);
 EXPORT_SYMBOL(pgtable_l5_enabled);
+#endif
 
 phys_addr_t phys_ram_base __ro_after_init;
 EXPORT_SYMBOL(phys_ram_base);
@@ -664,16 +666,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
 static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
 				      phys_addr_t size)
 {
-	if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
-		return PGDIR_SIZE;
-
-	if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
+	if (pgtable_l5_enabled &&
+	    !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
 		return P4D_SIZE;
 
-	if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
+	if (pgtable_l4_enabled &&
+	    !(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
 		return PUD_SIZE;
 
-	if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
+	if (IS_ENABLED(CONFIG_64BIT) &&
+	    !(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
 		return PMD_SIZE;
 
 	return PAGE_SIZE;
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 161d0b34c2cb..fc5fc4f785c4 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -5,6 +5,7 @@
 
 #include <linux/pagewalk.h>
 #include <linux/pgtable.h>
+#include <linux/vmalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/bitops.h>
 #include <asm/set_memory.h>
@@ -25,19 +26,6 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
 	return new_val;
 }
 
-static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
-			      unsigned long next, struct mm_walk *walk)
-{
-	pgd_t val = READ_ONCE(*pgd);
-
-	if (pgd_leaf(val)) {
-		val = __pgd(set_pageattr_masks(pgd_val(val), walk));
-		set_pgd(pgd, val);
-	}
-
-	return 0;
-}
-
 static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
 			      unsigned long next, struct mm_walk *walk)
 {
@@ -96,7 +84,6 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next,
 }
 
 static const struct mm_walk_ops pageattr_ops = {
-	.pgd_entry = pageattr_pgd_entry,
 	.p4d_entry = pageattr_p4d_entry,
 	.pud_entry = pageattr_pud_entry,
 	.pmd_entry = pageattr_pmd_entry,
@@ -105,12 +92,181 @@ static const struct mm_walk_ops pageattr_ops = {
 	.walk_lock = PGWALK_RDLOCK,
 };
 
+#ifdef CONFIG_64BIT
+static int __split_linear_mapping_pmd(pud_t *pudp,
+				      unsigned long vaddr, unsigned long end)
+{
+	pmd_t *pmdp;
+	unsigned long next;
+
+	pmdp = pmd_offset(pudp, vaddr);
+
+	do {
+		next = pmd_addr_end(vaddr, end);
+
+		if (next - vaddr >= PMD_SIZE &&
+		    vaddr <= (vaddr & PMD_MASK) && end >= next)
+			continue;
+
+		if (pmd_leaf(*pmdp)) {
+			struct page *pte_page;
+			unsigned long pfn = _pmd_pfn(*pmdp);
+			pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK);
+			pte_t *ptep_new;
+			int i;
+
+			pte_page = alloc_page(GFP_KERNEL);
+			if (!pte_page)
+				return -ENOMEM;
+
+			ptep_new = (pte_t *)page_address(pte_page);
+			for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep_new)
+				set_pte(ptep_new, pfn_pte(pfn + i, prot));
+
+			smp_wmb();
+
+			set_pmd(pmdp, pfn_pmd(page_to_pfn(pte_page), PAGE_TABLE));
+		}
+	} while (pmdp++, vaddr = next, vaddr != end);
+
+	return 0;
+}
+
+static int __split_linear_mapping_pud(p4d_t *p4dp,
+				      unsigned long vaddr, unsigned long end)
+{
+	pud_t *pudp;
+	unsigned long next;
+	int ret;
+
+	pudp = pud_offset(p4dp, vaddr);
+
+	do {
+		next = pud_addr_end(vaddr, end);
+
+		if (next - vaddr >= PUD_SIZE &&
+		    vaddr <= (vaddr & PUD_MASK) && end >= next)
+			continue;
+
+		if (pud_leaf(*pudp)) {
+			struct page *pmd_page;
+			unsigned long pfn = _pud_pfn(*pudp);
+			pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK);
+			pmd_t *pmdp_new;
+			int i;
+
+			pmd_page = alloc_page(GFP_KERNEL);
+			if (!pmd_page)
+				return -ENOMEM;
+
+			pmdp_new = (pmd_t *)page_address(pmd_page);
+			for (i = 0; i < PTRS_PER_PMD; ++i, ++pmdp_new)
+				set_pmd(pmdp_new,
+					pfn_pmd(pfn + ((i * PMD_SIZE) >> PAGE_SHIFT), prot));
+
+			smp_wmb();
+
+			set_pud(pudp, pfn_pud(page_to_pfn(pmd_page), PAGE_TABLE));
+		}
+
+		ret = __split_linear_mapping_pmd(pudp, vaddr, next);
+		if (ret)
+			return ret;
+	} while (pudp++, vaddr = next, vaddr != end);
+
+	return 0;
+}
+
+static int __split_linear_mapping_p4d(pgd_t *pgdp,
+				      unsigned long vaddr, unsigned long end)
+{
+	p4d_t *p4dp;
+	unsigned long next;
+	int ret;
+
+	p4dp = p4d_offset(pgdp, vaddr);
+
+	do {
+		next = p4d_addr_end(vaddr, end);
+
+		/*
+		 * If [vaddr; end] contains [vaddr & P4D_MASK; next], we don't
+		 * need to split, we'll change the protections on the whole P4D.
+		 */
+		if (next - vaddr >= P4D_SIZE &&
+		    vaddr <= (vaddr & P4D_MASK) && end >= next)
+			continue;
+
+		if (p4d_leaf(*p4dp)) {
+			struct page *pud_page;
+			unsigned long pfn = _p4d_pfn(*p4dp);
+			pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK);
+			pud_t *pudp_new;
+			int i;
+
+			pud_page = alloc_page(GFP_KERNEL);
+			if (!pud_page)
+				return -ENOMEM;
+
+			/*
+			 * Fill the pud level with leaf puds that have the same
+			 * protections as the leaf p4d.
+			 */
+			pudp_new = (pud_t *)page_address(pud_page);
+			for (i = 0; i < PTRS_PER_PUD; ++i, ++pudp_new)
+				set_pud(pudp_new,
+					pfn_pud(pfn + ((i * PUD_SIZE) >> PAGE_SHIFT), prot));
+
+			/*
+			 * Make sure the pud filling is not reordered with the
+			 * p4d store which could result in seeing a partially
+			 * filled pud level.
+			 */
+			smp_wmb();
+
+			set_p4d(p4dp, pfn_p4d(page_to_pfn(pud_page), PAGE_TABLE));
+		}
+
+		ret = __split_linear_mapping_pud(p4dp, vaddr, next);
+		if (ret)
+			return ret;
+	} while (p4dp++, vaddr = next, vaddr != end);
+
+	return 0;
+}
+
+static int __split_linear_mapping_pgd(pgd_t *pgdp,
+				      unsigned long vaddr,
+				      unsigned long end)
+{
+	unsigned long next;
+	int ret;
+
+	do {
+		next = pgd_addr_end(vaddr, end);
+		/* We never use PGD mappings for the linear mapping */
+		ret = __split_linear_mapping_p4d(pgdp, vaddr, next);
+		if (ret)
+			return ret;
+	} while (pgdp++, vaddr = next, vaddr != end);
+
+	return 0;
+}
+
+static int split_linear_mapping(unsigned long start, unsigned long end)
+{
+	return __split_linear_mapping_pgd(pgd_offset_k(start), start, end);
+}
+#endif	/* CONFIG_64BIT */
+
 static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
 			pgprot_t clear_mask)
 {
 	int ret;
 	unsigned long start = addr;
 	unsigned long end = start + PAGE_SIZE * numpages;
+	unsigned long __maybe_unused lm_start;
+	unsigned long __maybe_unused lm_end;
 	struct pageattr_masks masks = {
 		.set_mask = set_mask,
 		.clear_mask = clear_mask
@@ -120,11 +276,67 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
 		return 0;
 
 	mmap_write_lock(&init_mm);
+
+#ifdef CONFIG_64BIT
+	/*
+	 * We are about to change the permissions of a kernel mapping, we must
+	 * apply the same changes to its linear mapping alias, which may imply
+	 * splitting a huge mapping.
+	 */
+
+	if (is_vmalloc_or_module_addr((void *)start)) {
+		struct vm_struct *area = NULL;
+		int i, page_start;
+
+		area = find_vm_area((void *)start);
+		page_start = (start - (unsigned long)area->addr) >> PAGE_SHIFT;
+
+		for (i = page_start; i < page_start + numpages; ++i) {
+			lm_start = (unsigned long)page_address(area->pages[i]);
+			lm_end = lm_start + PAGE_SIZE;
+
+			ret = split_linear_mapping(lm_start, lm_end);
+			if (ret)
+				goto unlock;
+
+			ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
+						    &pageattr_ops, NULL, &masks);
+			if (ret)
+				goto unlock;
+		}
+	} else if (is_kernel_mapping(start) || is_linear_mapping(start)) {
+		lm_start = (unsigned long)lm_alias(start);
+		lm_end = (unsigned long)lm_alias(end);
+
+		ret = split_linear_mapping(lm_start, lm_end);
+		if (ret)
+			goto unlock;
+
+		ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
+					    &pageattr_ops, NULL, &masks);
+		if (ret)
+			goto unlock;
+	}
+
 	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
 				     &masks);
+
+unlock:
+	mmap_write_unlock(&init_mm);
+
+	/*
+	 * We can't use flush_tlb_kernel_range() here as we may have split a
+	 * hugepage that is larger than that, so let's flush everything.
+	 */
+	flush_tlb_all();
+#else
+	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+				     &masks);
+
 	mmap_write_unlock(&init_mm);
 
 	flush_tlb_kernel_range(start, end);
+#endif
 
 	return ret;
 }
@@ -159,36 +371,14 @@ int set_memory_nx(unsigned long addr, int numpages)
 
 int set_direct_map_invalid_noflush(struct page *page)
 {
-	int ret;
-	unsigned long start = (unsigned long)page_address(page);
-	unsigned long end = start + PAGE_SIZE;
-	struct pageattr_masks masks = {
-		.set_mask = __pgprot(0),
-		.clear_mask = __pgprot(_PAGE_PRESENT)
-	};
-
-	mmap_read_lock(&init_mm);
-	ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
-	mmap_read_unlock(&init_mm);
-
-	return ret;
+	return __set_memory((unsigned long)page_address(page), 1,
+			    __pgprot(0), __pgprot(_PAGE_PRESENT));
 }
 
 int set_direct_map_default_noflush(struct page *page)
 {
-	int ret;
-	unsigned long start = (unsigned long)page_address(page);
-	unsigned long end = start + PAGE_SIZE;
-	struct pageattr_masks masks = {
-		.set_mask = PAGE_KERNEL,
-		.clear_mask = __pgprot(0)
-	};
-
-	mmap_read_lock(&init_mm);
-	ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
-	mmap_read_unlock(&init_mm);
-
-	return ret;
+	return __set_memory((unsigned long)page_address(page), 1,
+			    PAGE_KERNEL, __pgprot(0));
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c
index c5fc5ec96f6d..370a422ede11 100644
--- a/arch/riscv/mm/pmem.c
+++ b/arch/riscv/mm/pmem.c
@@ -17,7 +17,7 @@ void arch_wb_cache_pmem(void *addr, size_t size)
 		return;
 	}
 #endif
-	ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size);
+	ALT_CMO_OP(CLEAN, addr, size, riscv_cbom_block_size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
@@ -29,6 +29,6 @@ void arch_invalidate_pmem(void *addr, size_t size)
 		return;
 	}
 #endif
-	ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size);
+	ALT_CMO_OP(INVAL, addr, size, riscv_cbom_block_size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 20a9f991a6d7..657c27bc07a7 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -129,55 +129,55 @@ static struct ptd_mm_info efi_ptd_info = {
 /* Page Table Entry */
 struct prot_bits {
 	u64 mask;
-	u64 val;
 	const char *set;
 	const char *clear;
 };
 
 static const struct prot_bits pte_bits[] = {
 	{
+#ifdef CONFIG_64BIT
+		.mask = _PAGE_NAPOT,
+		.set = "N",
+		.clear = ".",
+	}, {
+		.mask = _PAGE_MTMASK_SVPBMT,
+		.set = "MT(%s)",
+		.clear = "  ..  ",
+	}, {
+#endif
 		.mask = _PAGE_SOFT,
-		.val = _PAGE_SOFT,
-		.set = "RSW",
-		.clear = "   ",
+		.set = "RSW(%d)",
+		.clear = "  ..  ",
 	}, {
 		.mask = _PAGE_DIRTY,
-		.val = _PAGE_DIRTY,
 		.set = "D",
 		.clear = ".",
 	}, {
 		.mask = _PAGE_ACCESSED,
-		.val = _PAGE_ACCESSED,
 		.set = "A",
 		.clear = ".",
 	}, {
 		.mask = _PAGE_GLOBAL,
-		.val = _PAGE_GLOBAL,
 		.set = "G",
 		.clear = ".",
 	}, {
 		.mask = _PAGE_USER,
-		.val = _PAGE_USER,
 		.set = "U",
 		.clear = ".",
 	}, {
 		.mask = _PAGE_EXEC,
-		.val = _PAGE_EXEC,
 		.set = "X",
 		.clear = ".",
 	}, {
 		.mask = _PAGE_WRITE,
-		.val = _PAGE_WRITE,
 		.set = "W",
 		.clear = ".",
 	}, {
 		.mask = _PAGE_READ,
-		.val = _PAGE_READ,
 		.set = "R",
 		.clear = ".",
 	}, {
 		.mask = _PAGE_PRESENT,
-		.val = _PAGE_PRESENT,
 		.set = "V",
 		.clear = ".",
 	}
@@ -208,15 +208,30 @@ static void dump_prot(struct pg_state *st)
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(pte_bits); i++) {
-		const char *s;
+		char s[7];
+		unsigned long val;
 
-		if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val)
-			s = pte_bits[i].set;
-		else
-			s = pte_bits[i].clear;
+		val = st->current_prot & pte_bits[i].mask;
+		if (val) {
+			if (pte_bits[i].mask == _PAGE_SOFT)
+				sprintf(s, pte_bits[i].set, val >> 8);
+#ifdef CONFIG_64BIT
+			else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) {
+				if (val == _PAGE_NOCACHE_SVPBMT)
+					sprintf(s, pte_bits[i].set, "NC");
+				else if (val == _PAGE_IO_SVPBMT)
+					sprintf(s, pte_bits[i].set, "IO");
+				else
+					sprintf(s, pte_bits[i].set, "??");
+			}
+#endif
+			else
+				sprintf(s, "%s", pte_bits[i].set);
+		} else {
+			sprintf(s, "%s", pte_bits[i].clear);
+		}
 
-		if (s)
-			pt_dump_seq_printf(st->seq, " %s", s);
+		pt_dump_seq_printf(st->seq, " %s", s);
 	}
 }
 
@@ -384,6 +399,9 @@ static int __init ptdump_init(void)
 
 	kernel_ptd_info.base_addr = KERN_VIRT_START;
 
+	pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD";
+	pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD";
+
 	for (i = 0; i < ARRAY_SIZE(pg_level); i++)
 		for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
 			pg_level[i].mask |= pte_bits[j].mask;
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 77be59aadc73..e6659d7368b3 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -3,33 +3,56 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/hugetlb.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
 
 static inline void local_flush_tlb_all_asid(unsigned long asid)
 {
-	__asm__ __volatile__ ("sfence.vma x0, %0"
-			:
-			: "r" (asid)
-			: "memory");
+	if (asid != FLUSH_TLB_NO_ASID)
+		__asm__ __volatile__ ("sfence.vma x0, %0"
+				:
+				: "r" (asid)
+				: "memory");
+	else
+		local_flush_tlb_all();
 }
 
 static inline void local_flush_tlb_page_asid(unsigned long addr,
 		unsigned long asid)
 {
-	__asm__ __volatile__ ("sfence.vma %0, %1"
-			:
-			: "r" (addr), "r" (asid)
-			: "memory");
+	if (asid != FLUSH_TLB_NO_ASID)
+		__asm__ __volatile__ ("sfence.vma %0, %1"
+				:
+				: "r" (addr), "r" (asid)
+				: "memory");
+	else
+		local_flush_tlb_page(addr);
 }
 
-static inline void local_flush_tlb_range(unsigned long start,
-		unsigned long size, unsigned long stride)
+/*
+ * Flush entire TLB if number of entries to be flushed is greater
+ * than the threshold below.
+ */
+static unsigned long tlb_flush_all_threshold __read_mostly = 64;
+
+static void local_flush_tlb_range_threshold_asid(unsigned long start,
+						 unsigned long size,
+						 unsigned long stride,
+						 unsigned long asid)
 {
-	if (size <= stride)
-		local_flush_tlb_page(start);
-	else
-		local_flush_tlb_all();
+	unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride);
+	int i;
+
+	if (nr_ptes_in_range > tlb_flush_all_threshold) {
+		local_flush_tlb_all_asid(asid);
+		return;
+	}
+
+	for (i = 0; i < nr_ptes_in_range; ++i) {
+		local_flush_tlb_page_asid(start, asid);
+		start += stride;
+	}
 }
 
 static inline void local_flush_tlb_range_asid(unsigned long start,
@@ -37,8 +60,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
 {
 	if (size <= stride)
 		local_flush_tlb_page_asid(start, asid);
-	else
+	else if (size == FLUSH_TLB_MAX_SIZE)
 		local_flush_tlb_all_asid(asid);
+	else
+		local_flush_tlb_range_threshold_asid(start, size, stride, asid);
 }
 
 static void __ipi_flush_tlb_all(void *info)
@@ -51,7 +76,7 @@ void flush_tlb_all(void)
 	if (riscv_use_ipi_for_rfence())
 		on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
 	else
-		sbi_remote_sfence_vma(NULL, 0, -1);
+		sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
 }
 
 struct flush_tlb_range_data {
@@ -68,68 +93,62 @@ static void __ipi_flush_tlb_range_asid(void *info)
 	local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
 }
 
-static void __ipi_flush_tlb_range(void *info)
-{
-	struct flush_tlb_range_data *d = info;
-
-	local_flush_tlb_range(d->start, d->size, d->stride);
-}
-
 static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
 			      unsigned long size, unsigned long stride)
 {
 	struct flush_tlb_range_data ftd;
-	struct cpumask *cmask = mm_cpumask(mm);
-	unsigned int cpuid;
+	const struct cpumask *cmask;
+	unsigned long asid = FLUSH_TLB_NO_ASID;
 	bool broadcast;
 
-	if (cpumask_empty(cmask))
-		return;
+	if (mm) {
+		unsigned int cpuid;
 
-	cpuid = get_cpu();
-	/* check if the tlbflush needs to be sent to other CPUs */
-	broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
-	if (static_branch_unlikely(&use_asid_allocator)) {
-		unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
-
-		if (broadcast) {
-			if (riscv_use_ipi_for_rfence()) {
-				ftd.asid = asid;
-				ftd.start = start;
-				ftd.size = size;
-				ftd.stride = stride;
-				on_each_cpu_mask(cmask,
-						 __ipi_flush_tlb_range_asid,
-						 &ftd, 1);
-			} else
-				sbi_remote_sfence_vma_asid(cmask,
-							   start, size, asid);
-		} else {
-			local_flush_tlb_range_asid(start, size, stride, asid);
-		}
+		cmask = mm_cpumask(mm);
+		if (cpumask_empty(cmask))
+			return;
+
+		cpuid = get_cpu();
+		/* check if the tlbflush needs to be sent to other CPUs */
+		broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
+
+		if (static_branch_unlikely(&use_asid_allocator))
+			asid = atomic_long_read(&mm->context.id) & asid_mask;
 	} else {
-		if (broadcast) {
-			if (riscv_use_ipi_for_rfence()) {
-				ftd.asid = 0;
-				ftd.start = start;
-				ftd.size = size;
-				ftd.stride = stride;
-				on_each_cpu_mask(cmask,
-						 __ipi_flush_tlb_range,
-						 &ftd, 1);
-			} else
-				sbi_remote_sfence_vma(cmask, start, size);
-		} else {
-			local_flush_tlb_range(start, size, stride);
-		}
+		cmask = cpu_online_mask;
+		broadcast = true;
 	}
 
-	put_cpu();
+	if (broadcast) {
+		if (riscv_use_ipi_for_rfence()) {
+			ftd.asid = asid;
+			ftd.start = start;
+			ftd.size = size;
+			ftd.stride = stride;
+			on_each_cpu_mask(cmask,
+					 __ipi_flush_tlb_range_asid,
+					 &ftd, 1);
+		} else
+			sbi_remote_sfence_vma_asid(cmask,
+						   start, size, asid);
+	} else {
+		local_flush_tlb_range_asid(start, size, stride, asid);
+	}
+
+	if (mm)
+		put_cpu();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	__flush_tlb_range(mm, 0, -1, PAGE_SIZE);
+	__flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
+void flush_tlb_mm_range(struct mm_struct *mm,
+			unsigned long start, unsigned long end,
+			unsigned int page_size)
+{
+	__flush_tlb_range(mm, start, end - start, page_size);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
@@ -140,8 +159,40 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end)
 {
-	__flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
+	unsigned long stride_size;
+
+	if (!is_vm_hugetlb_page(vma)) {
+		stride_size = PAGE_SIZE;
+	} else {
+		stride_size = huge_page_size(hstate_vma(vma));
+
+		/*
+		 * As stated in the privileged specification, every PTE in a
+		 * NAPOT region must be invalidated, so reset the stride in that
+		 * case.
+		 */
+		if (has_svnapot()) {
+			if (stride_size >= PGDIR_SIZE)
+				stride_size = PGDIR_SIZE;
+			else if (stride_size >= P4D_SIZE)
+				stride_size = P4D_SIZE;
+			else if (stride_size >= PUD_SIZE)
+				stride_size = PUD_SIZE;
+			else if (stride_size >= PMD_SIZE)
+				stride_size = PMD_SIZE;
+			else
+				stride_size = PAGE_SIZE;
+		}
+	}
+
+	__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	__flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
 }
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index 9e6476719abb..280b0eb352b8 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -81,6 +81,14 @@ ifdef CONFIG_CFI_CLANG
 PURGATORY_CFLAGS_REMOVE		+= $(CC_FLAGS_CFI)
 endif
 
+ifdef CONFIG_RELOCATABLE
+PURGATORY_CFLAGS_REMOVE		+= -fPIE
+endif
+
+ifdef CONFIG_SHADOW_CALL_STACK
+PURGATORY_CFLAGS_REMOVE		+= $(CC_FLAGS_SCS)
+endif
+
 CFLAGS_REMOVE_purgatory.o	+= $(PURGATORY_CFLAGS_REMOVE)
 CFLAGS_purgatory.o		+= $(PURGATORY_CFLAGS)
 
diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S
index 0194f4554130..5bcf3af903da 100644
--- a/arch/riscv/purgatory/entry.S
+++ b/arch/riscv/purgatory/entry.S
@@ -7,15 +7,11 @@
  * Author: Li Zhengyu (lizhengyu3@huawei.com)
  *
  */
-
-.macro	size, sym:req
-	.size \sym, . - \sym
-.endm
+#include <linux/linkage.h>
 
 .text
 
-.globl purgatory_start
-purgatory_start:
+SYM_CODE_START(purgatory_start)
 
 	lla	sp, .Lstack
 	mv	s0, a0	/* The hartid of the current hart */
@@ -28,8 +24,7 @@ purgatory_start:
 	mv	a1, s1
 	ld	a2, riscv_kernel_entry
 	jr	a2
-
-size purgatory_start
+SYM_CODE_END(purgatory_start)
 
 .align 4
 	.rept	256
@@ -39,9 +34,6 @@ size purgatory_start
 
 .data
 
-.globl riscv_kernel_entry
-riscv_kernel_entry:
-	.quad	0
-size riscv_kernel_entry
+SYM_DATA(riscv_kernel_entry, .quad 0)
 
 .end
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b0d67ac8695f..3bec98d20283 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -236,6 +236,7 @@ config S390
 	select THREAD_INFO_IN_TASK
 	select TRACE_IRQFLAGS_SUPPORT
 	select TTY
+	select USER_STACKTRACE_SUPPORT
 	select VIRT_CPU_ACCOUNTING
 	select ZONE_DMA
 	# Note: keep the above list sorted alphabetically
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 7b7521762633..2ab4872fbee1 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,6 +3,7 @@
 #include <linux/init.h>
 #include <linux/ctype.h>
 #include <linux/pgtable.h>
+#include <asm/page-states.h>
 #include <asm/ebcdic.h>
 #include <asm/sclp.h>
 #include <asm/sections.h>
@@ -24,6 +25,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
 struct ipl_parameter_block __bootdata_preserved(ipl_block);
 int __bootdata_preserved(ipl_block_valid);
 int __bootdata_preserved(__kaslr_enabled);
+int __bootdata_preserved(cmma_flag) = 1;
 
 unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
 unsigned long memory_limit;
@@ -295,6 +297,12 @@ void parse_boot_command_line(void)
 		if (!strcmp(param, "nokaslr"))
 			__kaslr_enabled = 0;
 
+		if (!strcmp(param, "cmma")) {
+			rc = kstrtobool(val, &enabled);
+			if (!rc && !enabled)
+				cmma_flag = 0;
+		}
+
 #if IS_ENABLED(CONFIG_KVM)
 		if (!strcmp(param, "prot_virt")) {
 			rc = kstrtobool(val, &enabled);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 8826c4f18645..8104e0e3d188 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <asm/page-states.h>
 #include <asm/boot_data.h>
 #include <asm/sections.h>
 #include <asm/maccess.h>
@@ -57,6 +58,48 @@ static void detect_facilities(void)
 		machine.has_nx = 1;
 }
 
+static int cmma_test_essa(void)
+{
+	unsigned long reg1, reg2, tmp = 0;
+	int rc = 1;
+	psw_t old;
+
+	/* Test ESSA_GET_STATE */
+	asm volatile(
+		"	mvc	0(16,%[psw_old]),0(%[psw_pgm])\n"
+		"	epsw	%[reg1],%[reg2]\n"
+		"	st	%[reg1],0(%[psw_pgm])\n"
+		"	st	%[reg2],4(%[psw_pgm])\n"
+		"	larl	%[reg1],1f\n"
+		"	stg	%[reg1],8(%[psw_pgm])\n"
+		"	.insn	rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
+		"	la	%[rc],0\n"
+		"1:	mvc	0(16,%[psw_pgm]),0(%[psw_old])\n"
+		: [reg1] "=&d" (reg1),
+		  [reg2] "=&a" (reg2),
+		  [rc] "+&d" (rc),
+		  [tmp] "=&d" (tmp),
+		  "+Q" (S390_lowcore.program_new_psw),
+		  "=Q" (old)
+		: [psw_old] "a" (&old),
+		  [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+		  [cmd] "i" (ESSA_GET_STATE)
+		: "cc", "memory");
+	return rc;
+}
+
+static void cmma_init(void)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_test_essa()) {
+		cmma_flag = 0;
+		return;
+	}
+	if (test_facility(147))
+		cmma_flag = 2;
+}
+
 static void setup_lpp(void)
 {
 	S390_lowcore.current_pid = 0;
@@ -306,6 +349,7 @@ void startup_kernel(void)
 	setup_boot_command_line();
 	parse_boot_command_line();
 	detect_facilities();
+	cmma_init();
 	sanitize_prot_virt_host();
 	max_physmem_end = detect_max_physmem_end();
 	setup_ident_map_size(max_physmem_end);
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 3075d65e112c..e3a4500a5a75 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -2,6 +2,7 @@
 #include <linux/sched/task.h>
 #include <linux/pgtable.h>
 #include <linux/kasan.h>
+#include <asm/page-states.h>
 #include <asm/pgalloc.h>
 #include <asm/facility.h>
 #include <asm/sections.h>
@@ -70,6 +71,10 @@ static void kasan_populate_shadow(void)
 	crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
 	crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
 	memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
+	__arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat(kasan_early_shadow_pte, 1);
 
 	/*
 	 * Current memory layout:
@@ -223,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val)
 
 	table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
 	crst_table_init(table, val);
+	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
 	return table;
 }
 
@@ -238,6 +244,7 @@ static pte_t *boot_pte_alloc(void)
 	if (!pte_leftover) {
 		pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
 		pte = pte_leftover + _PAGE_TABLE_SIZE;
+		__arch_set_page_dat(pte, 1);
 	} else {
 		pte = pte_leftover;
 		pte_leftover = NULL;
@@ -418,6 +425,14 @@ void setup_vmem(unsigned long asce_limit)
 	unsigned long asce_bits;
 	int i;
 
+	/*
+	 * Mark whole memory as no-dat. This must be done before any
+	 * page tables are allocated, or kernel image builtin pages
+	 * are marked as dat tables.
+	 */
+	for_each_physmem_online_range(i, &start, &end)
+		__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
+
 	if (asce_limit == _REGION1_SIZE) {
 		asce_type = _REGION2_ENTRY_EMPTY;
 		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@@ -429,6 +444,8 @@ void setup_vmem(unsigned long asce_limit)
 
 	crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
 	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+	__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
 
 	/*
 	 * To allow prefixing the lowcore must be mapped with 4KB pages.
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 21b9e5290c04..01f1682a73b7 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -73,8 +73,6 @@ struct kprobe_ctlblk {
 void arch_remove_kprobe(struct kprobe *p);
 
 int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-int kprobe_exceptions_notify(struct notifier_block *self,
-	unsigned long val, void *data);
 
 #define flush_insn_slot(p)	do { } while (0)
 
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 829d68e2c685..bb1b4bef1878 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -11,7 +11,6 @@ typedef struct {
 	cpumask_t cpu_attach_mask;
 	atomic_t flush_count;
 	unsigned int flush_mm;
-	struct list_head pgtable_list;
 	struct list_head gmap_list;
 	unsigned long gmap_asce;
 	unsigned long asce;
@@ -39,7 +38,6 @@ typedef struct {
 
 #define INIT_MM_CONTEXT(name)						   \
 	.context.lock =	__SPIN_LOCK_UNLOCKED(name.context.lock),	   \
-	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
 	.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
 
 #endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 757fe6f0d802..929af18b0908 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -22,7 +22,6 @@ static inline int init_new_context(struct task_struct *tsk,
 	unsigned long asce_type, init_entry;
 
 	spin_lock_init(&mm->context.lock);
-	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index c33c4deb545f..08fcbd628120 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,6 +7,9 @@
 #ifndef PAGE_STATES_H
 #define PAGE_STATES_H
 
+#include <asm/sections.h>
+#include <asm/page.h>
+
 #define ESSA_GET_STATE			0
 #define ESSA_SET_STABLE			1
 #define ESSA_SET_UNUSED			2
@@ -18,4 +21,60 @@
 
 #define ESSA_MAX	ESSA_SET_STABLE_NODAT
 
+extern int __bootdata_preserved(cmma_flag);
+
+static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
+{
+	unsigned long rc;
+
+	asm volatile(
+		"	.insn	rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0"
+		: [rc] "=d" (rc)
+		: [paddr] "d" (paddr),
+		  [cmd] "i" (cmd));
+	return rc;
+}
+
+static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd)
+{
+	unsigned long paddr = __pa(addr) & PAGE_MASK;
+
+	while (num_pages--) {
+		essa(paddr, cmd);
+		paddr += PAGE_SIZE;
+	}
+}
+
+static inline void __set_page_unused(void *addr, unsigned long num_pages)
+{
+	__set_page_state(addr, num_pages, ESSA_SET_UNUSED);
+}
+
+static inline void __set_page_stable_dat(void *addr, unsigned long num_pages)
+{
+	__set_page_state(addr, num_pages, ESSA_SET_STABLE);
+}
+
+static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages)
+{
+	__set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT);
+}
+
+static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_flag < 2)
+		__set_page_stable_dat(addr, num_pages);
+	else
+		__set_page_stable_nodat(addr, num_pages);
+}
+
+static inline void __arch_set_page_dat(void *addr, unsigned long num_pages)
+{
+	if (!cmma_flag)
+		return;
+	__set_page_stable_dat(addr, num_pages);
+}
+
 #endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index cfec0743314e..73b9c3bf377f 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -164,7 +164,6 @@ static inline int page_reset_referenced(unsigned long addr)
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
-void arch_set_page_dat(struct page *page, int order);
 
 static inline int devmem_is_allowed(unsigned long pfn)
 {
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b248694e0024..e91cd6bbc330 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -159,13 +159,6 @@ struct zpci_dev {
 	unsigned long	*dma_table;
 	int		tlb_refresh;
 
-	spinlock_t	iommu_bitmap_lock;
-	unsigned long	*iommu_bitmap;
-	unsigned long	*lazy_bitmap;
-	unsigned long	iommu_size;
-	unsigned long	iommu_pages;
-	unsigned int	next_bit;
-
 	struct iommu_device iommu_dev;  /* IOMMU core handle */
 
 	char res_name[16];
@@ -180,10 +173,6 @@ struct zpci_dev {
 	struct zpci_fmb *fmb;
 	u16		fmb_update;	/* update interval */
 	u16		fmb_length;
-	/* software counters */
-	atomic64_t allocated_pages;
-	atomic64_t mapped_pages;
-	atomic64_t unmapped_pages;
 
 	u8		version;
 	enum pci_bus_speed max_bus_speed;
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d6189ed14f84..f0c677ddd270 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -50,6 +50,9 @@ struct clp_fh_list_entry {
 #define CLP_UTIL_STR_LEN	64
 #define CLP_PFIP_NR_SEGMENTS	4
 
+/* PCI function type numbers */
+#define PCI_FUNC_TYPE_ISM	0x5	/* ISM device */
+
 extern bool zpci_unique_uid;
 
 struct clp_rsp_slpc_pci {
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 7119c04c51c5..42d7cc4262ca 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -82,117 +82,16 @@ enum zpci_ioat_dtype {
 #define ZPCI_TABLE_VALID_MASK		0x20
 #define ZPCI_TABLE_PROT_MASK		0x200
 
-static inline unsigned int calc_rtx(dma_addr_t ptr)
-{
-	return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_sx(dma_addr_t ptr)
-{
-	return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_px(dma_addr_t ptr)
-{
-	return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
-}
-
-static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
-{
-	*entry &= ZPCI_PTE_FLAG_MASK;
-	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
-}
-
-static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
-{
-	*entry &= ZPCI_RTE_FLAG_MASK;
-	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
-	*entry |= ZPCI_TABLE_TYPE_RTX;
-}
-
-static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
-{
-	*entry &= ZPCI_STE_FLAG_MASK;
-	*entry |= (pto & ZPCI_STE_ADDR_MASK);
-	*entry |= ZPCI_TABLE_TYPE_SX;
-}
-
-static inline void validate_rt_entry(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_VALID_MASK;
-	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
-	*entry |= ZPCI_TABLE_VALID;
-	*entry |= ZPCI_TABLE_LEN_RTX;
-}
-
-static inline void validate_st_entry(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_VALID_MASK;
-	*entry |= ZPCI_TABLE_VALID;
-}
-
-static inline void invalidate_pt_entry(unsigned long *entry)
-{
-	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
-	*entry &= ~ZPCI_PTE_VALID_MASK;
-	*entry |= ZPCI_PTE_INVALID;
-}
-
-static inline void validate_pt_entry(unsigned long *entry)
-{
-	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
-	*entry &= ~ZPCI_PTE_VALID_MASK;
-	*entry |= ZPCI_PTE_VALID;
-}
-
-static inline void entry_set_protected(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_PROT_MASK;
-	*entry |= ZPCI_TABLE_PROTECTED;
-}
-
-static inline void entry_clr_protected(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_PROT_MASK;
-	*entry |= ZPCI_TABLE_UNPROTECTED;
-}
-
-static inline int reg_entry_isvalid(unsigned long entry)
-{
-	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
-}
-
-static inline int pt_entry_isvalid(unsigned long entry)
-{
-	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
-}
-
-static inline unsigned long *get_rt_sto(unsigned long entry)
-{
-	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
-		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
-	else
-		return NULL;
-
-}
-
-static inline unsigned long *get_st_pto(unsigned long entry)
-{
-	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
-		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
-	else
-		return NULL;
-}
-
-/* Prototypes */
-void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(gfp_t gfp);
-void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
-				  gfp_t gfp);
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
-
-extern const struct dma_map_ops s390_pci_dma_ops;
+struct zpci_iommu_ctrs {
+	atomic64_t		mapped_pages;
+	atomic64_t		unmapped_pages;
+	atomic64_t		global_rpcits;
+	atomic64_t		sync_map_rpcits;
+	atomic64_t		sync_rpcits;
+};
+
+struct zpci_dev;
 
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev);
 
 #endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 376b4b23bdaa..502d655fe6ae 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -25,7 +25,6 @@ void crst_table_free(struct mm_struct *, unsigned long *);
 unsigned long *page_table_alloc(struct mm_struct *);
 struct page *page_table_alloc_pgste(struct mm_struct *mm);
 void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 void page_table_free_pgste(struct page *page);
 extern int page_table_allocate_pgste;
 
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 25cadc2b9cff..df316436d2e1 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -125,9 +125,6 @@ static inline void vmcp_cma_reserve(void) { }
 
 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
 
-void cmma_init(void);
-void cmma_init_nodat(void);
-
 extern void (*_machine_restart)(char *command);
 extern void (*_machine_halt)(void);
 extern void (*_machine_power_off)(void);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 78f7b729b65f..31ec4f545e03 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -6,6 +6,13 @@
 #include <linux/ptrace.h>
 #include <asm/switch_to.h>
 
+struct stack_frame_user {
+	unsigned long back_chain;
+	unsigned long empty1[5];
+	unsigned long gprs[10];
+	unsigned long empty2[4];
+};
+
 enum stack_type {
 	STACK_TYPE_UNKNOWN,
 	STACK_TYPE_TASK,
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 383b1f91442c..d1455a601adc 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -69,12 +69,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_pmds = 1;
-	/*
-	 * page_table_free_rcu takes care of the allocation bit masks
-	 * of the 2K table fragments in the 4K page table page,
-	 * then calls tlb_remove_table.
-	 */
-	page_table_free_rcu(tlb, (unsigned long *) pte, address);
+	if (mm_alloc_pgste(tlb->mm))
+		gmap_unlink(tlb->mm, (unsigned long *)pte, address);
+	tlb_remove_ptdesc(tlb, pte);
 }
 
 /*
@@ -112,7 +109,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 	__tlb_adjust_range(tlb, address, PAGE_SIZE);
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
-	tlb_remove_table(tlb, p4d);
+	tlb_remove_ptdesc(tlb, p4d);
 }
 
 /*
@@ -130,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_p4ds = 1;
-	tlb_remove_table(tlb, pud);
+	tlb_remove_ptdesc(tlb, pud);
 }
 
 
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ff1f02b54771..eb43e5922a25 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -46,6 +46,7 @@ decompressor_handled_param(vmalloc);
 decompressor_handled_param(dfltcc);
 decompressor_handled_param(facilities);
 decompressor_handled_param(nokaslr);
+decompressor_handled_param(cmma);
 #if IS_ENABLED(CONFIG_KVM)
 decompressor_handled_param(prot_virt);
 #endif
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index c27321cb0969..dfa77da2fd2e 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -15,7 +15,10 @@
 #include <linux/export.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
 #include <linux/sysfs.h>
+#include <asm/stacktrace.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 #include <asm/lowcore.h>
@@ -212,6 +215,44 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 	}
 }
 
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+			 struct pt_regs *regs)
+{
+	struct stack_frame_user __user *sf;
+	unsigned long ip, sp;
+	bool first = true;
+
+	if (is_compat_task())
+		return;
+	perf_callchain_store(entry, instruction_pointer(regs));
+	sf = (void __user *)user_stack_pointer(regs);
+	pagefault_disable();
+	while (entry->nr < entry->max_stack) {
+		if (__get_user(sp, &sf->back_chain))
+			break;
+		if (__get_user(ip, &sf->gprs[8]))
+			break;
+		if (ip & 0x1) {
+			/*
+			 * If the instruction address is invalid, and this
+			 * is the first stack frame, assume r14 has not
+			 * been written to the stack yet. Otherwise exit.
+			 */
+			if (first && !(regs->gprs[14] & 0x1))
+				ip = regs->gprs[14];
+			else
+				break;
+		}
+		perf_callchain_store(entry, ip);
+		/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+		if (!sp || sp & 0x7)
+			break;
+		sf = (void __user *)sp;
+		first = false;
+	}
+	pagefault_enable();
+}
+
 /* Perf definitions for PMU event attributes in sysfs */
 ssize_t cpumf_events_sysfs_show(struct device *dev,
 				struct device_attribute *attr, char *page)
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 0787010139f7..94f440e38303 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -6,9 +6,12 @@
  */
 
 #include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 #include <asm/kprobes.h>
+#include <asm/ptrace.h>
 
 void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 		     struct task_struct *task, struct pt_regs *regs)
@@ -58,3 +61,43 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 		return -EINVAL;
 	return 0;
 }
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+			  const struct pt_regs *regs)
+{
+	struct stack_frame_user __user *sf;
+	unsigned long ip, sp;
+	bool first = true;
+
+	if (is_compat_task())
+		return;
+	if (!consume_entry(cookie, instruction_pointer(regs)))
+		return;
+	sf = (void __user *)user_stack_pointer(regs);
+	pagefault_disable();
+	while (1) {
+		if (__get_user(sp, &sf->back_chain))
+			break;
+		if (__get_user(ip, &sf->gprs[8]))
+			break;
+		if (ip & 0x1) {
+			/*
+			 * If the instruction address is invalid, and this
+			 * is the first stack frame, assume r14 has not
+			 * been written to the stack yet. Otherwise exit.
+			 */
+			if (first && !(regs->gprs[14] & 0x1))
+				ip = regs->gprs[14];
+			else
+				break;
+		}
+		if (!consume_entry(cookie, ip))
+			break;
+		/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+		if (!sp || sp & 0x7)
+			break;
+		sf = (void __user *)sp;
+		first = false;
+	}
+	pagefault_enable();
+}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 20786f6883b2..6f96b5a71c63 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -18,7 +18,7 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 #include <linux/pgtable.h>
-
+#include <asm/page-states.h>
 #include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/page.h>
@@ -33,7 +33,7 @@ static struct page *gmap_alloc_crst(void)
 	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
 	if (!page)
 		return NULL;
-	arch_set_page_dat(page, CRST_ALLOC_ORDER);
+	__arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
 	return page;
 }
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 7eca10c32caa..43e612bc2bcd 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -164,14 +164,10 @@ void __init mem_init(void)
 
 	pv_init();
 	kfence_split_mapping();
-	/* Setup guest page hinting */
-	cmma_init();
 
 	/* this will put all low memory onto the freelists */
 	memblock_free_all();
 	setup_zero_pages();	/* Setup zeroed pages. */
-
-	cmma_init_nodat();
 }
 
 void free_initmem(void)
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index a31acb2c4ef2..01f9b39e65f5 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -7,212 +7,18 @@
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/memblock.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <asm/asm-extable.h>
-#include <asm/facility.h>
 #include <asm/page-states.h>
+#include <asm/sections.h>
+#include <asm/page.h>
 
-static int cmma_flag = 1;
-
-static int __init cmma(char *str)
-{
-	bool enabled;
-
-	if (!kstrtobool(str, &enabled))
-		cmma_flag = enabled;
-	return 1;
-}
-__setup("cmma=", cmma);
-
-static inline int cmma_test_essa(void)
-{
-	unsigned long tmp = 0;
-	int rc = -EOPNOTSUPP;
-
-	/* test ESSA_GET_STATE */
-	asm volatile(
-		"	.insn	rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
-		"0:     la      %[rc],0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: [rc] "+&d" (rc), [tmp] "+&d" (tmp)
-		: [cmd] "i" (ESSA_GET_STATE));
-	return rc;
-}
-
-void __init cmma_init(void)
-{
-	if (!cmma_flag)
-		return;
-	if (cmma_test_essa()) {
-		cmma_flag = 0;
-		return;
-	}
-	if (test_facility(147))
-		cmma_flag = 2;
-}
-
-static inline void set_page_unused(struct page *page, int order)
-{
-	int i, rc;
-
-	for (i = 0; i < (1 << order); i++)
-		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
-			     : "=&d" (rc)
-			     : "a" (page_to_phys(page + i)),
-			       "i" (ESSA_SET_UNUSED));
-}
-
-static inline void set_page_stable_dat(struct page *page, int order)
-{
-	int i, rc;
-
-	for (i = 0; i < (1 << order); i++)
-		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
-			     : "=&d" (rc)
-			     : "a" (page_to_phys(page + i)),
-			       "i" (ESSA_SET_STABLE));
-}
-
-static inline void set_page_stable_nodat(struct page *page, int order)
-{
-	int i, rc;
-
-	for (i = 0; i < (1 << order); i++)
-		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
-			     : "=&d" (rc)
-			     : "a" (page_to_phys(page + i)),
-			       "i" (ESSA_SET_STABLE_NODAT));
-}
-
-static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	struct page *page;
-	pmd_t *pmd;
-
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none(*pmd) || pmd_large(*pmd))
-			continue;
-		page = phys_to_page(pmd_val(*pmd));
-		set_bit(PG_arch_1, &page->flags);
-	} while (pmd++, addr = next, addr != end);
-}
-
-static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	struct page *page;
-	pud_t *pud;
-	int i;
-
-	pud = pud_offset(p4d, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none(*pud) || pud_large(*pud))
-			continue;
-		if (!pud_folded(*pud)) {
-			page = phys_to_page(pud_val(*pud));
-			for (i = 0; i < 4; i++)
-				set_bit(PG_arch_1, &page[i].flags);
-		}
-		mark_kernel_pmd(pud, addr, next);
-	} while (pud++, addr = next, addr != end);
-}
-
-static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	struct page *page;
-	p4d_t *p4d;
-	int i;
-
-	p4d = p4d_offset(pgd, addr);
-	do {
-		next = p4d_addr_end(addr, end);
-		if (p4d_none(*p4d))
-			continue;
-		if (!p4d_folded(*p4d)) {
-			page = phys_to_page(p4d_val(*p4d));
-			for (i = 0; i < 4; i++)
-				set_bit(PG_arch_1, &page[i].flags);
-		}
-		mark_kernel_pud(p4d, addr, next);
-	} while (p4d++, addr = next, addr != end);
-}
-
-static void mark_kernel_pgd(void)
-{
-	unsigned long addr, next, max_addr;
-	struct page *page;
-	pgd_t *pgd;
-	int i;
-
-	addr = 0;
-	/*
-	 * Figure out maximum virtual address accessible with the
-	 * kernel ASCE. This is required to keep the page table walker
-	 * from accessing non-existent entries.
-	 */
-	max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2;
-	max_addr = 1UL << (max_addr * 11 + 31);
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, max_addr);
-		if (pgd_none(*pgd))
-			continue;
-		if (!pgd_folded(*pgd)) {
-			page = phys_to_page(pgd_val(*pgd));
-			for (i = 0; i < 4; i++)
-				set_bit(PG_arch_1, &page[i].flags);
-		}
-		mark_kernel_p4d(pgd, addr, next);
-	} while (pgd++, addr = next, addr != max_addr);
-}
-
-void __init cmma_init_nodat(void)
-{
-	struct page *page;
-	unsigned long start, end, ix;
-	int i;
-
-	if (cmma_flag < 2)
-		return;
-	/* Mark pages used in kernel page tables */
-	mark_kernel_pgd();
-	page = virt_to_page(&swapper_pg_dir);
-	for (i = 0; i < 4; i++)
-		set_bit(PG_arch_1, &page[i].flags);
-	page = virt_to_page(&invalid_pg_dir);
-	for (i = 0; i < 4; i++)
-		set_bit(PG_arch_1, &page[i].flags);
-
-	/* Set all kernel pages not used for page tables to stable/no-dat */
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
-		page = pfn_to_page(start);
-		for (ix = start; ix < end; ix++, page++) {
-			if (__test_and_clear_bit(PG_arch_1, &page->flags))
-				continue;	/* skip page table pages */
-			if (!list_empty(&page->lru))
-				continue;	/* skip free pages */
-			set_page_stable_nodat(page, 0);
-		}
-	}
-}
+int __bootdata_preserved(cmma_flag);
 
 void arch_free_page(struct page *page, int order)
 {
 	if (!cmma_flag)
 		return;
-	set_page_unused(page, order);
+	__set_page_unused(page_to_virt(page), 1UL << order);
 }
 
 void arch_alloc_page(struct page *page, int order)
@@ -220,14 +26,7 @@ void arch_alloc_page(struct page *page, int order)
 	if (!cmma_flag)
 		return;
 	if (cmma_flag < 2)
-		set_page_stable_dat(page, order);
+		__set_page_stable_dat(page_to_virt(page), 1UL << order);
 	else
-		set_page_stable_nodat(page, order);
-}
-
-void arch_set_page_dat(struct page *page, int order)
-{
-	if (!cmma_flag)
-		return;
-	set_page_stable_dat(page, order);
+		__set_page_stable_nodat(page_to_virt(page), 1UL << order);
 }
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 5488ae17318e..008e487c94a6 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <asm/mmu_context.h>
+#include <asm/page-states.h>
 #include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/tlb.h>
@@ -43,11 +44,13 @@ __initcall(page_table_register_sysctl);
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
 	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+	unsigned long *table;
 
 	if (!ptdesc)
 		return NULL;
-	arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER);
-	return (unsigned long *) ptdesc_to_virt(ptdesc);
+	table = ptdesc_to_virt(ptdesc);
+	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
+	return table;
 }
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
@@ -130,11 +133,6 @@ err_p4d:
 	return -ENOMEM;
 }
 
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
-	return atomic_fetch_xor(bits, v) ^ bits;
-}
-
 #ifdef CONFIG_PGSTE
 
 struct page *page_table_alloc_pgste(struct mm_struct *mm)
@@ -145,7 +143,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
 	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
 	if (ptdesc) {
 		table = (u64 *)ptdesc_to_virt(ptdesc);
-		arch_set_page_dat(virt_to_page(table), 0);
+		__arch_set_page_dat(table, 1);
 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
 		memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
 	}
@@ -159,125 +157,11 @@ void page_table_free_pgste(struct page *page)
 
 #endif /* CONFIG_PGSTE */
 
-/*
- * A 2KB-pgtable is either upper or lower half of a normal page.
- * The second half of the page may be unused or used as another
- * 2KB-pgtable.
- *
- * Whenever possible the parent page for a new 2KB-pgtable is picked
- * from the list of partially allocated pages mm_context_t::pgtable_list.
- * In case the list is empty a new parent page is allocated and added to
- * the list.
- *
- * When a parent page gets fully allocated it contains 2KB-pgtables in both
- * upper and lower halves and is removed from mm_context_t::pgtable_list.
- *
- * When 2KB-pgtable is freed from to fully allocated parent page that
- * page turns partially allocated and added to mm_context_t::pgtable_list.
- *
- * If 2KB-pgtable is freed from the partially allocated parent page that
- * page turns unused and gets removed from mm_context_t::pgtable_list.
- * Furthermore, the unused parent page is released.
- *
- * As follows from the above, no unallocated or fully allocated parent
- * pages are contained in mm_context_t::pgtable_list.
- *
- * The upper byte (bits 24-31) of the parent page _refcount is used
- * for tracking contained 2KB-pgtables and has the following format:
- *
- *   PP  AA
- * 01234567    upper byte (bits 24-31) of struct page::_refcount
- *   ||  ||
- *   ||  |+--- upper 2KB-pgtable is allocated
- *   ||  +---- lower 2KB-pgtable is allocated
- *   |+------- upper 2KB-pgtable is pending for removal
- *   +-------- lower 2KB-pgtable is pending for removal
- *
- * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
- * using _refcount is possible).
- *
- * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
- * The parent page is either:
- *   - added to mm_context_t::pgtable_list in case the second half of the
- *     parent page is still unallocated;
- *   - removed from mm_context_t::pgtable_list in case both hales of the
- *     parent page are allocated;
- * These operations are protected with mm_context_t::lock.
- *
- * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
- * and the corresponding PP bit is set to 1 in a single atomic operation.
- * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
- * exclusive and may never be both set to 1!
- * The parent page is either:
- *   - added to mm_context_t::pgtable_list in case the second half of the
- *     parent page is still allocated;
- *   - removed from mm_context_t::pgtable_list in case the second half of
- *     the parent page is unallocated;
- * These operations are protected with mm_context_t::lock.
- *
- * It is important to understand that mm_context_t::lock only protects
- * mm_context_t::pgtable_list and AA bits, but not the parent page itself
- * and PP bits.
- *
- * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
- * while both AA bits and the second PP bit are already unset. Then the
- * parent page does not contain any 2KB-pgtable fragment anymore, and it has
- * also been removed from mm_context_t::pgtable_list. It is safe to release
- * the page therefore.
- *
- * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
- * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
- * while the PP bits are never used, nor such a page is added to or removed
- * from mm_context_t::pgtable_list.
- *
- * pte_free_defer() overrides those rules: it takes the page off pgtable_list,
- * and prevents both 2K fragments from being reused. pte_free_defer() has to
- * guarantee that its pgtable cannot be reused before the RCU grace period
- * has elapsed (which page_table_free_rcu() does not actually guarantee).
- * But for simplicity, because page->rcu_head overlays page->lru, and because
- * the RCU callback might not be called before the mm_context_t has been freed,
- * pte_free_defer() in this implementation prevents both fragments from being
- * reused, and delays making the call to RCU until both fragments are freed.
- */
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
-	unsigned long *table;
 	struct ptdesc *ptdesc;
-	unsigned int mask, bit;
-
-	/* Try to get a fragment of a 4K page as a 2K page table */
-	if (!mm_alloc_pgste(mm)) {
-		table = NULL;
-		spin_lock_bh(&mm->context.lock);
-		if (!list_empty(&mm->context.pgtable_list)) {
-			ptdesc = list_first_entry(&mm->context.pgtable_list,
-						struct ptdesc, pt_list);
-			mask = atomic_read(&ptdesc->_refcount) >> 24;
-			/*
-			 * The pending removal bits must also be checked.
-			 * Failure to do so might lead to an impossible
-			 * value of (i.e 0x13 or 0x23) written to _refcount.
-			 * Such values violate the assumption that pending and
-			 * allocation bits are mutually exclusive, and the rest
-			 * of the code unrails as result. That could lead to
-			 * a whole bunch of races and corruptions.
-			 */
-			mask = (mask | (mask >> 4)) & 0x03U;
-			if (mask != 0x03U) {
-				table = (unsigned long *) ptdesc_to_virt(ptdesc);
-				bit = mask & 1;		/* =1 -> second 2K */
-				if (bit)
-					table += PTRS_PER_PTE;
-				atomic_xor_bits(&ptdesc->_refcount,
-							0x01U << (bit + 24));
-				list_del_init(&ptdesc->pt_list);
-			}
-		}
-		spin_unlock_bh(&mm->context.lock);
-		if (table)
-			return table;
-	}
-	/* Allocate a fresh page */
+	unsigned long *table;
+
 	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
 	if (!ptdesc)
 		return NULL;
@@ -285,177 +169,57 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 		pagetable_free(ptdesc);
 		return NULL;
 	}
-	arch_set_page_dat(ptdesc_page(ptdesc), 0);
-	/* Initialize page table */
-	table = (unsigned long *) ptdesc_to_virt(ptdesc);
-	if (mm_alloc_pgste(mm)) {
-		/* Return 4K page table with PGSTEs */
-		INIT_LIST_HEAD(&ptdesc->pt_list);
-		atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
-		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
-		memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
-	} else {
-		/* Return the first 2K fragment of the page */
-		atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24);
-		memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
-		spin_lock_bh(&mm->context.lock);
-		list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
-		spin_unlock_bh(&mm->context.lock);
-	}
+	table = ptdesc_to_virt(ptdesc);
+	__arch_set_page_dat(table, 1);
+	/* pt_list is used by gmap only */
+	INIT_LIST_HEAD(&ptdesc->pt_list);
+	memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+	memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
 	return table;
 }
 
-static void page_table_release_check(struct page *page, void *table,
-				     unsigned int half, unsigned int mask)
+static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
 {
-	char msg[128];
-
-	if (!IS_ENABLED(CONFIG_DEBUG_VM))
-		return;
-	if (!mask && list_empty(&page->lru))
-		return;
-	snprintf(msg, sizeof(msg),
-		 "Invalid pgtable %p release half 0x%02x mask 0x%02x",
-		 table, half, mask);
-	dump_page(page, msg);
-}
-
-static void pte_free_now(struct rcu_head *head)
-{
-	struct ptdesc *ptdesc;
-
-	ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
 	pagetable_pte_dtor(ptdesc);
 	pagetable_free(ptdesc);
 }
 
 void page_table_free(struct mm_struct *mm, unsigned long *table)
 {
-	unsigned int mask, bit, half;
 	struct ptdesc *ptdesc = virt_to_ptdesc(table);
 
-	if (!mm_alloc_pgste(mm)) {
-		/* Free 2K page table fragment of a 4K page */
-		bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
-		spin_lock_bh(&mm->context.lock);
-		/*
-		 * Mark the page for delayed release. The actual release
-		 * will happen outside of the critical section from this
-		 * function or from __tlb_remove_table()
-		 */
-		mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
-		mask >>= 24;
-		if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
-			/*
-			 * Other half is allocated, and neither half has had
-			 * its free deferred: add page to head of list, to make
-			 * this freed half available for immediate reuse.
-			 */
-			list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
-		} else {
-			/* If page is on list, now remove it. */
-			list_del_init(&ptdesc->pt_list);
-		}
-		spin_unlock_bh(&mm->context.lock);
-		mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24));
-		mask >>= 24;
-		if (mask != 0x00U)
-			return;
-		half = 0x01U << bit;
-	} else {
-		half = 0x03U;
-		mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
-		mask >>= 24;
-	}
-
-	page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
-	if (folio_test_clear_active(ptdesc_folio(ptdesc)))
-		call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
-	else
-		pte_free_now(&ptdesc->pt_rcu_head);
+	pagetable_pte_dtor_free(ptdesc);
 }
 
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
-			 unsigned long vmaddr)
+void __tlb_remove_table(void *table)
 {
-	struct mm_struct *mm;
-	unsigned int bit, mask;
 	struct ptdesc *ptdesc = virt_to_ptdesc(table);
+	struct page *page = ptdesc_page(ptdesc);
 
-	mm = tlb->mm;
-	if (mm_alloc_pgste(mm)) {
-		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) ((unsigned long)table | 0x03U);
-		tlb_remove_ptdesc(tlb, table);
+	if (compound_order(page) == CRST_ALLOC_ORDER) {
+		/* pmd, pud, or p4d */
+		pagetable_free(ptdesc);
 		return;
 	}
-	bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
-	spin_lock_bh(&mm->context.lock);
-	/*
-	 * Mark the page for delayed release. The actual release will happen
-	 * outside of the critical section from __tlb_remove_table() or from
-	 * page_table_free()
-	 */
-	mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
-	mask >>= 24;
-	if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
-		/*
-		 * Other half is allocated, and neither half has had
-		 * its free deferred: add page to end of list, to make
-		 * this freed half available for reuse once its pending
-		 * bit has been cleared by __tlb_remove_table().
-		 */
-		list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list);
-	} else {
-		/* If page is on list, now remove it. */
-		list_del_init(&ptdesc->pt_list);
-	}
-	spin_unlock_bh(&mm->context.lock);
-	table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
-	tlb_remove_table(tlb, table);
+	pagetable_pte_dtor_free(ptdesc);
 }
 
-void __tlb_remove_table(void *_table)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pte_free_now(struct rcu_head *head)
 {
-	unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
-	void *table = (void *)((unsigned long) _table ^ mask);
-	struct ptdesc *ptdesc = virt_to_ptdesc(table);
+	struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
 
-	switch (half) {
-	case 0x00U:	/* pmd, pud, or p4d */
-		pagetable_free(ptdesc);
-		return;
-	case 0x01U:	/* lower 2K of a 4K page table */
-	case 0x02U:	/* higher 2K of a 4K page table */
-		mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24));
-		mask >>= 24;
-		if (mask != 0x00U)
-			return;
-		break;
-	case 0x03U:	/* 4K page table with pgstes */
-		mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
-		mask >>= 24;
-		break;
-	}
-
-	page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
-	if (folio_test_clear_active(ptdesc_folio(ptdesc)))
-		call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
-	else
-		pte_free_now(&ptdesc->pt_rcu_head);
+	pagetable_pte_dtor_free(ptdesc);
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
 {
-	struct page *page;
+	struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
 
-	page = virt_to_page(pgtable);
-	SetPageActive(page);
-	page_table_free(mm, (unsigned long *)pgtable);
+	call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
 	/*
-	 * page_table_free() does not do the pgste gmap_unlink() which
-	 * page_table_free_rcu() does: warn us if pgste ever reaches here.
+	 * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
+	 * Turn to the generic pte_free_defer() version once gmap is removed.
 	 */
 	WARN_ON_ONCE(mm_has_pgste(mm));
 }
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 2e8a1064f103..186a020857cf 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -50,8 +50,7 @@ void *vmem_crst_alloc(unsigned long val)
 	if (!table)
 		return NULL;
 	crst_table_init(table, val);
-	if (slab_is_available())
-		arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
+	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
 	return table;
 }
 
@@ -67,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
 	if (!pte)
 		return NULL;
 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+	__arch_set_page_dat(pte, 1);
 	return pte;
 }
 
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 5ae31ca9dd44..0547a10406e7 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the s390 PCI subsystem.
 #
 
-obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_clp.o pci_sysfs.o \
 			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
 			   pci_bus.o pci_kvm_hook.o
 obj-$(CONFIG_PCI_IOV)	+= pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 6fab5c085565..676ac74026a8 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -124,7 +124,11 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
 
 	WARN_ON_ONCE(iota & 0x3fff);
 	fib.pba = base;
-	fib.pal = limit;
+	/* Work around off by one in ISM virt device */
+	if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
+		fib.pal = limit + (1 << 12);
+	else
+		fib.pal = limit;
 	fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
 	fib.gd = zdev->gisa;
 	cc = zpci_mod_fc(req, &fib, status);
@@ -153,6 +157,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
 int zpci_fmb_enable_device(struct zpci_dev *zdev)
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+	struct zpci_iommu_ctrs *ctrs;
 	struct zpci_fib fib = {0};
 	u8 cc, status;
 
@@ -165,9 +170,15 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
 	WARN_ON((u64) zdev->fmb & 0xf);
 
 	/* reset software counters */
-	atomic64_set(&zdev->allocated_pages, 0);
-	atomic64_set(&zdev->mapped_pages, 0);
-	atomic64_set(&zdev->unmapped_pages, 0);
+	ctrs = zpci_get_iommu_ctrs(zdev);
+	if (ctrs) {
+		atomic64_set(&ctrs->mapped_pages, 0);
+		atomic64_set(&ctrs->unmapped_pages, 0);
+		atomic64_set(&ctrs->global_rpcits, 0);
+		atomic64_set(&ctrs->sync_map_rpcits, 0);
+		atomic64_set(&ctrs->sync_rpcits, 0);
+	}
+
 
 	fib.fmb_addr = virt_to_phys(zdev->fmb);
 	fib.gd = zdev->gisa;
@@ -582,7 +593,6 @@ int pcibios_device_add(struct pci_dev *pdev)
 		pdev->no_vf_scan = 1;
 
 	pdev->dev.groups = zpci_attr_groups;
-	pdev->dev.dma_ops = &s390_pci_dma_ops;
 	zpci_map_resources(pdev);
 
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -756,8 +766,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
 	if (zdev->dma_table)
 		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 					virt_to_phys(zdev->dma_table), &status);
-	else
-		rc = zpci_dma_init_device(zdev);
 	if (rc) {
 		zpci_disable_device(zdev);
 		return rc;
@@ -865,11 +873,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
 	if (zdev->zbus->bus)
 		zpci_bus_remove_device(zdev, false);
 
-	if (zdev->dma_table) {
-		rc = zpci_dma_exit_device(zdev);
-		if (rc)
-			return rc;
-	}
 	if (zdev_enabled(zdev)) {
 		rc = zpci_disable_device(zdev);
 		if (rc)
@@ -918,8 +921,6 @@ void zpci_release_device(struct kref *kref)
 	if (zdev->zbus->bus)
 		zpci_bus_remove_device(zdev, false);
 
-	if (zdev->dma_table)
-		zpci_dma_exit_device(zdev);
 	if (zdev_enabled(zdev))
 		zpci_disable_device(zdev);
 
@@ -1109,10 +1110,6 @@ static int __init pci_base_init(void)
 	if (rc)
 		goto out_irq;
 
-	rc = zpci_dma_init();
-	if (rc)
-		goto out_dma;
-
 	rc = clp_scan_pci_devices();
 	if (rc)
 		goto out_find;
@@ -1122,8 +1119,6 @@ static int __init pci_base_init(void)
 	return 0;
 
 out_find:
-	zpci_dma_exit();
-out_dma:
 	zpci_irq_exit();
 out_irq:
 	zpci_mem_exit();
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 32245b970a0c..daa5d7450c7d 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
 		rc = zpci_enable_device(zdev);
 		if (rc)
 			return rc;
-		rc = zpci_dma_init_device(zdev);
-		if (rc) {
-			zpci_disable_device(zdev);
-			return rc;
-		}
 	}
 
 	if (!zdev->has_resources) {
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index ca6bd98eec13..6dde2263c79d 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = {
 };
 
 static char *pci_sw_names[] = {
-	"Allocated pages",
 	"Mapped pages",
 	"Unmapped pages",
+	"Global RPCITs",
+	"Sync Map RPCITs",
+	"Sync RPCITs",
 };
 
 static void pci_fmb_show(struct seq_file *m, char *name[], int length,
@@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
 
 static void pci_sw_counter_show(struct seq_file *m)
 {
-	struct zpci_dev *zdev = m->private;
-	atomic64_t *counter = &zdev->allocated_pages;
+	struct zpci_iommu_ctrs  *ctrs = zpci_get_iommu_ctrs(m->private);
+	atomic64_t *counter;
 	int i;
 
+	if (!ctrs)
+		return;
+
+	counter = &ctrs->mapped_pages;
 	for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
 		seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
 			   atomic64_read(counter));
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
deleted file mode 100644
index 99209085c75b..000000000000
--- a/arch/s390/pci/pci_dma.c
+++ /dev/null
@@ -1,746 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- *   Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/iommu-helper.h>
-#include <linux/dma-map-ops.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <asm/pci_dma.h>
-
-static struct kmem_cache *dma_region_table_cache;
-static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
-static u64 s390_iommu_aperture;
-static u32 s390_iommu_aperture_factor = 1;
-
-static int zpci_refresh_global(struct zpci_dev *zdev)
-{
-	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
-				  zdev->iommu_pages * PAGE_SIZE);
-}
-
-unsigned long *dma_alloc_cpu_table(gfp_t gfp)
-{
-	unsigned long *table, *entry;
-
-	table = kmem_cache_alloc(dma_region_table_cache, gfp);
-	if (!table)
-		return NULL;
-
-	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
-		*entry = ZPCI_TABLE_INVALID;
-	return table;
-}
-
-static void dma_free_cpu_table(void *table)
-{
-	kmem_cache_free(dma_region_table_cache, table);
-}
-
-static unsigned long *dma_alloc_page_table(gfp_t gfp)
-{
-	unsigned long *table, *entry;
-
-	table = kmem_cache_alloc(dma_page_table_cache, gfp);
-	if (!table)
-		return NULL;
-
-	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
-		*entry = ZPCI_PTE_INVALID;
-	return table;
-}
-
-static void dma_free_page_table(void *table)
-{
-	kmem_cache_free(dma_page_table_cache, table);
-}
-
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
-{
-	unsigned long old_rte, rte;
-	unsigned long *sto;
-
-	rte = READ_ONCE(*rtep);
-	if (reg_entry_isvalid(rte)) {
-		sto = get_rt_sto(rte);
-	} else {
-		sto = dma_alloc_cpu_table(gfp);
-		if (!sto)
-			return NULL;
-
-		set_rt_sto(&rte, virt_to_phys(sto));
-		validate_rt_entry(&rte);
-		entry_clr_protected(&rte);
-
-		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
-		if (old_rte != ZPCI_TABLE_INVALID) {
-			/* Somone else was faster, use theirs */
-			dma_free_cpu_table(sto);
-			sto = get_rt_sto(old_rte);
-		}
-	}
-	return sto;
-}
-
-static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
-{
-	unsigned long old_ste, ste;
-	unsigned long *pto;
-
-	ste = READ_ONCE(*step);
-	if (reg_entry_isvalid(ste)) {
-		pto = get_st_pto(ste);
-	} else {
-		pto = dma_alloc_page_table(gfp);
-		if (!pto)
-			return NULL;
-		set_st_pto(&ste, virt_to_phys(pto));
-		validate_st_entry(&ste);
-		entry_clr_protected(&ste);
-
-		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
-		if (old_ste != ZPCI_TABLE_INVALID) {
-			/* Somone else was faster, use theirs */
-			dma_free_page_table(pto);
-			pto = get_st_pto(old_ste);
-		}
-	}
-	return pto;
-}
-
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
-				  gfp_t gfp)
-{
-	unsigned long *sto, *pto;
-	unsigned int rtx, sx, px;
-
-	rtx = calc_rtx(dma_addr);
-	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
-	if (!sto)
-		return NULL;
-
-	sx = calc_sx(dma_addr);
-	pto = dma_get_page_table_origin(&sto[sx], gfp);
-	if (!pto)
-		return NULL;
-
-	px = calc_px(dma_addr);
-	return &pto[px];
-}
-
-void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
-{
-	unsigned long pte;
-
-	pte = READ_ONCE(*ptep);
-	if (flags & ZPCI_PTE_INVALID) {
-		invalidate_pt_entry(&pte);
-	} else {
-		set_pt_pfaa(&pte, page_addr);
-		validate_pt_entry(&pte);
-	}
-
-	if (flags & ZPCI_TABLE_PROTECTED)
-		entry_set_protected(&pte);
-	else
-		entry_clr_protected(&pte);
-
-	xchg(ptep, pte);
-}
-
-static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
-			      dma_addr_t dma_addr, size_t size, int flags)
-{
-	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	phys_addr_t page_addr = (pa & PAGE_MASK);
-	unsigned long *entry;
-	int i, rc = 0;
-
-	if (!nr_pages)
-		return -EINVAL;
-
-	if (!zdev->dma_table)
-		return -EINVAL;
-
-	for (i = 0; i < nr_pages; i++) {
-		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
-					   GFP_ATOMIC);
-		if (!entry) {
-			rc = -ENOMEM;
-			goto undo_cpu_trans;
-		}
-		dma_update_cpu_trans(entry, page_addr, flags);
-		page_addr += PAGE_SIZE;
-		dma_addr += PAGE_SIZE;
-	}
-
-undo_cpu_trans:
-	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
-		flags = ZPCI_PTE_INVALID;
-		while (i-- > 0) {
-			page_addr -= PAGE_SIZE;
-			dma_addr -= PAGE_SIZE;
-			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
-						   GFP_ATOMIC);
-			if (!entry)
-				break;
-			dma_update_cpu_trans(entry, page_addr, flags);
-		}
-	}
-	return rc;
-}
-
-static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
-			   size_t size, int flags)
-{
-	unsigned long irqflags;
-	int ret;
-
-	/*
-	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
-	 * translations when previously invalid translation-table entries are
-	 * validated. With lazy unmap, rpcit is skipped for previously valid
-	 * entries, but a global rpcit is then required before any address can
-	 * be re-used, i.e. after each iommu bitmap wrap-around.
-	 */
-	if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
-		if (!zdev->tlb_refresh)
-			return 0;
-	} else {
-		if (!s390_iommu_strict)
-			return 0;
-	}
-
-	ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
-				 PAGE_ALIGN(size));
-	if (ret == -ENOMEM && !s390_iommu_strict) {
-		/* enable the hypervisor to free some resources */
-		if (zpci_refresh_global(zdev))
-			goto out;
-
-		spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
-		bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
-			      zdev->lazy_bitmap, zdev->iommu_pages);
-		bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
-		spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
-		ret = 0;
-	}
-out:
-	return ret;
-}
-
-static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
-			    dma_addr_t dma_addr, size_t size, int flags)
-{
-	int rc;
-
-	rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
-	if (rc)
-		return rc;
-
-	rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
-	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
-		__dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
-
-	return rc;
-}
-
-void dma_free_seg_table(unsigned long entry)
-{
-	unsigned long *sto = get_rt_sto(entry);
-	int sx;
-
-	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
-		if (reg_entry_isvalid(sto[sx]))
-			dma_free_page_table(get_st_pto(sto[sx]));
-
-	dma_free_cpu_table(sto);
-}
-
-void dma_cleanup_tables(unsigned long *table)
-{
-	int rtx;
-
-	if (!table)
-		return;
-
-	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
-		if (reg_entry_isvalid(table[rtx]))
-			dma_free_seg_table(table[rtx]);
-
-	dma_free_cpu_table(table);
-}
-
-static unsigned long __dma_alloc_iommu(struct device *dev,
-				       unsigned long start, int size)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
-	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
-				start, size, zdev->start_dma >> PAGE_SHIFT,
-				dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
-				0);
-}
-
-static dma_addr_t dma_alloc_address(struct device *dev, int size)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long offset, flags;
-
-	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
-	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
-	if (offset == -1) {
-		if (!s390_iommu_strict) {
-			/* global flush before DMA addresses are reused */
-			if (zpci_refresh_global(zdev))
-				goto out_error;
-
-			bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
-				      zdev->lazy_bitmap, zdev->iommu_pages);
-			bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
-		}
-		/* wrap-around */
-		offset = __dma_alloc_iommu(dev, 0, size);
-		if (offset == -1)
-			goto out_error;
-	}
-	zdev->next_bit = offset + size;
-	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-
-	return zdev->start_dma + offset * PAGE_SIZE;
-
-out_error:
-	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-	return DMA_MAPPING_ERROR;
-}
-
-static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long flags, offset;
-
-	offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-
-	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
-	if (!zdev->iommu_bitmap)
-		goto out;
-
-	if (s390_iommu_strict)
-		bitmap_clear(zdev->iommu_bitmap, offset, size);
-	else
-		bitmap_set(zdev->lazy_bitmap, offset, size);
-
-out:
-	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-}
-
-static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
-{
-	struct {
-		unsigned long rc;
-		unsigned long addr;
-	} __packed data = {rc, addr};
-
-	zpci_err_hex(&data, sizeof(data));
-}
-
-static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
-				     unsigned long offset, size_t size,
-				     enum dma_data_direction direction,
-				     unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long pa = page_to_phys(page) + offset;
-	int flags = ZPCI_PTE_VALID;
-	unsigned long nr_pages;
-	dma_addr_t dma_addr;
-	int ret;
-
-	/* This rounds up number of pages based on size and offset */
-	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
-	dma_addr = dma_alloc_address(dev, nr_pages);
-	if (dma_addr == DMA_MAPPING_ERROR) {
-		ret = -ENOSPC;
-		goto out_err;
-	}
-
-	/* Use rounded up size */
-	size = nr_pages * PAGE_SIZE;
-
-	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
-		flags |= ZPCI_TABLE_PROTECTED;
-
-	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
-	if (ret)
-		goto out_free;
-
-	atomic64_add(nr_pages, &zdev->mapped_pages);
-	return dma_addr + (offset & ~PAGE_MASK);
-
-out_free:
-	dma_free_address(dev, dma_addr, nr_pages);
-out_err:
-	zpci_err("map error:\n");
-	zpci_err_dma(ret, pa);
-	return DMA_MAPPING_ERROR;
-}
-
-static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
-				 size_t size, enum dma_data_direction direction,
-				 unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	int npages, ret;
-
-	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
-	dma_addr = dma_addr & PAGE_MASK;
-	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
-			       ZPCI_PTE_INVALID);
-	if (ret) {
-		zpci_err("unmap error:\n");
-		zpci_err_dma(ret, dma_addr);
-		return;
-	}
-
-	atomic64_add(npages, &zdev->unmapped_pages);
-	dma_free_address(dev, dma_addr, npages);
-}
-
-static void *s390_dma_alloc(struct device *dev, size_t size,
-			    dma_addr_t *dma_handle, gfp_t flag,
-			    unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	struct page *page;
-	phys_addr_t pa;
-	dma_addr_t map;
-
-	size = PAGE_ALIGN(size);
-	page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-	if (!page)
-		return NULL;
-
-	pa = page_to_phys(page);
-	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
-	if (dma_mapping_error(dev, map)) {
-		__free_pages(page, get_order(size));
-		return NULL;
-	}
-
-	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
-	if (dma_handle)
-		*dma_handle = map;
-	return phys_to_virt(pa);
-}
-
-static void s390_dma_free(struct device *dev, size_t size,
-			  void *vaddr, dma_addr_t dma_handle,
-			  unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
-	size = PAGE_ALIGN(size);
-	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
-	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
-	free_pages((unsigned long)vaddr, get_order(size));
-}
-
-/* Map a segment into a contiguous dma address area */
-static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
-			     size_t size, dma_addr_t *handle,
-			     enum dma_data_direction dir)
-{
-	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	dma_addr_t dma_addr_base, dma_addr;
-	int flags = ZPCI_PTE_VALID;
-	struct scatterlist *s;
-	phys_addr_t pa = 0;
-	int ret;
-
-	dma_addr_base = dma_alloc_address(dev, nr_pages);
-	if (dma_addr_base == DMA_MAPPING_ERROR)
-		return -ENOMEM;
-
-	dma_addr = dma_addr_base;
-	if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
-		flags |= ZPCI_TABLE_PROTECTED;
-
-	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
-		pa = page_to_phys(sg_page(s));
-		ret = __dma_update_trans(zdev, pa, dma_addr,
-					 s->offset + s->length, flags);
-		if (ret)
-			goto unmap;
-
-		dma_addr += s->offset + s->length;
-	}
-	ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
-	if (ret)
-		goto unmap;
-
-	*handle = dma_addr_base;
-	atomic64_add(nr_pages, &zdev->mapped_pages);
-
-	return ret;
-
-unmap:
-	dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
-			 ZPCI_PTE_INVALID);
-	dma_free_address(dev, dma_addr_base, nr_pages);
-	zpci_err("map error:\n");
-	zpci_err_dma(ret, pa);
-	return ret;
-}
-
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
-			   int nr_elements, enum dma_data_direction dir,
-			   unsigned long attrs)
-{
-	struct scatterlist *s = sg, *start = sg, *dma = sg;
-	unsigned int max = dma_get_max_seg_size(dev);
-	unsigned int size = s->offset + s->length;
-	unsigned int offset = s->offset;
-	int count = 0, i, ret;
-
-	for (i = 1; i < nr_elements; i++) {
-		s = sg_next(s);
-
-		s->dma_length = 0;
-
-		if (s->offset || (size & ~PAGE_MASK) ||
-		    size + s->length > max) {
-			ret = __s390_dma_map_sg(dev, start, size,
-						&dma->dma_address, dir);
-			if (ret)
-				goto unmap;
-
-			dma->dma_address += offset;
-			dma->dma_length = size - offset;
-
-			size = offset = s->offset;
-			start = s;
-			dma = sg_next(dma);
-			count++;
-		}
-		size += s->length;
-	}
-	ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
-	if (ret)
-		goto unmap;
-
-	dma->dma_address += offset;
-	dma->dma_length = size - offset;
-
-	return count + 1;
-unmap:
-	for_each_sg(sg, s, count, i)
-		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
-				     dir, attrs);
-
-	return ret;
-}
-
-static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-			      int nr_elements, enum dma_data_direction dir,
-			      unsigned long attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nr_elements, i) {
-		if (s->dma_length)
-			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
-					     dir, attrs);
-		s->dma_address = 0;
-		s->dma_length = 0;
-	}
-}
-
-static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags)
-{
-	size_t n = BITS_TO_LONGS(bits);
-	size_t bytes;
-
-	if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes)))
-		return NULL;
-
-	return vzalloc(bytes);
-}
-	
-int zpci_dma_init_device(struct zpci_dev *zdev)
-{
-	u8 status;
-	int rc;
-
-	/*
-	 * At this point, if the device is part of an IOMMU domain, this would
-	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
-	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
-	 */
-	WARN_ON(zdev->s390_domain);
-
-	spin_lock_init(&zdev->iommu_bitmap_lock);
-
-	zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
-	if (!zdev->dma_table) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	/*
-	 * Restrict the iommu bitmap size to the minimum of the following:
-	 * - s390_iommu_aperture which defaults to high_memory
-	 * - 3-level pagetable address limit minus start_dma offset
-	 * - DMA address range allowed by the hardware (clp query pci fn)
-	 *
-	 * Also set zdev->end_dma to the actual end address of the usable
-	 * range, instead of the theoretical maximum as reported by hardware.
-	 *
-	 * This limits the number of concurrently usable DMA mappings since
-	 * for each DMA mapped memory address we need a DMA address including
-	 * extra DMA addresses for multiple mappings of the same memory address.
-	 */
-	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
-	zdev->iommu_size = min3(s390_iommu_aperture,
-				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
-				zdev->end_dma - zdev->start_dma + 1);
-	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
-	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
-	zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
-	if (!zdev->iommu_bitmap) {
-		rc = -ENOMEM;
-		goto free_dma_table;
-	}
-	if (!s390_iommu_strict) {
-		zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
-		if (!zdev->lazy_bitmap) {
-			rc = -ENOMEM;
-			goto free_bitmap;
-		}
-
-	}
-	if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-			       virt_to_phys(zdev->dma_table), &status)) {
-		rc = -EIO;
-		goto free_bitmap;
-	}
-
-	return 0;
-free_bitmap:
-	vfree(zdev->iommu_bitmap);
-	zdev->iommu_bitmap = NULL;
-	vfree(zdev->lazy_bitmap);
-	zdev->lazy_bitmap = NULL;
-free_dma_table:
-	dma_free_cpu_table(zdev->dma_table);
-	zdev->dma_table = NULL;
-out:
-	return rc;
-}
-
-int zpci_dma_exit_device(struct zpci_dev *zdev)
-{
-	int cc = 0;
-
-	/*
-	 * At this point, if the device is part of an IOMMU domain, this would
-	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
-	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
-	 */
-	WARN_ON(zdev->s390_domain);
-	if (zdev_enabled(zdev))
-		cc = zpci_unregister_ioat(zdev, 0);
-	/*
-	 * cc == 3 indicates the function is gone already. This can happen
-	 * if the function was deconfigured/disabled suddenly and we have not
-	 * received a new handle yet.
-	 */
-	if (cc && cc != 3)
-		return -EIO;
-
-	dma_cleanup_tables(zdev->dma_table);
-	zdev->dma_table = NULL;
-	vfree(zdev->iommu_bitmap);
-	zdev->iommu_bitmap = NULL;
-	vfree(zdev->lazy_bitmap);
-	zdev->lazy_bitmap = NULL;
-	zdev->next_bit = 0;
-	return 0;
-}
-
-static int __init dma_alloc_cpu_table_caches(void)
-{
-	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
-					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
-					0, NULL);
-	if (!dma_region_table_cache)
-		return -ENOMEM;
-
-	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
-					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
-					0, NULL);
-	if (!dma_page_table_cache) {
-		kmem_cache_destroy(dma_region_table_cache);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-int __init zpci_dma_init(void)
-{
-	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
-	if (!s390_iommu_aperture_factor)
-		s390_iommu_aperture = ULONG_MAX;
-	else
-		s390_iommu_aperture *= s390_iommu_aperture_factor;
-
-	return dma_alloc_cpu_table_caches();
-}
-
-void zpci_dma_exit(void)
-{
-	kmem_cache_destroy(dma_page_table_cache);
-	kmem_cache_destroy(dma_region_table_cache);
-}
-
-const struct dma_map_ops s390_pci_dma_ops = {
-	.alloc		= s390_dma_alloc,
-	.free		= s390_dma_free,
-	.map_sg		= s390_dma_map_sg,
-	.unmap_sg	= s390_dma_unmap_sg,
-	.map_page	= s390_dma_map_pages,
-	.unmap_page	= s390_dma_unmap_pages,
-	.mmap		= dma_common_mmap,
-	.get_sgtable	= dma_common_get_sgtable,
-	.alloc_pages	= dma_common_alloc_pages,
-	.free_pages	= dma_common_free_pages,
-	/* dma_supported is unconditionally true without a callback */
-};
-EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-
-static int __init s390_iommu_setup(char *str)
-{
-	if (!strcmp(str, "strict"))
-		s390_iommu_strict = 1;
-	return 1;
-}
-
-__setup("s390_iommu=", s390_iommu_setup);
-
-static int __init s390_iommu_aperture_setup(char *str)
-{
-	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
-		s390_iommu_aperture_factor = 1;
-	return 1;
-}
-
-__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b9324ca2eb94..4d9773ef9e0a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -59,9 +59,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
 	}
 }
 
-static bool is_passed_through(struct zpci_dev *zdev)
+static bool is_passed_through(struct pci_dev *pdev)
 {
-	return zdev->s390_domain;
+	struct zpci_dev *zdev = to_zpci(pdev);
+	bool ret;
+
+	mutex_lock(&zdev->kzdev_lock);
+	ret = !!zdev->kzdev;
+	mutex_unlock(&zdev->kzdev_lock);
+
+	return ret;
 }
 
 static bool is_driver_supported(struct pci_driver *driver)
@@ -176,7 +183,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 	}
 	pdev->error_state = pci_channel_io_frozen;
 
-	if (is_passed_through(to_zpci(pdev))) {
+	if (is_passed_through(pdev)) {
 		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
 			pci_name(pdev));
 		goto out_unlock;
@@ -239,7 +246,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
 	 * we will inject the error event and let the guest recover the device
 	 * itself.
 	 */
-	if (is_passed_through(to_zpci(pdev)))
+	if (is_passed_through(pdev))
 		goto out;
 	driver = to_pci_driver(pdev->dev.driver);
 	if (driver && driver->err_handler && driver->err_handler->error_detected)
@@ -306,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
 	/* Even though the device is already gone we still
 	 * need to free zPCI resources as part of the disable.
 	 */
-	if (zdev->dma_table)
-		zpci_dma_exit_device(zdev);
 	if (zdev_enabled(zdev))
 		zpci_disable_device(zdev);
 	zdev->state = ZPCI_FN_STATE_STANDBY;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index cae280e5c047..8a7abac51816 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret = 0;
+	u8 status;
 
 	/* Can't use device_remove_self() here as that would lead us to lock
 	 * the pci_rescan_remove_lock while holding the device' kernfs lock.
@@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 	pci_lock_rescan_remove();
 	if (pci_dev_is_added(pdev)) {
 		pci_stop_and_remove_bus_device(pdev);
-		if (zdev->dma_table) {
-			ret = zpci_dma_exit_device(zdev);
-			if (ret)
-				goto out;
-		}
-
 		if (zdev_enabled(zdev)) {
 			ret = zpci_disable_device(zdev);
 			/*
@@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			goto out;
-		ret = zpci_dma_init_device(zdev);
-		if (ret) {
-			zpci_disable_device(zdev);
-			goto out;
+
+		if (zdev->dma_table) {
+			ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+						 virt_to_phys(zdev->dma_table), &status);
+			if (ret)
+				zpci_disable_device(zdev);
 		}
-		pci_rescan_bus(zdev->zbus->bus);
 	}
 out:
+	pci_rescan_bus(zdev->zbus->bus);
 	pci_unlock_rescan_remove();
 	if (kn)
 		sysfs_unbreak_active_protection(kn);
diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
index eeba83e0a7d2..65d4c3316a5b 100644
--- a/arch/sh/include/asm/kprobes.h
+++ b/arch/sh/include/asm/kprobes.h
@@ -46,8 +46,6 @@ struct kprobe_ctlblk {
 };
 
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_exceptions_notify(struct notifier_block *self,
-				    unsigned long val, void *data);
 extern int kprobe_handle_illslot(unsigned long pc);
 #else
 
diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h
index 06c2bc767ef7..aec742cd898f 100644
--- a/arch/sparc/include/asm/kprobes.h
+++ b/arch/sparc/include/asm/kprobes.h
@@ -47,8 +47,6 @@ struct kprobe_ctlblk {
 	struct prev_kprobe prev_kprobe;
 };
 
-int kprobe_exceptions_notify(struct notifier_block *self,
-			     unsigned long val, void *data);
 int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
 				      struct pt_regs *regs);
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index a2e9317aad49..5939694dfb28 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -113,8 +113,6 @@ struct kprobe_ctlblk {
 };
 
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_exceptions_notify(struct notifier_block *self,
-				    unsigned long val, void *data);
 extern int kprobe_int3_handler(struct pt_regs *regs);
 
 #else
diff --git a/block/blk-core.c b/block/blk-core.c
index 9d51e9894ece..fdf25b8d6e78 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -501,8 +501,8 @@ static inline void bio_check_ro(struct bio *bio)
 	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
 		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
 			return;
-		pr_warn("Trying to write to read-only block-device %pg\n",
-			bio->bi_bdev);
+		pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",
+				    bio->bi_bdev);
 		/* Older lvm-tools actually trigger this */
 	}
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index bbf51d55724e..70661f58ee41 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1297,10 +1297,12 @@ config CRYPTO_JITTERENTROPY
 
 	  See https://www.chronox.de/jent.html
 
+if CRYPTO_JITTERENTROPY
+if CRYPTO_FIPS && EXPERT
+
 choice
 	prompt "CPU Jitter RNG Memory Size"
 	default CRYPTO_JITTERENTROPY_MEMSIZE_2
-	depends on CRYPTO_JITTERENTROPY
 	help
 	  The Jitter RNG measures the execution time of memory accesses.
 	  Multiple consecutive memory accesses are performed. If the memory
@@ -1344,7 +1346,6 @@ config CRYPTO_JITTERENTROPY_OSR
 	int "CPU Jitter RNG Oversampling Rate"
 	range 1 15
 	default 1
-	depends on CRYPTO_JITTERENTROPY
 	help
 	  The Jitter RNG allows the specification of an oversampling rate (OSR).
 	  The Jitter RNG operation requires a fixed amount of timing
@@ -1359,7 +1360,6 @@ config CRYPTO_JITTERENTROPY_OSR
 
 config CRYPTO_JITTERENTROPY_TESTINTERFACE
 	bool "CPU Jitter RNG Test Interface"
-	depends on CRYPTO_JITTERENTROPY
 	help
 	  The test interface allows a privileged process to capture
 	  the raw unconditioned high resolution time stamp noise that
@@ -1377,6 +1377,28 @@ config CRYPTO_JITTERENTROPY_TESTINTERFACE
 
 	  If unsure, select N.
 
+endif	# if CRYPTO_FIPS && EXPERT
+
+if !(CRYPTO_FIPS && EXPERT)
+
+config CRYPTO_JITTERENTROPY_MEMORY_BLOCKS
+	int
+	default 64
+
+config CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE
+	int
+	default 32
+
+config CRYPTO_JITTERENTROPY_OSR
+	int
+	default 1
+
+config CRYPTO_JITTERENTROPY_TESTINTERFACE
+	bool
+
+endif	# if !(CRYPTO_FIPS && EXPERT)
+endif	# if CRYPTO_JITTERENTROPY
+
 config CRYPTO_KDF800108_CTR
 	tristate
 	select CRYPTO_HMAC
diff --git a/crypto/ahash.c b/crypto/ahash.c
index deee55f939dc..80c3e5354711 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -651,6 +651,7 @@ struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *hash)
 			err = PTR_ERR(shash);
 			goto out_free_nhash;
 		}
+		nhash->using_shash = true;
 		*nctx = shash;
 		return nhash;
 	}
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9826907eb06e..7bdad836fc62 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -135,8 +135,6 @@ source "drivers/uio/Kconfig"
 
 source "drivers/vfio/Kconfig"
 
-source "drivers/vlynq/Kconfig"
-
 source "drivers/virt/Kconfig"
 
 source "drivers/virtio/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 722d15be0eb7..d828329c268d 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -151,7 +151,6 @@ obj-$(CONFIG_BCMA)		+= bcma/
 obj-$(CONFIG_VHOST_RING)	+= vhost/
 obj-$(CONFIG_VHOST_IOTLB)	+= vhost/
 obj-$(CONFIG_VHOST)		+= vhost/
-obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_GREYBUS)		+= greybus/
 obj-$(CONFIG_COMEDI)		+= comedi/
 obj-$(CONFIG_STAGING)		+= staging/
diff --git a/drivers/acpi/riscv/rhct.c b/drivers/acpi/riscv/rhct.c
index b280b3e9c7d9..caa2c16e1697 100644
--- a/drivers/acpi/riscv/rhct.c
+++ b/drivers/acpi/riscv/rhct.c
@@ -8,8 +8,9 @@
 #define pr_fmt(fmt)     "ACPI: RHCT: " fmt
 
 #include <linux/acpi.h>
+#include <linux/bits.h>
 
-static struct acpi_table_header *acpi_get_rhct(void)
+static struct acpi_table_rhct *acpi_get_rhct(void)
 {
 	static struct acpi_table_header *rhct;
 	acpi_status status;
@@ -26,7 +27,7 @@ static struct acpi_table_header *acpi_get_rhct(void)
 		}
 	}
 
-	return rhct;
+	return (struct acpi_table_rhct *)rhct;
 }
 
 /*
@@ -48,7 +49,7 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const
 	BUG_ON(acpi_disabled);
 
 	if (!table) {
-		rhct = (struct acpi_table_rhct *)acpi_get_rhct();
+		rhct = acpi_get_rhct();
 		if (!rhct)
 			return -ENOENT;
 	} else {
@@ -81,3 +82,89 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const
 
 	return -1;
 }
+
+static void acpi_parse_hart_info_cmo_node(struct acpi_table_rhct *rhct,
+					  struct acpi_rhct_hart_info *hart_info,
+					  u32 *cbom_size, u32 *cboz_size, u32 *cbop_size)
+{
+	u32 size_hartinfo = sizeof(struct acpi_rhct_hart_info);
+	u32 size_hdr = sizeof(struct acpi_rhct_node_header);
+	struct acpi_rhct_node_header *ref_node;
+	struct acpi_rhct_cmo_node *cmo_node;
+	u32 *hart_info_node_offset;
+
+	hart_info_node_offset = ACPI_ADD_PTR(u32, hart_info, size_hartinfo);
+	for (int i = 0; i < hart_info->num_offsets; i++) {
+		ref_node = ACPI_ADD_PTR(struct acpi_rhct_node_header,
+					rhct, hart_info_node_offset[i]);
+		if (ref_node->type == ACPI_RHCT_NODE_TYPE_CMO) {
+			cmo_node = ACPI_ADD_PTR(struct acpi_rhct_cmo_node,
+						ref_node, size_hdr);
+			if (cbom_size && cmo_node->cbom_size <= 30) {
+				if (!*cbom_size)
+					*cbom_size = BIT(cmo_node->cbom_size);
+				else if (*cbom_size != BIT(cmo_node->cbom_size))
+					pr_warn("CBOM size is not the same across harts\n");
+			}
+
+			if (cboz_size && cmo_node->cboz_size <= 30) {
+				if (!*cboz_size)
+					*cboz_size = BIT(cmo_node->cboz_size);
+				else if (*cboz_size != BIT(cmo_node->cboz_size))
+					pr_warn("CBOZ size is not the same across harts\n");
+			}
+
+			if (cbop_size && cmo_node->cbop_size <= 30) {
+				if (!*cbop_size)
+					*cbop_size = BIT(cmo_node->cbop_size);
+				else if (*cbop_size != BIT(cmo_node->cbop_size))
+					pr_warn("CBOP size is not the same across harts\n");
+			}
+		}
+	}
+}
+
+/*
+ * During early boot, the caller should call acpi_get_table() and pass its pointer to
+ * these functions (and free up later). At run time, since this table can be used
+ * multiple times, pass NULL so that the table remains in memory.
+ */
+void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size,
+			     u32 *cboz_size, u32 *cbop_size)
+{
+	u32 size_hdr = sizeof(struct acpi_rhct_node_header);
+	struct acpi_rhct_node_header *node, *end;
+	struct acpi_rhct_hart_info *hart_info;
+	struct acpi_table_rhct *rhct;
+
+	if (acpi_disabled)
+		return;
+
+	if (table) {
+		rhct = (struct acpi_table_rhct *)table;
+	} else {
+		rhct = acpi_get_rhct();
+		if (!rhct)
+			return;
+	}
+
+	if (cbom_size)
+		*cbom_size = 0;
+
+	if (cboz_size)
+		*cboz_size = 0;
+
+	if (cbop_size)
+		*cbop_size = 0;
+
+	end = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->header.length);
+	for (node = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->node_offset);
+	     node < end;
+	     node = ACPI_ADD_PTR(struct acpi_rhct_node_header, node, node->length)) {
+		if (node->type == ACPI_RHCT_NODE_TYPE_HART_INFO) {
+			hart_info = ACPI_ADD_PTR(struct acpi_rhct_hart_info, node, size_hdr);
+			acpi_parse_hart_info_cmo_node(rhct, hart_info, cbom_size,
+						      cboz_size, cbop_size);
+		}
+	}
+}
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 6fb4e8dc8c3c..09ed67772fae 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6180,24 +6180,10 @@ EXPORT_SYMBOL_GPL(ata_pci_remove_one);
 void ata_pci_shutdown_one(struct pci_dev *pdev)
 {
 	struct ata_host *host = pci_get_drvdata(pdev);
-	struct ata_port *ap;
-	unsigned long flags;
 	int i;
 
-	/* Tell EH to disable all devices */
-	for (i = 0; i < host->n_ports; i++) {
-		ap = host->ports[i];
-		spin_lock_irqsave(ap->lock, flags);
-		ap->pflags |= ATA_PFLAG_UNLOADING;
-		ata_port_schedule_eh(ap);
-		spin_unlock_irqrestore(ap->lock, flags);
-	}
-
 	for (i = 0; i < host->n_ports; i++) {
-		ap = host->ports[i];
-
-		/* Wait for EH to complete before freezing the port */
-		ata_port_wait_eh(ap);
+		struct ata_port *ap = host->ports[i];
 
 		ap->pflags |= ATA_PFLAG_FROZEN;
 
diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c
index 0c2ae430f5aa..18ceefd176df 100644
--- a/drivers/ata/pata_falcon.c
+++ b/drivers/ata/pata_falcon.c
@@ -121,7 +121,7 @@ static struct ata_port_operations pata_falcon_ops = {
 	.set_mode	= pata_falcon_set_mode,
 };
 
-static int __init pata_falcon_init_one(struct platform_device *pdev)
+static int pata_falcon_init_one(struct platform_device *pdev)
 {
 	struct resource *base_mem_res, *ctl_mem_res;
 	struct resource *base_res, *ctl_res, *irq_res;
@@ -216,23 +216,22 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
 				 IRQF_SHARED, &pata_falcon_sht);
 }
 
-static int __exit pata_falcon_remove_one(struct platform_device *pdev)
+static void pata_falcon_remove_one(struct platform_device *pdev)
 {
 	struct ata_host *host = platform_get_drvdata(pdev);
 
 	ata_host_detach(host);
-
-	return 0;
 }
 
 static struct platform_driver pata_falcon_driver = {
-	.remove = __exit_p(pata_falcon_remove_one),
+	.probe = pata_falcon_init_one,
+	.remove_new = pata_falcon_remove_one,
 	.driver   = {
 		.name	= "atari-falcon-ide",
 	},
 };
 
-module_platform_driver_probe(pata_falcon_driver, pata_falcon_init_one);
+module_platform_driver(pata_falcon_driver);
 
 MODULE_AUTHOR("Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("low-level driver for Atari Falcon PATA");
diff --git a/drivers/ata/pata_gayle.c b/drivers/ata/pata_gayle.c
index 3bdbe2b65a2b..94df60ac2307 100644
--- a/drivers/ata/pata_gayle.c
+++ b/drivers/ata/pata_gayle.c
@@ -124,7 +124,7 @@ static struct ata_port_operations pata_gayle_a4000_ops = {
 	.set_mode	= pata_gayle_set_mode,
 };
 
-static int __init pata_gayle_init_one(struct platform_device *pdev)
+static int pata_gayle_init_one(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct gayle_ide_platform_data *pdata;
@@ -193,23 +193,22 @@ static int __init pata_gayle_init_one(struct platform_device *pdev)
 	return 0;
 }
 
-static int __exit pata_gayle_remove_one(struct platform_device *pdev)
+static void pata_gayle_remove_one(struct platform_device *pdev)
 {
 	struct ata_host *host = platform_get_drvdata(pdev);
 
 	ata_host_detach(host);
-
-	return 0;
 }
 
 static struct platform_driver pata_gayle_driver = {
-	.remove = __exit_p(pata_gayle_remove_one),
+	.probe = pata_gayle_init_one,
+	.remove_new = pata_gayle_remove_one,
 	.driver   = {
 		.name	= "amiga-gayle-ide",
 	},
 };
 
-module_platform_driver_probe(pata_gayle_driver, pata_gayle_init_one);
+module_platform_driver(pata_gayle_driver);
 
 MODULE_AUTHOR("Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("low-level driver for Amiga Gayle PATA");
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 234a84ecde8b..ea6157747199 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1620,17 +1620,19 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 	}
 
 	if (!map->cache_bypass && map->format.parse_val) {
-		unsigned int ival;
+		unsigned int ival, offset;
 		int val_bytes = map->format.val_bytes;
-		for (i = 0; i < val_len / val_bytes; i++) {
-			ival = map->format.parse_val(val + (i * val_bytes));
-			ret = regcache_write(map,
-					     reg + regmap_get_offset(map, i),
-					     ival);
+
+		/* Cache the last written value for noinc writes */
+		i = noinc ? val_len - val_bytes : 0;
+		for (; i < val_len; i += val_bytes) {
+			ival = map->format.parse_val(val + i);
+			offset = noinc ? 0 : regmap_get_offset(map, i / val_bytes);
+			ret = regcache_write(map, reg + offset, ival);
 			if (ret) {
 				dev_err(map->dev,
 					"Error in caching of register: %x ret: %d\n",
-					reg + regmap_get_offset(map, i), ret);
+					reg + offset, ret);
 				return ret;
 			}
 		}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 800f131222fc..855fdf5c3b4e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -250,7 +250,6 @@ static void nbd_dev_remove(struct nbd_device *nbd)
 	struct gendisk *disk = nbd->disk;
 
 	del_gendisk(disk);
-	put_disk(disk);
 	blk_mq_free_tag_set(&nbd->tag_set);
 
 	/*
@@ -261,7 +260,7 @@ static void nbd_dev_remove(struct nbd_device *nbd)
 	idr_remove(&nbd_index_idr, nbd->index);
 	mutex_unlock(&nbd_index_mutex);
 	destroy_workqueue(nbd->recv_workq);
-	kfree(nbd);
+	put_disk(disk);
 }
 
 static void nbd_dev_remove_work(struct work_struct *work)
@@ -1608,6 +1607,13 @@ static void nbd_release(struct gendisk *disk)
 	nbd_put(nbd);
 }
 
+static void nbd_free_disk(struct gendisk *disk)
+{
+	struct nbd_device *nbd = disk->private_data;
+
+	kfree(nbd);
+}
+
 static const struct block_device_operations nbd_fops =
 {
 	.owner =	THIS_MODULE,
@@ -1615,6 +1621,7 @@ static const struct block_device_operations nbd_fops =
 	.release =	nbd_release,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
+	.free_disk =	nbd_free_disk,
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 50198657230e..57857c0dfba9 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -22,21 +22,32 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/interrupt.h>
 #include <linux/of_irq.h>
+#include <linux/limits.h>
 #include <clocksource/timer-riscv.h>
 #include <asm/smp.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/sbi.h>
 #include <asm/timex.h>
 
 static DEFINE_STATIC_KEY_FALSE(riscv_sstc_available);
 static bool riscv_timer_cannot_wake_cpu;
 
+static void riscv_clock_event_stop(void)
+{
+	if (static_branch_likely(&riscv_sstc_available)) {
+		csr_write(CSR_STIMECMP, ULONG_MAX);
+		if (IS_ENABLED(CONFIG_32BIT))
+			csr_write(CSR_STIMECMPH, ULONG_MAX);
+	} else {
+		sbi_set_timer(U64_MAX);
+	}
+}
+
 static int riscv_clock_next_event(unsigned long delta,
 		struct clock_event_device *ce)
 {
 	u64 next_tval = get_cycles64() + delta;
 
-	csr_set(CSR_IE, IE_TIE);
 	if (static_branch_likely(&riscv_sstc_available)) {
 #if defined(CONFIG_32BIT)
 		csr_write(CSR_STIMECMP, next_tval & 0xFFFFFFFF);
@@ -94,6 +105,8 @@ static int riscv_timer_starting_cpu(unsigned int cpu)
 	ce->irq = riscv_clock_event_irq;
 	if (riscv_timer_cannot_wake_cpu)
 		ce->features |= CLOCK_EVT_FEAT_C3STOP;
+	if (static_branch_likely(&riscv_sstc_available))
+		ce->rating = 450;
 	clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
 
 	enable_percpu_irq(riscv_clock_event_irq,
@@ -119,7 +132,7 @@ static irqreturn_t riscv_timer_interrupt(int irq, void *dev_id)
 {
 	struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event);
 
-	csr_clear(CSR_IE, IE_TIE);
+	riscv_clock_event_stop();
 	evdev->event_handler(evdev);
 
 	return IRQ_HANDLED;
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 11b3e34b7696..bd1e1357cef8 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -180,8 +180,11 @@ static const struct of_device_id blocklist[] __initconst = {
 	{ .compatible = "ti,am62a7", },
 	{ .compatible = "ti,am62p5", },
 
+	{ .compatible = "qcom,ipq5332", },
 	{ .compatible = "qcom,ipq6018", },
 	{ .compatible = "qcom,ipq8064", },
+	{ .compatible = "qcom,ipq8074", },
+	{ .compatible = "qcom,ipq9574", },
 	{ .compatible = "qcom,apq8064", },
 	{ .compatible = "qcom,msm8974", },
 	{ .compatible = "qcom,msm8960", },
diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c
index 15367ac08b2b..6355a39418c5 100644
--- a/drivers/cpufreq/qcom-cpufreq-nvmem.c
+++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c
@@ -38,6 +38,11 @@ enum ipq806x_versions {
 
 #define IPQ6000_VERSION	BIT(2)
 
+enum ipq8074_versions {
+	IPQ8074_HAWKEYE_VERSION = 0,
+	IPQ8074_ACORN_VERSION,
+};
+
 struct qcom_cpufreq_drv;
 
 struct qcom_cpufreq_match_data {
@@ -178,6 +183,16 @@ static int qcom_cpufreq_kryo_name_version(struct device *cpu_dev,
 	switch (msm_id) {
 	case QCOM_ID_MSM8996:
 	case QCOM_ID_APQ8096:
+	case QCOM_ID_IPQ5332:
+	case QCOM_ID_IPQ5322:
+	case QCOM_ID_IPQ5312:
+	case QCOM_ID_IPQ5302:
+	case QCOM_ID_IPQ5300:
+	case QCOM_ID_IPQ9514:
+	case QCOM_ID_IPQ9550:
+	case QCOM_ID_IPQ9554:
+	case QCOM_ID_IPQ9570:
+	case QCOM_ID_IPQ9574:
 		drv->versions = 1 << (unsigned int)(*speedbin);
 		break;
 	case QCOM_ID_MSM8996SG:
@@ -338,6 +353,44 @@ static int qcom_cpufreq_ipq6018_name_version(struct device *cpu_dev,
 	return 0;
 }
 
+static int qcom_cpufreq_ipq8074_name_version(struct device *cpu_dev,
+					     struct nvmem_cell *speedbin_nvmem,
+					     char **pvs_name,
+					     struct qcom_cpufreq_drv *drv)
+{
+	u32 msm_id;
+	int ret;
+	*pvs_name = NULL;
+
+	ret = qcom_smem_get_soc_id(&msm_id);
+	if (ret)
+		return ret;
+
+	switch (msm_id) {
+	case QCOM_ID_IPQ8070A:
+	case QCOM_ID_IPQ8071A:
+	case QCOM_ID_IPQ8172:
+	case QCOM_ID_IPQ8173:
+	case QCOM_ID_IPQ8174:
+		drv->versions = BIT(IPQ8074_ACORN_VERSION);
+		break;
+	case QCOM_ID_IPQ8072A:
+	case QCOM_ID_IPQ8074A:
+	case QCOM_ID_IPQ8076A:
+	case QCOM_ID_IPQ8078A:
+		drv->versions = BIT(IPQ8074_HAWKEYE_VERSION);
+		break;
+	default:
+		dev_err(cpu_dev,
+			"SoC ID %u is not part of IPQ8074 family, limiting to 1.4GHz!\n",
+			msm_id);
+		drv->versions = BIT(IPQ8074_ACORN_VERSION);
+		break;
+	}
+
+	return 0;
+}
+
 static const char *generic_genpd_names[] = { "perf", NULL };
 
 static const struct qcom_cpufreq_match_data match_data_kryo = {
@@ -367,6 +420,10 @@ static const struct qcom_cpufreq_match_data match_data_ipq8064 = {
 	.get_version = qcom_cpufreq_ipq8064_name_version,
 };
 
+static const struct qcom_cpufreq_match_data match_data_ipq8074 = {
+	.get_version = qcom_cpufreq_ipq8074_name_version,
+};
+
 static int qcom_cpufreq_probe(struct platform_device *pdev)
 {
 	struct qcom_cpufreq_drv *drv;
@@ -494,9 +551,12 @@ static const struct of_device_id qcom_cpufreq_match_list[] __initconst = {
 	{ .compatible = "qcom,msm8909", .data = &match_data_msm8909 },
 	{ .compatible = "qcom,msm8996", .data = &match_data_kryo },
 	{ .compatible = "qcom,qcs404", .data = &match_data_qcs404 },
+	{ .compatible = "qcom,ipq5332", .data = &match_data_kryo },
 	{ .compatible = "qcom,ipq6018", .data = &match_data_ipq6018 },
 	{ .compatible = "qcom,ipq8064", .data = &match_data_ipq8064 },
+	{ .compatible = "qcom,ipq8074", .data = &match_data_ipq8074 },
 	{ .compatible = "qcom,apq8064", .data = &match_data_krait },
+	{ .compatible = "qcom,ipq9574", .data = &match_data_kryo },
 	{ .compatible = "qcom,msm8974", .data = &match_data_krait },
 	{ .compatible = "qcom,msm8960", .data = &match_data_krait },
 	{},
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index ef4c12f0877b..06964a3c130f 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM)		+= -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
 				   -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
 				   -DEFI_HAVE_STRCMP -fno-builtin -fpic \
 				   $(call cc-option,-mno-single-pic-base)
-cflags-$(CONFIG_RISCV)		+= -fpic
+cflags-$(CONFIG_RISCV)		+= -fpic -DNO_ALTERNATIVE
 cflags-$(CONFIG_LOONGARCH)	+= -fpie
 
 cflags-$(CONFIG_EFI_PARAMS_FROM_FDT)	+= -I$(srctree)/scripts/dtc/libfdt
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 58f107194fda..04c03402db6d 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -750,12 +750,12 @@ static int aspeed_gpio_request(struct gpio_chip *chip, unsigned int offset)
 	if (!have_gpio(gpiochip_get_data(chip), offset))
 		return -ENODEV;
 
-	return pinctrl_gpio_request(chip->base + offset);
+	return pinctrl_gpio_request(chip, offset);
 }
 
 static void aspeed_gpio_free(struct gpio_chip *chip, unsigned int offset)
 {
-	pinctrl_gpio_free(chip->base + offset);
+	pinctrl_gpio_free(chip, offset);
 }
 
 static int usecs_to_cycles(struct aspeed_gpio *gpio, unsigned long usecs,
@@ -973,7 +973,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
 	else if (param == PIN_CONFIG_BIAS_DISABLE ||
 			param == PIN_CONFIG_BIAS_PULL_DOWN ||
 			param == PIN_CONFIG_DRIVE_STRENGTH)
-		return pinctrl_gpio_set_config(chip->base + offset, config);
+		return pinctrl_gpio_set_config(chip, offset, config);
 	else if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN ||
 			param == PIN_CONFIG_DRIVE_OPEN_SOURCE)
 		/* Return -ENOTSUPP to trigger emulation, as per datasheet */
diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index 858e6ebbb584..6c862c572322 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -227,14 +227,9 @@ static int em_gio_to_irq(struct gpio_chip *chip, unsigned offset)
 	return irq_create_mapping(gpio_to_priv(chip)->irq_domain, offset);
 }
 
-static int em_gio_request(struct gpio_chip *chip, unsigned offset)
-{
-	return pinctrl_gpio_request(chip->base + offset);
-}
-
 static void em_gio_free(struct gpio_chip *chip, unsigned offset)
 {
-	pinctrl_gpio_free(chip->base + offset);
+	pinctrl_gpio_free(chip, offset);
 
 	/* Set the GPIO as an input to ensure that the next GPIO request won't
 	* drive the GPIO pin as an output.
@@ -311,7 +306,7 @@ static int em_gio_probe(struct platform_device *pdev)
 	gpio_chip->direction_output = em_gio_direction_output;
 	gpio_chip->set = em_gio_set;
 	gpio_chip->to_irq = em_gio_to_irq;
-	gpio_chip->request = em_gio_request;
+	gpio_chip->request = pinctrl_gpio_request;
 	gpio_chip->free = em_gio_free;
 	gpio_chip->label = name;
 	gpio_chip->parent = dev;
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 8f80ca8ec1ed..a13f3c18ccd4 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -346,7 +346,7 @@ static int mvebu_gpio_direction_input(struct gpio_chip *chip, unsigned int pin)
 	 * Check with the pinctrl driver whether this pin is usable as
 	 * an input GPIO
 	 */
-	ret = pinctrl_gpio_direction_input(chip->base + pin);
+	ret = pinctrl_gpio_direction_input(chip, pin);
 	if (ret)
 		return ret;
 
@@ -366,7 +366,7 @@ static int mvebu_gpio_direction_output(struct gpio_chip *chip, unsigned int pin,
 	 * Check with the pinctrl driver whether this pin is usable as
 	 * an output GPIO
 	 */
-	ret = pinctrl_gpio_direction_output(chip->base + pin);
+	ret = pinctrl_gpio_direction_output(chip, pin);
 	if (ret)
 		return ret;
 
@@ -757,7 +757,6 @@ static const struct pwm_ops mvebu_pwm_ops = {
 	.free = mvebu_pwm_free,
 	.get_state = mvebu_pwm_get_state,
 	.apply = mvebu_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static void __maybe_unused mvebu_pwm_suspend(struct mvebu_gpio_chip *mvchip)
diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index cae9661862fe..91cea97255fa 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -260,7 +260,7 @@ static int pxa_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 	int ret;
 
 	if (pxa_gpio_has_pinctrl()) {
-		ret = pinctrl_gpio_direction_input(chip->base + offset);
+		ret = pinctrl_gpio_direction_input(chip, offset);
 		if (ret)
 			return ret;
 	}
@@ -289,7 +289,7 @@ static int pxa_gpio_direction_output(struct gpio_chip *chip,
 	writel_relaxed(mask, base + (value ? GPSR_OFFSET : GPCR_OFFSET));
 
 	if (pxa_gpio_has_pinctrl()) {
-		ret = pinctrl_gpio_direction_output(chip->base + offset);
+		ret = pinctrl_gpio_direction_output(chip, offset);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index d8b1baae6357..6159fda38d5d 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -275,7 +275,7 @@ static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset)
 		return error;
 	}
 
-	error = pinctrl_gpio_request(chip->base + offset);
+	error = pinctrl_gpio_request(chip, offset);
 	if (error)
 		pm_runtime_put(p->dev);
 
@@ -286,7 +286,7 @@ static void gpio_rcar_free(struct gpio_chip *chip, unsigned offset)
 {
 	struct gpio_rcar_priv *p = gpiochip_get_data(chip);
 
-	pinctrl_gpio_free(chip->base + offset);
+	pinctrl_gpio_free(chip, offset);
 
 	/*
 	 * Set the GPIO as an input to ensure that the next GPIO request won't
diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
index 23040a8cea34..0bd339813110 100644
--- a/drivers/gpio/gpio-rockchip.c
+++ b/drivers/gpio/gpio-rockchip.c
@@ -159,9 +159,9 @@ static int rockchip_gpio_set_direction(struct gpio_chip *chip,
 
 
 	if (input)
-		pinctrl_gpio_direction_input(bank->pin_base + offset);
+		pinctrl_gpio_direction_input(chip, offset);
 	else
-		pinctrl_gpio_direction_output(bank->pin_base + offset);
+		pinctrl_gpio_direction_output(chip, offset);
 
 	raw_spin_lock_irqsave(&bank->slock, flags);
 	rockchip_gpio_writel_bit(bank, offset, data, bank->gpio_regs->port_ddr);
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index ea715582bcf3..ea5f9cc14bc4 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -137,16 +137,11 @@ static void tegra_gpio_disable(struct tegra_gpio_info *tgi, unsigned int gpio)
 	tegra_gpio_mask_write(tgi, GPIO_MSK_CNF(tgi, gpio), gpio, 0);
 }
 
-static int tegra_gpio_request(struct gpio_chip *chip, unsigned int offset)
-{
-	return pinctrl_gpio_request(chip->base + offset);
-}
-
 static void tegra_gpio_free(struct gpio_chip *chip, unsigned int offset)
 {
 	struct tegra_gpio_info *tgi = gpiochip_get_data(chip);
 
-	pinctrl_gpio_free(chip->base + offset);
+	pinctrl_gpio_free(chip, offset);
 	tegra_gpio_disable(tgi, offset);
 }
 
@@ -179,7 +174,7 @@ static int tegra_gpio_direction_input(struct gpio_chip *chip,
 	tegra_gpio_mask_write(tgi, GPIO_MSK_OE(tgi, offset), offset, 0);
 	tegra_gpio_enable(tgi, offset);
 
-	ret = pinctrl_gpio_direction_input(chip->base + offset);
+	ret = pinctrl_gpio_direction_input(chip, offset);
 	if (ret < 0)
 		dev_err(tgi->dev,
 			"Failed to set pinctrl input direction of GPIO %d: %d",
@@ -199,7 +194,7 @@ static int tegra_gpio_direction_output(struct gpio_chip *chip,
 	tegra_gpio_mask_write(tgi, GPIO_MSK_OE(tgi, offset), offset, 1);
 	tegra_gpio_enable(tgi, offset);
 
-	ret = pinctrl_gpio_direction_output(chip->base + offset);
+	ret = pinctrl_gpio_direction_output(chip, offset);
 	if (ret < 0)
 		dev_err(tgi->dev,
 			"Failed to set pinctrl output direction of GPIO %d: %d",
@@ -717,7 +712,7 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 	}
 
 	tgi->gc.label			= "tegra-gpio";
-	tgi->gc.request			= tegra_gpio_request;
+	tgi->gc.request			= pinctrl_gpio_request;
 	tgi->gc.free			= tegra_gpio_free;
 	tgi->gc.direction_input		= tegra_gpio_direction_input;
 	tgi->gc.get			= tegra_gpio_get;
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 444501c56a3b..07e5e6323e86 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -130,7 +130,7 @@ static int vf610_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
 		vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR);
 	}
 
-	return pinctrl_gpio_direction_input(chip->base + gpio);
+	return pinctrl_gpio_direction_input(chip, gpio);
 }
 
 static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
@@ -148,7 +148,7 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
 		vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR);
 	}
 
-	return pinctrl_gpio_direction_output(chip->base + gpio);
+	return pinctrl_gpio_direction_output(chip, gpio);
 }
 
 static void vf610_gpio_irq_handler(struct irq_desc *desc)
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 31fc71a612c2..02ffda6c1e51 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -2287,8 +2287,7 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
 	 * FIXME: find a non-racy way to retrieve this information. Maybe a
 	 * lock common to both frameworks?
 	 */
-	ok_for_pinctrl =
-		pinctrl_gpio_can_use_line(gc->base + info->offset);
+	ok_for_pinctrl = pinctrl_gpio_can_use_line(gc, info->offset);
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index cbafcd95243e..95d2a7b2ea3e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1092,28 +1092,6 @@ void gpiochip_remove(struct gpio_chip *gc)
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
 
-/*
- * FIXME: This will be removed soon.
- *
- * This function is depracated, don't use.
- */
-struct gpio_chip *gpiochip_find(void *data,
-				int (*match)(struct gpio_chip *gc,
-					     void *data))
-{
-	struct gpio_device *gdev;
-	struct gpio_chip *gc = NULL;
-
-	gdev = gpio_device_find(data, match);
-	if (gdev) {
-		gc = gdev->chip;
-		gpio_device_put(gdev);
-	}
-
-	return gc;
-}
-EXPORT_SYMBOL_GPL(gpiochip_find);
-
 /**
  * gpio_device_find() - find a specific GPIO device
  * @data: data to pass to match function
@@ -2036,7 +2014,7 @@ int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset)
 		return 0;
 #endif
 
-	return pinctrl_gpio_request(gc->gpiodev->base + offset);
+	return pinctrl_gpio_request(gc, offset);
 }
 EXPORT_SYMBOL_GPL(gpiochip_generic_request);
 
@@ -2052,7 +2030,7 @@ void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset)
 		return;
 #endif
 
-	pinctrl_gpio_free(gc->gpiodev->base + offset);
+	pinctrl_gpio_free(gc, offset);
 }
 EXPORT_SYMBOL_GPL(gpiochip_generic_free);
 
@@ -2065,7 +2043,7 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_free);
 int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset,
 			    unsigned long config)
 {
-	return pinctrl_gpio_set_config(gc->gpiodev->base + offset, config);
+	return pinctrl_gpio_set_config(gc, offset, config);
 }
 EXPORT_SYMBOL_GPL(gpiochip_generic_config);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 91820838b63b..afec09930efa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -363,9 +363,6 @@ struct amdgpu_ip_block_version {
 	const struct amd_ip_funcs *funcs;
 };
 
-#define HW_REV(_Major, _Minor, _Rev) \
-	((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
-
 struct amdgpu_ip_block {
 	struct amdgpu_ip_block_status status;
 	const struct amdgpu_ip_block_version *version;
@@ -1162,11 +1159,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 			    uint32_t reg, uint32_t acc_flags);
 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
 				    u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+				uint32_t reg, uint32_t acc_flags,
+				uint32_t xcc_id);
 void amdgpu_device_wreg(struct amdgpu_device *adev,
 			uint32_t reg, uint32_t v,
 			uint32_t acc_flags);
 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
 				     u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+			    uint32_t reg, uint32_t v,
+			    uint32_t acc_flags,
+			    uint32_t xcc_id);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 			     uint32_t reg, uint32_t v, uint32_t xcc_id);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
@@ -1207,8 +1211,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
 #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
 
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
 
 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1218,6 +1222,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
 #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
 #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 4da82fc64fef..2deebece810e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1494,6 +1494,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
 	if (adev->asic_type < CHIP_RAVEN)
 		return false;
 
+	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+		return false;
+
 	/*
 	 * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally
 	 * risky to do any special firmware-related preparations for entering
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..f6598b9e4faa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -300,14 +300,13 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
 	hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
 
 	for (reg = hqd_base; reg <= hqd_end; reg++)
-		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
 	/* Activate doorbell logic before triggering WPTR poll. */
 	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL),
-				data);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data);
 
 	if (wptr) {
 		/* Don't read wptr with get_user because the user
@@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
 		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
 		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO),
-		       lower_32_bits(guessed_wptr));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI),
-		       upper_32_bits(guessed_wptr));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR),
-		       lower_32_bits((uintptr_t)wptr));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-			regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+			lower_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+			upper_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR,
+			lower_32_bits((uintptr_t)wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 			upper_32_bits((uintptr_t)wptr));
-		WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1),
-		       (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
-			       queue_id));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1,
+			(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
 	}
 
 	/* Start the EOP fetcher */
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
-	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
-			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
 
 	kgd_gfx_v9_release_queue(adev, inst);
 
@@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
 			VALID,
 			1);
 
-	WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+	WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
 			regTCP_WATCH0_ADDR_H) +
 			(watch_id * TCP_WATCH_STRIDE)),
-			watch_address_high);
+			watch_address_high, inst);
 
-	WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+	WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
 			regTCP_WATCH0_ADDR_L) +
 			(watch_id * TCP_WATCH_STRIDE)),
-			watch_address_low);
+			watch_address_low, inst);
 
 	return watch_address_cntl;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51011e8ee90d..00fbc0f44c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
 {
 	kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
 
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config);
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases);
 	/* APE1 no longer exists on GFX9 */
 
 	kgd_gfx_v9_unlock_srbm(adev, inst);
@@ -239,14 +239,13 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
 
 	for (reg = hqd_base;
 	     reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
-		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
 
 
 	/* Activate doorbell logic before triggering WPTR poll. */
 	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL),
-					data);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data);
 
 	if (wptr) {
 		/* Don't read wptr with get_user because the user
@@ -275,25 +274,24 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
 		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
 		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO),
-		       lower_32_bits(guessed_wptr));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI),
-		       upper_32_bits(guessed_wptr));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR),
-		       lower_32_bits((uintptr_t)wptr));
-		WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
-		       upper_32_bits((uintptr_t)wptr));
-		WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
-		       (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO,
+			lower_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI,
+			upper_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR,
+			lower_32_bits((uintptr_t)wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+			upper_32_bits((uintptr_t)wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
+			(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
 	}
 
 	/* Start the EOP fetcher */
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR),
-	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
-			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR,
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data);
 
 	kgd_gfx_v9_release_queue(adev, inst);
 
@@ -556,7 +554,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
 		break;
 	}
 
-	WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type);
 
 	end_jiffies = (utimeout * HZ / 1000) + jiffies;
 	while (true) {
@@ -908,8 +906,8 @@ void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
 					uint32_t inst)
 
 {
-	*wait_times = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
-			mmCP_IQ_WAIT_TIME2));
+	*wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst),
+			mmCP_IQ_WAIT_TIME2);
 }
 
 void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1eccad4ce243..41fbc4fd0fac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -425,6 +425,32 @@ validate_fail:
 	return ret;
 }
 
+static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+					       uint32_t domain,
+					       struct dma_fence *fence)
+{
+	int ret = amdgpu_bo_reserve(bo, false);
+
+	if (ret)
+		return ret;
+
+	ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+	if (ret)
+		goto unreserve_out;
+
+	ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+	if (ret)
+		goto unreserve_out;
+
+	dma_resv_add_fence(bo->tbo.base.resv, fence,
+			   DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+	amdgpu_bo_unreserve(bo);
+
+	return ret;
+}
+
 static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
 {
 	return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
@@ -1784,6 +1810,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		}
 		bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 		bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+	} else {
+		mutex_lock(&avm->process_info->lock);
+		if (avm->process_info->eviction_fence &&
+		    !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+			ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+				&avm->process_info->eviction_fence->base);
+		mutex_unlock(&avm->process_info->lock);
+		if (ret)
+			goto err_validate_bo;
 	}
 
 	if (offset)
@@ -1793,6 +1828,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 allocate_init_user_pages_failed:
 err_pin_bo:
+err_validate_bo:
 	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
 	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 err_node_allow:
@@ -1866,10 +1902,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	if (unlikely(ret))
 		return ret;
 
-	/* The eviction fence should be removed by the last unmap.
-	 * TODO: Log an error condition if the bo still has the eviction fence
-	 * attached
-	 */
 	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
 					process_info->eviction_fence);
 	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
@@ -1998,19 +2030,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	if (unlikely(ret))
 		goto out_unreserve;
 
-	if (mem->mapped_to_gpu_memory == 0 &&
-	    !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-		/* Validate BO only once. The eviction fence gets added to BO
-		 * the first time it is mapped. Validate will wait for all
-		 * background evictions to complete.
-		 */
-		ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
-		if (ret) {
-			pr_debug("Validate failed\n");
-			goto out_unreserve;
-		}
-	}
-
 	list_for_each_entry(entry, &mem->attachments, list) {
 		if (entry->bo_va->base.vm != avm || entry->is_mapped)
 			continue;
@@ -2037,10 +2056,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			 mem->mapped_to_gpu_memory);
 	}
 
-	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
-		dma_resv_add_fence(bo->tbo.base.resv,
-				   &avm->process_info->eviction_fence->base,
-				   DMA_RESV_USAGE_BOOKKEEP);
 	ret = unreserve_bo_and_vms(&ctx, false, false);
 
 	goto out;
@@ -2074,7 +2089,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-	struct amdkfd_process_info *process_info = avm->process_info;
 	unsigned long bo_size = mem->bo->tbo.base.size;
 	struct kfd_mem_attachment *entry;
 	struct bo_vm_reservation_context ctx;
@@ -2115,15 +2129,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 			 mem->mapped_to_gpu_memory);
 	}
 
-	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
-	 * required.
-	 */
-	if (mem->mapped_to_gpu_memory == 0 &&
-	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
-	    !mem->bo->tbo.pin_count)
-		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
-						process_info->eviction_fence);
-
 unreserve_out:
 	unreserve_bo_and_vms(&ctx, false, false);
 out:
@@ -2351,8 +2356,20 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
 	amdgpu_sync_create(&(*mem)->sync);
 	(*mem)->is_imported = true;
 
+	mutex_lock(&avm->process_info->lock);
+	if (avm->process_info->eviction_fence &&
+	    !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+		ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+				&avm->process_info->eviction_fence->base);
+	mutex_unlock(&avm->process_info->lock);
+	if (ret)
+		goto err_remove_mem;
+
 	return 0;
 
+err_remove_mem:
+	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+	drm_vma_node_revoke(&obj->vma_node, drm_priv);
 err_free_mem:
 	kfree(*mem);
 err_put_obj:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 5bbb23e102ba..618e469e3622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -29,6 +29,7 @@
 #include "amdgpu.h"
 #include "atom.h"
 
+#include <linux/device.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
@@ -287,6 +288,10 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
 	if (adev->flags & AMD_IS_APU)
 		return false;
 
+	/* ATRM is for on-platform devices only */
+	if (dev_is_removable(&adev->pdev->dev))
+		return false;
+
 	while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
 		if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
 		    (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 781e5c5ce04d..702f6610d024 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -172,6 +172,7 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
 	}
 
 	rcu_read_unlock();
+	*result = NULL;
 	return -ENOENT;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e210fe5c22a0..df3ecfa9e13f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1117,6 +1117,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 			return r;
 	}
 
+	/* FIXME: In theory this loop shouldn't be needed any more when
+	 * amdgpu_vm_handle_moved handles all moved BOs that are reserved
+	 * with p->ticket. But removing it caused test regressions, so I'm
+	 * leaving it here for now.
+	 */
 	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 		bo_va = e->bo_va;
 		if (bo_va == NULL)
@@ -1131,7 +1136,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 			return r;
 	}
 
-	r = amdgpu_vm_handle_moved(adev, vm);
+	r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
 	if (r)
 		return r;
 
@@ -1410,7 +1415,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		if (r == -ENOMEM)
 			DRM_ERROR("Not enough memory for command submission!\n");
 		else if (r != -ERESTARTSYS && r != -EAGAIN)
-			DRM_ERROR("Failed to process the buffer list %d!\n", r);
+			DRM_DEBUG("Failed to process the buffer list %d!\n", r);
 		goto error_fini;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d5f78179b2b6..7eeaf0aa7f81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -41,6 +41,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
+#include <linux/device.h>
 #include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
@@ -72,6 +73,7 @@
 #include "amdgpu_pmu.h"
 #include "amdgpu_fru_eeprom.h"
 #include "amdgpu_reset.h"
+#include "amdgpu_virt.h"
 
 #include <linux/suspend.h>
 #include <drm/task_barrier.h>
@@ -471,7 +473,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 		    amdgpu_sriov_runtime(adev) &&
 		    down_read_trylock(&adev->reset_domain->sem)) {
-			ret = amdgpu_kiq_rreg(adev, reg);
+			ret = amdgpu_kiq_rreg(adev, reg, 0);
 			up_read(&adev->reset_domain->sem);
 		} else {
 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -508,6 +510,49 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 	BUG();
 }
 
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+				uint32_t reg, uint32_t acc_flags,
+				uint32_t xcc_id)
+{
+	uint32_t ret, rlcg_flag;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if ((reg * 4) < adev->rmmio_size) {
+		if (amdgpu_sriov_vf(adev) &&
+		    !amdgpu_sriov_runtime(adev) &&
+		    adev->gfx.rlc.rlcg_reg_access_supported &&
+		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+							 GC_HWIP, false,
+							 &rlcg_flag)) {
+			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
+		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+		    amdgpu_sriov_runtime(adev) &&
+		    down_read_trylock(&adev->reset_domain->sem)) {
+			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
+			up_read(&adev->reset_domain->sem);
+		} else {
+			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+		}
+	} else {
+		ret = adev->pcie_rreg(adev, reg * 4);
+	}
+
+	return ret;
+}
+
 /*
  * MMIO register write with bytes helper functions
  * @offset:bytes offset from MMIO start
@@ -555,7 +600,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 		    amdgpu_sriov_runtime(adev) &&
 		    down_read_trylock(&adev->reset_domain->sem)) {
-			amdgpu_kiq_wreg(adev, reg, v);
+			amdgpu_kiq_wreg(adev, reg, v, 0);
 			up_read(&adev->reset_domain->sem);
 		} else {
 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
@@ -597,6 +642,47 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 }
 
 /**
+ * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+			uint32_t reg, uint32_t v,
+			uint32_t acc_flags, uint32_t xcc_id)
+{
+	uint32_t rlcg_flag;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if ((reg * 4) < adev->rmmio_size) {
+		if (amdgpu_sriov_vf(adev) &&
+		    !amdgpu_sriov_runtime(adev) &&
+		    adev->gfx.rlc.rlcg_reg_access_supported &&
+		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+							 GC_HWIP, true,
+							 &rlcg_flag)) {
+			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
+		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+		    amdgpu_sriov_runtime(adev) &&
+		    down_read_trylock(&adev->reset_domain->sem)) {
+			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
+			up_read(&adev->reset_domain->sem);
+		} else {
+			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+		}
+	} else {
+		adev->pcie_wreg(adev, reg * 4, v);
+	}
+}
+
+/**
  * amdgpu_device_indirect_rreg - read an indirect register
  *
  * @adev: amdgpu_device pointer
@@ -1073,6 +1159,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
 		amdgpu_psp_wait_for_bootloader(adev);
 		ret = amdgpu_atomfirmware_asic_init(adev, true);
+		/* TODO: check the return val and stop device initialization if boot fails */
+		amdgpu_psp_query_boot_status(adev);
 		return ret;
 	} else {
 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -2223,7 +2311,6 @@ out:
  */
 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 {
-	struct drm_device *dev = adev_to_drm(adev);
 	struct pci_dev *parent;
 	int i, r;
 	bool total;
@@ -2294,7 +2381,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	    (amdgpu_is_atpx_hybrid() ||
 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
 	    ((adev->flags & AMD_IS_APU) == 0) &&
-	    !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
+	    !dev_is_removable(&adev->pdev->dev))
 		adev->flags |= AMD_IS_PX;
 
 	if (!(adev->flags & AMD_IS_APU)) {
@@ -2497,6 +2584,18 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
 				  ring->name);
 			return r;
 		}
+		r = amdgpu_uvd_entity_init(adev, ring);
+		if (r) {
+			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
+				  ring->name);
+			return r;
+		}
+		r = amdgpu_vce_entity_init(adev, ring);
+		if (r) {
+			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
+				  ring->name);
+			return r;
+		}
 	}
 
 	amdgpu_xcp_update_partition_sched_list(adev);
@@ -3962,13 +4061,23 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 				}
 			}
 		} else {
-			tmp = amdgpu_reset_method;
-			/* It should do a default reset when loading or reloading the driver,
-			 * regardless of the module parameter reset_method.
-			 */
-			amdgpu_reset_method = AMD_RESET_METHOD_NONE;
-			r = amdgpu_asic_reset(adev);
-			amdgpu_reset_method = tmp;
+			switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+			case IP_VERSION(13, 0, 0):
+			case IP_VERSION(13, 0, 7):
+			case IP_VERSION(13, 0, 10):
+				r = psp_gpu_reset(adev);
+				break;
+			default:
+				tmp = amdgpu_reset_method;
+				/* It should do a default reset when loading or reloading the driver,
+				 * regardless of the module parameter reset_method.
+				 */
+				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+				r = amdgpu_asic_reset(adev);
+				amdgpu_reset_method = tmp;
+				break;
+			}
+
 			if (r) {
 				dev_err(adev->dev, "asic reset on init failed\n");
 				goto failed;
@@ -4132,7 +4241,7 @@ fence_driver_init:
 
 	px = amdgpu_device_supports_px(ddev);
 
-	if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+	if (px || (!dev_is_removable(&adev->pdev->dev) &&
 				apple_gmux_detect(NULL, NULL)))
 		vga_switcheroo_register_client(adev->pdev,
 					       &amdgpu_switcheroo_ops, px);
@@ -4282,7 +4391,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 
 	px = amdgpu_device_supports_px(adev_to_drm(adev));
 
-	if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+	if (px || (!dev_is_removable(&adev->pdev->dev) &&
 				apple_gmux_detect(NULL, NULL)))
 		vga_switcheroo_unregister_client(adev->pdev);
 
@@ -4474,19 +4583,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 	}
 	amdgpu_fence_driver_hw_init(adev);
 
-	r = amdgpu_device_ip_late_init(adev);
-	if (r)
-		goto exit;
-
-	queue_delayed_work(system_wq, &adev->delayed_init_work,
-			   msecs_to_jiffies(AMDGPU_RESUME_MS));
-
 	if (!adev->in_s0ix) {
 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
 		if (r)
 			goto exit;
 	}
 
+	r = amdgpu_device_ip_late_init(adev);
+	if (r)
+		goto exit;
+
+	queue_delayed_work(system_wq, &adev->delayed_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
 exit:
 	if (amdgpu_sriov_vf(adev)) {
 		amdgpu_virt_init_data_exchange(adev);
@@ -5566,10 +5674,6 @@ skip_hw_reset:
 			drm_sched_start(&ring->sched, true);
 		}
 
-		if (adev->enable_mes &&
-		    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
-			amdgpu_mes_self_test(tmp_adev);
-
 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b6a53e8429b2..0431eafa86b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -99,6 +99,7 @@
 MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
 
 #define mmRCC_CONFIG_MEMSIZE	0xde3
+#define mmMP0_SMN_C2PMSG_33	0x16061
 #define mmMM_INDEX		0x0
 #define mmMM_INDEX_HI		0x6
 #define mmMM_DATA		0x1
@@ -239,8 +240,26 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
 static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
 						 uint8_t *binary)
 {
-	uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
-	int ret = 0;
+	uint64_t vram_size;
+	u32 msg;
+	int i, ret = 0;
+
+	/* It can take up to a second for IFWI init to complete on some dGPUs,
+	 * but generally it should be in the 60-100ms range.  Normally this starts
+	 * as soon as the device gets power so by the time the OS loads this has long
+	 * completed.  However, when a card is hotplugged via e.g., USB4, we need to
+	 * wait for this to complete.  Once the C2PMSG is updated, we can
+	 * continue.
+	 */
+	if (dev_is_removable(&adev->pdev->dev)) {
+		for (i = 0; i < 1000; i++) {
+			msg = RREG32(mmMP0_SMN_C2PMSG_33);
+			if (msg & 0x80000000)
+				break;
+			msleep(1);
+		}
+	}
+	vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
 
 	if (vram_size) {
 		uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
@@ -2449,6 +2468,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 	if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0))
 		adev->gmc.xgmi.supported = true;
 
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
+		adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0);
+
 	/* set NBIO version */
 	switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
 	case IP_VERSION(6, 1, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index b5e28fa3f414..e7e87a3b2601 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -409,7 +409,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
 		if (!r)
 			r = amdgpu_vm_clear_freed(adev, vm, NULL);
 		if (!r)
-			r = amdgpu_vm_handle_moved(adev, vm);
+			r = amdgpu_vm_handle_moved(adev, vm, ticket);
 
 		if (r && r != -EBUSY)
 			DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6cc6e3991410..3095a3a864af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2041,6 +2041,14 @@ static const struct pci_device_id pciidlist[] = {
 
 MODULE_DEVICE_TABLE(pci, pciidlist);
 
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+	/* differentiate between P10 and P11 asics with the same DID */
+	{0x67FF, 0xE3, CHIP_POLARIS10},
+	{0x67FF, 0xE7, CHIP_POLARIS10},
+	{0x67FF, 0xF3, CHIP_POLARIS10},
+	{0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
 static const struct drm_driver amdgpu_kms_driver;
 
 static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
@@ -2083,6 +2091,22 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
 	}
 }
 
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+		if (pdev->device == asic_type_quirks[i].device &&
+			pdev->revision == asic_type_quirks[i].revision) {
+				flags &= ~AMD_ASIC_MASK;
+				flags |= asic_type_quirks[i].type;
+				break;
+			}
+	}
+
+	return flags;
+}
+
 static int amdgpu_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *ent)
 {
@@ -2110,15 +2134,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 			 "See modparam exp_hw_support\n");
 		return -ENODEV;
 	}
-	/* differentiate between P10 and P11 asics with the same DID */
-	if (pdev->device == 0x67FF &&
-	    (pdev->revision == 0xE3 ||
-	     pdev->revision == 0xE7 ||
-	     pdev->revision == 0xF3 ||
-	     pdev->revision == 0xF7)) {
-		flags &= ~AMD_ASIC_MASK;
-		flags |= CHIP_POLARIS10;
-	}
+
+	flags = amdgpu_fix_asic_type(pdev, flags);
 
 	/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
 	 * however, SME requires an indirect IOMMU mapping because the encryption
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index c92e0aba69e1..b9674c57c436 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -385,9 +385,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 	struct amdgpu_ring *ring = &kiq->ring;
 	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
 
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
 	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
 		domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
 
 	/* create MQD for KIQ */
 	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
@@ -929,12 +931,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
 		func(adev, ras_error_status, i);
 }
 
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
 {
 	signed long r, cnt = 0;
 	unsigned long flags;
 	uint32_t seq, reg_val_offs = 0, value = 0;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	if (amdgpu_device_skip_hw_access(adev))
@@ -997,12 +999,12 @@ failed_kiq_read:
 	return ~0;
 }
 
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
 {
 	signed long r, cnt = 0;
 	unsigned long flags;
 	uint32_t seq;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	BUG_ON(!ring->funcs->emit_wreg);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 7088c5015675..f23bafec71c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -521,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
 				  struct amdgpu_irq_src *source,
 				  struct amdgpu_iv_entry *entry);
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
 void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 2dce338b0f1e..5f71414190e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -826,7 +826,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
 				gc_ver == IP_VERSION(9, 4, 3) ||
 				gc_ver >= IP_VERSION(10, 3, 0));
 
-	gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
+	if (!amdgpu_sriov_xnack_support(adev))
+		gmc->noretry = 1;
+	else
+		gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
 }
 
 void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 5a828c175e3a..cf33eb219e25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -143,6 +143,46 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
+{
+	if (!mca_set)
+		return;
+
+	memset(mca_set, 0, sizeof(*mca_set));
+	INIT_LIST_HEAD(&mca_set->list);
+}
+
+int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry)
+{
+	struct mca_bank_node *node;
+
+	if (!entry)
+		return -EINVAL;
+
+	node = kvzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	memcpy(&node->entry, entry, sizeof(*entry));
+
+	INIT_LIST_HEAD(&node->node);
+	list_add_tail(&node->node, &mca_set->list);
+
+	mca_set->nr_entries++;
+
+	return 0;
+}
+
+void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set)
+{
+	struct mca_bank_node *node, *tmp;
+
+	list_for_each_entry_safe(node, tmp, &mca_set->list, node) {
+		list_del(&node->node);
+		kvfree(node);
+	}
+}
+
 void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs)
 {
 	struct amdgpu_mca *mca = &adev->mca;
@@ -160,6 +200,65 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
 	return -EOPNOTSUPP;
 }
 
+static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry)
+{
+	dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n");
+	dev_info(adev->dev, "[Hardware error] aca entry[%02d].STATUS=0x%016llx\n",
+		 idx, entry->regs[MCA_REG_IDX_STATUS]);
+	dev_info(adev->dev, "[Hardware error] aca entry[%02d].ADDR=0x%016llx\n",
+		 idx, entry->regs[MCA_REG_IDX_ADDR]);
+	dev_info(adev->dev, "[Hardware error] aca entry[%02d].MISC0=0x%016llx\n",
+		 idx, entry->regs[MCA_REG_IDX_MISC0]);
+	dev_info(adev->dev, "[Hardware error] aca entry[%02d].IPID=0x%016llx\n",
+		 idx, entry->regs[MCA_REG_IDX_IPID]);
+	dev_info(adev->dev, "[Hardware error] aca entry[%02d].SYND=0x%016llx\n",
+		 idx, entry->regs[MCA_REG_IDX_SYND]);
+}
+
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data)
+{
+	struct amdgpu_smuio_mcm_config_info mcm_info;
+	struct mca_bank_set mca_set;
+	struct mca_bank_node *node;
+	struct mca_bank_entry *entry;
+	uint32_t count;
+	int ret, i = 0;
+
+	amdgpu_mca_bank_set_init(&mca_set);
+
+	ret = amdgpu_mca_smu_get_mca_set(adev, blk, type, &mca_set);
+	if (ret)
+		goto out_mca_release;
+
+	list_for_each_entry(node, &mca_set.list, node) {
+		entry = &node->entry;
+
+		amdgpu_mca_smu_mca_bank_dump(adev, i++, entry);
+
+		count = 0;
+		ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
+		if (ret)
+			goto out_mca_release;
+
+		if (!count)
+			continue;
+
+		mcm_info.socket_id = entry->info.socket_id;
+		mcm_info.die_id = entry->info.aid;
+
+		if (type == AMDGPU_MCA_ERROR_TYPE_UE)
+			amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count);
+		else
+			amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count);
+	}
+
+out_mca_release:
+	amdgpu_mca_bank_set_release(&mca_set);
+
+	return ret;
+}
+
+
 int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
 {
 	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
@@ -173,17 +272,77 @@ int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_m
 	return -EOPNOTSUPP;
 }
 
-int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
-				   enum amdgpu_mca_error_type type, uint32_t *count)
+int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+					    enum amdgpu_mca_error_type type, uint32_t *total)
 {
 	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
-	if (!count)
+	struct mca_bank_set mca_set;
+	struct mca_bank_node *node;
+	struct mca_bank_entry *entry;
+	uint32_t count;
+	int ret;
+
+	if (!total)
 		return -EINVAL;
 
-	if (mca_funcs && mca_funcs->mca_get_error_count)
-		return mca_funcs->mca_get_error_count(adev, blk, type, count);
+	if (!mca_funcs)
+		return -EOPNOTSUPP;
 
-	return -EOPNOTSUPP;
+	if (!mca_funcs->mca_get_ras_mca_set || !mca_funcs->mca_get_valid_mca_count)
+		return -EOPNOTSUPP;
+
+	amdgpu_mca_bank_set_init(&mca_set);
+
+	ret = mca_funcs->mca_get_ras_mca_set(adev, blk, type, &mca_set);
+	if (ret)
+		goto err_mca_set_release;
+
+	*total = 0;
+	list_for_each_entry(node, &mca_set.list, node) {
+		entry = &node->entry;
+
+		count = 0;
+		ret = mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, &count);
+		if (ret)
+			goto err_mca_set_release;
+
+		*total += count;
+	}
+
+err_mca_set_release:
+	amdgpu_mca_bank_set_release(&mca_set);
+
+	return ret;
+}
+
+int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+					 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
+{
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+	if (!count || !entry)
+		return -EINVAL;
+
+	if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count)
+		return -EOPNOTSUPP;
+
+
+	return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count);
+}
+
+int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+			       enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set)
+{
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+	if (!mca_set)
+		return -EINVAL;
+
+	if (!mca_funcs || !mca_funcs->mca_get_ras_mca_set)
+		return -EOPNOTSUPP;
+
+	WARN_ON(!list_empty(&mca_set->list));
+
+	return mca_funcs->mca_get_ras_mca_set(adev, blk, type, mca_set);
 }
 
 int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
@@ -230,14 +389,21 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
 static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry)
 {
 	int i, idx = entry->idx;
+	int reg_idx_array[] = {
+		MCA_REG_IDX_STATUS,
+		MCA_REG_IDX_ADDR,
+		MCA_REG_IDX_MISC0,
+		MCA_REG_IDX_IPID,
+		MCA_REG_IDX_SYND,
+	};
 
 	seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE");
 	seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip);
 	seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
 		   idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype);
 
-	for (i = 0; i < ARRAY_SIZE(entry->regs); i++)
-		seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, i, entry->regs[i]);
+	for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++)
+		seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]);
 }
 
 static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 28ad463cf5c9..2b488fcf2f95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -25,6 +25,27 @@
 
 #define MCA_MAX_REGS_COUNT	(16)
 
+#define MCA_REG_FIELD(x, h, l)			(((x) & GENMASK_ULL(h, l)) >> l)
+#define MCA_REG__STATUS__VAL(x)			MCA_REG_FIELD(x, 63, 63)
+#define MCA_REG__STATUS__OVERFLOW(x)		MCA_REG_FIELD(x, 62, 62)
+#define MCA_REG__STATUS__UC(x)			MCA_REG_FIELD(x, 61, 61)
+#define MCA_REG__STATUS__EN(x)			MCA_REG_FIELD(x, 60, 60)
+#define MCA_REG__STATUS__MISCV(x)		MCA_REG_FIELD(x, 59, 59)
+#define MCA_REG__STATUS__ADDRV(x)		MCA_REG_FIELD(x, 58, 58)
+#define MCA_REG__STATUS__PCC(x)			MCA_REG_FIELD(x, 57, 57)
+#define MCA_REG__STATUS__ERRCOREIDVAL(x)	MCA_REG_FIELD(x, 56, 56)
+#define MCA_REG__STATUS__TCC(x)			MCA_REG_FIELD(x, 55, 55)
+#define MCA_REG__STATUS__SYNDV(x)		MCA_REG_FIELD(x, 53, 53)
+#define MCA_REG__STATUS__CECC(x)		MCA_REG_FIELD(x, 46, 46)
+#define MCA_REG__STATUS__UECC(x)		MCA_REG_FIELD(x, 45, 45)
+#define MCA_REG__STATUS__DEFERRED(x)		MCA_REG_FIELD(x, 44, 44)
+#define MCA_REG__STATUS__POISON(x)		MCA_REG_FIELD(x, 43, 43)
+#define MCA_REG__STATUS__SCRUB(x)		MCA_REG_FIELD(x, 40, 40)
+#define MCA_REG__STATUS__ERRCOREID(x)		MCA_REG_FIELD(x, 37, 32)
+#define MCA_REG__STATUS__ADDRLSB(x)		MCA_REG_FIELD(x, 29, 24)
+#define MCA_REG__STATUS__ERRORCODEEXT(x)	MCA_REG_FIELD(x, 21, 16)
+#define MCA_REG__STATUS__ERRORCODE(x)		MCA_REG_FIELD(x, 15, 0)
+
 enum amdgpu_mca_ip {
 	AMDGPU_MCA_IP_UNKNOW = -1,
 	AMDGPU_MCA_IP_PSP = 0,
@@ -33,6 +54,7 @@ enum amdgpu_mca_ip {
 	AMDGPU_MCA_IP_SMU,
 	AMDGPU_MCA_IP_MP5,
 	AMDGPU_MCA_IP_UMC,
+	AMDGPU_MCA_IP_PCS_XGMI,
 	AMDGPU_MCA_IP_COUNT,
 };
 
@@ -57,6 +79,15 @@ struct amdgpu_mca {
 	const struct amdgpu_mca_smu_funcs *mca_funcs;
 };
 
+enum mca_reg_idx {
+	MCA_REG_IDX_STATUS		= 1,
+	MCA_REG_IDX_ADDR		= 2,
+	MCA_REG_IDX_MISC0		= 3,
+	MCA_REG_IDX_IPID		= 5,
+	MCA_REG_IDX_SYND		= 6,
+	MCA_REG_IDX_COUNT		= 16,
+};
+
 struct mca_bank_info {
 	int socket_id;
 	int aid;
@@ -72,18 +103,28 @@ struct mca_bank_entry {
 	uint64_t regs[MCA_MAX_REGS_COUNT];
 };
 
+struct mca_bank_node {
+	struct mca_bank_entry entry;
+	struct list_head node;
+};
+
+struct mca_bank_set {
+	int nr_entries;
+	struct list_head list;
+};
+
 struct amdgpu_mca_smu_funcs {
 	int max_ue_count;
 	int max_ce_count;
 	int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable);
-	int (*mca_get_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
-				   enum amdgpu_mca_error_type type, uint32_t *count);
+	int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+				   struct mca_bank_set *mca_set);
+	int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+					 struct mca_bank_entry *entry, uint32_t *count);
 	int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
 				       uint32_t *count);
 	int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
 				 int idx, struct mca_bank_entry *entry);
-	int (*mca_get_ras_mca_idx_array)(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
-					 enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size);
 };
 
 void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
@@ -107,11 +148,22 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
 void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs);
 int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable);
 int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count);
+int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+					   enum amdgpu_mca_error_type type, uint32_t *total);
 int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
 				   enum amdgpu_mca_error_type type, uint32_t *count);
+int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+					 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count);
+int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+			       enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set);
 int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
 				 int idx, struct mca_bank_entry *entry);
 
 void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
 
+void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set);
+int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry);
+void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set);
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 59f10b353b3a..9ddbf1494326 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -557,8 +557,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
 	mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
 	mqd_prop.hqd_active = false;
 
+	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+		mutex_lock(&adev->srbm_mutex);
+		amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
+	}
+
 	mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
 
+	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+	}
+
 	amdgpu_bo_unreserve(q->mqd_obj);
 }
 
@@ -994,9 +1006,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
 	switch (queue_type) {
 	case AMDGPU_RING_TYPE_GFX:
 		ring->funcs = adev->gfx.gfx_ring[0].funcs;
+		ring->me = adev->gfx.gfx_ring[0].me;
+		ring->pipe = adev->gfx.gfx_ring[0].pipe;
 		break;
 	case AMDGPU_RING_TYPE_COMPUTE:
 		ring->funcs = adev->gfx.compute_ring[0].funcs;
+		ring->me = adev->gfx.compute_ring[0].me;
+		ring->pipe = adev->gfx.compute_ring[0].pipe;
 		break;
 	case AMDGPU_RING_TYPE_SDMA:
 		ring->funcs = adev->sdma.instance[0].ring.funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0dcb6c36b02c..cef920a93924 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1062,9 +1062,6 @@ static const char * const amdgpu_vram_names[] = {
  */
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
-	/* set the default AGP aperture state */
-	amdgpu_gmc_set_agp_default(adev, &adev->gmc);
-
 	/* On A+A platform, VRAM can be mapped as WB */
 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
 		/* reserve PAT memory space to WC for VRAM */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 648bd5e12830..32b701cc0376 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2120,6 +2120,21 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
 	return ret;
 }
 
+int amdgpu_psp_query_boot_status(struct amdgpu_device *adev)
+{
+	struct psp_context *psp = &adev->psp;
+	int ret = 0;
+
+	if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+		return 0;
+
+	if (psp->funcs &&
+	    psp->funcs->query_boot_status)
+		ret = psp->funcs->query_boot_status(psp);
+
+	return ret;
+}
+
 static int psp_hw_start(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 7111dd32e66f..5d36ad3f48c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -134,6 +134,7 @@ struct psp_funcs {
 	int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
 	int (*vbflash_stat)(struct psp_context *psp);
 	int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+	int (*query_boot_status)(struct psp_context *psp);
 };
 
 struct ta_funcs {
@@ -537,4 +538,6 @@ int is_psp_fw_valid(struct psp_bin_desc bin);
 
 int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
 
+int amdgpu_psp_query_boot_status(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 303fbb6a48b6..84e5987b14e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1165,13 +1165,53 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
 	}
 }
 
-/* query/inject/cure begin */
-int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
-				  struct ras_query_if *info)
+static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
+						struct ras_query_if *info,
+						struct ras_err_data *err_data,
+						unsigned int error_query_mode)
 {
+	enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
 	struct amdgpu_ras_block_object *block_obj = NULL;
+
+	if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
+		return -EINVAL;
+
+	if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+		if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
+			amdgpu_ras_get_ecc_info(adev, err_data);
+		} else {
+			block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+			if (!block_obj || !block_obj->hw_ops) {
+				dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+					     get_ras_block_str(&info->head));
+				return -EINVAL;
+			}
+
+			if (block_obj->hw_ops->query_ras_error_count)
+				block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+
+			if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
+			    (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
+			    (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
+				if (block_obj->hw_ops->query_ras_error_status)
+					block_obj->hw_ops->query_ras_error_status(adev);
+			}
+		}
+	} else {
+		/* FIXME: add code to check return value later */
+		amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data);
+		amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data);
+	}
+
+	return 0;
+}
+
+/* query/inject/cure begin */
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
+{
 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
 	struct ras_err_data err_data;
+	unsigned int error_query_mode;
 	int ret;
 
 	if (!obj)
@@ -1181,27 +1221,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 	if (ret)
 		return ret;
 
-	if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
-		amdgpu_ras_get_ecc_info(adev, &err_data);
-	} else {
-		block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
-		if (!block_obj || !block_obj->hw_ops)   {
-			dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
-				     get_ras_block_str(&info->head));
-			ret = -EINVAL;
-			goto out_fini_err_data;
-		}
-
-		if (block_obj->hw_ops->query_ras_error_count)
-			block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+	if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
+		return -EINVAL;
 
-		if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
-		    (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
-		    (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
-				if (block_obj->hw_ops->query_ras_error_status)
-					block_obj->hw_ops->query_ras_error_status(adev);
-			}
-	}
+	ret = amdgpu_ras_query_error_status_helper(adev, info,
+						   &err_data,
+						   error_query_mode);
+	if (ret)
+		goto out_fini_err_data;
 
 	amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
 
@@ -1222,6 +1249,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
 	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+	struct amdgpu_hive_info *hive;
+	int hive_ras_recovery = 0;
 
 	if (!block_obj || !block_obj->hw_ops) {
 		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1229,15 +1258,22 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
 		return -EOPNOTSUPP;
 	}
 
-	/* skip ras error reset in gpu reset */
-	if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
-	    mca_funcs && mca_funcs->mca_set_debug_mode)
-		return -EOPNOTSUPP;
-
 	if (!amdgpu_ras_is_supported(adev, block) ||
 	    !amdgpu_ras_get_mca_debug_mode(adev))
 		return -EOPNOTSUPP;
 
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (hive) {
+		hive_ras_recovery = atomic_read(&hive->ras_recovery);
+		amdgpu_put_xgmi_hive(hive);
+	}
+
+	/* skip ras error reset in gpu reset */
+	if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
+	    hive_ras_recovery) &&
+	    mca_funcs && mca_funcs->mca_set_debug_mode)
+		return -EOPNOTSUPP;
+
 	if (block_obj->hw_ops->reset_ras_error_count)
 		block_obj->hw_ops->reset_ras_error_count(adev);
 
@@ -1528,7 +1564,8 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-	sysfs_remove_file_from_group(&adev->dev->kobj,
+	if (adev->dev->kobj.sd)
+		sysfs_remove_file_from_group(&adev->dev->kobj,
 				&con->badpages_attr.attr,
 				RAS_FS_NAME);
 }
@@ -1547,7 +1584,8 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
 		.attrs = attrs,
 	};
 
-	sysfs_remove_group(&adev->dev->kobj, &group);
+	if (adev->dev->kobj.sd)
+		sysfs_remove_group(&adev->dev->kobj, &group);
 
 	return 0;
 }
@@ -1594,7 +1632,8 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
 	if (!obj || !obj->attr_inuse)
 		return -EINVAL;
 
-	sysfs_remove_file_from_group(&adev->dev->kobj,
+	if (adev->dev->kobj.sd)
+		sysfs_remove_file_from_group(&adev->dev->kobj,
 				&obj->sysfs_attr.attr,
 				RAS_FS_NAME);
 	obj->attr_inuse = 0;
@@ -3388,6 +3427,26 @@ bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
 		return true;
 }
 
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+				     unsigned int *error_query_mode)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+	if (!con) {
+		*error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
+		return false;
+	}
+
+	if (mca_funcs && mca_funcs->mca_set_debug_mode)
+		*error_query_mode =
+			(con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
+	else
+		*error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
+
+	return true;
+}
+
 /* Register each ip ras block into amdgpu ras */
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
 		struct amdgpu_ras_block_object *ras_block_obj)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 665414c22ca9..19161916ac46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -320,6 +320,12 @@ enum amdgpu_ras_ret {
 	AMDGPU_RAS_PT,
 };
 
+enum amdgpu_ras_error_query_mode {
+	AMDGPU_RAS_INVALID_ERROR_QUERY		= 0,
+	AMDGPU_RAS_DIRECT_ERROR_QUERY		= 1,
+	AMDGPU_RAS_FIRMWARE_ERROR_QUERY		= 2,
+};
+
 /* ras error status reisger fields */
 #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT	0x0
 #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK	0x00000001L
@@ -769,6 +775,8 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
 
 void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
 bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev);
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+				     unsigned int *mode);
 
 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
 				struct amdgpu_ras_block_object *ras_block_obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index f74347cc087a..d65e21914d8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -166,8 +166,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
 			}
 		}
 
-		if (reset)
+		if (reset) {
+			/* use mode-2 reset for poison consumption */
+			if (!entry)
+				con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
 			amdgpu_ras_reset_gpu(adev);
+		}
 	}
 
 	kfree(err_data->err_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 815b7c34ed33..65949cc7abb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -399,20 +399,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
  *
  * @adev: amdgpu_device pointer
  *
+ * Initialize the entity used for handle management in the kernel driver.
  */
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
 {
-	struct amdgpu_ring *ring;
-	struct drm_gpu_scheduler *sched;
-	int r;
+	if (ring == &adev->uvd.inst[0].ring) {
+		struct drm_gpu_scheduler *sched = &ring->sched;
+		int r;
 
-	ring = &adev->uvd.inst[0].ring;
-	sched = &ring->sched;
-	r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
-				  &sched, 1, NULL);
-	if (r) {
-		DRM_ERROR("Failed setting up UVD kernel entity.\n");
-		return r;
+		r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+					  &sched, 1, NULL);
+		if (r) {
+			DRM_ERROR("Failed setting up UVD kernel entity.\n");
+			return r;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index a9f342537c68..9dfad2f48ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -73,7 +73,7 @@ struct amdgpu_uvd {
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev);
 int amdgpu_uvd_suspend(struct amdgpu_device *adev);
 int amdgpu_uvd_resume(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 1904edf68407..0954447f689d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -231,20 +231,20 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
  *
  * @adev: amdgpu_device pointer
  *
+ * Initialize the entity used for handle management in the kernel driver.
  */
-int amdgpu_vce_entity_init(struct amdgpu_device *adev)
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
 {
-	struct amdgpu_ring *ring;
-	struct drm_gpu_scheduler *sched;
-	int r;
-
-	ring = &adev->vce.ring[0];
-	sched = &ring->sched;
-	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
-				  &sched, 1, NULL);
-	if (r != 0) {
-		DRM_ERROR("Failed setting up VCE run queue.\n");
-		return r;
+	if (ring == &adev->vce.ring[0]) {
+		struct drm_gpu_scheduler *sched = &ring->sched;
+		int r;
+
+		r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+					  &sched, 1, NULL);
+		if (r != 0) {
+			DRM_ERROR("Failed setting up VCE run queue.\n");
+			return r;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index ea680fc9a6c3..6e53f872d084 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -55,7 +55,7 @@ struct amdgpu_vce {
 
 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
 int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vce_entity_init(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 int amdgpu_vce_suspend(struct amdgpu_device *adev);
 int amdgpu_vce_resume(struct amdgpu_device *adev);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a0aa624f5a92..3a632c3b1a2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 					uint32_t reg0, uint32_t reg1,
-					uint32_t ref, uint32_t mask)
+					uint32_t ref, uint32_t mask,
+					uint32_t xcc_inst)
 {
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
 	struct amdgpu_ring *ring = &kiq->ring;
 	signed long r, cnt = 0;
 	unsigned long flags;
@@ -942,7 +943,7 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
 	}
 }
 
-static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
 						 u32 acc_flags, u32 hwip,
 						 bool write, u32 *rlcg_flag)
 {
@@ -975,7 +976,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
 	return ret;
 }
 
-static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
 {
 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
 	uint32_t timeout = 50000;
@@ -1093,3 +1094,13 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
 	else
 		return RREG32(offset);
 }
+
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
+{
+	bool xnack_mode = true;
+
+	if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+		xnack_mode = false;
+
+	return xnack_mode;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 858ef21ae515..d4207e44141f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 					uint32_t reg0, uint32_t rreg1,
-					uint32_t ref, uint32_t mask);
+					uint32_t ref, uint32_t mask,
+					uint32_t xcc_inst);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
@@ -365,4 +366,9 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
 			uint32_t ucode_id);
 void amdgpu_virt_post_reset(struct amdgpu_device *adev);
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+					  u32 acc_flags, u32 hwip,
+					  bool write, u32 *rlcg_flag);
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3cd5977c0709..d1b8afd105c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1098,8 +1098,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 				bo = gem_to_amdgpu_bo(gobj);
 		}
 		mem = bo->tbo.resource;
-		if (mem->mem_type == TTM_PL_TT ||
-		    mem->mem_type == AMDGPU_PL_PREEMPT)
+		if (mem && (mem->mem_type == TTM_PL_TT ||
+			    mem->mem_type == AMDGPU_PL_PREEMPT))
 			pages_addr = bo->tbo.ttm->dma_address;
 	}
 
@@ -1373,6 +1373,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @ticket: optional reservation ticket used to reserve the VM
  *
  * Make sure all BOs which are moved are updated in the PTs.
  *
@@ -1382,11 +1383,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  * PTs have to be reserved!
  */
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-			   struct amdgpu_vm *vm)
+			   struct amdgpu_vm *vm,
+			   struct ww_acquire_ctx *ticket)
 {
 	struct amdgpu_bo_va *bo_va;
 	struct dma_resv *resv;
-	bool clear;
+	bool clear, unlock;
 	int r;
 
 	spin_lock(&vm->status_lock);
@@ -1409,17 +1411,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 		spin_unlock(&vm->status_lock);
 
 		/* Try to reserve the BO to avoid clearing its ptes */
-		if (!adev->debug_vm && dma_resv_trylock(resv))
+		if (!adev->debug_vm && dma_resv_trylock(resv)) {
 			clear = false;
+			unlock = true;
+		/* The caller is already holding the reservation lock */
+		} else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
+			clear = false;
+			unlock = false;
 		/* Somebody else is using the BO right now */
-		else
+		} else {
 			clear = true;
+			unlock = false;
+		}
 
 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
-		if (!clear)
+		if (unlock)
 			dma_resv_unlock(resv);
 		spin_lock(&vm->status_lock);
 	}
@@ -2130,7 +2139,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   int32_t xcp_id)
 {
 	struct amdgpu_bo *root_bo;
 	struct amdgpu_bo_vm *root;
@@ -2149,6 +2159,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
 	INIT_LIST_HEAD(&vm->done);
 	INIT_LIST_HEAD(&vm->pt_freed);
 	INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
+	INIT_KFIFO(vm->faults);
 
 	r = amdgpu_vm_init_entities(adev, vm);
 	if (r)
@@ -2183,34 +2194,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
 				false, &root, xcp_id);
 	if (r)
 		goto error_free_delayed;
-	root_bo = &root->bo;
+
+	root_bo = amdgpu_bo_ref(&root->bo);
 	r = amdgpu_bo_reserve(root_bo, true);
-	if (r)
-		goto error_free_root;
+	if (r) {
+		amdgpu_bo_unref(&root->shadow);
+		amdgpu_bo_unref(&root_bo);
+		goto error_free_delayed;
+	}
 
+	amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
 	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
 	if (r)
-		goto error_unreserve;
-
-	amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+		goto error_free_root;
 
 	r = amdgpu_vm_pt_clear(adev, vm, root, false);
 	if (r)
-		goto error_unreserve;
+		goto error_free_root;
 
 	amdgpu_bo_unreserve(vm->root.bo);
-
-	INIT_KFIFO(vm->faults);
+	amdgpu_bo_unref(&root_bo);
 
 	return 0;
 
-error_unreserve:
-	amdgpu_bo_unreserve(vm->root.bo);
-
 error_free_root:
-	amdgpu_bo_unref(&root->shadow);
+	amdgpu_vm_pt_free_root(adev, vm);
+	amdgpu_bo_unreserve(vm->root.bo);
 	amdgpu_bo_unref(&root_bo);
-	vm->root.bo = NULL;
 
 error_free_delayed:
 	dma_fence_put(vm->last_tlb_flush);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9c7b5d33b56e..2cd86d2bf73f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -443,7 +443,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-			   struct amdgpu_vm *vm);
+			   struct amdgpu_vm *vm,
+			   struct ww_acquire_ctx *ticket);
 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 18f58efc9dc7..08916538a615 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -77,7 +77,16 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
 	return true;
 }
 
+static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
+{
+	struct drm_buddy_block *block;
+	u64 size = 0;
 
+	list_for_each_entry(block, head, link)
+		size += amdgpu_vram_mgr_block_size(block);
+
+	return size;
+}
 
 /**
  * DOC: mem_info_vram_total
@@ -516,6 +525,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	mutex_unlock(&mgr->lock);
 
 	vres->base.start = 0;
+	size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+		     vres->base.size);
 	list_for_each_entry(block, &vres->blocks, link) {
 		unsigned long start;
 
@@ -523,8 +534,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			amdgpu_vram_mgr_block_size(block);
 		start >>= PAGE_SHIFT;
 
-		if (start > PFN_UP(vres->base.size))
-			start -= PFN_UP(vres->base.size);
+		if (start > PFN_UP(size))
+			start -= PFN_UP(size);
 		else
 			start = 0;
 		vres->base.start = max(vres->base.start, start);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 9d5d742ee9d3..bd20cb3b9819 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -103,6 +103,53 @@ static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
 	smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
 };
 
+static const int xgmi3x16_pcs_err_status_reg_v6_4[] = {
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = {
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const u64 xgmi_v6_4_0_mca_base_array[] = {
+	0x11a09200,
+	0x11b09200,
+};
+
+static const char *xgmi_v6_4_0_ras_error_code_ext[32] = {
+	[0x00] = "XGMI PCS DataLossErr",
+	[0x01] = "XGMI PCS TrainingErr",
+	[0x02] = "XGMI PCS FlowCtrlAckErr",
+	[0x03] = "XGMI PCS RxFifoUnderflowErr",
+	[0x04] = "XGMI PCS RxFifoOverflowErr",
+	[0x05] = "XGMI PCS CRCErr",
+	[0x06] = "XGMI PCS BERExceededErr",
+	[0x07] = "XGMI PCS TxMetaDataErr",
+	[0x08] = "XGMI PCS ReplayBufParityErr",
+	[0x09] = "XGMI PCS DataParityErr",
+	[0x0a] = "XGMI PCS ReplayFifoOverflowErr",
+	[0x0b] = "XGMI PCS ReplayFifoUnderflowErr",
+	[0x0c] = "XGMI PCS ElasticFifoOverflowErr",
+	[0x0d] = "XGMI PCS DeskewErr",
+	[0x0e] = "XGMI PCS FlowCtrlCRCErr",
+	[0x0f] = "XGMI PCS DataStartupLimitErr",
+	[0x10] = "XGMI PCS FCInitTimeoutErr",
+	[0x11] = "XGMI PCS RecoveryTimeoutErr",
+	[0x12] = "XGMI PCS ReadySerialTimeoutErr",
+	[0x13] = "XGMI PCS ReadySerialAttemptErr",
+	[0x14] = "XGMI PCS RecoveryAttemptErr",
+	[0x15] = "XGMI PCS RecoveryRelockAttemptErr",
+	[0x16] = "XGMI PCS ReplayAttemptErr",
+	[0x17] = "XGMI PCS SyncHdrErr",
+	[0x18] = "XGMI PCS TxReplayTimeoutErr",
+	[0x19] = "XGMI PCS RxReplayTimeoutErr",
+	[0x1a] = "XGMI PCS LinkSubTxTimeoutErr",
+	[0x1b] = "XGMI PCS LinkSubRxTimeoutErr",
+	[0x1c] = "XGMI PCS RxCMDPktErr",
+};
+
 static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
 	{"XGMI PCS DataLossErr",
 	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)},
@@ -926,7 +973,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg
 	WREG32_PCIE(pcs_status_reg, 0);
 }
 
-static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
 {
 	uint32_t i;
 
@@ -952,6 +999,49 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
 	default:
 		break;
 	}
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+		for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++)
+			pcs_clear_status(adev,
+					xgmi3x16_pcs_err_status_reg_v6_4[i]);
+		break;
+	default:
+		break;
+	}
+}
+
+static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base)
+{
+	WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+		__xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]);
+}
+
+static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+	int i;
+
+	for_each_inst(i, adev->aid_mask)
+		xgmi_v6_4_0_reset_error_count(adev, i);
+}
+
+static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+		xgmi_v6_4_0_reset_ras_error_count(adev);
+		break;
+	default:
+		amdgpu_xgmi_legacy_reset_ras_error_count(adev);
+		break;
+	}
 }
 
 static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
@@ -969,7 +1059,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
 
 	if (is_xgmi_pcs) {
 		if (amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
-		    IP_VERSION(6, 1, 0)) {
+		    IP_VERSION(6, 1, 0) ||
+		    amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+		    IP_VERSION(6, 4, 0)) {
 			pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
 			field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
 		} else {
@@ -1003,11 +1095,11 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
 	return 0;
 }
 
-static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
-					     void *ras_error_status)
+static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev,
+						     void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-	int i;
+	int i, supported = 1;
 	uint32_t data, mask_data = 0;
 	uint32_t ue_cnt = 0, ce_cnt = 0;
 
@@ -1071,7 +1163,25 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
 		}
 		break;
 	default:
-		dev_warn(adev->dev, "XGMI RAS error query not supported");
+		supported = 0;
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+		/* check xgmi3x16 pcs error */
+		for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) {
+			data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]);
+			mask_data =
+				RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, true, true);
+		}
+		break;
+	default:
+		if (!supported)
+			dev_warn(adev->dev, "XGMI RAS error query not supported");
 		break;
 	}
 
@@ -1081,32 +1191,116 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
 	err_data->ce_count += ce_cnt;
 }
 
+static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status)
+{
+	const char *error_str;
+	int ext_error_code;
+
+	ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status);
+
+	error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+		xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+	if (error_str)
+		dev_info(adev->dev, "%s detected\n", error_str);
+
+	switch (ext_error_code) {
+	case 0:
+		return AMDGPU_MCA_ERROR_TYPE_UE;
+	case 6:
+		return AMDGPU_MCA_ERROR_TYPE_CE;
+	default:
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
+
+static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info,
+					    u64 mca_base, struct ras_err_data *err_data)
+{
+	int xgmi_inst = mcm_info->die_id;
+	u64 status = 0;
+
+	status = RREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS);
+	if (!MCA_REG__STATUS__VAL(status))
+		return;
+
+	switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
+	case AMDGPU_MCA_ERROR_TYPE_UE:
+		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+		break;
+	case AMDGPU_MCA_ERROR_TYPE_CE:
+		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+		break;
+	default:
+		break;
+	}
+
+	WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data)
+{
+	struct amdgpu_smuio_mcm_config_info mcm_info = {
+		.socket_id = adev->smuio.funcs->get_socket_id(adev),
+		.die_id = xgmi_inst,
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+		__xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data);
+}
+
+static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	int i;
+
+	for_each_inst(i, adev->aid_mask)
+		xgmi_v6_4_0_query_error_count(adev, i, err_data);
+}
+
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+					      void *ras_error_status)
+{
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+		xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status);
+		break;
+	default:
+		amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status);
+		break;
+	}
+}
+
 /* Trigger XGMI/WAFL error */
 static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
 			void *inject_if, uint32_t instance_mask)
 {
-	int ret = 0;
+	int ret1, ret2;
 	struct ta_ras_trigger_error_input *block_info =
 				(struct ta_ras_trigger_error_input *)inject_if;
 
 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
 		dev_warn(adev->dev, "Failed to disallow df cstate");
 
-	if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW))
+	ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW);
+	if (ret1 && ret1 != -EOPNOTSUPP)
 		dev_warn(adev->dev, "Failed to disallow XGMI power down");
 
-	ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
+	ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
 
 	if (amdgpu_ras_intr_triggered())
-		return ret;
+		return ret2;
 
-	if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT))
+	ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT);
+	if (ret1 && ret1 != -EOPNOTSUPP)
 		dev_warn(adev->dev, "Failed to allow XGMI power down");
 
 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
 		dev_warn(adev->dev, "Failed to allow df cstate");
 
-	return ret;
+	return ret2;
 }
 
 struct amdgpu_ras_block_hw_ops  xgmi_ras_hw_ops = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index d9ccacd06fba..c8a3bf01743f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3498,6 +3498,8 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
 static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
 					       unsigned int vmid);
 
+static int gfx_v10_0_set_powergating_state(void *handle,
+					  enum amd_powergating_state state);
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
@@ -6465,11 +6467,18 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
 		nv_grbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 		if (adev->gfx.me.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	} else {
+		mutex_lock(&adev->srbm_mutex);
+		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+			gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+		nv_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
 		/* restore mqd with the backup copy */
 		if (adev->gfx.me.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
 		/* reset the ring */
 		ring->wptr = 0;
 		*ring->wptr_cpu_addr = 0;
@@ -6743,7 +6752,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.kiq[0].mqd_backup)
-			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
@@ -6766,7 +6775,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.kiq[0].mqd_backup)
-			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
 	}
 
 	return 0;
@@ -6787,11 +6796,11 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	} else {
 		/* restore MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 		/* reset ring buffer */
 		ring->wptr = 0;
 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
@@ -7172,6 +7181,13 @@ static int gfx_v10_0_hw_fini(void *handle)
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 
+	/* WA added for Vangogh asic fixing the SMU suspend failure
+	 * It needs to set power gating again during gfxoff control
+	 * otherwise the gfxoff disallowing will be failed to set.
+	 */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
+		gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE);
+
 	if (!adev->no_hw_access) {
 		if (amdgpu_async_gfx_ring) {
 			if (amdgpu_gfx_disable_kgq(adev, 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index fd22943685f7..0c6133cc5e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -155,6 +155,7 @@ static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
 {
 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
@@ -3714,11 +3715,11 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
 		soc21_grbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 		if (adev->gfx.me.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	} else {
 		/* restore mqd with the backup copy */
 		if (adev->gfx.me.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
 		/* reset the ring */
 		ring->wptr = 0;
 		*ring->wptr_cpu_addr = 0;
@@ -4007,7 +4008,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.kiq[0].mqd_backup)
-			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
@@ -4030,7 +4031,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.kiq[0].mqd_backup)
-			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
 	}
 
 	return 0;
@@ -4051,11 +4052,11 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	} else {
 		/* restore MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 		/* reset ring buffer */
 		ring->wptr = 0;
 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 41bbabd9ad4d..40d06d32bb74 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1102,6 +1102,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
 		reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
 		reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT);
 	}
+	adev->gfx.rlc.rlcg_reg_access_supported = true;
 }
 
 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
@@ -2738,16 +2739,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
 
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		mec_int_cntl = RREG32(mec_int_cntl_reg);
+		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
 					     TIME_STAMP_INT_ENABLE, 0);
-		WREG32(mec_int_cntl_reg, mec_int_cntl);
+		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		mec_int_cntl = RREG32(mec_int_cntl_reg);
+		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
 					     TIME_STAMP_INT_ENABLE, 1);
-		WREG32(mec_int_cntl_reg, mec_int_cntl);
+		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
 		break;
 	default:
 		break;
@@ -3799,6 +3800,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
 		}
 	}
 
+	/* handle extra register entries of UE */
+	for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+		for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+			for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+				/* no need to select if instance number is 1 */
+				if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+					gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+					gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+				amdgpu_ras_inst_query_ras_error_count(adev,
+					&(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+					1,
+					gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+					gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+					GET_INST(GC, xcc_id),
+					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+					&ue_count);
+			}
+		}
+	}
+
 	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
 			xcc_id);
 	mutex_unlock(&adev->grbm_idx_mutex);
@@ -3838,6 +3860,23 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
 		}
 	}
 
+	/* handle extra register entries of UE */
+	for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+		for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+			for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+				/* no need to select if instance number is 1 */
+				if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+					gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+					gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+				amdgpu_ras_inst_reset_ras_error_count(adev,
+					&(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+					1,
+					GET_INST(GC, xcc_id));
+			}
+		}
+	}
+
 	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
 			xcc_id);
 	mutex_unlock(&adev->grbm_idx_mutex);
@@ -4300,7 +4339,7 @@ const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = {
 	.type = AMD_IP_BLOCK_TYPE_GFX,
 	.major = 9,
 	.minor = 4,
-	.rev = 0,
+	.rev = 3,
 	.funcs = &gfx_v9_4_3_ip_funcs,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..0ec7b061d7c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
 	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-				1 << vmid);
+				1 << vmid, GET_INST(GC, 0));
 		return;
 	}
 
@@ -672,6 +672,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
 	/* add the xgmi offset of the physical node */
 	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, &adev->gmc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 	if (!amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 4713a62ad586..6dce9b29f675 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
 	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-				1 << vmid);
+				1 << vmid, GET_INST(GC, 0));
 		return;
 	}
 
@@ -637,6 +637,7 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
 
 	base = adev->mmhub.funcs->get_fb_location(adev);
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, &adev->gmc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
 	if (!amdgpu_sriov_vf(adev) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 7f66954fd302..42e103d7077d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -211,6 +211,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
 
 	base <<= 24;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, mc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 61ca1a82b651..efc16e580f1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -239,6 +239,7 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
 
 	base <<= 24;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, mc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index fa59749c2aef..ff4ae73d27ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -413,6 +413,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
 		base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, mc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b66c5f7e1c56..bde25eb4ed8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 					uint32_t vmhub, uint32_t flush_type)
 {
 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
-	u32 j, inv_req, tmp, sem, req, ack;
+	u32 j, inv_req, tmp, sem, req, ack, inst;
 	const unsigned int eng = 17;
 	struct amdgpu_vmhub *hub;
 
@@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	/* This is necessary for a HW workaround under SRIOV as well
 	 * as GFXOFF under bare metal
 	 */
-	if (adev->gfx.kiq[0].ring.sched.ready &&
+	if (vmhub >= AMDGPU_MMHUB0(0))
+		inst = GET_INST(GC, 0);
+	else
+		inst = vmhub;
+	if (adev->gfx.kiq[inst].ring.sched.ready &&
 	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
 		uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
 		uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
-						   1 << vmid);
+						   1 << vmid, inst);
 		return;
 	}
 
@@ -856,9 +860,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		for (j = 0; j < adev->usec_timeout; j++) {
 			/* a read return value of 1 means semaphore acquire */
 			if (vmhub >= AMDGPU_MMHUB0(0))
-				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem);
+				tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst);
 			else
-				tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem);
+				tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst);
 			if (tmp & 0x1)
 				break;
 			udelay(1);
@@ -869,9 +873,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	}
 
 	if (vmhub >= AMDGPU_MMHUB0(0))
-		WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req);
+		WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst);
 	else
-		WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req);
+		WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst);
 
 	/*
 	 * Issue a dummy read to wait for the ACK register to
@@ -884,9 +888,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
 	for (j = 0; j < adev->usec_timeout; j++) {
 		if (vmhub >= AMDGPU_MMHUB0(0))
-			tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack);
+			tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst);
 		else
-			tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack);
+			tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst);
 		if (tmp & (1 << vmid))
 			break;
 		udelay(1);
@@ -899,9 +903,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		 * write with 0 means semaphore release
 		 */
 		if (vmhub >= AMDGPU_MMHUB0(0))
-			WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0);
+			WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst);
 		else
-			WREG32_SOC15_IP_NO_KIQ(GC, sem, 0);
+			WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst);
 	}
 
 	spin_unlock(&adev->gmc.invalidate_lock);
@@ -1176,7 +1180,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		if (uncached) {
 			mtype = MTYPE_UC;
 		} else if (ext_coherent) {
-			mtype = is_local ? MTYPE_CC : MTYPE_UC;
+			if (adev->rev_id)
+				mtype = is_local ? MTYPE_CC : MTYPE_UC;
+			else
+				mtype = MTYPE_UC;
 		} else if (adev->flags & AMD_IS_APU) {
 			mtype = is_local ? mtype_local : MTYPE_NC;
 		} else {
@@ -1297,7 +1304,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
 
 			*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
 				 AMDGPU_PTE_MTYPE_VG10(mtype_local);
-		} else {
+		} else if (adev->rev_id) {
 			/* MTYPE_UC case */
 			*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
 				 AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
@@ -1614,6 +1621,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 {
 	u64 base = adev->mmhub.funcs->get_fb_location(adev);
 
+	amdgpu_gmc_set_agp_default(adev, mc);
+
 	/* add the xgmi offset of the physical node */
 	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
 	if (adev->gmc.xgmi.connected_to_cpu) {
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 3f3a6445c006..49e934975719 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -145,6 +145,10 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
 		break;
 	}
 
+	/* Do not program registers if VF */
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
 
 	if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 9a8ec4d7e333..82b6b62c170b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -654,9 +654,11 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
  */
 static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
 {
-	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+	if (!amdgpu_sriov_vf(ring->adev)) {
+		amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+			0, 0, PACKETJ_TYPE0));
+		amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+	}
 
 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
 		0, 0, PACKETJ_TYPE0));
@@ -672,9 +674,11 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
  */
 static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
 {
-	amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
-		0, 0, PACKETJ_TYPE0));
-	amdgpu_ring_write(ring, 0x62a04);
+	if (!amdgpu_sriov_vf(ring->adev)) {
+		amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+			0, 0, PACKETJ_TYPE0));
+		amdgpu_ring_write(ring, 0x62a04);
+	}
 
 	amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
 		0, 0, PACKETJ_TYPE0));
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index e523627cfe25..df218d5ca775 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -28,6 +28,7 @@
 #include "nbio/nbio_2_3_offset.h"
 #include "nbio/nbio_2_3_sh_mask.h"
 #include <uapi/linux/kfd_ioctl.h>
+#include <linux/device.h>
 #include <linux/pci.h>
 
 #define smnPCIE_CONFIG_CNTL	0x11180044
@@ -361,7 +362,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
 
 		data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
 
-		if (pci_is_thunderbolt_attached(adev->pdev))
+		if (dev_is_removable(&adev->pdev->dev))
 			data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT  << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
 		else
 			data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
@@ -480,7 +481,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
 
 	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
 	data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
-	if (pci_is_thunderbolt_attached(adev->pdev))
+	if (dev_is_removable(&adev->pdev->dev))
 		data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT  << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
 	else
 		data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 4142e2fcd866..3cf4684d0d3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -759,6 +759,83 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
 	return 0;
 }
 
+
+static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev,
+					   uint32_t inst,
+					   uint32_t boot_error)
+{
+	uint32_t socket_id;
+	uint32_t aid_id;
+	uint32_t hbm_id;
+	uint32_t reg_data;
+
+	socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID);
+	aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID);
+	hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID);
+
+	reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109);
+	dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n",
+		 socket_id, aid_id, reg_data);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n",
+			 socket_id, aid_id, hbm_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD))
+		dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST))
+		dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n",
+			 socket_id, aid_id, hbm_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST))
+		dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n",
+			 socket_id, aid_id, hbm_id);
+}
+
+static int psp_v13_0_query_boot_status(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+	int inst_mask = adev->aid_mask;
+	uint32_t reg_data;
+	uint32_t i;
+	int ret = 0;
+
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+		return 0;
+
+	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007)
+		return 0;
+
+	for_each_inst(i, inst_mask) {
+		reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126);
+		if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) {
+			psp_v13_0_boot_error_reporting(adev, i, reg_data);
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	return ret;
+}
+
 static const struct psp_funcs psp_v13_0_funcs = {
 	.init_microcode = psp_v13_0_init_microcode,
 	.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -781,6 +858,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
 	.update_spirom = psp_v13_0_update_spirom,
 	.vbflash_stat = psp_v13_0_vbflash_status,
 	.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
+	.query_boot_status = psp_v13_0_query_boot_status,
 };
 
 void psp_v13_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index c46bc6aa4f48..0f24af6f2810 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -427,6 +427,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
 				      uint32_t inst_mask)
 {
 	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
+	u32 doorbell_offset, doorbell;
 	u32 rb_cntl, ib_cntl;
 	int i, unset = 0;
 
@@ -444,6 +445,18 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
 		ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+
+		if (sdma[i]->use_doorbell) {
+			doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
+			doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
+
+			doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0);
+			doorbell_offset = REG_SET_FIELD(doorbell_offset,
+					SDMA_GFX_DOORBELL_OFFSET,
+					OFFSET, 0);
+			WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
+			WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
+		}
 	}
 }
 
@@ -631,12 +644,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
 	WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
 
-	/* Initialize the ring buffer's read and write pointers */
-	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
-	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
-	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
-	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
-
 	/* set the wb address whether it's enabled or not */
 	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI,
 	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
@@ -654,6 +661,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	/* before programing wptr to a less value, need set minor_ptr_update first */
 	WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
 
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
+	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
+	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
+	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+
 	doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
 	doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
 
@@ -2048,7 +2061,7 @@ const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
 	.type = AMD_IP_BLOCK_TYPE_SDMA,
 	.major = 4,
 	.minor = 4,
-	.rev = 0,
+	.rev = 2,
 	.funcs = &sdma_v4_4_2_ip_funcs,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index da683afa0222..242b24f73c17 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,7 +69,7 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
 
-#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
 	__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
@@ -86,8 +86,8 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 	 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
 
-#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
-	 __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \
+	 __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
 
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
 	__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
@@ -140,7 +140,7 @@
 
 /* for GC only */
 #define RREG32_RLC(reg) \
-	__RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP)
+	__RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0)
 
 #define WREG32_RLC_NO_KIQ(reg, value, hwip) \
 	__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
@@ -204,4 +204,10 @@
 			+ adev->asic_funcs->encode_ext_smn_addressing(ext), \
 			value) \
 
+#define RREG64_MCA(ext, mca_base, idx) \
+	RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8))
+
+#define WREG64_MCA(ext, mca_base, idx, val) \
+	WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val)
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index d5083c549330..48c6efcdeac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -381,6 +381,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
 		return AMD_RESET_METHOD_MODE1;
 	case IP_VERSION(13, 0, 4):
 	case IP_VERSION(13, 0, 11):
+	case IP_VERSION(14, 0, 0):
 		return AMD_RESET_METHOD_MODE2;
 	default:
 		if (amdgpu_dpm_is_baco_supported(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 743d2f68b090..e9c2ff74f0bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -88,16 +88,15 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
 		umc_v12_0_reset_error_count_per_channel, NULL);
 }
 
-static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
 {
 	return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
-		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
-		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
 }
 
-static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
 {
 	return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 4885b9fff272..b34b1e358f8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -117,6 +117,9 @@
 		(pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
 	} while (0)
 
+bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status);
+bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status);
+
 extern const uint32_t
 	umc_v12_0_channel_idx_tbl[]
 			[UMC_V12_0_UMC_INSTANCE_NUM]
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index 58a8f78c003c..a6006f231c65 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -577,8 +577,6 @@ static int uvd_v3_1_sw_init(void *handle)
 	ptr += ucode_len;
 	memcpy(&adev->uvd.keyselect, ptr, 4);
 
-	r = amdgpu_uvd_entity_init(adev);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index d3b1e31f5450..1aa09ad7bbe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -127,8 +127,6 @@ static int uvd_v4_2_sw_init(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_uvd_entity_init(adev);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 5a8116437abf..f8b229b75435 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -125,8 +125,6 @@ static int uvd_v5_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_uvd_entity_init(adev);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 74c09230aeb3..a9a6880f44e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -432,8 +432,6 @@ static int uvd_v6_0_sw_init(void *handle)
 		}
 	}
 
-	r = amdgpu_uvd_entity_init(adev);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 1c42cf10cc29..6068b784dc69 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -480,10 +480,6 @@ static int uvd_v7_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_uvd_entity_init(adev);
-	if (r)
-		return r;
-
 	r = amdgpu_virt_alloc_mm_table(adev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 67eb01fef789..a08e7abca423 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -441,8 +441,6 @@ static int vce_v2_0_sw_init(void *handle)
 			return r;
 	}
 
-	r = amdgpu_vce_entity_init(adev);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 18f6e62af339..f4760748d349 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -450,8 +450,6 @@ static int vce_v3_0_sw_init(void *handle)
 			return r;
 	}
 
-	r = amdgpu_vce_entity_init(adev);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index e0b70cd3b697..06d787385ad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -486,11 +486,6 @@ static int vce_v4_0_sw_init(void *handle)
 			return r;
 	}
 
-
-	r = amdgpu_vce_entity_init(adev);
-	if (r)
-		return r;
-
 	r = amdgpu_virt_alloc_mm_table(adev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 0e792a8496d6..cd8e459201f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1404,6 +1404,66 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
 	return i;
 }
 
+static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
+						   struct kfd_gpu_cache_info *pcache_info)
+{
+	struct amdgpu_device *adev = kdev->adev;
+	int i = 0;
+
+	/* TCP L1 Cache per CU */
+	if (adev->gfx.config.gc_tcp_size_per_cu) {
+		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+		pcache_info[i].cache_level = 1;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = 1;
+		i++;
+	}
+	/* Scalar L1 Instruction Cache per SQC */
+	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+		pcache_info[i].cache_size =
+			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+		pcache_info[i].cache_level = 1;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_INST_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+		i++;
+	}
+	/* Scalar L1 Data Cache per SQC */
+	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+		pcache_info[i].cache_level = 1;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+		i++;
+	}
+	/* L2 Data Cache per GPU (Total Tex Cache) */
+	if (adev->gfx.config.gc_tcc_size) {
+		pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+		pcache_info[i].cache_level = 2;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+		i++;
+	}
+	/* L3 Data Cache per GPU */
+	if (adev->gmc.mall_size) {
+		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+		pcache_info[i].cache_level = 3;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+		i++;
+	}
+	return i;
+}
+
 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
 {
 	int num_of_cache_types = 0;
@@ -1461,10 +1521,14 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
 			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
 			break;
 		case IP_VERSION(9, 4, 2):
-		case IP_VERSION(9, 4, 3):
 			*pcache_info = aldebaran_cache_info;
 			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
 			break;
+		case IP_VERSION(9, 4, 3):
+			num_of_cache_types =
+				kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
+									*pcache_info);
+			break;
 		case IP_VERSION(9, 1, 0):
 		case IP_VERSION(9, 2, 2):
 			*pcache_info = raven_cache_info;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index fbf053001af9..7a33e06f5c90 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1416,8 +1416,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
 		 * per-process XNACK mode selection. But let the dev->noretry
 		 * setting still influence the default XNACK mode.
 		 */
-		if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev))
+		if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) {
+			if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) {
+				pr_debug("SRIOV platform xnack not supported\n");
+				return false;
+			}
 			continue;
+		}
 
 		/* GFXv10 and later GPUs do not support shader preemption
 		 * during page faults. This can lead to poor QoS for queue
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index e67d06a42809..f2f3c338fd94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1255,9 +1255,11 @@ svm_range_get_pte_flags(struct kfd_node *node,
 		}
 		break;
 	case IP_VERSION(9, 4, 3):
-		mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
-			      (amdgpu_mtype_local == 2 || ext_coherent ?
-					AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW);
+		if (ext_coherent)
+			mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC;
+		else
+			mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+				amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
 		snoop = true;
 		if (uncached) {
 			mapping_flags |= AMDGPU_VM_MTYPE_UC;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c1e10f42db28..057284bf50bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1602,10 +1602,13 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
 	unsigned int cu_sibling_map_mask;
 	int first_active_cu;
 	int i, j, k, xcc, start, end;
+	int num_xcc = NUM_XCC(knode->xcc_mask);
 	struct kfd_cache_properties *pcache = NULL;
+	enum amdgpu_memory_partition mode;
+	struct amdgpu_device *adev = knode->adev;
 
 	start = ffs(knode->xcc_mask) - 1;
-	end = start + NUM_XCC(knode->xcc_mask);
+	end = start + num_xcc;
 	cu_sibling_map_mask = cu_info->bitmap[start][0][0];
 	cu_sibling_map_mask &=
 		((1 << pcache_info[cache_type].num_cu_shared) - 1);
@@ -1624,7 +1627,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
 		pcache->processor_id_low = cu_processor_id
 					+ (first_active_cu - 1);
 		pcache->cache_level = pcache_info[cache_type].cache_level;
-		pcache->cache_size = pcache_info[cache_type].cache_size;
+
+		if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3))
+			mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+		else
+			mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+		if (pcache->cache_level == 2)
+			pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc;
+		else if (mode)
+			pcache->cache_size = pcache_info[cache_type].cache_size / mode;
+		else
+			pcache->cache_size = pcache_info[cache_type].cache_size;
 
 		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
 			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 9cd83b780102..ed784cf27d39 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -1216,6 +1216,9 @@ bool dm_helpers_dp_handle_test_pattern_request(
 
 	}
 
+	pipe_ctx->stream->test_pattern.type = test_pattern;
+	pipe_ctx->stream->test_pattern.color_space = test_pattern_color_space;
+
 	dc_link_dp_set_test_pattern(
 		(struct dc_link *) link,
 		test_pattern,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index 0f580ea37576..133af994a08c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -37,7 +37,7 @@
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_atomic.h>
-#include "dcn10/dcn10_optc.h"
+#include "dc/inc/hw/optc.h"
 
 #include "dc/inc/core_types.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index f80917f6153b..0fa4fcd00de2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -111,17 +111,21 @@ static int dcn35_get_active_display_cnt_wa(
 	return display_count;
 }
 
-static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+		bool safe_to_lower, bool disable)
 {
 	struct dc *dc = clk_mgr_base->ctx->dc;
 	int i;
 
 	for (i = 0; i < dc->res_pool->pipe_count; ++i) {
-		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+		struct pipe_ctx *pipe = safe_to_lower
+			? &context->res_ctx.pipe_ctx[i]
+			: &dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
-		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
+		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+				     !pipe->stream->link_enc)) {
 			struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
 
 			if (disable) {
@@ -301,11 +305,11 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
-		dcn35_disable_otg_wa(clk_mgr_base, context, true);
+		dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
 
 		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
 		dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-		dcn35_disable_otg_wa(clk_mgr_base, context, false);
+		dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
 
 		update_dispclk = true;
 	}
@@ -814,7 +818,8 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle)
 	struct dc *dc = clk_mgr_base->ctx->dc;
 	uint32_t val = dcn35_smu_read_ips_scratch(clk_mgr);
 
-	if (dc->config.disable_ips == 0) {
+	if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
+		dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
 		val |= DMUB_IPS1_ALLOW_MASK;
 		val |= DMUB_IPS2_ALLOW_MASK;
 	} else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
@@ -1114,7 +1119,7 @@ void dcn35_clk_mgr_construct(
 		dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
 				smu_dpm_clks.dpm_clks);
 
-	if (ctx->dc->config.disable_ips == 0) {
+	if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
 		bool ips_support = false;
 
 		/*avoid call pmfw at init*/
@@ -1127,7 +1132,7 @@ void dcn35_clk_mgr_construct(
 			ctx->dc->debug.disable_hpo_power_gate = false;
 		} else {
 			/*let's reset the config control flag*/
-			ctx->dc->config.disable_ips = 1; /*pmfw not support it, disable it all*/
+			ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 74c21d98b4de..7b9bf5cb4529 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2582,6 +2582,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
 	if (u->gamut_remap_matrix)
 		update_flags->bits.gamut_remap_change = 1;
 
+	if (u->blend_tf)
+		update_flags->bits.gamma_change = 1;
+
 	if (u->gamma) {
 		enum surface_pixel_format format = SURFACE_PIXEL_FORMAT_GRPH_BEGIN;
 
@@ -4113,8 +4116,17 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 	bool success = false;
 	struct dc_state *minimal_transition_context;
 	struct pipe_split_policy_backup policy;
+	struct mall_temp_config mall_temp_config;
 
 	/* commit based on new context */
+	/* Since all phantom pipes are removed in full validation,
+	 * we have to save and restore the subvp/mall config when
+	 * we do a minimal transition since the flags marking the
+	 * pipe as subvp/phantom will be cleared (dc copy constructor
+	 * creates a shallow copy).
+	 */
+	if (dc->res_pool->funcs->save_mall_state)
+		dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 	minimal_transition_context = create_minimal_transition_state(dc,
 			context, &policy);
 	if (minimal_transition_context) {
@@ -4123,9 +4135,20 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 			dc->hwss.is_pipe_topology_transition_seamless(
 					dc, minimal_transition_context, context)) {
 			DC_LOG_DC("%s base = new state\n", __func__);
+
 			success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK;
 		}
 		release_minimal_transition_state(dc, minimal_transition_context, &policy);
+		if (dc->res_pool->funcs->restore_mall_state)
+			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
+		/* If we do a minimal transition with plane removal and the context
+		 * has subvp we also have to retain back the phantom stream / planes
+		 * since the refcount is decremented as part of the min transition
+		 * (we commit a state with no subvp, so the phantom streams / planes
+		 * had to be removed).
+		 */
+		if (dc->res_pool->funcs->retain_phantom_pipes)
+			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 	}
 
 	if (!success) {
@@ -4348,7 +4371,6 @@ static bool full_update_required(struct dc *dc,
 				srf_updates[i].in_transfer_func ||
 				srf_updates[i].func_shaper ||
 				srf_updates[i].lut3d_func ||
-				srf_updates[i].blend_tf ||
 				srf_updates[i].surface->force_full_update ||
 				(srf_updates[i].flip_addr &&
 				srf_updates[i].flip_addr->address.tmz_surface != srf_updates[i].surface->address.tmz_surface) ||
@@ -4885,7 +4907,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow)
 	if (dc->debug.disable_idle_power_optimizations)
 		return;
 
-	if (dc->caps.ips_support && dc->config.disable_ips)
+	if (dc->caps.ips_support && (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
 		return;
 
 	if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present)
@@ -4906,7 +4928,7 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc)
 	if (dc->debug.disable_idle_power_optimizations)
 		return false;
 
-	if (!dc->caps.ips_support || dc->config.disable_ips)
+	if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
 		return false;
 
 	if (dc->hwss.get_idle_state)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 6ed40b6c6178..4bdf105d1d71 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -533,7 +533,7 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
 
-		if (res_ctx->pipe_ctx[i].stream != stream)
+		if (res_ctx->pipe_ctx[i].stream != stream || !tg)
 			continue;
 
 		return tg->funcs->get_frame_count(tg);
@@ -592,7 +592,7 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
 
-		if (res_ctx->pipe_ctx[i].stream != stream)
+		if (res_ctx->pipe_ctx[i].stream != stream || !tg)
 			continue;
 
 		tg->funcs->get_scanoutpos(tg,
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 6e54ca055fcb..9316b737a8ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -49,7 +49,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.256"
+#define DC_VER "3.2.259"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index ba142bef626b..e4c007203318 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -120,6 +120,80 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv,
 	}
 }
 
+bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+		unsigned int count,
+		union dmub_rb_cmd *cmd_list)
+{
+	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	struct dmub_srv *dmub;
+	enum dmub_status status;
+	int i;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	dmub = dc_dmub_srv->dmub;
+
+	for (i = 0 ; i < count; i++) {
+		// Queue command
+		status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
+
+		if (status == DMUB_STATUS_QUEUE_FULL) {
+			/* Execute and wait for queue to become empty again. */
+			dmub_srv_cmd_execute(dmub);
+			dmub_srv_wait_for_idle(dmub, 100000);
+
+			/* Requeue the command. */
+			status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
+		}
+
+		if (status != DMUB_STATUS_OK) {
+			DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			return false;
+		}
+	}
+
+	status = dmub_srv_cmd_execute(dmub);
+	if (status != DMUB_STATUS_OK) {
+		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		return false;
+	}
+
+	return true;
+}
+
+bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
+		enum dm_dmub_wait_type wait_type,
+		union dmub_rb_cmd *cmd_list)
+{
+	struct dmub_srv *dmub;
+	enum dmub_status status;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	dmub = dc_dmub_srv->dmub;
+
+	// Wait for DMUB to process command
+	if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) {
+		status = dmub_srv_wait_for_idle(dmub, 100000);
+
+		if (status != DMUB_STATUS_OK) {
+			DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			return false;
+		}
+
+		// Copy data back from ring buffer into command
+		if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+			dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list);
+	}
+
+	return true;
+}
+
 bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
 {
 	return dc_dmub_srv_cmd_run_list(dc_dmub_srv, 1, cmd, wait_type);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index 31150b214394..d4a60f53faab 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -56,6 +56,14 @@ void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
 
 bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv);
 
+bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+		unsigned int count,
+		union dmub_rb_cmd *cmd_list);
+
+bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
+		enum dm_dmub_wait_type wait_type,
+		union dmub_rb_cmd *cmd_list);
+
 bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
 
 bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index 35ae245ef722..eeeeeef4d717 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -142,7 +142,8 @@ enum dp_test_link_rate {
 	DP_TEST_LINK_RATE_HBR3		= 0x1E,
 	DP_TEST_LINK_RATE_UHBR10	= 0x01,
 	DP_TEST_LINK_RATE_UHBR20	= 0x02,
-	DP_TEST_LINK_RATE_UHBR13_5	= 0x03,
+	DP_TEST_LINK_RATE_UHBR13_5_LEGACY	= 0x03, /* For backward compatibility*/
+	DP_TEST_LINK_RATE_UHBR13_5	= 0x04,
 };
 
 struct dc_link_settings {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 40dc51853d62..cea666ea66c6 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1037,7 +1037,9 @@ struct replay_config {
 	bool replay_smu_opt_supported;                  // SMU optimization is supported
 	unsigned int replay_enable_option;              // Replay enablement option
 	uint32_t debug_flags;                           // Replay debug flags
-	bool replay_timing_sync_supported;             // Replay desync is supported
+	bool replay_timing_sync_supported; // Replay desync is supported
+	bool force_disable_desync_error_check;             // Replay desync is supported
+	bool received_desync_error_hpd; //Replay Received Desync Error HPD.
 	union replay_error_status replay_error_status; // Replay error status
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
index c50aa30614be..051e4c2b4cf2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
@@ -128,21 +128,6 @@
 	SRI(DC_ABM1_ACE_THRES_12, ABM, id), \
 	NBIO_SR(BIOS_SCRATCH_2)
 
-#define ABM_DCN32_REG_LIST(id)\
-	SRI(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \
-	SRI(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \
-	SRI(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \
-	SRI(DC_ABM1_HG_MISC_CTRL, ABM, id), \
-	SRI(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \
-	SRI(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \
-	SRI(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \
-	SRI(BL1_PWM_USER_LEVEL, ABM, id), \
-	SRI(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \
-	SRI(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \
-	SRI(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \
-	SRI(DC_ABM1_ACE_THRES_12, ABM, id), \
-	NBIO_SR(BIOS_SCRATCH_2)
-
 #define ABM_SF(reg_name, field_name, post_fix)\
 	.field_name = reg_name ## __ ## field_name ## post_fix
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index aaf6c981fd9e..ab81594a7fad 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -26,7 +26,7 @@
 #ifndef __DC_TIMING_GENERATOR_DCN10_H__
 #define __DC_TIMING_GENERATOR_DCN10_H__
 
-#include "timing_generator.h"
+#include "optc.h"
 
 #define DCN10TG_FROM_TG(tg)\
 	container_of(tg, struct optc, base)
@@ -594,190 +594,6 @@ struct dcn_optc_mask {
 	TG_REG_FIELD_LIST_DCN3_5(uint32_t)
 };
 
-struct optc {
-	struct timing_generator base;
-
-	const struct dcn_optc_registers *tg_regs;
-	const struct dcn_optc_shift *tg_shift;
-	const struct dcn_optc_mask *tg_mask;
-
-	int opp_count;
-
-	uint32_t max_h_total;
-	uint32_t max_v_total;
-
-	uint32_t min_h_blank;
-
-	uint32_t min_h_sync_width;
-	uint32_t min_v_sync_width;
-	uint32_t min_v_blank;
-	uint32_t min_v_blank_interlace;
-
-	int vstartup_start;
-	int vupdate_offset;
-	int vupdate_width;
-	int vready_offset;
-	struct dc_crtc_timing orginal_patched_timing;
-	enum signal_type signal;
-};
-
 void dcn10_timing_generator_init(struct optc *optc);
 
-struct dcn_otg_state {
-	uint32_t v_blank_start;
-	uint32_t v_blank_end;
-	uint32_t v_sync_a_pol;
-	uint32_t v_total;
-	uint32_t v_total_max;
-	uint32_t v_total_min;
-	uint32_t v_total_min_sel;
-	uint32_t v_total_max_sel;
-	uint32_t v_sync_a_start;
-	uint32_t v_sync_a_end;
-	uint32_t h_blank_start;
-	uint32_t h_blank_end;
-	uint32_t h_sync_a_start;
-	uint32_t h_sync_a_end;
-	uint32_t h_sync_a_pol;
-	uint32_t h_total;
-	uint32_t underflow_occurred_status;
-	uint32_t otg_enabled;
-	uint32_t blank_enabled;
-	uint32_t vertical_interrupt1_en;
-	uint32_t vertical_interrupt1_line;
-	uint32_t vertical_interrupt2_en;
-	uint32_t vertical_interrupt2_line;
-};
-
-void optc1_read_otg_state(struct optc *optc1,
-		struct dcn_otg_state *s);
-
-bool optc1_get_hw_timing(struct timing_generator *tg,
-		struct dc_crtc_timing *hw_crtc_timing);
-
-bool optc1_validate_timing(
-	struct timing_generator *optc,
-	const struct dc_crtc_timing *timing);
-
-void optc1_program_timing(
-	struct timing_generator *optc,
-	const struct dc_crtc_timing *dc_crtc_timing,
-	int vready_offset,
-	int vstartup_start,
-	int vupdate_offset,
-	int vupdate_width,
-	const enum signal_type signal,
-	bool use_vbios);
-
-void optc1_setup_vertical_interrupt0(
-		struct timing_generator *optc,
-		uint32_t start_line,
-		uint32_t end_line);
-void optc1_setup_vertical_interrupt1(
-		struct timing_generator *optc,
-		uint32_t start_line);
-void optc1_setup_vertical_interrupt2(
-		struct timing_generator *optc,
-		uint32_t start_line);
-
-void optc1_program_global_sync(
-		struct timing_generator *optc,
-		int vready_offset,
-		int vstartup_start,
-		int vupdate_offset,
-		int vupdate_width);
-
-bool optc1_disable_crtc(struct timing_generator *optc);
-
-bool optc1_is_counter_moving(struct timing_generator *optc);
-
-void optc1_get_position(struct timing_generator *optc,
-		struct crtc_position *position);
-
-uint32_t optc1_get_vblank_counter(struct timing_generator *optc);
-
-void optc1_get_crtc_scanoutpos(
-	struct timing_generator *optc,
-	uint32_t *v_blank_start,
-	uint32_t *v_blank_end,
-	uint32_t *h_position,
-	uint32_t *v_position);
-
-void optc1_set_early_control(
-	struct timing_generator *optc,
-	uint32_t early_cntl);
-
-void optc1_wait_for_state(struct timing_generator *optc,
-		enum crtc_state state);
-
-void optc1_set_blank(struct timing_generator *optc,
-		bool enable_blanking);
-
-bool optc1_is_blanked(struct timing_generator *optc);
-
-void optc1_program_blank_color(
-		struct timing_generator *optc,
-		const struct tg_color *black_color);
-
-bool optc1_did_triggered_reset_occur(
-	struct timing_generator *optc);
-
-void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst);
-
-void optc1_disable_reset_trigger(struct timing_generator *optc);
-
-void optc1_lock(struct timing_generator *optc);
-
-void optc1_unlock(struct timing_generator *optc);
-
-void optc1_enable_optc_clock(struct timing_generator *optc, bool enable);
-
-void optc1_set_drr(
-	struct timing_generator *optc,
-	const struct drr_params *params);
-
-void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
-
-void optc1_set_static_screen_control(
-	struct timing_generator *optc,
-	uint32_t event_triggers,
-	uint32_t num_frames);
-
-void optc1_program_stereo(struct timing_generator *optc,
-	const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
-
-bool optc1_is_stereo_left_eye(struct timing_generator *optc);
-
-void optc1_clear_optc_underflow(struct timing_generator *optc);
-
-void optc1_tg_init(struct timing_generator *optc);
-
-bool optc1_is_tg_enabled(struct timing_generator *optc);
-
-bool optc1_is_optc_underflow_occurred(struct timing_generator *optc);
-
-void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable);
-
-void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable);
-
-bool optc1_get_otg_active_size(struct timing_generator *optc,
-		uint32_t *otg_active_width,
-		uint32_t *otg_active_height);
-
-void optc1_enable_crtc_reset(
-		struct timing_generator *optc,
-		int source_tg_inst,
-		struct crtc_trigger_info *crtc_tp);
-
-bool optc1_configure_crc(struct timing_generator *optc,
-			  const struct crc_params *params);
-
-bool optc1_get_crc(struct timing_generator *optc,
-		    uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
-
-bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
-
-void optc1_set_vtg_params(struct timing_generator *optc,
-		const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2);
-
 #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
index 5eebe7f03ddc..c9ae2d8f0096 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
@@ -137,7 +137,15 @@ void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz)
 		dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 2;
 	}
 
-	// TODO DSC: This is actually image width limitation, not a slice width. This should be added to the criteria to use ODM.
+	/* For pixel clock bigger than a single-pipe limit needing four engines ODM 4:1, which then quardruples our
+	 * throughput and number of slices
+	 */
+	if (pixel_clock_100Hz > DCN20_MAX_PIXEL_CLOCK_Mhz*10000*2) {
+		dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1;
+		dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1;
+		dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 4;
+	}
+
 	dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */
 	dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
index 50dc83404644..11f7746f3a65 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -613,16 +613,19 @@ static void dpp3_program_blnd_pwl(
 			REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].red_reg);
 		REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_red);
 	} else {
+		REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0);
 		REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 4);
 		for (i = 0 ; i < num; i++)
 			REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].red_reg);
 		REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_red);
 
+		REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0);
 		REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 2);
 		for (i = 0 ; i < num; i++)
 			REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].green_reg);
 		REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_green);
 
+		REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0);
 		REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 1);
 		for (i = 0 ; i < num; i++)
 			REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].blue_reg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
index 2861d974fcf6..75547ce86c09 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
@@ -316,7 +316,7 @@ bool hubp3_program_surface_flip_and_addr(
 	return true;
 }
 
-static void hubp3_program_tiling(
+void hubp3_program_tiling(
 	struct dcn20_hubp *hubp2,
 	const union dc_tiling_info *info,
 	const enum surface_pixel_format pixel_format)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h
index 8a32772d4e91..b010531a7fe8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h
@@ -278,6 +278,11 @@ void hubp3_setup(
 		struct _vcs_dpi_display_rq_regs_st *rq_regs,
 		struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest);
 
+void hubp3_program_tiling(
+		struct dcn20_hubp *hubp2,
+		const union dc_tiling_info *info,
+		const enum surface_pixel_format pixel_format);
+
 void hubp3_dcc_control(struct hubp *hubp, bool enable,
 		enum hubp_ind_block_size blk_size);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 1d052f08aff5..994b21ed272f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -237,16 +237,19 @@ void mpc32_program_post1dlut_pwl(
 			REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].red_reg);
 		REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_red);
 	} else {
+		REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0);
 		REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 4);
 		for (i = 0 ; i < num; i++)
 			REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].red_reg);
 		REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_red);
 
+		REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0);
 		REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 2);
 		for (i = 0 ; i < num; i++)
 			REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].green_reg);
 		REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_green);
 
+		REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0);
 		REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 1);
 		for (i = 0 ; i < num; i++)
 			REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].blue_reg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
index addedcfd1238..479f3683c0b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
@@ -325,6 +325,43 @@ static void dccg35_set_dpstreamclk(
 	}
 }
 
+static void dccg35_set_physymclk_root_clock_gating(
+		struct dccg *dccg,
+		int phy_inst,
+		bool enable)
+{
+	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+	if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+		return;
+
+	switch (phy_inst) {
+	case 0:
+		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+				PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+		break;
+	case 1:
+		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+				PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+		break;
+	case 2:
+		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+				PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+		break;
+	case 3:
+		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+				PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+		break;
+	case 4:
+		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+				PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+		break;
+	default:
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+}
+
 static void dccg35_set_physymclk(
 		struct dccg *dccg,
 		int phy_inst,
@@ -340,16 +377,10 @@ static void dccg35_set_physymclk(
 			REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
 					PHYASYMCLK_EN, 1,
 					PHYASYMCLK_SRC_SEL, clk_src);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYASYMCLK_ROOT_GATE_DISABLE, 1);
 		} else {
 			REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
 					PHYASYMCLK_EN, 0,
 					PHYASYMCLK_SRC_SEL, 0);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYASYMCLK_ROOT_GATE_DISABLE, 0);
 		}
 		break;
 	case 1:
@@ -357,16 +388,10 @@ static void dccg35_set_physymclk(
 			REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
 					PHYBSYMCLK_EN, 1,
 					PHYBSYMCLK_SRC_SEL, clk_src);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYBSYMCLK_ROOT_GATE_DISABLE, 1);
 		} else {
 			REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
 					PHYBSYMCLK_EN, 0,
 					PHYBSYMCLK_SRC_SEL, 0);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYBSYMCLK_ROOT_GATE_DISABLE, 0);
 		}
 		break;
 	case 2:
@@ -374,16 +399,10 @@ static void dccg35_set_physymclk(
 			REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
 					PHYCSYMCLK_EN, 1,
 					PHYCSYMCLK_SRC_SEL, clk_src);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYCSYMCLK_ROOT_GATE_DISABLE, 1);
 		} else {
 			REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
 					PHYCSYMCLK_EN, 0,
 					PHYCSYMCLK_SRC_SEL, 0);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYCSYMCLK_ROOT_GATE_DISABLE, 0);
 		}
 		break;
 	case 3:
@@ -391,16 +410,10 @@ static void dccg35_set_physymclk(
 			REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
 					PHYDSYMCLK_EN, 1,
 					PHYDSYMCLK_SRC_SEL, clk_src);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYDSYMCLK_ROOT_GATE_DISABLE, 1);
 		} else {
 			REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
 					PHYDSYMCLK_EN, 0,
 					PHYDSYMCLK_SRC_SEL, 0);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYDSYMCLK_ROOT_GATE_DISABLE, 0);
 		}
 		break;
 	case 4:
@@ -408,16 +421,10 @@ static void dccg35_set_physymclk(
 			REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
 					PHYESYMCLK_EN, 1,
 					PHYESYMCLK_SRC_SEL, clk_src);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYESYMCLK_ROOT_GATE_DISABLE, 1);
 		} else {
 			REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
 					PHYESYMCLK_EN, 0,
 					PHYESYMCLK_SRC_SEL, 0);
-			if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
-				REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-					PHYESYMCLK_ROOT_GATE_DISABLE, 0);
 		}
 		break;
 	default:
@@ -490,8 +497,8 @@ void dccg35_init(struct dccg *dccg)
 
 	if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
 		for (otg_inst = 0; otg_inst < 5; otg_inst++)
-			dccg35_set_physymclk(dccg, otg_inst,
-					     PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+			dccg35_set_physymclk_root_clock_gating(dccg, otg_inst,
+					false);
 /*
 	dccg35_enable_global_fgcg_rep(
 		dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits
@@ -754,7 +761,9 @@ static const struct dccg_funcs dccg35_funcs = {
 	.disable_symclk32_se = dccg31_disable_symclk32_se,
 	.enable_symclk32_le = dccg31_enable_symclk32_le,
 	.disable_symclk32_le = dccg31_disable_symclk32_le,
+	.set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating,
 	.set_physymclk = dccg35_set_physymclk,
+	.set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating,
 	.set_dtbclk_dto = dccg35_set_dtbclk_dto,
 	.set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
 	.set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c
index 1ed58660779e..771fcd0d3b99 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c
@@ -53,11 +53,146 @@ static void hubp35_init(struct hubp *hubp)
 
 	/*do nothing for now for dcn3.5 or later*/
 }
+
+void hubp35_program_pixel_format(
+	struct hubp *hubp,
+	enum surface_pixel_format format)
+{
+	struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+	uint32_t green_bar = 1;
+	uint32_t red_bar = 3;
+	uint32_t blue_bar = 2;
+
+	/* swap for ABGR format */
+	if (format == SURFACE_PIXEL_FORMAT_GRPH_ABGR8888
+			|| format == SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010
+			|| format == SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS
+			|| format == SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616
+			|| format == SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F) {
+		red_bar = 2;
+		blue_bar = 3;
+	}
+
+	REG_UPDATE_3(HUBPRET_CONTROL,
+			CROSSBAR_SRC_Y_G, green_bar,
+			CROSSBAR_SRC_CB_B, blue_bar,
+			CROSSBAR_SRC_CR_R, red_bar);
+
+	/* Mapping is same as ipp programming (cnvc) */
+
+	switch (format)	{
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 1);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 3);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 8);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 10);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /* we use crossbar already */
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:/*we use crossbar already*/
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 24);
+		break;
+
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 65);
+		break;
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 64);
+		break;
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 67);
+		break;
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 66);
+		break;
+	case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 12);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 112);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 113);
+		break;
+	case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 114);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 118);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT:
+		REG_UPDATE(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 119);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_RGBE:
+		REG_UPDATE_2(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 116,
+				ALPHA_PLANE_EN, 0);
+		break;
+	case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+		REG_UPDATE_2(DCSURF_SURFACE_CONFIG,
+				SURFACE_PIXEL_FORMAT, 116,
+				ALPHA_PLANE_EN, 1);
+		break;
+	default:
+		BREAK_TO_DEBUGGER();
+		break;
+	}
+
+	/* don't see the need of program the xbar in DCN 1.0 */
+}
+
+void hubp35_program_surface_config(
+	struct hubp *hubp,
+	enum surface_pixel_format format,
+	union dc_tiling_info *tiling_info,
+	struct plane_size *plane_size,
+	enum dc_rotation_angle rotation,
+	struct dc_plane_dcc_param *dcc,
+	bool horizontal_mirror,
+	unsigned int compat_level)
+{
+	struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+	hubp3_dcc_control_sienna_cichlid(hubp, dcc);
+	hubp3_program_tiling(hubp2, tiling_info, format);
+	hubp2_program_size(hubp, format, plane_size, dcc);
+	hubp2_program_rotation(hubp, rotation, horizontal_mirror);
+	hubp35_program_pixel_format(hubp, format);
+}
+
 struct hubp_funcs dcn35_hubp_funcs = {
 	.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
 	.hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
 	.hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr,
-	.hubp_program_surface_config = hubp3_program_surface_config,
+	.hubp_program_surface_config = hubp35_program_surface_config,
 	.hubp_is_flip_pending = hubp2_is_flip_pending,
 	.hubp_setup = hubp3_setup,
 	.hubp_setup_interdependent = hubp2_setup_interdependent,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h
index 3d830f93141e..586b43aa5834 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h
@@ -58,4 +58,18 @@ bool hubp35_construct(
 
 void hubp35_set_fgcg(struct hubp *hubp, bool enable);
 
+void hubp35_program_pixel_format(
+	struct hubp *hubp,
+	enum surface_pixel_format format);
+
+void hubp35_program_surface_config(
+	struct hubp *hubp,
+	enum surface_pixel_format format,
+	union dc_tiling_info *tiling_info,
+	struct plane_size *plane_size,
+	enum dc_rotation_angle rotation,
+	struct dc_plane_dcc_param *dcc,
+	bool horizontal_mirror,
+	unsigned int compat_level);
+
 #endif /* __DC_HUBP_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
index 0f60c40e1fc5..46f71ff08fd1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
@@ -332,6 +332,13 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	pg_cntl->pg_res_enable[PG_DCIO] = power_on;
 }
 
+void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on)
+{
+	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+
+	REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0);
+}
+
 static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl)
 {
 	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
@@ -501,7 +508,8 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = {
 	.mpcc_pg_control = pg_cntl35_mpcc_pg_control,
 	.opp_pg_control = pg_cntl35_opp_pg_control,
 	.optc_pg_control = pg_cntl35_optc_pg_control,
-	.dwb_pg_control = pg_cntl35_dwb_pg_control
+	.dwb_pg_control = pg_cntl35_dwb_pg_control,
+	.set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22
 };
 
 struct pg_cntl *pg_cntl35_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
index 3de240884d22..069dae08e222 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
@@ -183,6 +183,7 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl,
 	unsigned int optc_inst, bool power_on);
 void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on);
 void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl);
+void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on);
 
 struct pg_cntl *pg_cntl35_create(
 	struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
index 3c7c810bab1f..c7e011d26d41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
@@ -610,7 +610,23 @@ static struct dce_hwseq_registers hwseq_reg;
 	HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\
 	HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\
 	HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\
-	HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh)
+	HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh)
 
 static const struct dce_hwseq_shift hwseq_shift = {
 		HWSEQ_DCN35_MASK_SH_LIST(__SHIFT)
@@ -708,7 +724,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 			.i2c = true,
 			.dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
 			.dscl = true,
-			.cm = true,
+			.cm = false,
 			.mpc = true,
 			.optc = true,
 			.vpg = true,
@@ -719,14 +735,14 @@ static const struct dc_debug_options debug_defaults_drv = {
 		.bits = {
 			.dpp = true,
 			.dsc = true,/*dscclk and dsc pg*/
-			.hdmistream = false,
-			.hdmichar = false,
-			.dpstream = false,
-			.symclk32_se = false,
-			.symclk32_le = false,
-			.symclk_fe = false,
-			.physymclk = false,
-			.dpiasymclk = false,
+			.hdmistream = true,
+			.hdmichar = true,
+			.dpstream = true,
+			.symclk32_se = true,
+			.symclk32_le = true,
+			.symclk_fe = true,
+			.physymclk = true,
+			.dpiasymclk = true,
 		}
 	},
 	.seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT,
@@ -741,7 +757,6 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.disable_boot_optimizations = false,
 	.disable_unbounded_requesting = false,
 	.disable_mem_low_power = false,
-	.enable_hpo_pg_support = false,
 	//must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions
 	.enable_double_buffered_dsc_pg_support = true,
 	.enable_dp_dig_pixel_rate_div_policy = 1,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index ad741a723c0e..3686f1e7de3a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -5128,7 +5128,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			ViewportExceedsSurface = true;
 
 		if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
-				&& v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
+				&& v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
 			if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
 				ViewportExceedsSurface = true;
 			}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 70ae5eba624e..acff3449b8d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -60,8 +60,12 @@ endif
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+frame_warn_flag := -Wframe-larger-than=3072
+else
 frame_warn_flag := -Wframe-larger-than=2048
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
index d2046e770c50..1a2b24cc6b61 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -55,10 +55,11 @@ struct dc_pipe_mapping_scratch {
 	struct dc_plane_pipe_pool pipe_pool;
 };
 
-static bool get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane,
-	unsigned int stream_id, unsigned int *plane_id)
+static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane,
+	unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id)
 {
 	int i, j;
+	bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists;
 
 	if (!plane_id)
 		return false;
@@ -66,7 +67,8 @@ static bool get_plane_id(const struct dc_state *state, const struct dc_plane_sta
 	for (i = 0; i < state->stream_count; i++) {
 		if (state->streams[i]->stream_id == stream_id) {
 			for (j = 0; j < state->stream_status[i].plane_count; j++) {
-				if (state->stream_status[i].plane_states[j] == plane) {
+				if (state->stream_status[i].plane_states[j] == plane &&
+					(!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) {
 					*plane_id = (i << 16) | j;
 					return true;
 				}
@@ -123,8 +125,9 @@ static struct pipe_ctx *find_master_pipe_of_plane(struct dml2_context *ctx,
 	unsigned int plane_id_assigned_to_pipe;
 
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
-		if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(state, state->res_ctx.pipe_ctx[i].plane_state,
-			state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id_assigned_to_pipe)) {
+		if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state,
+			state->res_ctx.pipe_ctx[i].stream->stream_id,
+			ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) {
 			if (plane_id_assigned_to_pipe == plane_id)
 				return &state->res_ctx.pipe_ctx[i];
 		}
@@ -141,8 +144,9 @@ static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx,
 	unsigned int plane_id_assigned_to_pipe;
 
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
-		if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(state, state->res_ctx.pipe_ctx[i].plane_state,
-			state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id_assigned_to_pipe)) {
+		if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state,
+			state->res_ctx.pipe_ctx[i].stream->stream_id,
+			ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) {
 			if (plane_id_assigned_to_pipe == plane_id)
 				pipes[num_found++] = i;
 		}
@@ -609,6 +613,7 @@ static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct d
 		const struct dc_plane_state *plane,
 		int odm_factor,
 		int mpc_factor,
+		int plane_index,
 		struct dc_plane_pipe_pool *pipe_pool,
 		const struct dc_state *existing_state)
 {
@@ -620,7 +625,7 @@ static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct d
 	unsigned int next_pipe_to_assign;
 	int odm_slice, mpc_slice;
 
-	if (!get_plane_id(state, plane, stream->stream_id, &plane_id)) {
+	if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) {
 		ASSERT(false);
 		return master_pipe;
 	}
@@ -667,12 +672,16 @@ static void free_pipe(struct pipe_ctx *pipe)
 }
 
 static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state,
-	const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id)
+	const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id, int plane_index)
 {
 	int i;
+	bool is_plane_duplicate = ctx->v20.scratch.plane_duplicate_exists;
+
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		if (state->res_ctx.pipe_ctx[i].plane_state == plane &&
 			state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id &&
+			(!is_plane_duplicate || (is_plane_duplicate &&
+			ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx] == plane_index)) &&
 			!is_pipe_used(pool, state->res_ctx.pipe_ctx[i].pipe_idx)) {
 			free_pipe(&state->res_ctx.pipe_ctx[i]);
 		}
@@ -717,19 +726,20 @@ static void map_pipes_for_stream(struct dml2_context *ctx, struct dc_state *stat
 }
 
 static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, const struct dc_plane_state *plane,
-		struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state)
+		int plane_index, struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state)
 {
 	int odm_slice_index;
 	unsigned int plane_id;
 	struct pipe_ctx *master_pipe = NULL;
 	int i;
 
-	if (!get_plane_id(state, plane, stream->stream_id, &plane_id)) {
+	if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) {
 		ASSERT(false);
 		return;
 	}
 
-	master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, scratch->mpc_info.mpc_factor, &scratch->pipe_pool, existing_state);
+	master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor,
+			scratch->mpc_info.mpc_factor, plane_index, &scratch->pipe_pool, existing_state);
 	sort_pipes_for_splitting(&scratch->pipe_pool);
 
 	for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) {
@@ -755,7 +765,7 @@ static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state
 		}
 	}
 
-	free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id);
+	free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id, plane_index);
 }
 
 static unsigned int get_mpc_factor(struct dml2_context *ctx,
@@ -768,7 +778,7 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx,
 	unsigned int plane_id;
 	unsigned int cfg_idx;
 
-	get_plane_id(state, status->plane_states[plane_idx], stream_id, &plane_id);
+	get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id);
 	cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
 	if (ctx->architecture == dml2_architecture_20)
 		return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
@@ -911,26 +921,14 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s
 	unsigned int stream_id;
 
 	const unsigned int *ODMMode, *DPPPerSurface;
-	unsigned int odm_mode_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}, dpp_per_surface_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0};
 	struct dc_pipe_mapping_scratch scratch;
 
 	if (ctx->config.map_dc_pipes_with_callbacks)
 		return map_dc_pipes_with_callbacks(
 				ctx, state, disp_cfg, mapping, existing_state);
 
-	if (ctx->architecture == dml2_architecture_21) {
-		/*
-		 * Extract ODM and DPP outputs from DML2.1 and map them in an array as required for pipe mapping in dml2_map_dc_pipes.
-		 * As data cannot be directly extracted in const pointers, assign these arrays to const pointers before proceeding to
-		 * maximize the reuse of existing code. Const pointers are required because dml2.0 dml_display_cfg_st is const.
-		 *
-		 */
-		ODMMode = (const unsigned int *)odm_mode_array;
-		DPPPerSurface = (const unsigned int *)dpp_per_surface_array;
-	} else {
-		ODMMode = (unsigned int *)disp_cfg->hw.ODMMode;
-		DPPPerSurface = disp_cfg->hw.DPPPerSurface;
-	}
+	ODMMode = (unsigned int *)disp_cfg->hw.ODMMode;
+	DPPPerSurface = disp_cfg->hw.DPPPerSurface;
 
 	for (stream_index = 0; stream_index < state->stream_count; stream_index++) {
 		memset(&scratch, 0, sizeof(struct dc_pipe_mapping_scratch));
@@ -958,8 +956,8 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s
 
 		for (plane_index = 0; plane_index < state->stream_status[stream_index].plane_count; plane_index++) {
 			// Planes are ordered top to bottom.
-			if (get_plane_id(state, state->stream_status[stream_index].plane_states[plane_index],
-				stream_id, &plane_id)) {
+			if (get_plane_id(ctx, state, state->stream_status[stream_index].plane_states[plane_index],
+				stream_id, plane_index, &plane_id)) {
 				plane_disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
 
 				// Setup mpc_info for this plane
@@ -983,7 +981,8 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s
 				// Clear the pool assignment scratch (which is per plane)
 				memset(&scratch.pipe_pool, 0, sizeof(struct dc_plane_pipe_pool));
 
-				map_pipes_for_plane(ctx, state, state->streams[stream_index], state->stream_status[stream_index].plane_states[plane_index], &scratch, existing_state);
+				map_pipes_for_plane(ctx, state, state->streams[stream_index],
+					state->stream_status[stream_index].plane_states[plane_index], plane_index, &scratch, existing_state);
 			} else {
 				// Plane ID cannot be generated, therefore no DML mapping can be performed.
 				ASSERT(false);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h
index ed5b767d46e0..1cf8a884c0fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h
@@ -75,6 +75,8 @@ struct dml2_dml_to_dc_pipe_mapping {
 	bool dml_pipe_idx_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
 	unsigned int dml_pipe_idx_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
 	bool dml_pipe_idx_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+	unsigned int dml_pipe_idx_to_plane_index[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+	bool dml_pipe_idx_to_plane_index_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
 };
 
 struct dml2_wrapper_scratch {
@@ -96,6 +98,7 @@ struct dml2_wrapper_scratch {
 
 	struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping;
 	bool enable_flexible_pipe_mapping;
+	bool plane_duplicate_exists;
 };
 
 struct dml2_helper_det_policy_scratch {
@@ -104,7 +107,6 @@ struct dml2_helper_det_policy_scratch {
 
 enum dml2_architecture {
 	dml2_architecture_20,
-	dml2_architecture_21
 };
 
 struct dml2_context {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 89836f175a13..75171bee6f71 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -231,6 +231,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s
 		out->num_chans = 4;
 		out->round_trip_ping_latency_dcfclk_cycles = 106;
 		out->smn_latency_us = 2;
+		out->dispclk_dppclk_vco_speed_mhz = 3600;
 		break;
 
 	case dml_project_dcn351:
@@ -930,10 +931,11 @@ static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml
 	return location;
 }
 
-static bool get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane,
-		unsigned int stream_id, unsigned int *plane_id)
+static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *context, const struct dc_plane_state *plane,
+		unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id)
 {
 	int i, j;
+	bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists;
 
 	if (!plane_id)
 		return false;
@@ -941,7 +943,8 @@ static bool get_plane_id(const struct dc_state *context, const struct dc_plane_s
 	for (i = 0; i < context->stream_count; i++) {
 		if (context->streams[i]->stream_id == stream_id) {
 			for (j = 0; j < context->stream_status[i].plane_count; j++) {
-				if (context->stream_status[i].plane_states[j] == plane) {
+				if (context->stream_status[i].plane_states[j] == plane &&
+					(!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) {
 					*plane_id = (i << 16) | j;
 					return true;
 				}
@@ -953,13 +956,13 @@ static bool get_plane_id(const struct dc_state *context, const struct dc_plane_s
 }
 
 static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane,
-		const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id)
+		const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index)
 {
 	unsigned int plane_id;
 	int i = 0;
 	int location = -1;
 
-	if (!get_plane_id(context, plane, stream_id, &plane_id)) {
+	if (!get_plane_id(context->bw_ctx.dml2, context, plane, stream_id, plane_index, &plane_id)) {
 		ASSERT(false);
 		return -1;
 	}
@@ -990,7 +993,41 @@ static void apply_legacy_svp_drr_settings(struct dml2_context *dml2, const struc
 	}
 }
 
-void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg)
+static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state)
+{
+	unsigned int i;
+	unsigned int pipe_index = 0;
+	unsigned int plane_index = 0;
+	struct dml2_dml_to_dc_pipe_mapping *dml_to_dc_pipe_mapping = &dml2->v20.scratch.dml_to_dc_pipe_mapping;
+
+	for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+		dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[i] = false;
+		dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[i] = 0;
+	}
+
+	for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+		if (!pipe || !pipe->stream || !pipe->plane_state)
+			continue;
+
+		while (pipe) {
+			pipe_index = pipe->pipe_idx;
+
+			if (pipe->stream && dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] == false) {
+				dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[pipe_index] = plane_index;
+				plane_index++;
+				dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] = true;
+			}
+
+			pipe = pipe->bottom_pipe;
+		}
+
+		plane_index = 0;
+	}
+}
+
+void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg)
 {
 	int i = 0, j = 0;
 	int disp_cfg_stream_location, disp_cfg_plane_location;
@@ -1007,6 +1044,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d
 	dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4;
 	dml_dispcfg->plane.HostVMEnable = false;
 
+	dml2_populate_pipe_to_plane_index_mapping(dml2, context);
+
 	for (i = 0; i < context->stream_count; i++) {
 		disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg);
 
@@ -1043,7 +1082,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d
 		} else {
 			for (j = 0; j < context->stream_status[i].plane_count; j++) {
 				disp_cfg_plane_location = map_plane_to_dml_display_cfg(dml2,
-					context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id);
+					context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id, j);
 
 				if (disp_cfg_plane_location < 0)
 					disp_cfg_plane_location = dml_dispcfg->num_surfaces++;
@@ -1067,7 +1106,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d
 
 				dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location;
 
-				if (get_plane_id(context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id,
+				if (get_plane_id(dml2, context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, j,
 					&dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
 					dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h
index dac6d27b14cd..d764773938f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h
@@ -34,7 +34,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 void dml2_translate_ip_params(const struct dc *in_dc, struct ip_params_st *out);
 void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box_st *out);
 void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states);
-void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg);
+void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg);
 void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out);
 bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 69fd96f4f3b0..2498b8341199 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -209,10 +209,11 @@ static int find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int
 	return -1;
 }
 
-static bool get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane,
-	unsigned int stream_id, unsigned int *plane_id)
+static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane,
+	unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id)
 {
 	int i, j;
+	bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists;
 
 	if (!plane_id)
 		return false;
@@ -220,7 +221,8 @@ static bool get_plane_id(const struct dc_state *state, const struct dc_plane_sta
 	for (i = 0; i < state->stream_count; i++) {
 		if (state->streams[i]->stream_id == stream_id) {
 			for (j = 0; j < state->stream_status[i].plane_count; j++) {
-				if (state->stream_status[i].plane_states[j] == plane) {
+				if (state->stream_status[i].plane_states[j] == plane &&
+					(!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) {
 					*plane_id = (i << 16) | j;
 					return true;
 				}
@@ -304,8 +306,9 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 		 * there is a need to know which DML pipe index maps to which DC pipe. The code below
 		 * finds a dml_pipe_index from the plane id if a plane is valid. If a plane is not valid then
 		 * it finds a dml_pipe_index from the stream id. */
-		if (get_plane_id(context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state,
-			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, &plane_id)) {
+		if (get_plane_id(in_ctx, context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state,
+			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id,
+			in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[context->res_ctx.pipe_ctx[dc_pipe_ctx_index].pipe_idx], &plane_id)) {
 			dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id);
 		} else {
 			dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id);
@@ -445,8 +448,9 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc
 	for (i = 0; i < MAX_PIPES; i++) {
 		if (!display_state->res_ctx.pipe_ctx[i].stream)
 			continue;
-		if (get_plane_id(display_state, display_state->res_ctx.pipe_ctx[i].plane_state,
-			display_state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id))
+		if (get_plane_id(in_ctx, display_state, display_state->res_ctx.pipe_ctx[i].plane_state,
+			display_state->res_ctx.pipe_ctx[i].stream->stream_id,
+			in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[display_state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id))
 			dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id);
 		else
 			dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, display_state->res_ctx.pipe_ctx[i].stream->stream_id);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 0a06bf3b135a..8f231418870f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -639,7 +639,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s
 	return result;
 }
 
-static bool dml2_validate_only(const struct dc_state *context)
+static bool dml2_validate_only(struct dc_state *context)
 {
 	struct dml2_context *dml2 = context->bw_ctx.dml2;
 	unsigned int result = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index 3966845c7694..e8b5f17beb96 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -512,6 +512,11 @@ static bool intersect_dsc_caps(
 		dsc_sink_caps->slice_caps1.bits.NUM_SLICES_4 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_4;
 	dsc_common_caps->slice_caps.bits.NUM_SLICES_8 =
 		dsc_sink_caps->slice_caps1.bits.NUM_SLICES_8 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_8;
+	dsc_common_caps->slice_caps.bits.NUM_SLICES_12 =
+		dsc_sink_caps->slice_caps1.bits.NUM_SLICES_12 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_12;
+	dsc_common_caps->slice_caps.bits.NUM_SLICES_16 =
+		dsc_sink_caps->slice_caps2.bits.NUM_SLICES_16 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_16;
+
 	if (!dsc_common_caps->slice_caps.raw)
 		return false;
 
@@ -703,6 +708,12 @@ static int get_available_dsc_slices(union dsc_enc_slice_caps slice_caps, int *av
 	if (slice_caps.bits.NUM_SLICES_8)
 		available_slices[idx++] = 8;
 
+	if (slice_caps.bits.NUM_SLICES_12)
+		available_slices[idx++] = 12;
+
+	if (slice_caps.bits.NUM_SLICES_16)
+		available_slices[idx++] = 16;
+
 	return idx;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
index 2fefdf40612d..44b4df6469d1 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
@@ -1183,7 +1183,23 @@ struct dce_hwseq_registers {
 	type LONO_FGCG_REP_DIS;\
 	type LONO_DISPCLK_GATE_DISABLE;\
 	type LONO_SOCCLK_GATE_DISABLE;\
-	type LONO_DMCUBCLK_GATE_DISABLE;
+	type LONO_DMCUBCLK_GATE_DISABLE;\
+	type SYMCLKA_FE_GATE_DISABLE;\
+	type SYMCLKB_FE_GATE_DISABLE;\
+	type SYMCLKC_FE_GATE_DISABLE;\
+	type SYMCLKD_FE_GATE_DISABLE;\
+	type SYMCLKE_FE_GATE_DISABLE;\
+	type HDMICHARCLK0_GATE_DISABLE;\
+	type SYMCLKA_GATE_DISABLE;\
+	type SYMCLKB_GATE_DISABLE;\
+	type SYMCLKC_GATE_DISABLE;\
+	type SYMCLKD_GATE_DISABLE;\
+	type SYMCLKE_GATE_DISABLE;\
+	type PHYASYMCLK_ROOT_GATE_DISABLE;\
+	type PHYBSYMCLK_ROOT_GATE_DISABLE;\
+	type PHYCSYMCLK_ROOT_GATE_DISABLE;\
+	type PHYDSYMCLK_ROOT_GATE_DISABLE;\
+	type PHYESYMCLK_ROOT_GATE_DISABLE;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 1b9f21fd4f17..6a65af8c36b9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -615,12 +615,6 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		    pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, false);
-		}
-
-		/* Today only FPO uses cursor P-State force. Only clear cursor P-State force
-		 * if it's not FPO.
-		 */
-		if (!pipe->stream || !pipe->stream->fpo_in_use) {
 			if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow)
 				hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, false);
 		}
@@ -632,17 +626,10 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct hubp *hubp = pipe->plane_res.hubp;
 
-		if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
+				pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
-		}
-
-		if (pipe->stream && pipe->stream->fpo_in_use) {
-			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
-				hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
-			/* For now only force cursor p-state disallow for FPO
-			 * Needs to be added for subvp once FW side gets updated
-			 */
 			if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow)
 				hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, true);
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 34737d60b965..5a8258287438 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -138,16 +138,25 @@ void dcn35_init_hw(struct dc *dc)
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
 
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0x3F000000);
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf);
-
 	//dcn35_set_dmu_fgcg(hws, dc->debug.enable_fine_grain_clock_gating.bits.dmu);
 
 	if (!dcb->funcs->is_accelerated_mode(dcb)) {
 		/*this calls into dmubfw to do the init*/
 		hws->funcs.bios_golden_init(dc);
 	}
+
+	REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
+	REG_WRITE(DCCG_GATE_DISABLE_CNTL2,  0);
+
+	/* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */
+	REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1,
+			PHYBSYMCLK_ROOT_GATE_DISABLE, 1,
+			PHYCSYMCLK_ROOT_GATE_DISABLE, 1,
+			PHYDSYMCLK_ROOT_GATE_DISABLE, 1,
+			PHYESYMCLK_ROOT_GATE_DISABLE, 1);
+
+	REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf);
+
 	// Initialize the dccg
 	if (res_pool->dccg->funcs->dccg_init)
 		res_pool->dccg->funcs->dccg_init(res_pool->dccg);
@@ -274,7 +283,19 @@ void dcn35_init_hw(struct dc *dc)
 	if (!dc->debug.disable_clock_gate) {
 		/* enable all DCN clock gating */
 		REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-		REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
+
+		REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, 0,
+				SYMCLKB_FE_GATE_DISABLE, 0,
+				SYMCLKC_FE_GATE_DISABLE, 0,
+				SYMCLKD_FE_GATE_DISABLE, 0,
+				SYMCLKE_FE_GATE_DISABLE, 0);
+		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, 0);
+		REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, 0,
+				SYMCLKB_GATE_DISABLE, 0,
+				SYMCLKC_GATE_DISABLE, 0,
+				SYMCLKD_GATE_DISABLE, 0,
+				SYMCLKE_GATE_DISABLE, 0);
+
 		REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
 	}
 
@@ -311,6 +332,9 @@ void dcn35_init_hw(struct dc *dc)
 	if (dc->res_pool->pg_cntl) {
 		if (dc->res_pool->pg_cntl->funcs->init_pg_status)
 			dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl);
+
+		if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22)
+			dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index 13f12f2a3f81..ce2f0c0e82bd 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -141,6 +141,11 @@ struct dccg_funcs {
 			enum physymclk_clock_source clk_src,
 			bool force_enable);
 
+	void (*set_physymclk_root_clock_gating)(
+			struct dccg *dccg,
+			int phy_inst,
+			bool enable);
+
 	void (*set_dtbclk_dto)(
 			struct dccg *dccg,
 			const struct dtbclk_dto_params *params);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
index d7b8d586b523..4b27f29d0d80 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
@@ -76,6 +76,8 @@ union dsc_enc_slice_caps {
 		uint8_t NUM_SLICES_3 : 1; /* This one is not per DSC spec, but our encoder supports it */
 		uint8_t NUM_SLICES_4 : 1;
 		uint8_t NUM_SLICES_8 : 1;
+		uint8_t NUM_SLICES_12 : 1;
+		uint8_t NUM_SLICES_16 : 1;
 	} bits;
 	uint8_t raw;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
new file mode 100644
index 000000000000..9a8bf6ec70ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+/**
+ * DOC: overview
+ *
+ * Output Pipe Timing Combiner (OPTC) includes two major functional blocks:
+ * Output Data Mapper (ODM) and Output Timing Generator (OTG).
+ *
+ * - ODM: It is Output Data Mapping block. It can combine input data from
+ *   multiple OPP data pipes into one single data stream or split data from one
+ *   OPP data pipe into multiple data streams or just bypass OPP data to DIO.
+ * - OTG: It is Output Timing Generator. It generates display timing signals to
+ *   drive the display output.
+ */
+
+#ifndef __DC_OPTC_H__
+#define __DC_OPTC_H__
+
+#include "timing_generator.h"
+
+struct optc {
+	struct timing_generator base;
+
+	const struct dcn_optc_registers *tg_regs;
+	const struct dcn_optc_shift *tg_shift;
+	const struct dcn_optc_mask *tg_mask;
+
+	int opp_count;
+
+	uint32_t max_h_total;
+	uint32_t max_v_total;
+
+	uint32_t min_h_blank;
+
+	uint32_t min_h_sync_width;
+	uint32_t min_v_sync_width;
+	uint32_t min_v_blank;
+	uint32_t min_v_blank_interlace;
+
+	int vstartup_start;
+	int vupdate_offset;
+	int vupdate_width;
+	int vready_offset;
+	struct dc_crtc_timing orginal_patched_timing;
+	enum signal_type signal;
+};
+
+struct dcn_otg_state {
+	uint32_t v_blank_start;
+	uint32_t v_blank_end;
+	uint32_t v_sync_a_pol;
+	uint32_t v_total;
+	uint32_t v_total_max;
+	uint32_t v_total_min;
+	uint32_t v_total_min_sel;
+	uint32_t v_total_max_sel;
+	uint32_t v_sync_a_start;
+	uint32_t v_sync_a_end;
+	uint32_t h_blank_start;
+	uint32_t h_blank_end;
+	uint32_t h_sync_a_start;
+	uint32_t h_sync_a_end;
+	uint32_t h_sync_a_pol;
+	uint32_t h_total;
+	uint32_t underflow_occurred_status;
+	uint32_t otg_enabled;
+	uint32_t blank_enabled;
+	uint32_t vertical_interrupt1_en;
+	uint32_t vertical_interrupt1_line;
+	uint32_t vertical_interrupt2_en;
+	uint32_t vertical_interrupt2_line;
+};
+
+void optc1_read_otg_state(struct optc *optc1, struct dcn_otg_state *s);
+
+bool optc1_get_hw_timing(struct timing_generator *tg, struct dc_crtc_timing *hw_crtc_timing);
+
+bool optc1_validate_timing(struct timing_generator *optc,
+			   const struct dc_crtc_timing *timing);
+
+void optc1_program_timing(struct timing_generator *optc,
+			  const struct dc_crtc_timing *dc_crtc_timing,
+			  int vready_offset,
+			  int vstartup_start,
+			  int vupdate_offset,
+			  int vupdate_width,
+			  const enum signal_type signal,
+			  bool use_vbios);
+
+void optc1_setup_vertical_interrupt0(struct timing_generator *optc,
+				     uint32_t start_line,
+				     uint32_t end_line);
+
+void optc1_setup_vertical_interrupt1(struct timing_generator *optc,
+				     uint32_t start_line);
+
+void optc1_setup_vertical_interrupt2(struct timing_generator *optc,
+				     uint32_t start_line);
+
+void optc1_program_global_sync(struct timing_generator *optc,
+			       int vready_offset,
+			       int vstartup_start,
+			       int vupdate_offset,
+			       int vupdate_width);
+
+bool optc1_disable_crtc(struct timing_generator *optc);
+
+bool optc1_is_counter_moving(struct timing_generator *optc);
+
+void optc1_get_position(struct timing_generator *optc,
+			struct crtc_position *position);
+
+uint32_t optc1_get_vblank_counter(struct timing_generator *optc);
+
+void optc1_get_crtc_scanoutpos(struct timing_generator *optc,
+			       uint32_t *v_blank_start,
+			       uint32_t *v_blank_end,
+			       uint32_t *h_position,
+			       uint32_t *v_position);
+
+void optc1_set_early_control(struct timing_generator *optc,
+			     uint32_t early_cntl);
+
+void optc1_wait_for_state(struct timing_generator *optc,
+			  enum crtc_state state);
+
+void optc1_set_blank(struct timing_generator *optc,
+		     bool enable_blanking);
+
+bool optc1_is_blanked(struct timing_generator *optc);
+
+void optc1_program_blank_color(struct timing_generator *optc,
+			       const struct tg_color *black_color);
+
+bool optc1_did_triggered_reset_occur(struct timing_generator *optc);
+
+void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst);
+
+void optc1_disable_reset_trigger(struct timing_generator *optc);
+
+void optc1_lock(struct timing_generator *optc);
+
+void optc1_unlock(struct timing_generator *optc);
+
+void optc1_enable_optc_clock(struct timing_generator *optc, bool enable);
+
+void optc1_set_drr(struct timing_generator *optc,
+		   const struct drr_params *params);
+
+void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
+
+void optc1_set_static_screen_control(struct timing_generator *optc,
+				     uint32_t event_triggers,
+				     uint32_t num_frames);
+
+void optc1_program_stereo(struct timing_generator *optc,
+			  const struct dc_crtc_timing *timing,
+			  struct crtc_stereo_flags *flags);
+
+bool optc1_is_stereo_left_eye(struct timing_generator *optc);
+
+void optc1_clear_optc_underflow(struct timing_generator *optc);
+
+void optc1_tg_init(struct timing_generator *optc);
+
+bool optc1_is_tg_enabled(struct timing_generator *optc);
+
+bool optc1_is_optc_underflow_occurred(struct timing_generator *optc);
+
+void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable);
+
+void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable);
+
+bool optc1_get_otg_active_size(struct timing_generator *optc,
+			       uint32_t *otg_active_width,
+			       uint32_t *otg_active_height);
+
+void optc1_enable_crtc_reset(struct timing_generator *optc,
+			     int source_tg_inst,
+			     struct crtc_trigger_info *crtc_tp);
+
+bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params);
+
+bool optc1_get_crc(struct timing_generator *optc,
+		   uint32_t *r_cr,
+		   uint32_t *g_y,
+		   uint32_t *b_cb);
+
+bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
+
+void optc1_set_vtg_params(struct timing_generator *optc,
+			  const struct dc_crtc_timing *dc_crtc_timing,
+			  bool program_fp2);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
index 00ea3864dd4d..b9812afb886b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
@@ -47,6 +47,8 @@ struct pg_cntl_funcs {
 	void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on);
 	void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
 	void (*init_pg_status)(struct pg_cntl *pg_cntl);
+
+	void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on);
 };
 
 #endif //__DC_PG_CNTL_H__
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
index 21a39afd274b..2d152b68a501 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
@@ -53,6 +53,7 @@ static enum dc_link_rate get_link_rate_from_test_link_rate(uint8_t test_rate)
 		return LINK_RATE_UHBR10;
 	case DP_TEST_LINK_RATE_UHBR20:
 		return LINK_RATE_UHBR20;
+	case DP_TEST_LINK_RATE_UHBR13_5_LEGACY:
 	case DP_TEST_LINK_RATE_UHBR13_5:
 		return LINK_RATE_UHBR13_5;
 	default:
@@ -119,6 +120,11 @@ static void dp_test_send_link_training(struct dc_link *link)
 			1);
 	link_settings.link_rate = get_link_rate_from_test_link_rate(test_rate);
 
+	if (link_settings.link_rate == LINK_RATE_UNKNOWN) {
+		DC_LOG_ERROR("%s: Invalid test link rate.", __func__);
+		ASSERT(0);
+	}
+
 	/* Set preferred link settings */
 	link->verified_link_cap.lane_count = link_settings.lane_count;
 	link->verified_link_cap.link_rate = link_settings.link_rate;
@@ -457,7 +463,7 @@ static void set_crtc_test_pattern(struct dc_link *link,
 			controller_color_space = pipe_ctx->stream_res.test_pattern_params.color_space;
 
 			if (controller_color_space == CONTROLLER_DP_COLOR_SPACE_UDEFINED) {
-				DC_LOG_WARNING("%s: Color space must be defined for test pattern", __func__);
+				DC_LOG_ERROR("%s: Color space must be defined for test pattern", __func__);
 				ASSERT(0);
 			}
 
@@ -592,6 +598,7 @@ bool dp_set_test_pattern(
 	const unsigned char *p_custom_pattern,
 	unsigned int cust_pattern_size)
 {
+	const struct link_hwss *link_hwss;
 	struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
 	struct pipe_ctx *pipe_ctx = NULL;
 	unsigned int lane;
@@ -828,11 +835,9 @@ bool dp_set_test_pattern(
 
 		pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg);
 		/* update MSA to requested color space */
-		pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(pipe_ctx->stream_res.stream_enc,
-				&pipe_ctx->stream->timing,
-				color_space,
-				pipe_ctx->stream->use_vsc_sdp_for_colorimetry,
-				link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP);
+		link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
+		pipe_ctx->stream->output_color_space = color_space;
+		link_hwss->setup_stream_attribute(pipe_ctx);
 
 		if (pipe_ctx->stream->use_vsc_sdp_for_colorimetry) {
 			if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index 34bf8a9ef738..0c00e94e90b1 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -184,14 +184,17 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link)
 	return false;
 }
 
-static bool handle_hpd_irq_replay_sink(struct dc_link *link)
+static void handle_hpd_irq_replay_sink(struct dc_link *link)
 {
 	union dpcd_replay_configuration replay_configuration;
 	/*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/
 	union psr_error_status replay_error_status;
 
+	if (link->replay_settings.config.force_disable_desync_error_check)
+		return;
+
 	if (!link->replay_settings.replay_feature_enabled)
-		return false;
+		return;
 
 	dm_helpers_dp_read_dpcd(
 		link->ctx,
@@ -207,6 +210,9 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link)
 		&replay_error_status.raw,
 		sizeof(replay_error_status.raw));
 
+	if (replay_configuration.bits.DESYNC_ERROR_STATUS)
+		link->replay_settings.config.received_desync_error_hpd = 1;
+
 	link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR =
 		replay_error_status.bits.LINK_CRC_ERROR;
 	link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR =
@@ -243,7 +249,6 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link)
 			edp_set_replay_allow_active(link, &allow_active, true, false, NULL);
 		}
 	}
-	return true;
 }
 
 void dp_handle_link_loss(struct dc_link *link)
@@ -424,9 +429,7 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
 		/* PSR-related error was detected and handled */
 		return true;
 
-	if (handle_hpd_irq_replay_sink(link))
-		/* Replay-related error was detected and handled */
-		return true;
+	handle_hpd_irq_replay_sink(link);
 
 	/* If PSR-related error handled, Main link may be off,
 	 * so do not handle as a normal sink status change interrupt.
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index bc907ae2052d..ed4379c04715 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -583,6 +583,7 @@ union dmub_fw_boot_status {
 		uint32_t fams_enabled : 1; /**< 1 if VBIOS data is deferred programmed */
 		uint32_t detection_required: 1; /**<  if detection need to be triggered by driver */
 		uint32_t hw_power_init_done: 1; /**< 1 if hw power init is completed */
+		uint32_t ono_regions_enabled: 1; /**< 1 if ONO regions are enabled */
 	} bits; /**< status bits */
 	uint32_t all; /**< 32-bit access to status bits */
 };
@@ -599,6 +600,7 @@ enum dmub_fw_boot_status_bit {
 	DMUB_FW_BOOT_STATUS_BIT_FAMS_ENABLED = (1 << 5), /**< 1 if FAMS is enabled*/
 	DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED = (1 << 6), /**< 1 if detection need to be triggered by driver*/
 	DMUB_FW_BOOT_STATUS_BIT_HW_POWER_INIT_DONE = (1 << 7), /**< 1 if hw power init is completed */
+	DMUB_FW_BOOT_STATUS_BIT_ONO_REGIONS_ENABLED = (1 << 8), /**< 1 if ONO regions are enabled */
 };
 
 /* Register bit definition for SCRATCH5 */
@@ -617,9 +619,12 @@ enum dmub_lvtma_status_bit {
 };
 
 enum dmub_ips_disable_type {
-	DMUB_IPS_DISABLE_IPS1 = 1,
-	DMUB_IPS_DISABLE_IPS2 = 2,
-	DMUB_IPS_DISABLE_IPS2_Z10 = 3,
+	DMUB_IPS_ENABLE = 0,
+	DMUB_IPS_DISABLE_ALL = 1,
+	DMUB_IPS_DISABLE_IPS1 = 2,
+	DMUB_IPS_DISABLE_IPS2 = 3,
+	DMUB_IPS_DISABLE_IPS2_Z10 = 4,
+	DMUB_IPS_DISABLE_DYNAMIC = 5,
 };
 
 #define DMUB_IPS1_ALLOW_MASK 0x00000001
@@ -653,8 +658,8 @@ union dmub_fw_boot_options {
 		uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/
 		uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */
 		uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
-		uint32_t ips_disable: 2; /* options to disable ips support*/
-		uint32_t reserved : 10; /**< reserved */
+		uint32_t ips_disable: 3; /* options to disable ips support*/
+		uint32_t reserved : 9; /**< reserved */
 	} bits; /**< boot bits */
 	uint32_t all; /**< 32-bit access to bits */
 };
@@ -2098,7 +2103,7 @@ enum psr_version {
 	/**
 	 * PSR not supported.
 	 */
-	PSR_VERSION_UNSUPPORTED			= 0xFFFFFFFF,
+	PSR_VERSION_UNSUPPORTED			= 0xFF,	// psr_version field is only 8 bits wide
 };
 
 /**
@@ -3620,7 +3625,6 @@ struct dmub_cmd_abm_pause_data {
 	uint8_t pad[1];
 };
 
-
 /**
  * Definition of a DMUB_CMD__ABM_PAUSE command.
  */
@@ -4046,6 +4050,7 @@ union dmub_rb_cmd {
 	 * Definition of a DMUB_CMD__MALL command.
 	 */
 	struct dmub_rb_cmd_mall mall;
+
 	/**
 	 * Definition of a DMUB_CMD__CAB command.
 	 */
@@ -4067,6 +4072,7 @@ union dmub_rb_cmd {
 	 * Definition of DMUB_CMD__PANEL_CNTL commands.
 	 */
 	struct dmub_rb_cmd_panel_cntl panel_cntl;
+
 	/**
 	 * Definition of a DMUB_CMD__ABM_SET_PIPE command.
 	 */
@@ -4470,10 +4476,6 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb)
 		uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr);
 		uint8_t i;
 
-		/* Don't remove this.
-		 * The contents need to actually be read from the ring buffer
-		 * for this function to be effective.
-		 */
 		for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
 			(void)READ_ONCE(*data++);
 
@@ -4522,5 +4524,4 @@ static inline void dmub_rb_get_return_data(struct dmub_rb *rb,
 //==============================================================================
 //</DMUB_RB>====================================================================
 //==============================================================================
-
 #endif /* _DMUB_CMD_H_ */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h
index 6e29a185de51..765d9ca2316f 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h
@@ -242,6 +242,34 @@
 //MP0_SMN_C2PMSG_103
 #define MP0_SMN_C2PMSG_103__CONTENT__SHIFT                                                                    0x0
 #define MP0_SMN_C2PMSG_103__CONTENT_MASK                                                                      0xFFFFFFFFL
+//MP0_SMN_C2PMSG_109
+#define MP0_SMN_C2PMSG_109__CONTENT__SHIFT                                                                    0x0
+#define MP0_SMN_C2PMSG_109__CONTENT_MASK                                                                      0xFFFFFFFFL
+//MP0_SMN_C2PMSG_126
+#define MP0_SMN_C2PMSG_126__GPU_ERR_MEM_TRAINING__SHIFT                                                       0x0
+#define MP0_SMN_C2PMSG_126__GPU_ERR_FW_LOAD__SHIFT                                                            0x1
+#define MP0_SMN_C2PMSG_126__GPU_ERR_WAFL_LINK_TRAINING__SHIFT                                                 0x2
+#define MP0_SMN_C2PMSG_126__GPU_ERR_XGMI_LINK_TRAINING__SHIFT                                                 0x3
+#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_CP_LINK_TRAINING__SHIFT                                               0x4
+#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_DP_LINK_TRAINING__SHIFT                                               0x5
+#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_MEM_TEST__SHIFT                                                       0x6
+#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_BIST_TEST__SHIFT                                                      0x7
+#define MP0_SMN_C2PMSG_126__SOCKET_ID__SHIFT                                                                  0x8
+#define MP0_SMN_C2PMSG_126__AID_ID__SHIFT                                                                     0xb
+#define MP0_SMN_C2PMSG_126__HBM_ID__SHIFT                                                                     0xd
+#define MP0_SMN_C2PMSG_126__BOOT_STATUS__SHIFT                                                                0x1f
+#define MP0_SMN_C2PMSG_126__GPU_ERR_MEM_TRAINING_MASK                                                         0x00000001L
+#define MP0_SMN_C2PMSG_126__GPU_ERR_FW_LOAD_MASK                                                              0x00000002L
+#define MP0_SMN_C2PMSG_126__GPU_ERR_WAFL_LINK_TRAINING_MASK                                                   0x00000004L
+#define MP0_SMN_C2PMSG_126__GPU_ERR_XGMI_LINK_TRAINING_MASK                                                   0x00000008L
+#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_CP_LINK_TRAINING_MASK                                                 0x00000010L
+#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_DP_LINK_TRAINING_MASK                                                 0x00000020L
+#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_MEM_TEST_MASK                                                         0x00000040L
+#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_BIST_TEST_MASK                                                        0x00000080L
+#define MP0_SMN_C2PMSG_126__SOCKET_ID_MASK                                                                    0x00000700L
+#define MP0_SMN_C2PMSG_126__AID_ID_MASK                                                                       0x00001800L
+#define MP0_SMN_C2PMSG_126__HBM_ID_MASK                                                                       0x00002000L
+#define MP0_SMN_C2PMSG_126__BOOT_STATUS_MASK                                                                  0x80000000L
 //MP0_SMN_IH_CREDIT
 #define MP0_SMN_IH_CREDIT__CREDIT_VALUE__SHIFT                                                                0x0
 #define MP0_SMN_IH_CREDIT__CLIENT_ID__SHIFT                                                                   0x10
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 3201808c2dd8..cd3c40a86029 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -1080,33 +1080,35 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_ipu_activity[8];
 	/* time filtered per-core C0 residency % [0-100]*/
 	uint16_t			average_core_c0_activity[16];
-	/* time filtered DRAM read bandwidth [GB/sec] */
+	/* time filtered DRAM read bandwidth [MB/sec] */
 	uint16_t			average_dram_reads;
-	/* time filtered DRAM write bandwidth [GB/sec] */
+	/* time filtered DRAM write bandwidth [MB/sec] */
 	uint16_t			average_dram_writes;
 
 	/* Driver attached timestamp (in ns) */
 	uint64_t			system_clock_counter;
 
 	/* Power/Energy */
-	/* average dGPU + APU power on A + A platform */
+	/* time filtered power used for PPT/STAPM [APU+dGPU] [mW] */
 	uint32_t			average_socket_power;
-	/* average IPU power [W] */
+	/* time filtered IPU power [mW] */
 	uint16_t			average_ipu_power;
-	/* average APU power [W] */
+	/* time filtered APU power [mW] */
 	uint32_t			average_apu_power;
-	/* average dGPU power [W] */
+	/* time filtered GFX power [mW] */
+	uint32_t			average_gfx_power;
+	/* time filtered dGPU power [mW] */
 	uint32_t			average_dgpu_power;
-	/* sum of core power across all cores in the socket [W] */
-	uint32_t			average_core_power;
-	/* calculated core power [W] */
-	uint16_t			core_power[16];
-	/* maximum IRM defined STAPM power limit [W] */
+	/* time filtered sum of core power across all cores in the socket [mW] */
+	uint32_t			average_all_core_power;
+	/* calculated core power [mW] */
+	uint16_t			average_core_power[16];
+	/* maximum IRM defined STAPM power limit [mW] */
 	uint16_t			stapm_power_limit;
-	/* time filtered STAPM power limit [W] */
+	/* time filtered STAPM power limit [mW] */
 	uint16_t			current_stapm_power_limit;
 
-	/* Average clocks */
+	/* time filtered clocks [MHz] */
 	uint16_t			average_gfxclk_frequency;
 	uint16_t			average_socclk_frequency;
 	uint16_t			average_vpeclk_frequency;
@@ -1115,7 +1117,7 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_vclk_frequency;
 
 	/* Current clocks */
-	/* target core frequency */
+	/* target core frequency [MHz] */
 	uint16_t			current_coreclk[16];
 	/* CCLK frequency limit enforced on classic cores [MHz] */
 	uint16_t			current_core_maxfreq;
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index acf3527fff2d..08cb79401410 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -491,7 +491,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
 int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 
 	if (pp_funcs && pp_funcs->get_apu_thermal_limit) {
 		mutex_lock(&adev->pm.mutex);
@@ -505,7 +505,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit
 int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t limit)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 
 	if (pp_funcs && pp_funcs->set_apu_thermal_limit) {
 		mutex_lock(&adev->pm.mutex);
@@ -1182,7 +1182,7 @@ int amdgpu_dpm_get_sclk_od(struct amdgpu_device *adev)
 	int ret = 0;
 
 	if (!pp_funcs->get_sclk_od)
-		return 0;
+		return -EOPNOTSUPP;
 
 	mutex_lock(&adev->pm.mutex);
 	ret = pp_funcs->get_sclk_od(adev->powerplay.pp_handle);
@@ -1196,7 +1196,7 @@ int amdgpu_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value)
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
 	if (is_support_sw_smu(adev))
-		return 0;
+		return -EOPNOTSUPP;
 
 	mutex_lock(&adev->pm.mutex);
 	if (pp_funcs->set_sclk_od)
@@ -1219,7 +1219,7 @@ int amdgpu_dpm_get_mclk_od(struct amdgpu_device *adev)
 	int ret = 0;
 
 	if (!pp_funcs->get_mclk_od)
-		return 0;
+		return -EOPNOTSUPP;
 
 	mutex_lock(&adev->pm.mutex);
 	ret = pp_funcs->get_mclk_od(adev->powerplay.pp_handle);
@@ -1233,7 +1233,7 @@ int amdgpu_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
 	if (is_support_sw_smu(adev))
-		return 0;
+		return -EOPNOTSUPP;
 
 	mutex_lock(&adev->pm.mutex);
 	if (pp_funcs->set_mclk_od)
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 517b9fb4624c..ca2ece24e1e0 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -989,12 +989,13 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
  * Reading back the files will show you the available power levels within
  * the power state and the clock information for those levels. If deep sleep is
  * applied to a clock, the level will be denoted by a special level 'S:'
- * E.g.,
- *	S: 19Mhz *
- *	0: 615Mhz
- *	1: 800Mhz
- *	2: 888Mhz
- *	3: 1000Mhz
+ * E.g., ::
+ *
+ *  S: 19Mhz *
+ *  0: 615Mhz
+ *  1: 800Mhz
+ *  2: 888Mhz
+ *  3: 1000Mhz
  *
  *
  * To manually adjust these states, first select manual using
@@ -2197,6 +2198,22 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 	} else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) {
 		if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE)
 			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) {
+		if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) {
+		if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(apu_thermal_cap)) {
+		u32 limit;
+
+		if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) ==
+		    -EOPNOTSUPP)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_dpm_pcie)) {
+		if (gc_ver == IP_VERSION(9, 4, 2) ||
+		    gc_ver == IP_VERSION(9, 4, 3))
+			*states = ATTR_STATE_UNSUPPORTED;
 	}
 
 	switch (gc_ver) {
@@ -3288,10 +3305,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
 	uint32_t tmp;
 
-	/* under multi-vf mode, the hwmon attributes are all not supported */
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	/* under pp one vf mode manage of hwmon attributes is not supported */
 	if (amdgpu_sriov_is_pp_one_vf(adev))
 		effective_mode &= ~S_IWUSR;
@@ -4162,6 +4175,7 @@ err_out:
 
 int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 {
+	enum amdgpu_sriov_vf_mode mode;
 	uint32_t mask = 0;
 	int ret;
 
@@ -4173,17 +4187,21 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled == 0)
 		return 0;
 
-	adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
-								   DRIVER_NAME, adev,
-								   hwmon_groups);
-	if (IS_ERR(adev->pm.int_hwmon_dev)) {
-		ret = PTR_ERR(adev->pm.int_hwmon_dev);
-		dev_err(adev->dev,
-			"Unable to register hwmon device: %d\n", ret);
-		return ret;
+	mode = amdgpu_virt_get_sriov_vf_mode(adev);
+
+	/* under multi-vf mode, the hwmon attributes are all not supported */
+	if (mode != SRIOV_VF_MODE_MULTI_VF) {
+		adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
+														DRIVER_NAME, adev,
+														hwmon_groups);
+		if (IS_ERR(adev->pm.int_hwmon_dev)) {
+			ret = PTR_ERR(adev->pm.int_hwmon_dev);
+			dev_err(adev->dev, "Unable to register hwmon device: %d\n", ret);
+			return ret;
+		}
 	}
 
-	switch (amdgpu_virt_get_sriov_vf_mode(adev)) {
+	switch (mode) {
 	case SRIOV_VF_MODE_ONE_VF:
 		mask = ATTR_FLAG_ONEVF;
 		break;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 9e4f8a4104a3..914c15387157 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1022,6 +1022,9 @@ static int pp_get_power_limit(void *handle, uint32_t *limit,
 				*limit /= 100;
 			}
 			break;
+		case PP_PWR_LIMIT_MIN:
+			*limit = 0;
+			break;
 		default:
 			ret = -EOPNOTSUPP;
 			break;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h
index 9fcad69a9f34..2cf2a7b12623 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h
@@ -367,7 +367,7 @@ typedef struct _ATOM_Tonga_VCE_State_Record {
 typedef struct _ATOM_Tonga_VCE_State_Table {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;
-	ATOM_Tonga_VCE_State_Record entries[1];
+	ATOM_Tonga_VCE_State_Record entries[];
 } ATOM_Tonga_VCE_State_Table;
 
 typedef struct _ATOM_Tonga_PowerTune_Table {
@@ -481,7 +481,7 @@ typedef struct _ATOM_Tonga_Hard_Limit_Record {
 typedef struct _ATOM_Tonga_Hard_Limit_Table {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;
-	ATOM_Tonga_Hard_Limit_Record entries[1];
+	ATOM_Tonga_Hard_Limit_Record entries[];
 } ATOM_Tonga_Hard_Limit_Table;
 
 typedef struct _ATOM_Tonga_GPIO_Table {
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h
index 8b0590b834cc..de2926df5ed7 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h
@@ -129,7 +129,7 @@ typedef struct _ATOM_Vega10_State {
 typedef struct _ATOM_Vega10_State_Array {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;                                         /* Number of entries. */
-	ATOM_Vega10_State states[1];                             /* Dynamically allocate entries. */
+	ATOM_Vega10_State states[];                             /* Dynamically allocate entries. */
 } ATOM_Vega10_State_Array;
 
 typedef struct _ATOM_Vega10_CLK_Dependency_Record {
@@ -169,37 +169,37 @@ typedef struct _ATOM_Vega10_GFXCLK_Dependency_Table {
 typedef struct _ATOM_Vega10_MCLK_Dependency_Table {
     UCHAR ucRevId;
     UCHAR ucNumEntries;                                         /* Number of entries. */
-    ATOM_Vega10_MCLK_Dependency_Record entries[1];            /* Dynamically allocate entries. */
+    ATOM_Vega10_MCLK_Dependency_Record entries[];            /* Dynamically allocate entries. */
 } ATOM_Vega10_MCLK_Dependency_Table;
 
 typedef struct _ATOM_Vega10_SOCCLK_Dependency_Table {
     UCHAR ucRevId;
     UCHAR ucNumEntries;                                         /* Number of entries. */
-    ATOM_Vega10_CLK_Dependency_Record entries[1];            /* Dynamically allocate entries. */
+    ATOM_Vega10_CLK_Dependency_Record entries[];            /* Dynamically allocate entries. */
 } ATOM_Vega10_SOCCLK_Dependency_Table;
 
 typedef struct _ATOM_Vega10_DCEFCLK_Dependency_Table {
     UCHAR ucRevId;
     UCHAR ucNumEntries;                                         /* Number of entries. */
-    ATOM_Vega10_CLK_Dependency_Record entries[1];            /* Dynamically allocate entries. */
+    ATOM_Vega10_CLK_Dependency_Record entries[];            /* Dynamically allocate entries. */
 } ATOM_Vega10_DCEFCLK_Dependency_Table;
 
 typedef struct _ATOM_Vega10_PIXCLK_Dependency_Table {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;                                         /* Number of entries. */
-	ATOM_Vega10_CLK_Dependency_Record entries[1];            /* Dynamically allocate entries. */
+	ATOM_Vega10_CLK_Dependency_Record entries[];            /* Dynamically allocate entries. */
 } ATOM_Vega10_PIXCLK_Dependency_Table;
 
 typedef struct _ATOM_Vega10_DISPCLK_Dependency_Table {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;                                         /* Number of entries.*/
-	ATOM_Vega10_CLK_Dependency_Record entries[1];            /* Dynamically allocate entries. */
+	ATOM_Vega10_CLK_Dependency_Record entries[];            /* Dynamically allocate entries. */
 } ATOM_Vega10_DISPCLK_Dependency_Table;
 
 typedef struct _ATOM_Vega10_PHYCLK_Dependency_Table {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;                                         /* Number of entries. */
-	ATOM_Vega10_CLK_Dependency_Record entries[1];            /* Dynamically allocate entries. */
+	ATOM_Vega10_CLK_Dependency_Record entries[];            /* Dynamically allocate entries. */
 } ATOM_Vega10_PHYCLK_Dependency_Table;
 
 typedef struct _ATOM_Vega10_MM_Dependency_Record {
@@ -213,7 +213,7 @@ typedef struct _ATOM_Vega10_MM_Dependency_Record {
 typedef struct _ATOM_Vega10_MM_Dependency_Table {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;                                         /* Number of entries */
-	ATOM_Vega10_MM_Dependency_Record entries[1];             /* Dynamically allocate entries */
+	ATOM_Vega10_MM_Dependency_Record entries[];             /* Dynamically allocate entries */
 } ATOM_Vega10_MM_Dependency_Table;
 
 typedef struct _ATOM_Vega10_PCIE_Record {
@@ -225,7 +225,7 @@ typedef struct _ATOM_Vega10_PCIE_Record {
 typedef struct _ATOM_Vega10_PCIE_Table {
 	UCHAR  ucRevId;
 	UCHAR  ucNumEntries;                                        /* Number of entries */
-	ATOM_Vega10_PCIE_Record entries[1];                      /* Dynamically allocate entries. */
+	ATOM_Vega10_PCIE_Record entries[];                      /* Dynamically allocate entries. */
 } ATOM_Vega10_PCIE_Table;
 
 typedef struct _ATOM_Vega10_Voltage_Lookup_Record {
@@ -235,7 +235,7 @@ typedef struct _ATOM_Vega10_Voltage_Lookup_Record {
 typedef struct _ATOM_Vega10_Voltage_Lookup_Table {
 	UCHAR ucRevId;
 	UCHAR ucNumEntries;                                          /* Number of entries */
-	ATOM_Vega10_Voltage_Lookup_Record entries[1];             /* Dynamically allocate entries */
+	ATOM_Vega10_Voltage_Lookup_Record entries[];             /* Dynamically allocate entries */
 } ATOM_Vega10_Voltage_Lookup_Table;
 
 typedef struct _ATOM_Vega10_Fan_Table {
@@ -327,7 +327,7 @@ typedef struct _ATOM_Vega10_VCE_State_Record {
 typedef struct _ATOM_Vega10_VCE_State_Table {
     UCHAR ucRevId;
     UCHAR ucNumEntries;
-    ATOM_Vega10_VCE_State_Record entries[1];
+    ATOM_Vega10_VCE_State_Record entries[];
 } ATOM_Vega10_VCE_State_Table;
 
 typedef struct _ATOM_Vega10_PowerTune_Table {
@@ -427,7 +427,7 @@ typedef struct _ATOM_Vega10_Hard_Limit_Record {
 typedef struct _ATOM_Vega10_Hard_Limit_Table {
     UCHAR ucRevId;
     UCHAR ucNumEntries;
-    ATOM_Vega10_Hard_Limit_Record entries[1];
+    ATOM_Vega10_Hard_Limit_Record entries[];
 } ATOM_Vega10_Hard_Limit_Table;
 
 typedef struct _Vega10_PPTable_Generic_SubTable_Header {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 9f86c1fecbb1..1ead323f1c78 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -733,7 +733,7 @@ static int smu_early_init(void *handle)
 	smu->adev = adev;
 	smu->pm_enabled = !!amdgpu_dpm;
 	smu->is_apu = false;
-	smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+	smu->smu_baco.state = SMU_BACO_STATE_NONE;
 	smu->smu_baco.platform_support = false;
 	smu->user_dpm_profile.fan_mode = -1;
 
@@ -1711,6 +1711,7 @@ static int smu_disable_dpms(struct smu_context *smu)
 	}
 
 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) &&
+	    !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) &&
 	    !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop)
 		adev->gfx.rlc.funcs->stop(adev);
 
@@ -1742,10 +1743,31 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
 	return 0;
 }
 
+static int smu_reset_mp1_state(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+	int ret = 0;
+
+	if ((!adev->in_runpm) && (!adev->in_suspend) &&
+		(!amdgpu_in_reset(adev)))
+		switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+		case IP_VERSION(13, 0, 0):
+		case IP_VERSION(13, 0, 7):
+		case IP_VERSION(13, 0, 10):
+			ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+			break;
+		default:
+			break;
+		}
+
+	return ret;
+}
+
 static int smu_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret;
 
 	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
@@ -1763,7 +1785,15 @@ static int smu_hw_fini(void *handle)
 
 	adev->pm.dpm_enabled = false;
 
-	return smu_smc_hw_cleanup(smu);
+	ret = smu_smc_hw_cleanup(smu);
+	if (ret)
+		return ret;
+
+	ret = smu_reset_mp1_state(smu);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static void smu_late_fini(void *handle)
@@ -2718,7 +2748,7 @@ unlock:
 
 static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit)
 {
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 	struct smu_context *smu = handle;
 
 	if (smu->ppt_funcs && smu->ppt_funcs->get_apu_thermal_limit)
@@ -2729,7 +2759,7 @@ static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit)
 
 static int smu_set_apu_thermal_limit(void *handle, uint32_t limit)
 {
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 	struct smu_context *smu = handle;
 
 	if (smu->ppt_funcs && smu->ppt_funcs->set_apu_thermal_limit)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 839553a86aa2..8def291b18bc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -419,6 +419,7 @@ enum smu_reset_mode {
 enum smu_baco_state {
 	SMU_BACO_STATE_ENTER = 0,
 	SMU_BACO_STATE_EXIT,
+	SMU_BACO_STATE_NONE,
 };
 
 struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index b483c8e096e7..22f88842a7fd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -150,97 +150,39 @@ typedef struct {
 } DpmClocks_t;
 
 typedef struct {
-  uint16_t CoreFrequency[16];           //Target core frequency [MHz]
-  uint16_t CorePower[16];               //CAC calculated core power [W] [Q8.8]
-  uint16_t CoreTemperature[16];         //TSEN measured core temperature [C] [Q8.8]
-  uint16_t GfxTemperature;              //TSEN measured GFX temperature [C] [Q8.8]
-  uint16_t SocTemperature;              //TSEN measured SOC temperature [C] [Q8.8]
-  uint16_t StapmOpnLimit;               //Maximum IRM defined STAPM power limit [W] [Q8.8]
-  uint16_t StapmCurrentLimit;           //Time filtered STAPM power limit [W] [Q8.8]
-  uint16_t InfrastructureCpuMaxFreq;    //CCLK frequency limit enforced on classic cores [MHz]
-  uint16_t InfrastructureGfxMaxFreq;    //GFXCLK frequency limit enforced on GFX [MHz]
-  uint16_t SkinTemp;                    //Maximum skin temperature reported by APU and HS2 chassis sensors [C] [Q8.8]
-  uint16_t AverageGfxclkFrequency;      //Time filtered target GFXCLK frequency [MHz]
-  uint16_t AverageFclkFrequency;        //Time filtered target FCLK frequency [MHz]
-  uint16_t AverageGfxActivity;          //Time filtered GFX busy % [0-100] [Q8.8]
-  uint16_t AverageSocclkFrequency;      //Time filtered target SOCCLK frequency [MHz]
-  uint16_t AverageVclkFrequency;        //Time filtered target VCLK frequency [MHz]
-  uint16_t AverageVcnActivity;          //Time filtered VCN busy % [0-100] [Q8.8]
-  uint16_t AverageVpeclkFrequency;      //Time filtered target VPECLK frequency [MHz]
-  uint16_t AverageIpuclkFrequency;      //Time filtered target IPUCLK frequency [MHz]
-  uint16_t AverageIpuBusy[8];           //Time filtered IPU per-column busy % [0-100] [Q8.8]
-  uint16_t AverageDRAMReads;            //Time filtered DRAM read bandwidth [GB/sec] [Q8.8]
-  uint16_t AverageDRAMWrites;           //Time filtered DRAM write bandwidth [GB/sec] [Q8.8]
-  uint16_t AverageCoreC0Residency[16];  //Time filtered per-core C0 residency % [0-100] [Q8.8]
-  uint16_t IpuPower;                    //Time filtered IPU power [W] [Q8.8]
-  uint32_t ApuPower;                    //Time filtered APU power [W] [Q24.8]
-  uint32_t dGpuPower;                   //Time filtered dGPU power [W] [Q24.8]
-  uint32_t AverageSocketPower;          //Time filtered power used for PPT/STAPM [APU+dGPU] [W] [Q24.8]
-  uint32_t AverageCorePower;            //Time filtered sum of core power across all cores in the socket [W] [Q24.8]
-  uint32_t FilterAlphaValue;            //Metrics table alpha filter time constant [us]
-  uint32_t MetricsCounter;              //Counter that is incremented on every metrics table update [PM_TIMER cycles]
+  uint16_t CoreFrequency[16];        //Target core frequency [MHz]
+  uint16_t CorePower[16];            //CAC calculated core power [mW]
+  uint16_t CoreTemperature[16];      //TSEN measured core temperature [centi-C]
+  uint16_t GfxTemperature;           //TSEN measured GFX temperature [centi-C]
+  uint16_t SocTemperature;           //TSEN measured SOC temperature [centi-C]
+  uint16_t StapmOpnLimit;            //Maximum IRM defined STAPM power limit [mW]
+  uint16_t StapmCurrentLimit;        //Time filtered STAPM power limit [mW]
+  uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz]
+  uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz]
+  uint16_t SkinTemp;                 //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C]
+  uint16_t GfxclkFrequency;          //Time filtered target GFXCLK frequency [MHz]
+  uint16_t FclkFrequency;            //Time filtered target FCLK frequency [MHz]
+  uint16_t GfxActivity;              //Time filtered GFX busy % [0-100]
+  uint16_t SocclkFrequency;          //Time filtered target SOCCLK frequency [MHz]
+  uint16_t VclkFrequency;            //Time filtered target VCLK frequency [MHz]
+  uint16_t VcnActivity;              //Time filtered VCN busy % [0-100]
+  uint16_t VpeclkFrequency;          //Time filtered target VPECLK frequency [MHz]
+  uint16_t IpuclkFrequency;          //Time filtered target IPUCLK frequency [MHz]
+  uint16_t IpuBusy[8];               //Time filtered IPU per-column busy % [0-100]
+  uint16_t DRAMReads;                //Time filtered DRAM read bandwidth [MB/sec]
+  uint16_t DRAMWrites;               //Time filtered DRAM write bandwidth [MB/sec]
+  uint16_t CoreC0Residency[16];      //Time filtered per-core C0 residency % [0-100]
+  uint16_t IpuPower;                 //Time filtered IPU power [mW]
+  uint32_t ApuPower;                 //Time filtered APU power [mW]
+  uint32_t GfxPower;                 //Time filtered GFX power [mW]
+  uint32_t dGpuPower;                //Time filtered dGPU power [mW]
+  uint32_t SocketPower;              //Time filtered power used for PPT/STAPM [APU+dGPU] [mW]
+  uint32_t AllCorePower;             //Time filtered sum of core power across all cores in the socket [mW]
+  uint32_t FilterAlphaValue;         //Metrics table alpha filter time constant [us]
+  uint32_t MetricsCounter;           //Counter that is incremented on every metrics table update [PM_TIMER cycles]
+  uint32_t spare[16];
 } SmuMetrics_t;
 
-typedef struct {
-  uint16_t GfxclkFrequency;             //[MHz]
-  uint16_t SocclkFrequency;             //[MHz]
-  uint16_t VclkFrequency;               //[MHz]
-  uint16_t DclkFrequency;               //[MHz]
-  uint16_t MemclkFrequency;             //[MHz]
-  uint16_t spare;
-  uint16_t UvdActivity;                 //[centi]
-  uint16_t GfxActivity;                 //[centi]
-
-  uint16_t Voltage[2];                  //[mV] indices: VDDCR_VDD, VDDCR_SOC
-  uint16_t Current[2];                  //[mA] indices: VDDCR_VDD, VDDCR_SOC
-  uint16_t Power[2];                    //[mW] indices: VDDCR_VDD, VDDCR_SOC
-
-  uint16_t CoreFrequency[8];            //[MHz]
-  uint16_t CorePower[8];                //[mW]
-  uint16_t CoreTemperature[8];          //[centi-Celsius]
-  uint16_t L3Frequency[2];              //[MHz]
-  uint16_t L3Temperature[2];            //[centi-Celsius]
-
-  uint16_t spare2[24];
-
-  uint16_t GfxTemperature;              //[centi-Celsius]
-  uint16_t SocTemperature;              //[centi-Celsius]
-  uint16_t ThrottlerStatus;
-
-  uint16_t CurrentSocketPower;          //[mW]
-  uint16_t StapmOpnLimit;               //[W]
-  uint16_t StapmCurrentLimit;           //[W]
-  uint32_t ApuPower;                    //[mW]
-  uint32_t dGpuPower;                   //[mW]
-
-  uint16_t VddTdcValue;                 //[mA]
-  uint16_t SocTdcValue;                 //[mA]
-  uint16_t VddEdcValue;                 //[mA]
-  uint16_t SocEdcValue;                 //[mA]
-
-  uint16_t InfrastructureCpuMaxFreq;    //[MHz]
-  uint16_t InfrastructureGfxMaxFreq;    //[MHz]
-
-  uint16_t SkinTemp;
-  uint16_t DeviceState;
-  uint16_t CurTemp;                     //[centi-Celsius]
-  uint16_t FilterAlphaValue;            //[m]
-
-  uint16_t AverageGfxclkFrequency;
-  uint16_t AverageFclkFrequency;
-  uint16_t AverageGfxActivity;
-  uint16_t AverageSocclkFrequency;
-  uint16_t AverageVclkFrequency;
-  uint16_t AverageVcnActivity;
-  uint16_t AverageDRAMReads;          //Filtered DF Bandwidth::DRAM Reads
-  uint16_t AverageDRAMWrites;         //Filtered DF Bandwidth::DRAM Writes
-  uint16_t AverageSocketPower;        //Filtered value of CurrentSocketPower
-  uint16_t AverageCorePower[2];       //Filtered of [sum of CorePower[8] per ccx])
-  uint16_t AverageCoreC0Residency[16]; //Filtered of [average C0 residency % per core]
-  uint16_t spare1;
-  uint32_t MetricsCounter;            //Counts the # of metrics table parameter reads per update to the metrics table, i.e. if the metrics table update happens every 1 second, this value could be up to 1000 if the smu collected metrics data every cycle, or as low as 0 if the smu was asleep the whole time. Reset to 0 after writing.
-} SmuMetrics_legacy_t;
-
 //ISP tile definitions
 typedef enum {
   TILE_XTILE = 0,         //ONO0
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index cc02f979e9e9..95cb919718ae 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -299,5 +299,7 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
 				     uint8_t pcie_gen_cap,
 				     uint8_t pcie_width_cap);
 
+int smu_v13_0_disable_pmfw_state(struct smu_context *smu);
+
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 3efc6aed28f1..762b31455a0b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -234,24 +234,15 @@ static int vangogh_tables_init(struct smu_context *smu)
 		       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_ACTIVITY_MONITOR_COEFF, sizeof(DpmActivityMonitorCoeffExt_t),
 		       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, max(sizeof(SmuMetrics_t), sizeof(SmuMetrics_legacy_t)),
+		       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
-	if (smu->smc_fw_if_version < 0x3) {
-		SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_legacy_t),
-				PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-		smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_legacy_t), GFP_KERNEL);
-	} else {
-		SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
-				PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-		smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
-	}
+	smu_table->metrics_table = kzalloc(max(sizeof(SmuMetrics_t), sizeof(SmuMetrics_legacy_t)), GFP_KERNEL);
 	if (!smu_table->metrics_table)
 		goto err0_out;
 	smu_table->metrics_time = 0;
 
-	if (smu->smc_fw_version >= 0x043F3E00)
-		smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_3);
-	else
-		smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
+	smu_table->gpu_metrics_table_size = max(sizeof(struct gpu_metrics_v2_3), sizeof(struct gpu_metrics_v2_2));
 	smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
 	if (!smu_table->gpu_metrics_table)
 		goto err1_out;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index a49e5adf7cc3..cf1b84060bc3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2477,3 +2477,16 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
 
 	return 0;
 }
+
+int smu_v13_0_disable_pmfw_state(struct smu_context *smu)
+{
+	int ret;
+	struct amdgpu_device *adev = smu->adev;
+
+	WREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff), 0);
+
+	ret = RREG32_PCIE(MP1_Public |
+					   (smnMP1_FIRMWARE_FLAGS & 0xffffffff));
+
+	return ret == 0 ? 0 : -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 34bd99b0e137..82c4e1f1c6f0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -354,12 +354,12 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
 	if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC)
 		smu->dc_controlled_by_gpio = true;
 
-	if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_BACO ||
-	    powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
+	if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_BACO) {
 		smu_baco->platform_support = true;
 
-	if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
-		smu_baco->maco_support = true;
+		if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
+			smu_baco->maco_support = true;
+	}
 
 	if (!overdrive_lowerlimits->FeatureCtrlMask ||
 	    !overdrive_upperlimits->FeatureCtrlMask)
@@ -2530,38 +2530,10 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 		}
 	}
 
-	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
-		(((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) ||
-		((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC)))) {
-		ret = smu_cmn_update_table(smu,
-					   SMU_TABLE_ACTIVITY_MONITOR_COEFF,
-					   WORKLOAD_PPLIB_COMPUTE_BIT,
-					   (void *)(&activity_monitor_external),
-					   false);
-		if (ret) {
-			dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
-			return ret;
-		}
-
-		ret = smu_cmn_update_table(smu,
-					   SMU_TABLE_ACTIVITY_MONITOR_COEFF,
-					   WORKLOAD_PPLIB_CUSTOM_BIT,
-					   (void *)(&activity_monitor_external),
-					   true);
-		if (ret) {
-			dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
-			return ret;
-		}
-
-		workload_type = smu_cmn_to_asic_specific_index(smu,
-						       CMN2ASIC_MAPPING_WORKLOAD,
-						       PP_SMC_POWER_PROFILE_CUSTOM);
-	} else {
-		/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
-		workload_type = smu_cmn_to_asic_specific_index(smu,
+	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+	workload_type = smu_cmn_to_asic_specific_index(smu,
 						       CMN2ASIC_MAPPING_WORKLOAD,
 						       smu->power_profile_mode);
-	}
 
 	if (workload_type < 0)
 		return -EINVAL;
@@ -2602,14 +2574,20 @@ static int smu_v13_0_0_baco_enter(struct smu_context *smu)
 static int smu_v13_0_0_baco_exit(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
+	int ret;
 
 	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
 		/* Wait for PMFW handling for the Dstate change */
 		usleep_range(10000, 11000);
-		return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
 	} else {
-		return smu_v13_0_baco_exit(smu);
+		ret = smu_v13_0_baco_exit(smu);
 	}
+
+	if (!ret)
+		adev->gfx.is_poweron = false;
+
+	return ret;
 }
 
 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
@@ -2794,7 +2772,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
 
 	switch (mp1_state) {
 	case PP_MP1_STATE_UNLOAD:
-		ret = smu_cmn_set_mp1_state(smu, mp1_state);
+		ret = smu_cmn_send_smc_msg_with_param(smu,
+											  SMU_MSG_PrepareMp1ForUnload,
+											  0x55, NULL);
+
+		if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+			ret = smu_v13_0_disable_pmfw_state(smu);
+
 		break;
 	default:
 		/* Ignore others */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index f42b48b31927..891605d4975f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -48,6 +48,7 @@
 #include "smu_cmn.h"
 #include "mp/mp_13_0_6_offset.h"
 #include "mp/mp_13_0_6_sh_mask.h"
+#include "umc_v12_0.h"
 
 #undef MP1_Public
 #undef smnMP1_FIRMWARE_FLAGS
@@ -94,22 +95,11 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin");
 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5
 #define LINK_SPEED_MAX				4
 
-#define SMU_13_0_6_DSCLK_THRESHOLD 100
+#define SMU_13_0_6_DSCLK_THRESHOLD 140
 
 #define MCA_BANK_IPID(_ip, _hwid, _type) \
 	[AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, }
 
-enum mca_reg_idx {
-	MCA_REG_IDX_CONTROL		= 0,
-	MCA_REG_IDX_STATUS		= 1,
-	MCA_REG_IDX_ADDR		= 2,
-	MCA_REG_IDX_MISC0		= 3,
-	MCA_REG_IDX_CONFIG		= 4,
-	MCA_REG_IDX_IPID		= 5,
-	MCA_REG_IDX_SYND		= 6,
-	MCA_REG_IDX_COUNT		= 16,
-};
-
 struct mca_bank_ipid {
 	enum amdgpu_mca_ip ip;
 	uint16_t hwid;
@@ -122,7 +112,9 @@ struct mca_ras_info {
 	int *err_code_array;
 	int err_code_count;
 	int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
-			     enum amdgpu_mca_error_type type, int idx, uint32_t *count);
+			     enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count);
+	bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+			      enum amdgpu_mca_error_type type, struct mca_bank_entry *entry);
 };
 
 #define P2S_TABLE_ID_A 0x50325341
@@ -270,7 +262,7 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t p2s_table_id = P2S_TABLE_ID_A;
 	int ret = 0, i, p2stable_count;
-	char ucode_prefix[30];
+	char ucode_prefix[15];
 	char fw_name[30];
 
 	/* No need to load P2S tables in IOV mode */
@@ -2305,7 +2297,7 @@ static int smu_v13_0_6_post_init(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 
 	if (!amdgpu_sriov_vf(adev) && adev->ras_enabled)
-		return smu_v13_0_6_mca_set_debug_mode(smu, true);
+		return smu_v13_0_6_mca_set_debug_mode(smu, false);
 
 	return 0;
 }
@@ -2387,6 +2379,7 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT
 	MCA_BANK_IPID(UMC, 0x96, 0x0),
 	MCA_BANK_IPID(SMU, 0x01, 0x1),
 	MCA_BANK_IPID(MP5, 0x01, 0x2),
+	MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0),
 };
 
 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info)
@@ -2448,53 +2441,60 @@ static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_t
 	return 0;
 }
 
-static int mca_decode_mca_ipid(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, int *ip)
+static int mca_decode_ipid_to_hwip(uint64_t val)
 {
 	const struct mca_bank_ipid *ipid;
-	uint64_t val;
 	uint16_t hwid, mcatype;
-	int i, ret;
-
-	ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_IPID, &val);
-	if (ret)
-		return ret;
+	int i;
 
 	hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID);
 	mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType);
 
-	if (hwid) {
-		for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) {
-			ipid = &smu_v13_0_6_mca_ipid_table[i];
+	for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) {
+		ipid = &smu_v13_0_6_mca_ipid_table[i];
 
-			if (!ipid->hwid)
-				continue;
+		if (!ipid->hwid)
+			continue;
 
-			if (ipid->hwid == hwid && ipid->mcatype == mcatype) {
-				*ip = i;
-				return 0;
-			}
-		}
+		if (ipid->hwid == hwid && ipid->mcatype == mcatype)
+			return i;
 	}
 
-	*ip = AMDGPU_MCA_IP_UNKNOW;
+	return AMDGPU_MCA_IP_UNKNOW;
+}
+
+static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+				     enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
+{
+	uint64_t status0;
+
+	status0 = entry->regs[MCA_REG_IDX_STATUS];
+
+	if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
+		*count = 0;
+		return 0;
+	}
+
+	if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0))
+		*count = 1;
+	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0))
+		*count = 1;
 
 	return 0;
 }
 
-static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
-					enum amdgpu_mca_error_type type, int idx, uint32_t *count)
+static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+					  enum amdgpu_mca_error_type type, struct mca_bank_entry *entry,
+					  uint32_t *count)
 {
-	uint64_t status0;
-	int ret;
+	u32 ext_error_code;
 
-	ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0);
-	if (ret)
-		return ret;
+	ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]);
 
-	if (REG_GET_FIELD(status0, MCMP1_STATUST0, Val))
+	if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0)
+		*count = 1;
+	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6)
 		*count = 1;
-	else
-		*count = 0;
 
 	return 0;
 }
@@ -2515,70 +2515,41 @@ static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mc
 	return false;
 }
 
-static int mca_mp5_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
-				     enum amdgpu_mca_error_type type, int idx, uint32_t *count)
+static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+				     enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
 {
-	uint64_t status0 = 0, misc0 = 0;
-	uint32_t errcode;
-	int ret;
-
-	if (mca_ras->ip != AMDGPU_MCA_IP_MP5)
-		return -EINVAL;
-
-	ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0);
-	if (ret)
-		return ret;
+	uint64_t status0, misc0;
 
+	status0 = entry->regs[MCA_REG_IDX_STATUS];
 	if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
 		*count = 0;
 		return 0;
 	}
 
-	errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode);
-	if (!mca_smu_check_error_code(adev, mca_ras, errcode))
-		return 0;
-
 	if (type == AMDGPU_MCA_ERROR_TYPE_UE &&
 	    REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 &&
 	    REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) {
-		if (count)
-			*count = 1;
+		*count = 1;
 		return 0;
-	}
-
-	ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0);
-	if (ret)
-		return ret;
-
-	if (count)
+	} else {
+		misc0 = entry->regs[MCA_REG_IDX_MISC0];
 		*count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
+	}
 
 	return 0;
 }
 
 static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
-				     enum amdgpu_mca_error_type type, int idx, uint32_t *count)
+				     enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
 {
-	uint64_t status0 = 0, misc0 = 0;
-	uint32_t errcode;
-	int ret;
-
-	if (mca_ras->ip != AMDGPU_MCA_IP_SMU)
-		return -EINVAL;
-
-	ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0);
-	if (ret)
-		return ret;
+	uint64_t status0, misc0;
 
+	status0 = entry->regs[MCA_REG_IDX_STATUS];
 	if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
 		*count = 0;
 		return 0;
 	}
 
-	errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode);
-	if (!mca_smu_check_error_code(adev, mca_ras, errcode))
-		return 0;
-
 	if (type == AMDGPU_MCA_ERROR_TYPE_UE &&
 	    REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 &&
 	    REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) {
@@ -2587,16 +2558,43 @@ static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
 		return 0;
 	}
 
-	ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0);
-	if (ret)
-		return ret;
-
-	if (count)
-		*count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
+	misc0 = entry->regs[MCA_REG_IDX_MISC0];
+	*count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
 
 	return 0;
 }
 
+static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+				      enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
+{
+	uint32_t instlo;
+
+	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	switch (instlo) {
+	case 0x36430400: /* SMNAID XCD 0 */
+	case 0x38430400: /* SMNAID XCD 1 */
+	case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */
+		return true;
+	default:
+		return false;
+	}
+
+	return false;
+};
+
+static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+				  enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
+{
+	uint32_t errcode, instlo;
+
+	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	if (instlo != 0x03b30400)
+		return false;
+
+	errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+	return mca_smu_check_error_code(adev, mca_ras, errcode);
+}
+
 static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 };
 static int mmhub_err_codes[] = {
 	CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */
@@ -2608,23 +2606,30 @@ static const struct mca_ras_info mca_ras_table[] = {
 	{
 		.blkid = AMDGPU_RAS_BLOCK__UMC,
 		.ip = AMDGPU_MCA_IP_UMC,
-		.get_err_count = mca_normal_mca_get_err_count,
+		.get_err_count = mca_umc_mca_get_err_count,
 	}, {
 		.blkid = AMDGPU_RAS_BLOCK__GFX,
-		.ip = AMDGPU_MCA_IP_MP5,
-		.get_err_count = mca_mp5_mca_get_err_count,
+		.ip = AMDGPU_MCA_IP_SMU,
+		.get_err_count = mca_gfx_mca_get_err_count,
+		.bank_is_valid = mca_gfx_smu_bank_is_valid,
 	}, {
 		.blkid = AMDGPU_RAS_BLOCK__SDMA,
 		.ip = AMDGPU_MCA_IP_SMU,
 		.err_code_array = sdma_err_codes,
 		.err_code_count = ARRAY_SIZE(sdma_err_codes),
 		.get_err_count = mca_smu_mca_get_err_count,
+		.bank_is_valid = mca_smu_bank_is_valid,
 	}, {
 		.blkid = AMDGPU_RAS_BLOCK__MMHUB,
 		.ip = AMDGPU_MCA_IP_SMU,
 		.err_code_array = mmhub_err_codes,
 		.err_code_count = ARRAY_SIZE(mmhub_err_codes),
 		.get_err_count = mca_smu_mca_get_err_count,
+		.bank_is_valid = mca_smu_bank_is_valid,
+	}, {
+		.blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL,
+		.ip = AMDGPU_MCA_IP_PCS_XGMI,
+		.get_err_count = mca_pcs_xgmi_mca_get_err_count,
 	},
 };
 
@@ -2659,130 +2664,84 @@ static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_e
 }
 
 static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
-			      enum amdgpu_mca_error_type type, int idx)
+			      enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
 {
-	int ret, ip = AMDGPU_MCA_IP_UNKNOW;
-
-	ret = mca_decode_mca_ipid(adev, type, idx, &ip);
-	if (ret)
-		return false;
-
-	if (ip == AMDGPU_MCA_IP_UNKNOW)
+	if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip)
 		return false;
 
-	return ip == mca_ras->ip;
-}
+	if (mca_ras->bank_is_valid)
+		return mca_ras->bank_is_valid(mca_ras, adev, type, entry);
 
-static int mca_get_valid_mca_idx(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
-				 enum amdgpu_mca_error_type type,
-				 uint32_t mca_cnt, int *idx_array, int idx_array_size)
-{
-	int i, idx_cnt = 0;
-
-	for (i = 0; i < mca_cnt; i++) {
-		if (!mca_bank_is_valid(adev, mca_ras, type, i))
-			continue;
-
-		if (idx_array) {
-			if (idx_cnt < idx_array_size)
-				idx_array[idx_cnt] = i;
-			else
-				return -EINVAL;
-		}
-
-		idx_cnt++;
-	}
-
-	return idx_cnt;
+	return true;
 }
 
-static int __mca_smu_get_error_count(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, enum amdgpu_mca_error_type type, uint32_t *count)
+static int __mca_smu_get_ras_mca_set(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
+				     enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set)
 {
-	uint32_t result, mca_cnt, total = 0;
-	int idx_array[16];
-	int i, ret, idx_cnt = 0;
+	struct mca_bank_entry entry;
+	uint32_t mca_cnt;
+	int i, ret;
 
 	ret = mca_get_valid_mca_count(adev, type, &mca_cnt);
 	if (ret)
 		return ret;
 
 	/* if valid mca bank count is 0, the driver can return 0 directly */
-	if (!mca_cnt) {
-		*count = 0;
+	if (!mca_cnt)
 		return 0;
-	}
 
-	if (!mca_ras->get_err_count)
-		return -EINVAL;
+	for (i = 0; i < mca_cnt; i++) {
+		memset(&entry, 0, sizeof(entry));
+		ret = mca_get_mca_entry(adev, type, i, &entry);
+		if (ret)
+			return ret;
 
-	idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, ARRAY_SIZE(idx_array));
-	if (idx_cnt < 0)
-		return -EINVAL;
+		if (mca_ras && !mca_bank_is_valid(adev, mca_ras, type, &entry))
+			continue;
 
-	for (i = 0; i < idx_cnt; i++) {
-		result = 0;
-		ret = mca_ras->get_err_count(mca_ras, adev, type, idx_array[i], &result);
+		ret = amdgpu_mca_bank_set_add_entry(mca_set, &entry);
 		if (ret)
 			return ret;
-
-		total += result;
 	}
 
-	*count = total;
-
 	return 0;
 }
 
-static int mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
-				   enum amdgpu_mca_error_type type, uint32_t *count)
+static int mca_smu_get_ras_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+				   enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set)
 {
-	const struct mca_ras_info *mca_ras;
+	const struct mca_ras_info *mca_ras = NULL;
 
-	if (!count)
+	if (!mca_set)
 		return -EINVAL;
 
-	mca_ras = mca_get_mca_ras_info(adev, blk);
-	if (!mca_ras)
-		return -EOPNOTSUPP;
-
-	return __mca_smu_get_error_count(adev, mca_ras, type, count);
-}
-
-static int __mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
-					   enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size)
-{
-	uint32_t mca_cnt = 0;
-	int ret, idx_cnt = 0;
-
-	ret = mca_get_valid_mca_count(adev, type, &mca_cnt);
-	if (ret)
-		return ret;
-
-	/* if valid mca bank count is 0, the driver can return 0 directly */
-	if (!mca_cnt) {
-		*idx_array_size = 0;
-		return 0;
+	if (blk != AMDGPU_RAS_BLOCK_COUNT) {
+		mca_ras = mca_get_mca_ras_info(adev, blk);
+		if (!mca_ras)
+			return -EOPNOTSUPP;
 	}
 
-	idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, *idx_array_size);
-	if (idx_cnt < 0)
-		return -EINVAL;
-
-	*idx_array_size = idx_cnt;
-
-	return 0;
+	return __mca_smu_get_ras_mca_set(adev, mca_ras, type, mca_set);
 }
 
-static int mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
-					 enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size)
+static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+					 struct mca_bank_entry *entry, uint32_t *count)
 {
 	const struct mca_ras_info *mca_ras;
 
+	if (!entry || !count)
+		return -EINVAL;
+
 	mca_ras = mca_get_mca_ras_info(adev, blk);
 	if (!mca_ras)
 		return -EOPNOTSUPP;
 
-	return __mca_smu_get_ras_mca_idx_array(adev, mca_ras, type, idx_array, idx_array_size);
+	if (!mca_bank_is_valid(adev, mca_ras, type, entry)) {
+		*count = 0;
+		return 0;
+	}
+
+	return mca_ras->get_err_count(mca_ras, adev, type, entry, count);
 }
 
 static int mca_smu_get_mca_entry(struct amdgpu_device *adev,
@@ -2801,10 +2760,10 @@ static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = {
 	.max_ue_count = 12,
 	.max_ce_count = 12,
 	.mca_set_debug_mode = mca_smu_set_debug_mode,
-	.mca_get_error_count = mca_smu_get_error_count,
+	.mca_get_ras_mca_set = mca_smu_get_ras_mca_set,
+	.mca_parse_mca_error_count = mca_smu_parse_mca_error_count,
 	.mca_get_mca_entry = mca_smu_get_mca_entry,
 	.mca_get_valid_mca_count = mca_smu_get_valid_mca_count,
-	.mca_get_ras_mca_idx_array = mca_smu_get_ras_mca_idx_array,
 };
 
 static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index ac0e1cc812bd..81eafed76045 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -346,12 +346,13 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
 	if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
 		smu->dc_controlled_by_gpio = true;
 
-	if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_BACO ||
-	    powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO)
+	if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_BACO) {
 		smu_baco->platform_support = true;
 
-	if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled))
-		smu_baco->maco_support = true;
+		if ((powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO)
+					&& (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled))
+			smu_baco->maco_support = true;
+	}
 
 	if (!overdrive_lowerlimits->FeatureCtrlMask ||
 	    !overdrive_upperlimits->FeatureCtrlMask)
@@ -2498,7 +2499,13 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
 
 	switch (mp1_state) {
 	case PP_MP1_STATE_UNLOAD:
-		ret = smu_cmn_set_mp1_state(smu, mp1_state);
+		ret = smu_cmn_send_smc_msg_with_param(smu,
+											  SMU_MSG_PrepareMp1ForUnload,
+											  0x55, NULL);
+
+		if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+			ret = smu_v13_0_disable_pmfw_state(smu);
+
 		break;
 	default:
 		/* Ignore others */
@@ -2524,14 +2531,20 @@ static int smu_v13_0_7_baco_enter(struct smu_context *smu)
 static int smu_v13_0_7_baco_exit(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
+	int ret;
 
 	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
 		/* Wait for PMFW handling for the Dstate change */
 		usleep_range(10000, 11000);
-		return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
 	} else {
-		return smu_v13_0_baco_exit(smu);
+		ret = smu_v13_0_baco_exit(smu);
 	}
+
+	if (!ret)
+		adev->gfx.is_poweron = false;
+
+	return ret;
 }
 
 static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index 4ac22f44d160..d8f8ad0e7137 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -57,7 +57,7 @@ int smu_v14_0_init_microcode(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
 	char fw_name[30];
-	char ucode_prefix[30];
+	char ucode_prefix[15];
 	int err = 0;
 	const struct smc_firmware_header_v1_0 *hdr;
 	const struct common_firmware_header *header;
@@ -229,6 +229,8 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2;
 		break;
 	case IP_VERSION(14, 0, 0):
+		if ((smu->smc_fw_version < 0x5d3a00))
+			dev_warn(smu->adev->dev, "The PMFW version(%x) is behind in this BIOS!\n", smu->smc_fw_version);
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index c36fc10b63c8..03b38c3a9968 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -156,15 +156,10 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
 		PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t),
 		PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-	if (smu->smc_fw_version > 0x5d3500) {
-		SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
-			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-		smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
-	} else {
-		SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_legacy_t),
-			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-		smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_legacy_t), GFP_KERNEL);
-	}
+	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
+		PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+
+	smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
 		goto err0_out;
 	smu_table->metrics_time = 0;
@@ -177,10 +172,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
 	if (!smu_table->watermarks_table)
 		goto err2_out;
 
-	if (smu->smc_fw_version > 0x5d3500)
-		smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v3_0);
-	else
-		smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1);
+	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v3_0);
 	smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
 	if (!smu_table->gpu_metrics_table)
 		goto err3_out;
@@ -242,13 +234,13 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 
 	switch (member) {
 	case METRICS_AVERAGE_GFXCLK:
-		*value = metrics->AverageGfxclkFrequency;
+		*value = metrics->GfxclkFrequency;
 		break;
 	case METRICS_AVERAGE_SOCCLK:
-		*value = metrics->AverageSocclkFrequency;
+		*value = metrics->SocclkFrequency;
 		break;
 	case METRICS_AVERAGE_VCLK:
-		*value = metrics->AverageVclkFrequency;
+		*value = metrics->VclkFrequency;
 		break;
 	case METRICS_AVERAGE_DCLK:
 		*value = 0;
@@ -257,25 +249,25 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 		*value = 0;
 		break;
 	case METRICS_AVERAGE_FCLK:
-		*value = metrics->AverageFclkFrequency;
+		*value = metrics->FclkFrequency;
 		break;
 	case METRICS_AVERAGE_GFXACTIVITY:
-		*value = metrics->AverageGfxActivity >> 8;
+		*value = metrics->GfxActivity / 100;
 		break;
 	case METRICS_AVERAGE_VCNACTIVITY:
-		*value = metrics->AverageVcnActivity >> 8;
+		*value = metrics->VcnActivity / 100;
 		break;
 	case METRICS_AVERAGE_SOCKETPOWER:
 	case METRICS_CURR_SOCKETPOWER:
-		*value = (metrics->AverageSocketPower & 0xff00) +
-		((metrics->AverageSocketPower & 0xff) * 100 >> 8);
+		*value = (metrics->SocketPower / 1000 << 8) +
+		(metrics->SocketPower % 1000 / 10);
 		break;
 	case METRICS_TEMPERATURE_EDGE:
-		*value = (metrics->GfxTemperature >> 8) *
+		*value = metrics->GfxTemperature / 100 *
 		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	case METRICS_TEMPERATURE_HOTSPOT:
-		*value = (metrics->SocTemperature >> 8) *
+		*value = metrics->SocTemperature / 100 *
 		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	case METRICS_THROTTLER_STATUS:
@@ -317,107 +309,6 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v14_0_0_legacy_get_smu_metrics_data(struct smu_context *smu,
-					    MetricsMember_t member,
-					    uint32_t *value)
-{
-	struct smu_table_context *smu_table = &smu->smu_table;
-
-	SmuMetrics_legacy_t *metrics = (SmuMetrics_legacy_t *)smu_table->metrics_table;
-	int ret = 0;
-
-	ret = smu_cmn_get_metrics_table(smu, NULL, false);
-	if (ret)
-		return ret;
-
-	switch (member) {
-	case METRICS_AVERAGE_GFXCLK:
-		*value = metrics->GfxclkFrequency;
-		break;
-	case METRICS_AVERAGE_SOCCLK:
-		*value = metrics->SocclkFrequency;
-		break;
-	case METRICS_AVERAGE_VCLK:
-		*value = metrics->VclkFrequency;
-		break;
-	case METRICS_AVERAGE_DCLK:
-		*value = metrics->DclkFrequency;
-		break;
-	case METRICS_AVERAGE_UCLK:
-		*value = metrics->MemclkFrequency;
-		break;
-	case METRICS_AVERAGE_GFXACTIVITY:
-		*value = metrics->GfxActivity / 100;
-		break;
-	case METRICS_AVERAGE_FCLK:
-		*value = metrics->AverageFclkFrequency;
-		break;
-	case METRICS_AVERAGE_VCNACTIVITY:
-		*value = metrics->UvdActivity;
-		break;
-	case METRICS_AVERAGE_SOCKETPOWER:
-		*value = (metrics->AverageSocketPower << 8) / 1000;
-		break;
-	case METRICS_CURR_SOCKETPOWER:
-		*value = (metrics->CurrentSocketPower << 8) / 1000;
-		break;
-	case METRICS_TEMPERATURE_EDGE:
-		*value = metrics->GfxTemperature / 100 *
-		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
-		break;
-	case METRICS_TEMPERATURE_HOTSPOT:
-		*value = metrics->SocTemperature / 100 *
-		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
-		break;
-	case METRICS_THROTTLER_STATUS:
-		*value = metrics->ThrottlerStatus;
-		break;
-	case METRICS_VOLTAGE_VDDGFX:
-		*value = metrics->Voltage[0];
-		break;
-	case METRICS_VOLTAGE_VDDSOC:
-		*value = metrics->Voltage[1];
-		break;
-	case METRICS_SS_APU_SHARE:
-		/* return the percentage of APU power with respect to APU's power limit.
-		 * percentage is reported, this isn't boost value. Smartshift power
-		 * boost/shift is only when the percentage is more than 100.
-		 */
-		if (metrics->StapmOpnLimit > 0)
-			*value =  (metrics->ApuPower * 100) / metrics->StapmOpnLimit;
-		else
-			*value = 0;
-		break;
-	case METRICS_SS_DGPU_SHARE:
-		/* return the percentage of dGPU power with respect to dGPU's power limit.
-		 * percentage is reported, this isn't boost value. Smartshift power
-		 * boost/shift is only when the percentage is more than 100.
-		 */
-		if ((metrics->dGpuPower > 0) &&
-		    (metrics->StapmCurrentLimit > metrics->StapmOpnLimit))
-			*value = (metrics->dGpuPower * 100) /
-				 (metrics->StapmCurrentLimit - metrics->StapmOpnLimit);
-		else
-			*value = 0;
-		break;
-	default:
-		*value = UINT_MAX;
-		break;
-	}
-
-	return ret;
-}
-
-static int smu_v14_0_0_common_get_smu_metrics_data(struct smu_context *smu,
-					    MetricsMember_t member,
-					    uint32_t *value)
-{
-	if (smu->smc_fw_version > 0x5d3500)
-		return smu_v14_0_0_get_smu_metrics_data(smu, member, value);
-	else
-		return smu_v14_0_0_legacy_get_smu_metrics_data(smu, member, value);
-}
-
 static int smu_v14_0_0_read_sensor(struct smu_context *smu,
 				   enum amd_pp_sensors sensor,
 				   void *data, uint32_t *size)
@@ -429,69 +320,69 @@ static int smu_v14_0_0_read_sensor(struct smu_context *smu,
 
 	switch (sensor) {
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_AVERAGE_GFXACTIVITY,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_AVERAGE_SOCKETPOWER,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_CURR_SOCKETPOWER,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_EDGE_TEMP:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_TEMPERATURE_EDGE,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_TEMPERATURE_HOTSPOT,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GFX_MCLK:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_AVERAGE_UCLK,
 						       (uint32_t *)data);
 		*(uint32_t *)data *= 100;
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GFX_SCLK:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_AVERAGE_GFXCLK,
 						       (uint32_t *)data);
 		*(uint32_t *)data *= 100;
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_VDDGFX:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_VOLTAGE_VDDGFX,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_VDDNB:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_VOLTAGE_VDDSOC,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_SS_APU_SHARE:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_SS_APU_SHARE,
 						       (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_SS_DGPU_SHARE:
-		ret = smu_v14_0_0_common_get_smu_metrics_data(smu,
+		ret = smu_v14_0_0_get_smu_metrics_data(smu,
 						       METRICS_SS_DGPU_SHARE,
 						       (uint32_t *)data);
 		*size = 4;
@@ -588,7 +479,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	if (ret)
 		return ret;
 
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);
+	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 3, 0);
 
 	gpu_metrics->temperature_gfx = metrics.GfxTemperature;
 	gpu_metrics->temperature_soc = metrics.SocTemperature;
@@ -597,32 +488,33 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 		sizeof(uint16_t) * 16);
 	gpu_metrics->temperature_skin = metrics.SkinTemp;
 
-	gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
-	gpu_metrics->average_vcn_activity = metrics.AverageVcnActivity;
+	gpu_metrics->average_gfx_activity = metrics.GfxActivity;
+	gpu_metrics->average_vcn_activity = metrics.VcnActivity;
 	memcpy(&gpu_metrics->average_ipu_activity[0],
-		&metrics.AverageIpuBusy[0],
+		&metrics.IpuBusy[0],
 		sizeof(uint16_t) * 8);
 	memcpy(&gpu_metrics->average_core_c0_activity[0],
-		&metrics.AverageCoreC0Residency[0],
+		&metrics.CoreC0Residency[0],
 		sizeof(uint16_t) * 16);
-	gpu_metrics->average_dram_reads = metrics.AverageDRAMReads;
-	gpu_metrics->average_dram_writes = metrics.AverageDRAMWrites;
+	gpu_metrics->average_dram_reads = metrics.DRAMReads;
+	gpu_metrics->average_dram_writes = metrics.DRAMWrites;
 
-	gpu_metrics->average_socket_power = metrics.AverageSocketPower;
+	gpu_metrics->average_socket_power = metrics.SocketPower;
 	gpu_metrics->average_ipu_power = metrics.IpuPower;
 	gpu_metrics->average_apu_power = metrics.ApuPower;
+	gpu_metrics->average_gfx_power = metrics.GfxPower;
 	gpu_metrics->average_dgpu_power = metrics.dGpuPower;
-	gpu_metrics->average_core_power = metrics.AverageCorePower;
-	memcpy(&gpu_metrics->core_power[0],
+	gpu_metrics->average_all_core_power = metrics.AllCorePower;
+	memcpy(&gpu_metrics->average_core_power[0],
 		&metrics.CorePower[0],
 		sizeof(uint16_t) * 16);
 
-	gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
-	gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency;
-	gpu_metrics->average_vpeclk_frequency = metrics.AverageVpeclkFrequency;
-	gpu_metrics->average_fclk_frequency = metrics.AverageFclkFrequency;
-	gpu_metrics->average_vclk_frequency = metrics.AverageVclkFrequency;
-	gpu_metrics->average_ipuclk_frequency = metrics.AverageIpuclkFrequency;
+	gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency;
+	gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency;
+	gpu_metrics->average_vpeclk_frequency = metrics.VpeclkFrequency;
+	gpu_metrics->average_fclk_frequency = metrics.FclkFrequency;
+	gpu_metrics->average_vclk_frequency = metrics.VclkFrequency;
+	gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency;
 
 	memcpy(&gpu_metrics->current_coreclk[0],
 		&metrics.CoreFrequency[0],
@@ -638,68 +530,6 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	return sizeof(struct gpu_metrics_v3_0);
 }
 
-static ssize_t smu_v14_0_0_get_legacy_gpu_metrics(struct smu_context *smu,
-						void **table)
-{
-	struct smu_table_context *smu_table = &smu->smu_table;
-	struct gpu_metrics_v2_1 *gpu_metrics =
-		(struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
-	SmuMetrics_legacy_t metrics;
-	int ret = 0;
-
-	ret = smu_cmn_get_metrics_table(smu, &metrics, true);
-	if (ret)
-		return ret;
-
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);
-
-	gpu_metrics->temperature_gfx = metrics.GfxTemperature;
-	gpu_metrics->temperature_soc = metrics.SocTemperature;
-	memcpy(&gpu_metrics->temperature_core[0],
-		&metrics.CoreTemperature[0],
-		sizeof(uint16_t) * 8);
-	gpu_metrics->temperature_l3[0] = metrics.L3Temperature[0];
-	gpu_metrics->temperature_l3[1] = metrics.L3Temperature[1];
-
-	gpu_metrics->average_gfx_activity = metrics.GfxActivity;
-	gpu_metrics->average_mm_activity = metrics.UvdActivity;
-
-	gpu_metrics->average_socket_power = metrics.CurrentSocketPower;
-	gpu_metrics->average_gfx_power = metrics.Power[0];
-	gpu_metrics->average_soc_power = metrics.Power[1];
-	memcpy(&gpu_metrics->average_core_power[0],
-		&metrics.CorePower[0],
-		sizeof(uint16_t) * 8);
-
-	gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency;
-	gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency;
-	gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency;
-	gpu_metrics->average_fclk_frequency = metrics.MemclkFrequency;
-	gpu_metrics->average_vclk_frequency = metrics.VclkFrequency;
-	gpu_metrics->average_dclk_frequency = metrics.DclkFrequency;
-
-	memcpy(&gpu_metrics->current_coreclk[0],
-		&metrics.CoreFrequency[0],
-		sizeof(uint16_t) * 8);
-
-	gpu_metrics->throttle_status = metrics.ThrottlerStatus;
-	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
-
-	*table = (void *)gpu_metrics;
-
-	return sizeof(struct gpu_metrics_v2_1);
-}
-
-static ssize_t smu_v14_0_0_common_get_gpu_metrics(struct smu_context *smu,
-				      void **table)
-{
-
-	if (smu->smc_fw_version > 0x5d3500)
-		return smu_v14_0_0_get_gpu_metrics(smu, table);
-	else
-		return smu_v14_0_0_get_legacy_gpu_metrics(smu, table);
-}
-
 static int smu_v14_0_0_mode2_reset(struct smu_context *smu)
 {
 	int ret;
@@ -928,7 +758,7 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
 		return -EINVAL;
 	}
 
-	return smu_v14_0_0_common_get_smu_metrics_data(smu, member_type, value);
+	return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value);
 }
 
 static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
@@ -1230,7 +1060,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.read_sensor = smu_v14_0_0_read_sensor,
 	.is_dpm_running = smu_v14_0_0_is_dpm_running,
 	.set_watermarks_table = smu_v14_0_0_set_watermarks_table,
-	.get_gpu_metrics = smu_v14_0_0_common_get_gpu_metrics,
+	.get_gpu_metrics = smu_v14_0_0_get_gpu_metrics,
 	.get_enabled_mask = smu_cmn_get_enabled_mask,
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
 	.set_driver_table_location = smu_v14_0_set_driver_table_location,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 6e57c94379a9..001a5cf09657 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -1004,6 +1004,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
 	case METRICS_VERSION(2, 4):
 		structure_size = sizeof(struct gpu_metrics_v2_4);
 		break;
+	case METRICS_VERSION(3, 0):
+		structure_size = sizeof(struct gpu_metrics_v3_0);
+		break;
 	default:
 		return;
 	}
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 84148a79414b..c45c07840f64 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1580,7 +1580,6 @@ static const struct pwm_ops ti_sn_pwm_ops = {
 	.free = ti_sn_pwm_free,
 	.apply = ti_sn_pwm_apply,
 	.get_state = ti_sn_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int ti_sn_pwm_probe(struct auxiliary_device *adev,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index f7003d1ec5ef..01da6789d044 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1069,7 +1069,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 		fence = drm_syncobj_fence_get(syncobjs[i]);
 		if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) {
 			dma_fence_put(fence);
-			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+			if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+				     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) {
 				continue;
 			} else {
 				timeout = -EINVAL;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 6d7ba4d0f130..c4839c67cb0f 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2750,6 +2750,18 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state)
 	for_each_pipe(dev_priv, pipe)
 		min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk);
 
+	/*
+	 * Avoid glk_force_audio_cdclk() causing excessive screen
+	 * blinking when multiple pipes are active by making sure
+	 * CDCLK frequency is always high enough for audio. With a
+	 * single active pipe we can always change CDCLK frequency
+	 * by changing the cd2x divider (see glk_cdclk_table[]) and
+	 * thus a full modeset won't be needed then.
+	 */
+	if (IS_GEMINILAKE(dev_priv) && cdclk_state->active_pipes &&
+	    !is_power_of_2(cdclk_state->active_pipes))
+		min_cdclk = max(2 * 96000, min_cdclk);
+
 	if (min_cdclk > dev_priv->display.cdclk.max_cdclk_freq) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "required cdclk (%d kHz) exceeds max (%d kHz)\n",
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 1891c0cc187d..2c1034578984 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -430,7 +430,7 @@ static int mtl_max_source_rate(struct intel_dp *intel_dp)
 	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
 
 	if (intel_is_c10phy(i915, phy))
-		return intel_dp_is_edp(intel_dp) ? 675000 : 810000;
+		return 810000;
 
 	return 2000000;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index 37b0f8529b4f..f64d348a969e 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -58,7 +58,7 @@ struct intel_tc_port {
 	struct delayed_work link_reset_work;
 	int link_refcount;
 	bool legacy_port:1;
-	char port_name[8];
+	const char *port_name;
 	enum tc_port_mode mode;
 	enum tc_port_mode init_mode;
 	enum phy_fia phy_fia;
@@ -1875,8 +1875,12 @@ int intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy)
 	else
 		tc->phy_ops = &icl_tc_phy_ops;
 
-	snprintf(tc->port_name, sizeof(tc->port_name),
-		 "%c/TC#%d", port_name(port), tc_port + 1);
+	tc->port_name = kasprintf(GFP_KERNEL, "%c/TC#%d", port_name(port),
+				  tc_port + 1);
+	if (!tc->port_name) {
+		kfree(tc);
+		return -ENOMEM;
+	}
 
 	mutex_init(&tc->lock);
 	/* TODO: Combine the two works */
@@ -1897,6 +1901,7 @@ void intel_tc_port_cleanup(struct intel_digital_port *dig_port)
 {
 	intel_tc_port_suspend(dig_port);
 
+	kfree(dig_port->tc->port_name);
 	kfree(dig_port->tc);
 	dig_port->tc = NULL;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9a9ff84c90d7..e38f06a6e56e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -844,6 +844,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv,
 		if (idx >= pc->num_user_engines)
 			return -EINVAL;
 
+		idx = array_index_nospec(idx, pc->num_user_engines);
 		pe = &pc->user_engines[idx];
 
 		/* Only render engine supports RPCS configuration. */
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 1c93e84278a0..15fc8e4703f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -195,6 +195,21 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
 	spin_unlock_irq(&uncore->lock);
 }
 
+static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915)
+{
+	/*
+	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
+	 * will be dropped. For WC mappings in general we have 64 byte burst
+	 * writes when the WC buffer is flushed, so we can't use it, but have to
+	 * resort to an uncached mapping. The WC issue is easily caught by the
+	 * readback check when writing GTT PTE entries.
+	 */
+	if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11)
+		return true;
+
+	return false;
+}
+
 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
@@ -202,8 +217,12 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 	/*
 	 * Note that as an uncached mmio write, this will flush the
 	 * WCB of the writes into the GGTT before it triggers the invalidate.
+	 *
+	 * Only perform this when GGTT is mapped as WC, see ggtt_probe_common().
 	 */
-	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	if (needs_wc_ggtt_mapping(ggtt->vm.i915))
+		intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6,
+				      GFX_FLSH_CNTL_EN);
 }
 
 static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
@@ -1140,17 +1159,11 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
 	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
 
-	/*
-	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
-	 * will be dropped. For WC mappings in general we have 64 byte burst
-	 * writes when the WC buffer is flushed, so we can't use it, but have to
-	 * resort to an uncached mapping. The WC issue is easily caught by the
-	 * readback check when writing GTT PTE entries.
-	 */
-	if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
-		ggtt->gsm = ioremap(phys_addr, size);
-	else
+	if (needs_wc_ggtt_mapping(i915))
 		ggtt->gsm = ioremap_wc(phys_addr, size);
+	else
+		ggtt->gsm = ioremap(phys_addr, size);
+
 	if (!ggtt->gsm) {
 		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 8b67abd720be..7090e4be29cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -581,19 +581,23 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
 
 static void rc6_res_reg_init(struct intel_rc6 *rc6)
 {
-	memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg));
+	i915_reg_t res_reg[INTEL_RC6_RES_MAX] = {
+		[0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG,
+	};
 
 	switch (rc6_to_gt(rc6)->type) {
 	case GT_MEDIA:
-		rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6;
+		res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6;
 		break;
 	default:
-		rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED;
-		rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6;
-		rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p;
-		rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp;
+		res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED;
+		res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6;
+		res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p;
+		res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp;
 		break;
 	}
+
+	memcpy(rc6->res_reg, res_reg, sizeof(res_reg));
 }
 
 void intel_rc6_init(struct intel_rc6 *rc6)
diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c
index 614bde321589..8bca02025e09 100644
--- a/drivers/gpu/drm/i915/i915_debugfs_params.c
+++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
@@ -38,10 +38,13 @@ static int i915_param_int_open(struct inode *inode, struct file *file)
 
 static int notify_guc(struct drm_i915_private *i915)
 {
-	int ret = 0;
+	struct intel_gt *gt;
+	int i, ret = 0;
 
-	if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
-		ret = intel_guc_global_policies_update(&to_gt(i915)->uc.guc);
+	for_each_gt(gt, i915, i) {
+		if (intel_uc_uses_guc_submission(&gt->uc))
+			ret = intel_guc_global_policies_update(&gt->uc.guc);
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2f3ecd7d4804..7b1c8de2f9cb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4227,11 +4227,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 	u32 known_open_flags;
 	int ret;
 
-	if (!perf->i915) {
-		drm_dbg(&perf->i915->drm,
-			"i915 perf interface not available for this system\n");
+	if (!perf->i915)
 		return -ENOTSUPP;
-	}
 
 	known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
 			   I915_PERF_FLAG_FD_NONBLOCK |
@@ -4607,11 +4604,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 	struct i915_oa_reg *regs;
 	int err, id;
 
-	if (!perf->i915) {
-		drm_dbg(&perf->i915->drm,
-			"i915 perf interface not available for this system\n");
+	if (!perf->i915)
 		return -ENOTSUPP;
-	}
 
 	if (!perf->metrics_kobj) {
 		drm_dbg(&perf->i915->drm,
@@ -4773,11 +4767,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 	struct i915_oa_config *oa_config;
 	int ret;
 
-	if (!perf->i915) {
-		drm_dbg(&perf->i915->drm,
-			"i915 perf interface not available for this system\n");
+	if (!perf->i915)
 		return -ENOTSUPP;
-	}
 
 	if (i915_perf_stream_paranoid && !perfmon_capable()) {
 		drm_dbg(&perf->i915->drm,
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 6492a70e3c39..404b0483bb7c 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -1229,6 +1229,9 @@ int qxl_destroy_monitors_object(struct qxl_device *qdev)
 	if (!qdev->monitors_config_bo)
 		return 0;
 
+	kfree(qdev->dumb_heads);
+	qdev->dumb_heads = NULL;
+
 	qdev->monitors_config = NULL;
 	qdev->ram_header->monitors_config = 0;
 
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index 8a6621f1e82c..2db40789235c 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -3893,7 +3893,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
 typedef struct _ATOM_GPIO_PIN_LUT
 {
   ATOM_COMMON_TABLE_HEADER  sHeader;
-  ATOM_GPIO_PIN_ASSIGNMENT	asGPIO_Pin[1];
+  ATOM_GPIO_PIN_ASSIGNMENT	asGPIO_Pin[];
 }ATOM_GPIO_PIN_LUT;
 
 /****************************************************************************/	
@@ -4061,7 +4061,7 @@ typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT         //usSrcDstTableOffset
   UCHAR               ucNumberOfSrc;
   USHORT              usSrcObjectID[1];
   UCHAR               ucNumberOfDst;
-  USHORT              usDstObjectID[1];
+  USHORT              usDstObjectID[];
 }ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT;
 
 
@@ -4233,7 +4233,7 @@ typedef struct  _ATOM_CONNECTOR_DEVICE_TAG_RECORD
   ATOM_COMMON_RECORD_HEADER   sheader;
   UCHAR                       ucNumberOfDevice;
   UCHAR                       ucReserved;
-  ATOM_CONNECTOR_DEVICE_TAG   asDeviceTag[1];         //This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation
+  ATOM_CONNECTOR_DEVICE_TAG   asDeviceTag[];          //This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation
 }ATOM_CONNECTOR_DEVICE_TAG_RECORD;
 
 
@@ -4293,7 +4293,7 @@ typedef struct  _ATOM_OBJECT_GPIO_CNTL_RECORD
   ATOM_COMMON_RECORD_HEADER   sheader;
   UCHAR                       ucFlags;                // Future expnadibility
   UCHAR                       ucNumberOfPins;         // Number of GPIO pins used to control the object
-  ATOM_GPIO_PIN_CONTROL_PAIR  asGpio[1];              // the real gpio pin pair determined by number of pins ucNumberOfPins
+  ATOM_GPIO_PIN_CONTROL_PAIR  asGpio[];               // the real gpio pin pair determined by number of pins ucNumberOfPins
 }ATOM_OBJECT_GPIO_CNTL_RECORD;
 
 //Definitions for GPIO pin state 
@@ -4444,7 +4444,7 @@ typedef struct  _ATOM_BRACKET_LAYOUT_RECORD
   UCHAR                       ucWidth;
   UCHAR                       ucConnNum;
   UCHAR                       ucReserved;
-  ATOM_CONNECTOR_LAYOUT_INFO  asConnInfo[1];
+  ATOM_CONNECTOR_LAYOUT_INFO  asConnInfo[];
 }ATOM_BRACKET_LAYOUT_RECORD;
 
 /****************************************************************************/	
@@ -4600,7 +4600,7 @@ typedef struct  _ATOM_I2C_VOLTAGE_OBJECT_V3
    UCHAR    ucVoltageControlAddress;
    UCHAR    ucVoltageControlOffset;	 	
    ULONG    ulReserved;
-   VOLTAGE_LUT_ENTRY asVolI2cLut[1];        // end with 0xff
+   VOLTAGE_LUT_ENTRY asVolI2cLut[];         // end with 0xff
 }ATOM_I2C_VOLTAGE_OBJECT_V3;
 
 // ATOM_I2C_VOLTAGE_OBJECT_V3.ucVoltageControlFlag
@@ -4625,7 +4625,7 @@ typedef struct  _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3
    UCHAR    ucLeakageEntryNum;           // indicate the entry number of LeakageId/Voltage Lut table
    UCHAR    ucReserved[2];               
    ULONG    ulMaxVoltageLevel;
-   LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[1];   
+   LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[];
 }ATOM_LEAKAGE_VOLTAGE_OBJECT_V3;
 
 
@@ -4753,7 +4753,7 @@ typedef struct _ATOM_POWER_SOURCE_INFO
 {
 		ATOM_COMMON_TABLE_HEADER		asHeader;
 		UCHAR												asPwrbehave[16];
-		ATOM_POWER_SOURCE_OBJECT		asPwrObj[1];
+		ATOM_POWER_SOURCE_OBJECT		asPwrObj[];
 }ATOM_POWER_SOURCE_INFO;
 
 
@@ -5440,7 +5440,7 @@ typedef struct _ATOM_FUSION_SYSTEM_INFO_V2
 typedef struct _ATOM_I2C_DATA_RECORD
 {
   UCHAR         ucNunberOfBytes;                                              //Indicates how many bytes SW needs to write to the external ASIC for one block, besides to "Start" and "Stop"
-  UCHAR         ucI2CData[1];                                                 //I2C data in bytes, should be less than 16 bytes usually
+  UCHAR         ucI2CData[];                                                  //I2C data in bytes, should be less than 16 bytes usually
 }ATOM_I2C_DATA_RECORD;
 
 
@@ -5451,14 +5451,14 @@ typedef struct _ATOM_I2C_DEVICE_SETUP_INFO
   UCHAR		                        ucSSChipID;             //SS chip being used
   UCHAR		                        ucSSChipSlaveAddr;      //Slave Address to set up this SS chip
   UCHAR                           ucNumOfI2CDataRecords;  //number of data block
-  ATOM_I2C_DATA_RECORD            asI2CData[1];  
+  ATOM_I2C_DATA_RECORD            asI2CData[];
 }ATOM_I2C_DEVICE_SETUP_INFO;
 
 //==========================================================================================
 typedef struct  _ATOM_ASIC_MVDD_INFO
 {
   ATOM_COMMON_TABLE_HEADER	      sHeader; 
-  ATOM_I2C_DEVICE_SETUP_INFO      asI2CSetup[1];
+  ATOM_I2C_DEVICE_SETUP_INFO      asI2CSetup[];
 }ATOM_ASIC_MVDD_INFO;
 
 //==========================================================================================
@@ -5520,7 +5520,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO
 typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V2
 {
   ATOM_COMMON_TABLE_HEADER	      sHeader; 
-  ATOM_ASIC_SS_ASSIGNMENT_V2		  asSpreadSpectrum[1];      //this is point only. 
+  ATOM_ASIC_SS_ASSIGNMENT_V2		  asSpreadSpectrum[];       //this is point only.
 }ATOM_ASIC_INTERNAL_SS_INFO_V2;
 
 typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3
@@ -5542,7 +5542,7 @@ typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3
 typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
 {
   ATOM_COMMON_TABLE_HEADER	      sHeader; 
-  ATOM_ASIC_SS_ASSIGNMENT_V3		  asSpreadSpectrum[1];      //this is pointer only. 
+  ATOM_ASIC_SS_ASSIGNMENT_V3		  asSpreadSpectrum[];       //this is pointer only.
 }ATOM_ASIC_INTERNAL_SS_INFO_V3;
 
 
@@ -6282,7 +6282,7 @@ typedef union _ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS
 
 typedef struct _ATOM_MEMORY_SETTING_DATA_BLOCK{
 	ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS			ulMemoryID;
-	ULONG															        aulMemData[1];
+	ULONG															        aulMemData[];
 }ATOM_MEMORY_SETTING_DATA_BLOCK;
 
 
@@ -7092,7 +7092,7 @@ typedef struct _ATOM_DISP_OUT_INFO_V3
   UCHAR  ucCoreRefClkSource;                    // value of CORE_REF_CLK_SOURCE
   UCHAR  ucDispCaps;
   UCHAR  ucReserved[2];
-  ASIC_TRANSMITTER_INFO_V2  asTransmitterInfo[1];     // for alligment only
+  ASIC_TRANSMITTER_INFO_V2  asTransmitterInfo[];      // for alligment only
 }ATOM_DISP_OUT_INFO_V3;
 
 //ucDispCaps
@@ -7324,12 +7324,12 @@ typedef struct _CLOCK_CONDITION_SETTING_ENTRY{
   USHORT usMaxClockFreq;
   UCHAR  ucEncodeMode;
   UCHAR  ucPhySel;
-  ULONG  ulAnalogSetting[1];
+  ULONG  ulAnalogSetting[];
 }CLOCK_CONDITION_SETTING_ENTRY;
 
 typedef struct _CLOCK_CONDITION_SETTING_INFO{
   USHORT usEntrySize;
-  CLOCK_CONDITION_SETTING_ENTRY asClkCondSettingEntry[1];
+  CLOCK_CONDITION_SETTING_ENTRY asClkCondSettingEntry[];
 }CLOCK_CONDITION_SETTING_INFO;
 
 typedef struct _PHY_CONDITION_REG_VAL{
@@ -7346,27 +7346,27 @@ typedef struct _PHY_CONDITION_REG_VAL_V2{
 typedef struct _PHY_CONDITION_REG_INFO{
   USHORT usRegIndex;
   USHORT usSize;
-  PHY_CONDITION_REG_VAL asRegVal[1];
+  PHY_CONDITION_REG_VAL asRegVal[];
 }PHY_CONDITION_REG_INFO;
 
 typedef struct _PHY_CONDITION_REG_INFO_V2{
   USHORT usRegIndex;
   USHORT usSize;
-  PHY_CONDITION_REG_VAL_V2 asRegVal[1];
+  PHY_CONDITION_REG_VAL_V2 asRegVal[];
 }PHY_CONDITION_REG_INFO_V2;
 
 typedef struct _PHY_ANALOG_SETTING_INFO{
   UCHAR  ucEncodeMode;
   UCHAR  ucPhySel;
   USHORT usSize;
-  PHY_CONDITION_REG_INFO  asAnalogSetting[1];
+  PHY_CONDITION_REG_INFO  asAnalogSetting[];
 }PHY_ANALOG_SETTING_INFO;
 
 typedef struct _PHY_ANALOG_SETTING_INFO_V2{
   UCHAR  ucEncodeMode;
   UCHAR  ucPhySel;
   USHORT usSize;
-  PHY_CONDITION_REG_INFO_V2  asAnalogSetting[1];
+  PHY_CONDITION_REG_INFO_V2  asAnalogSetting[];
 }PHY_ANALOG_SETTING_INFO_V2;
 
 typedef struct _GFX_HAVESTING_PARAMETERS {
diff --git a/drivers/gpu/drm/renesas/shmobile/Kconfig b/drivers/gpu/drm/renesas/shmobile/Kconfig
index ad14112999ad..027220b8fe1c 100644
--- a/drivers/gpu/drm/renesas/shmobile/Kconfig
+++ b/drivers/gpu/drm/renesas/shmobile/Kconfig
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 config DRM_SHMOBILE
 	tristate "DRM Support for SH Mobile"
-	depends on DRM
+	depends on DRM && PM
 	depends on ARCH_RENESAS || ARCH_SHMOBILE || COMPILE_TEST
 	select BACKLIGHT_CLASS_DEVICE
 	select DRM_KMS_HELPER
 	select DRM_GEM_DMA_HELPER
+	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have an SH Mobile chipset.
 	  If M is selected the module will be called shmob-drm.
diff --git a/drivers/gpu/drm/renesas/shmobile/Makefile b/drivers/gpu/drm/renesas/shmobile/Makefile
index 861edafed856..2679555d61a7 100644
--- a/drivers/gpu/drm/renesas/shmobile/Makefile
+++ b/drivers/gpu/drm/renesas/shmobile/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-shmob-drm-y := shmob_drm_backlight.o \
-	       shmob_drm_crtc.o \
+shmob-drm-y := shmob_drm_crtc.o \
 	       shmob_drm_drv.o \
 	       shmob_drm_kms.o \
 	       shmob_drm_plane.o
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c
deleted file mode 100644
index 794573badfe8..000000000000
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c
+++ /dev/null
@@ -1,82 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * shmob_drm_backlight.c  --  SH Mobile DRM Backlight
- *
- * Copyright (C) 2012 Renesas Electronics Corporation
- *
- * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- */
-
-#include <linux/backlight.h>
-
-#include "shmob_drm_backlight.h"
-#include "shmob_drm_crtc.h"
-#include "shmob_drm_drv.h"
-
-static int shmob_drm_backlight_update(struct backlight_device *bdev)
-{
-	struct shmob_drm_connector *scon = bl_get_data(bdev);
-	struct shmob_drm_device *sdev = scon->connector.dev->dev_private;
-	const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight;
-	int brightness = backlight_get_brightness(bdev);
-
-	return bdata->set_brightness(brightness);
-}
-
-static int shmob_drm_backlight_get_brightness(struct backlight_device *bdev)
-{
-	struct shmob_drm_connector *scon = bl_get_data(bdev);
-	struct shmob_drm_device *sdev = scon->connector.dev->dev_private;
-	const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight;
-
-	return bdata->get_brightness();
-}
-
-static const struct backlight_ops shmob_drm_backlight_ops = {
-	.options	= BL_CORE_SUSPENDRESUME,
-	.update_status	= shmob_drm_backlight_update,
-	.get_brightness	= shmob_drm_backlight_get_brightness,
-};
-
-void shmob_drm_backlight_dpms(struct shmob_drm_connector *scon, int mode)
-{
-	if (scon->backlight == NULL)
-		return;
-
-	scon->backlight->props.power = mode == DRM_MODE_DPMS_ON
-				     ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN;
-	backlight_update_status(scon->backlight);
-}
-
-int shmob_drm_backlight_init(struct shmob_drm_connector *scon)
-{
-	struct shmob_drm_device *sdev = scon->connector.dev->dev_private;
-	const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight;
-	struct drm_connector *connector = &scon->connector;
-	struct drm_device *dev = connector->dev;
-	struct backlight_device *backlight;
-
-	if (!bdata->max_brightness)
-		return 0;
-
-	backlight = backlight_device_register(bdata->name, dev->dev, scon,
-					      &shmob_drm_backlight_ops, NULL);
-	if (IS_ERR(backlight)) {
-		dev_err(dev->dev, "unable to register backlight device: %ld\n",
-			PTR_ERR(backlight));
-		return PTR_ERR(backlight);
-	}
-
-	backlight->props.max_brightness = bdata->max_brightness;
-	backlight->props.brightness = bdata->max_brightness;
-	backlight->props.power = FB_BLANK_POWERDOWN;
-	backlight_update_status(backlight);
-
-	scon->backlight = backlight;
-	return 0;
-}
-
-void shmob_drm_backlight_exit(struct shmob_drm_connector *scon)
-{
-	backlight_device_unregister(scon->backlight);
-}
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h
deleted file mode 100644
index d9abb7a60be5..000000000000
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * shmob_drm_backlight.h  --  SH Mobile DRM Backlight
- *
- * Copyright (C) 2012 Renesas Electronics Corporation
- *
- * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- */
-
-#ifndef __SHMOB_DRM_BACKLIGHT_H__
-#define __SHMOB_DRM_BACKLIGHT_H__
-
-struct shmob_drm_connector;
-
-void shmob_drm_backlight_dpms(struct shmob_drm_connector *scon, int mode);
-int shmob_drm_backlight_init(struct shmob_drm_connector *scon);
-void shmob_drm_backlight_exit(struct shmob_drm_connector *scon);
-
-#endif /* __SHMOB_DRM_BACKLIGHT_H__ */
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c
index 11dd2bc803e7..2e2f37b9d0a4 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c
@@ -7,9 +7,18 @@
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  */
 
-#include <linux/backlight.h>
 #include <linux/clk.h>
-
+#include <linux/media-bus-format.h>
+#include <linux/of.h>
+#include <linux/of_graph.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_atomic_uapi.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_bridge_connector.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_dma_helper.h>
@@ -18,85 +27,123 @@
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
-#include <drm/drm_plane_helper.h>
+#include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
 
-#include "shmob_drm_backlight.h"
+#include <video/videomode.h>
+
 #include "shmob_drm_crtc.h"
 #include "shmob_drm_drv.h"
 #include "shmob_drm_kms.h"
 #include "shmob_drm_plane.h"
 #include "shmob_drm_regs.h"
 
-/*
- * TODO: panel support
- */
-
 /* -----------------------------------------------------------------------------
- * Clock management
+ * Page Flip
  */
 
-static int shmob_drm_clk_on(struct shmob_drm_device *sdev)
+void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc)
 {
-	int ret;
+	struct drm_pending_vblank_event *event;
+	struct drm_device *dev = scrtc->base.dev;
+	unsigned long flags;
 
-	if (sdev->clock) {
-		ret = clk_prepare_enable(sdev->clock);
-		if (ret < 0)
-			return ret;
+	spin_lock_irqsave(&dev->event_lock, flags);
+	event = scrtc->event;
+	scrtc->event = NULL;
+	if (event) {
+		drm_crtc_send_vblank_event(&scrtc->base, event);
+		wake_up(&scrtc->flip_wait);
+		drm_crtc_vblank_put(&scrtc->base);
 	}
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+}
 
-	return 0;
+static bool shmob_drm_crtc_page_flip_pending(struct shmob_drm_crtc *scrtc)
+{
+	struct drm_device *dev = scrtc->base.dev;
+	unsigned long flags;
+	bool pending;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	pending = scrtc->event != NULL;
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	return pending;
 }
 
-static void shmob_drm_clk_off(struct shmob_drm_device *sdev)
+static void shmob_drm_crtc_wait_page_flip(struct shmob_drm_crtc *scrtc)
 {
-	if (sdev->clock)
-		clk_disable_unprepare(sdev->clock);
+	struct drm_crtc *crtc = &scrtc->base;
+	struct shmob_drm_device *sdev = to_shmob_device(crtc->dev);
+
+	if (wait_event_timeout(scrtc->flip_wait,
+			       !shmob_drm_crtc_page_flip_pending(scrtc),
+			       msecs_to_jiffies(50)))
+		return;
+
+	dev_warn(sdev->dev, "page flip timeout\n");
+
+	shmob_drm_crtc_finish_page_flip(scrtc);
 }
 
 /* -----------------------------------------------------------------------------
  * CRTC
  */
 
+static const struct {
+	u32 fmt;
+	u32 ldmt1r;
+} shmob_drm_bus_fmts[] = {
+	{ MEDIA_BUS_FMT_RGB888_3X8,	 LDMT1R_MIFTYP_RGB8 },
+	{ MEDIA_BUS_FMT_RGB666_2X9_BE,	 LDMT1R_MIFTYP_RGB9 },
+	{ MEDIA_BUS_FMT_RGB888_2X12_BE,	 LDMT1R_MIFTYP_RGB12A },
+	{ MEDIA_BUS_FMT_RGB444_1X12,	 LDMT1R_MIFTYP_RGB12B },
+	{ MEDIA_BUS_FMT_RGB565_1X16,	 LDMT1R_MIFTYP_RGB16 },
+	{ MEDIA_BUS_FMT_RGB666_1X18,	 LDMT1R_MIFTYP_RGB18 },
+	{ MEDIA_BUS_FMT_RGB888_1X24,	 LDMT1R_MIFTYP_RGB24 },
+	{ MEDIA_BUS_FMT_UYVY8_1X16,	 LDMT1R_MIFTYP_YCBCR },
+};
+
 static void shmob_drm_crtc_setup_geometry(struct shmob_drm_crtc *scrtc)
 {
-	struct drm_crtc *crtc = &scrtc->crtc;
-	struct shmob_drm_device *sdev = crtc->dev->dev_private;
-	const struct shmob_drm_interface_data *idata = &sdev->pdata->iface;
+	struct drm_crtc *crtc = &scrtc->base;
+	struct shmob_drm_device *sdev = to_shmob_device(crtc->dev);
+	const struct drm_display_info *info = &sdev->connector->display_info;
 	const struct drm_display_mode *mode = &crtc->mode;
+	unsigned int i;
 	u32 value;
 
-	value = sdev->ldmt1r
-	      | ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? 0 : LDMT1R_VPOL)
-	      | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? 0 : LDMT1R_HPOL)
-	      | ((idata->flags & SHMOB_DRM_IFACE_FL_DWPOL) ? LDMT1R_DWPOL : 0)
-	      | ((idata->flags & SHMOB_DRM_IFACE_FL_DIPOL) ? LDMT1R_DIPOL : 0)
-	      | ((idata->flags & SHMOB_DRM_IFACE_FL_DAPOL) ? LDMT1R_DAPOL : 0)
-	      | ((idata->flags & SHMOB_DRM_IFACE_FL_HSCNT) ? LDMT1R_HSCNT : 0)
-	      | ((idata->flags & SHMOB_DRM_IFACE_FL_DWCNT) ? LDMT1R_DWCNT : 0);
-	lcdc_write(sdev, LDMT1R, value);
-
-	if (idata->interface >= SHMOB_DRM_IFACE_SYS8A &&
-	    idata->interface <= SHMOB_DRM_IFACE_SYS24) {
-		/* Setup SYS bus. */
-		value = (idata->sys.cs_setup << LDMT2R_CSUP_SHIFT)
-		      | (idata->sys.vsync_active_high ? LDMT2R_RSV : 0)
-		      | (idata->sys.vsync_dir_input ? LDMT2R_VSEL : 0)
-		      | (idata->sys.write_setup << LDMT2R_WCSC_SHIFT)
-		      | (idata->sys.write_cycle << LDMT2R_WCEC_SHIFT)
-		      | (idata->sys.write_strobe << LDMT2R_WCLW_SHIFT);
-		lcdc_write(sdev, LDMT2R, value);
-
-		value = (idata->sys.read_latch << LDMT3R_RDLC_SHIFT)
-		      | (idata->sys.read_setup << LDMT3R_RCSC_SHIFT)
-		      | (idata->sys.read_cycle << LDMT3R_RCEC_SHIFT)
-		      | (idata->sys.read_strobe << LDMT3R_RCLW_SHIFT);
-		lcdc_write(sdev, LDMT3R, value);
+	if (!info->num_bus_formats || !info->bus_formats) {
+		dev_warn(sdev->dev, "No bus format reported, using RGB888\n");
+		value = LDMT1R_MIFTYP_RGB24;
+	} else {
+		for (i = 0; i < ARRAY_SIZE(shmob_drm_bus_fmts); i++) {
+			if (shmob_drm_bus_fmts[i].fmt == info->bus_formats[0])
+				break;
+		}
+		if (i < ARRAY_SIZE(shmob_drm_bus_fmts)) {
+			value = shmob_drm_bus_fmts[i].ldmt1r;
+		} else {
+			dev_warn(sdev->dev,
+				 "unsupported bus format 0x%x, using RGB888\n",
+				 info->bus_formats[0]);
+			value = LDMT1R_MIFTYP_RGB24;
+		}
 	}
 
+	if (info->bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE)
+		value |= LDMT1R_DWPOL;
+	if (info->bus_flags & DRM_BUS_FLAG_DE_LOW)
+		value |= LDMT1R_DIPOL;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		value |= LDMT1R_VPOL;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		value |= LDMT1R_HPOL;
+	lcdc_write(sdev, LDMT1R, value);
+
 	value = ((mode->hdisplay / 8) << 16)			/* HDCN */
 	      | (mode->htotal / 8);				/* HTCN */
 	lcdc_write(sdev, LDHCNR, value);
@@ -121,7 +168,7 @@ static void shmob_drm_crtc_setup_geometry(struct shmob_drm_crtc *scrtc)
 
 static void shmob_drm_crtc_start_stop(struct shmob_drm_crtc *scrtc, bool start)
 {
-	struct shmob_drm_device *sdev = scrtc->crtc.dev->dev_private;
+	struct shmob_drm_device *sdev = to_shmob_device(scrtc->base.dev);
 	u32 value;
 
 	value = lcdc_read(sdev, LDCNT2R);
@@ -145,34 +192,23 @@ static void shmob_drm_crtc_start_stop(struct shmob_drm_crtc *scrtc, bool start)
 	}
 }
 
-/*
- * shmob_drm_crtc_start - Configure and start the LCDC
- * @scrtc: the SH Mobile CRTC
- *
- * Configure and start the LCDC device. External devices (clocks, MERAM, panels,
- * ...) are not touched by this function.
- */
-static void shmob_drm_crtc_start(struct shmob_drm_crtc *scrtc)
+static inline struct shmob_drm_crtc *to_shmob_crtc(struct drm_crtc *crtc)
+{
+	return container_of(crtc, struct shmob_drm_crtc, base);
+}
+
+static void shmob_drm_crtc_atomic_enable(struct drm_crtc *crtc,
+					 struct drm_atomic_state *state)
 {
-	struct drm_crtc *crtc = &scrtc->crtc;
-	struct shmob_drm_device *sdev = crtc->dev->dev_private;
-	const struct shmob_drm_interface_data *idata = &sdev->pdata->iface;
-	const struct shmob_drm_format_info *format;
-	struct drm_device *dev = sdev->ddev;
-	struct drm_plane *plane;
+	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
+	struct shmob_drm_device *sdev = to_shmob_device(crtc->dev);
+	unsigned int clk_div = sdev->config.clk_div;
+	struct device *dev = sdev->dev;
 	u32 value;
 	int ret;
 
-	if (scrtc->started)
-		return;
-
-	format = shmob_drm_format_info(crtc->primary->fb->format->format);
-	if (WARN_ON(format == NULL))
-		return;
-
-	/* Enable clocks before accessing the hardware. */
-	ret = shmob_drm_clk_on(sdev);
-	if (ret < 0)
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret)
 		return;
 
 	/* Reset and enable the LCDC. */
@@ -188,79 +224,50 @@ static void shmob_drm_crtc_start(struct shmob_drm_crtc *scrtc)
 	lcdc_write(sdev, LDPMR, 0);
 
 	value = sdev->lddckr;
-	if (idata->clk_div) {
+	if (clk_div) {
 		/* FIXME: sh7724 can only use 42, 48, 54 and 60 for the divider
 		 * denominator.
 		 */
 		lcdc_write(sdev, LDDCKPAT1R, 0);
-		lcdc_write(sdev, LDDCKPAT2R, (1 << (idata->clk_div / 2)) - 1);
+		lcdc_write(sdev, LDDCKPAT2R, (1 << (clk_div / 2)) - 1);
 
-		if (idata->clk_div == 1)
+		if (clk_div == 1)
 			value |= LDDCKR_MOSEL;
 		else
-			value |= idata->clk_div;
+			value |= clk_div;
 	}
 
 	lcdc_write(sdev, LDDCKR, value);
 	lcdc_write(sdev, LDDCKSTPR, 0);
 	lcdc_wait_bit(sdev, LDDCKSTPR, ~0, 0);
 
-	/* TODO: Setup SYS panel */
-
 	/* Setup geometry, format, frame buffer memory and operation mode. */
 	shmob_drm_crtc_setup_geometry(scrtc);
 
-	/* TODO: Handle YUV colorspaces. Hardcode REC709 for now. */
-	lcdc_write(sdev, LDDFR, format->lddfr | LDDFR_CF1);
-	lcdc_write(sdev, LDMLSR, scrtc->line_size);
-	lcdc_write(sdev, LDSA1R, scrtc->dma[0]);
-	if (format->yuv)
-		lcdc_write(sdev, LDSA2R, scrtc->dma[1]);
 	lcdc_write(sdev, LDSM1R, 0);
 
-	/* Word and long word swap. */
-	switch (format->fourcc) {
-	case DRM_FORMAT_RGB565:
-	case DRM_FORMAT_NV21:
-	case DRM_FORMAT_NV61:
-	case DRM_FORMAT_NV42:
-		value = LDDDSR_LS | LDDDSR_WS;
-		break;
-	case DRM_FORMAT_RGB888:
-	case DRM_FORMAT_NV12:
-	case DRM_FORMAT_NV16:
-	case DRM_FORMAT_NV24:
-		value = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS;
-		break;
-	case DRM_FORMAT_ARGB8888:
-	case DRM_FORMAT_XRGB8888:
-	default:
-		value = LDDDSR_LS;
-		break;
-	}
-	lcdc_write(sdev, LDDDSR, value);
-
-	/* Setup planes. */
-	drm_for_each_legacy_plane(plane, dev) {
-		if (plane->crtc == crtc)
-			shmob_drm_plane_setup(plane);
-	}
-
 	/* Enable the display output. */
 	lcdc_write(sdev, LDCNT1R, LDCNT1R_DE);
 
 	shmob_drm_crtc_start_stop(scrtc, true);
 
-	scrtc->started = true;
+	/* Turn vertical blank interrupt reporting back on. */
+	drm_crtc_vblank_on(crtc);
 }
 
-static void shmob_drm_crtc_stop(struct shmob_drm_crtc *scrtc)
+static void shmob_drm_crtc_atomic_disable(struct drm_crtc *crtc,
+					  struct drm_atomic_state *state)
 {
-	struct drm_crtc *crtc = &scrtc->crtc;
-	struct shmob_drm_device *sdev = crtc->dev->dev_private;
+	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
+	struct shmob_drm_device *sdev = to_shmob_device(crtc->dev);
 
-	if (!scrtc->started)
-		return;
+	/*
+	 * Disable vertical blank interrupt reporting.  We first need to wait
+	 * for page flip completion before stopping the CRTC as userspace
+	 * expects page flips to eventually complete.
+	 */
+	shmob_drm_crtc_wait_page_flip(scrtc);
+	drm_crtc_vblank_off(crtc);
 
 	/* Stop the LCDC. */
 	shmob_drm_crtc_start_stop(scrtc, false);
@@ -268,145 +275,31 @@ static void shmob_drm_crtc_stop(struct shmob_drm_crtc *scrtc)
 	/* Disable the display output. */
 	lcdc_write(sdev, LDCNT1R, 0);
 
-	/* Stop clocks. */
-	shmob_drm_clk_off(sdev);
-
-	scrtc->started = false;
-}
-
-void shmob_drm_crtc_suspend(struct shmob_drm_crtc *scrtc)
-{
-	shmob_drm_crtc_stop(scrtc);
-}
-
-void shmob_drm_crtc_resume(struct shmob_drm_crtc *scrtc)
-{
-	if (scrtc->dpms != DRM_MODE_DPMS_ON)
-		return;
-
-	shmob_drm_crtc_start(scrtc);
+	pm_runtime_put(sdev->dev);
 }
 
-static void shmob_drm_crtc_compute_base(struct shmob_drm_crtc *scrtc,
-					int x, int y)
+static void shmob_drm_crtc_atomic_flush(struct drm_crtc *crtc,
+					struct drm_atomic_state *state)
 {
-	struct drm_crtc *crtc = &scrtc->crtc;
-	struct drm_framebuffer *fb = crtc->primary->fb;
-	struct drm_gem_dma_object *gem;
-	unsigned int bpp;
-
-	bpp = scrtc->format->yuv ? 8 : scrtc->format->bpp;
-	gem = drm_fb_dma_get_gem_obj(fb, 0);
-	scrtc->dma[0] = gem->dma_addr + fb->offsets[0]
-		      + y * fb->pitches[0] + x * bpp / 8;
-
-	if (scrtc->format->yuv) {
-		bpp = scrtc->format->bpp - 8;
-		gem = drm_fb_dma_get_gem_obj(fb, 1);
-		scrtc->dma[1] = gem->dma_addr + fb->offsets[1]
-			      + y / (bpp == 4 ? 2 : 1) * fb->pitches[1]
-			      + x * (bpp == 16 ? 2 : 1);
-	}
-}
-
-static void shmob_drm_crtc_update_base(struct shmob_drm_crtc *scrtc)
-{
-	struct drm_crtc *crtc = &scrtc->crtc;
-	struct shmob_drm_device *sdev = crtc->dev->dev_private;
-
-	shmob_drm_crtc_compute_base(scrtc, crtc->x, crtc->y);
-
-	lcdc_write_mirror(sdev, LDSA1R, scrtc->dma[0]);
-	if (scrtc->format->yuv)
-		lcdc_write_mirror(sdev, LDSA2R, scrtc->dma[1]);
-
-	lcdc_write(sdev, LDRCNTR, lcdc_read(sdev, LDRCNTR) ^ LDRCNTR_MRS);
-}
-
-#define to_shmob_crtc(c)	container_of(c, struct shmob_drm_crtc, crtc)
-
-static void shmob_drm_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
-	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
-
-	if (scrtc->dpms == mode)
-		return;
-
-	if (mode == DRM_MODE_DPMS_ON)
-		shmob_drm_crtc_start(scrtc);
-	else
-		shmob_drm_crtc_stop(scrtc);
-
-	scrtc->dpms = mode;
-}
-
-static void shmob_drm_crtc_mode_prepare(struct drm_crtc *crtc)
-{
-	shmob_drm_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-}
+	struct drm_pending_vblank_event *event;
+	struct drm_device *dev = crtc->dev;
+	unsigned long flags;
 
-static int shmob_drm_crtc_mode_set(struct drm_crtc *crtc,
-				   struct drm_display_mode *mode,
-				   struct drm_display_mode *adjusted_mode,
-				   int x, int y,
-				   struct drm_framebuffer *old_fb)
-{
-	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
-	struct shmob_drm_device *sdev = crtc->dev->dev_private;
-	const struct shmob_drm_format_info *format;
-
-	format = shmob_drm_format_info(crtc->primary->fb->format->format);
-	if (format == NULL) {
-		dev_dbg(sdev->dev, "mode_set: unsupported format %p4cc\n",
-			&crtc->primary->fb->format->format);
-		return -EINVAL;
+	if (crtc->state->event) {
+		spin_lock_irqsave(&dev->event_lock, flags);
+		event = crtc->state->event;
+		crtc->state->event = NULL;
+		drm_crtc_send_vblank_event(crtc, event);
+		spin_unlock_irqrestore(&dev->event_lock, flags);
 	}
-
-	scrtc->format = format;
-	scrtc->line_size = crtc->primary->fb->pitches[0];
-
-	shmob_drm_crtc_compute_base(scrtc, x, y);
-
-	return 0;
-}
-
-static void shmob_drm_crtc_mode_commit(struct drm_crtc *crtc)
-{
-	shmob_drm_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
-}
-
-static int shmob_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
-					struct drm_framebuffer *old_fb)
-{
-	shmob_drm_crtc_update_base(to_shmob_crtc(crtc));
-
-	return 0;
 }
 
 static const struct drm_crtc_helper_funcs crtc_helper_funcs = {
-	.dpms = shmob_drm_crtc_dpms,
-	.prepare = shmob_drm_crtc_mode_prepare,
-	.commit = shmob_drm_crtc_mode_commit,
-	.mode_set = shmob_drm_crtc_mode_set,
-	.mode_set_base = shmob_drm_crtc_mode_set_base,
+	.atomic_flush = shmob_drm_crtc_atomic_flush,
+	.atomic_enable = shmob_drm_crtc_atomic_enable,
+	.atomic_disable = shmob_drm_crtc_atomic_disable,
 };
 
-void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc)
-{
-	struct drm_pending_vblank_event *event;
-	struct drm_device *dev = scrtc->crtc.dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->event_lock, flags);
-	event = scrtc->event;
-	scrtc->event = NULL;
-	if (event) {
-		drm_crtc_send_vblank_event(&scrtc->crtc, event);
-		drm_crtc_vblank_put(&scrtc->crtc);
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-}
-
 static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
 				    struct drm_pending_vblank_event *event,
@@ -414,7 +307,7 @@ static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc,
 				    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
-	struct drm_device *dev = scrtc->crtc.dev;
+	struct drm_device *dev = scrtc->base.dev;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
@@ -424,12 +317,11 @@ static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc,
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
-	crtc->primary->fb = fb;
-	shmob_drm_crtc_update_base(scrtc);
+	drm_atomic_set_fb_for_plane(crtc->primary->state, fb);
 
 	if (event) {
 		event->pipe = 0;
-		drm_crtc_vblank_get(&scrtc->crtc);
+		drm_crtc_vblank_get(&scrtc->base);
 		spin_lock_irqsave(&dev->event_lock, flags);
 		scrtc->event = event;
 		spin_unlock_irqrestore(&dev->event_lock, flags);
@@ -457,7 +349,7 @@ static void shmob_drm_crtc_enable_vblank(struct shmob_drm_device *sdev,
 
 static int shmob_drm_enable_vblank(struct drm_crtc *crtc)
 {
-	struct shmob_drm_device *sdev = crtc->dev->dev_private;
+	struct shmob_drm_device *sdev = to_shmob_device(crtc->dev);
 
 	shmob_drm_crtc_enable_vblank(sdev, true);
 
@@ -466,88 +358,65 @@ static int shmob_drm_enable_vblank(struct drm_crtc *crtc)
 
 static void shmob_drm_disable_vblank(struct drm_crtc *crtc)
 {
-	struct shmob_drm_device *sdev = crtc->dev->dev_private;
+	struct shmob_drm_device *sdev = to_shmob_device(crtc->dev);
 
 	shmob_drm_crtc_enable_vblank(sdev, false);
 }
 
 static const struct drm_crtc_funcs crtc_funcs = {
+	.reset = drm_atomic_helper_crtc_reset,
 	.destroy = drm_crtc_cleanup,
-	.set_config = drm_crtc_helper_set_config,
+	.set_config = drm_atomic_helper_set_config,
 	.page_flip = shmob_drm_crtc_page_flip,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
 	.enable_vblank = shmob_drm_enable_vblank,
 	.disable_vblank = shmob_drm_disable_vblank,
 };
 
-static const uint32_t modeset_formats[] = {
-	DRM_FORMAT_RGB565,
-	DRM_FORMAT_RGB888,
-	DRM_FORMAT_ARGB8888,
-	DRM_FORMAT_XRGB8888,
-};
-
-static const struct drm_plane_funcs primary_plane_funcs = {
-	DRM_PLANE_NON_ATOMIC_FUNCS,
-};
-
 int shmob_drm_crtc_create(struct shmob_drm_device *sdev)
 {
-	struct drm_crtc *crtc = &sdev->crtc.crtc;
-	struct drm_plane *primary;
+	struct drm_crtc *crtc = &sdev->crtc.base;
+	struct drm_plane *primary, *plane;
+	unsigned int i;
 	int ret;
 
-	sdev->crtc.dpms = DRM_MODE_DPMS_OFF;
+	init_waitqueue_head(&sdev->crtc.flip_wait);
 
-	primary = __drm_universal_plane_alloc(sdev->ddev, sizeof(*primary), 0,
-					      0, &primary_plane_funcs,
-					      modeset_formats,
-					      ARRAY_SIZE(modeset_formats),
-					      NULL, DRM_PLANE_TYPE_PRIMARY,
-					      NULL);
+	primary = shmob_drm_plane_create(sdev, DRM_PLANE_TYPE_PRIMARY, 0);
 	if (IS_ERR(primary))
 		return PTR_ERR(primary);
 
-	ret = drm_crtc_init_with_planes(sdev->ddev, crtc, primary, NULL,
+	for (i = 1; i < 5; ++i) {
+		plane = shmob_drm_plane_create(sdev, DRM_PLANE_TYPE_OVERLAY, i);
+		if (IS_ERR(plane))
+			return PTR_ERR(plane);
+	}
+
+	ret = drm_crtc_init_with_planes(&sdev->ddev, crtc, primary, NULL,
 					&crtc_funcs, NULL);
-	if (ret < 0) {
-		drm_plane_cleanup(primary);
-		kfree(primary);
+	if (ret < 0)
 		return ret;
-	}
 
 	drm_crtc_helper_add(crtc, &crtc_helper_funcs);
 
+	/* Start with vertical blank interrupt reporting disabled. */
+	drm_crtc_vblank_off(crtc);
+
 	return 0;
 }
 
 /* -----------------------------------------------------------------------------
- * Encoder
+ * Legacy Encoder
  */
 
-#define to_shmob_encoder(e) \
-	container_of(e, struct shmob_drm_encoder, encoder)
-
-static void shmob_drm_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct shmob_drm_encoder *senc = to_shmob_encoder(encoder);
-	struct shmob_drm_device *sdev = encoder->dev->dev_private;
-	struct shmob_drm_connector *scon = &sdev->connector;
-
-	if (senc->dpms == mode)
-		return;
-
-	shmob_drm_backlight_dpms(scon, mode);
-
-	senc->dpms = mode;
-}
-
 static bool shmob_drm_encoder_mode_fixup(struct drm_encoder *encoder,
 					 const struct drm_display_mode *mode,
 					 struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct shmob_drm_device *sdev = dev->dev_private;
-	struct drm_connector *connector = &sdev->connector.connector;
+	struct shmob_drm_device *sdev = to_shmob_device(dev);
+	struct drm_connector *connector = sdev->connector;
 	const struct drm_display_mode *panel_mode;
 
 	if (list_empty(&connector->modes)) {
@@ -563,60 +432,61 @@ static bool shmob_drm_encoder_mode_fixup(struct drm_encoder *encoder,
 	return true;
 }
 
-static void shmob_drm_encoder_mode_prepare(struct drm_encoder *encoder)
-{
-	/* No-op, everything is handled in the CRTC code. */
-}
-
-static void shmob_drm_encoder_mode_set(struct drm_encoder *encoder,
-				       struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
-{
-	/* No-op, everything is handled in the CRTC code. */
-}
-
-static void shmob_drm_encoder_mode_commit(struct drm_encoder *encoder)
-{
-	/* No-op, everything is handled in the CRTC code. */
-}
-
 static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
-	.dpms = shmob_drm_encoder_dpms,
 	.mode_fixup = shmob_drm_encoder_mode_fixup,
-	.prepare = shmob_drm_encoder_mode_prepare,
-	.commit = shmob_drm_encoder_mode_commit,
-	.mode_set = shmob_drm_encoder_mode_set,
 };
 
+/* -----------------------------------------------------------------------------
+ * Encoder
+ */
+
 int shmob_drm_encoder_create(struct shmob_drm_device *sdev)
 {
-	struct drm_encoder *encoder = &sdev->encoder.encoder;
+	struct drm_encoder *encoder = &sdev->encoder;
+	struct drm_bridge *bridge;
 	int ret;
 
-	sdev->encoder.dpms = DRM_MODE_DPMS_OFF;
-
 	encoder->possible_crtcs = 1;
 
-	ret = drm_simple_encoder_init(sdev->ddev, encoder,
-				      DRM_MODE_ENCODER_LVDS);
+	ret = drm_simple_encoder_init(&sdev->ddev, encoder,
+				      DRM_MODE_ENCODER_DPI);
 	if (ret < 0)
 		return ret;
 
-	drm_encoder_helper_add(encoder, &encoder_helper_funcs);
+	if (sdev->pdata) {
+		drm_encoder_helper_add(encoder, &encoder_helper_funcs);
+		return 0;
+	}
+
+	/* Create a panel bridge */
+	bridge = devm_drm_of_get_bridge(sdev->dev, sdev->dev->of_node, 0, 0);
+	if (IS_ERR(bridge))
+		return PTR_ERR(bridge);
+
+	/* Attach the bridge to the encoder */
+	ret = drm_bridge_attach(encoder, bridge, NULL,
+				DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+	if (ret) {
+		dev_err(sdev->dev, "failed to attach bridge: %pe\n",
+			ERR_PTR(ret));
+		return ret;
+	}
 
 	return 0;
 }
 
 /* -----------------------------------------------------------------------------
- * Connector
+ * Legacy Connector
  */
 
-#define to_shmob_connector(c) \
-	container_of(c, struct shmob_drm_connector, connector)
+static inline struct shmob_drm_connector *to_shmob_connector(struct drm_connector *connector)
+{
+	return container_of(connector, struct shmob_drm_connector, base);
+}
 
 static int shmob_drm_connector_get_modes(struct drm_connector *connector)
 {
-	struct shmob_drm_device *sdev = connector->dev->dev_private;
+	struct shmob_drm_connector *scon = to_shmob_connector(connector);
 	struct drm_display_mode *mode;
 
 	mode = drm_mode_create(connector->dev);
@@ -624,22 +494,10 @@ static int shmob_drm_connector_get_modes(struct drm_connector *connector)
 		return 0;
 
 	mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
-	mode->clock = sdev->pdata->panel.mode.clock;
-	mode->hdisplay = sdev->pdata->panel.mode.hdisplay;
-	mode->hsync_start = sdev->pdata->panel.mode.hsync_start;
-	mode->hsync_end = sdev->pdata->panel.mode.hsync_end;
-	mode->htotal = sdev->pdata->panel.mode.htotal;
-	mode->vdisplay = sdev->pdata->panel.mode.vdisplay;
-	mode->vsync_start = sdev->pdata->panel.mode.vsync_start;
-	mode->vsync_end = sdev->pdata->panel.mode.vsync_end;
-	mode->vtotal = sdev->pdata->panel.mode.vtotal;
-	mode->flags = sdev->pdata->panel.mode.flags;
-
-	drm_mode_set_name(mode);
-	drm_mode_probed_add(connector, mode);
 
-	connector->display_info.width_mm = sdev->pdata->panel.width_mm;
-	connector->display_info.height_mm = sdev->pdata->panel.height_mm;
+	drm_display_mode_from_videomode(scon->mode, mode);
+
+	drm_mode_probed_add(connector, mode);
 
 	return 1;
 }
@@ -659,54 +517,106 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = {
 
 static void shmob_drm_connector_destroy(struct drm_connector *connector)
 {
-	struct shmob_drm_connector *scon = to_shmob_connector(connector);
-
-	shmob_drm_backlight_exit(scon);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
+
+	kfree(connector);
 }
 
 static const struct drm_connector_funcs connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.reset = drm_atomic_helper_connector_reset,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = shmob_drm_connector_destroy,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-int shmob_drm_connector_create(struct shmob_drm_device *sdev,
-			       struct drm_encoder *encoder)
+static struct drm_connector *
+shmob_drm_connector_init(struct shmob_drm_device *sdev,
+			 struct drm_encoder *encoder)
 {
-	struct drm_connector *connector = &sdev->connector.connector;
+	u32 bus_fmt = sdev->pdata->iface.bus_fmt;
+	struct shmob_drm_connector *scon;
+	struct drm_connector *connector;
+	struct drm_display_info *info;
+	unsigned int i;
 	int ret;
 
-	sdev->connector.encoder = encoder;
+	for (i = 0; i < ARRAY_SIZE(shmob_drm_bus_fmts); i++) {
+		if (shmob_drm_bus_fmts[i].fmt == bus_fmt)
+			break;
+	}
+	if (i == ARRAY_SIZE(shmob_drm_bus_fmts)) {
+		dev_err(sdev->dev, "unsupported bus format 0x%x\n", bus_fmt);
+		return ERR_PTR(-EINVAL);
+	}
 
-	connector->display_info.width_mm = sdev->pdata->panel.width_mm;
-	connector->display_info.height_mm = sdev->pdata->panel.height_mm;
+	scon = kzalloc(sizeof(*scon), GFP_KERNEL);
+	if (!scon)
+		return ERR_PTR(-ENOMEM);
 
-	ret = drm_connector_init(sdev->ddev, connector, &connector_funcs,
-				 DRM_MODE_CONNECTOR_LVDS);
-	if (ret < 0)
-		return ret;
+	connector = &scon->base;
+	scon->encoder = encoder;
+	scon->mode = &sdev->pdata->panel.mode;
+
+	info = &connector->display_info;
+	info->width_mm = sdev->pdata->panel.width_mm;
+	info->height_mm = sdev->pdata->panel.height_mm;
+
+	if (scon->mode->flags & DISPLAY_FLAGS_PIXDATA_POSEDGE)
+		info->bus_flags |= DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
+	if (scon->mode->flags & DISPLAY_FLAGS_DE_LOW)
+		info->bus_flags |= DRM_BUS_FLAG_DE_LOW;
+
+	ret = drm_display_info_set_bus_formats(info, &bus_fmt, 1);
+	if (ret < 0) {
+		kfree(scon);
+		return ERR_PTR(ret);
+	}
+
+	ret = drm_connector_init(&sdev->ddev, connector, &connector_funcs,
+				 DRM_MODE_CONNECTOR_DPI);
+	if (ret < 0) {
+		kfree(scon);
+		return ERR_PTR(ret);
+	}
 
 	drm_connector_helper_add(connector, &connector_helper_funcs);
 
-	ret = shmob_drm_backlight_init(&sdev->connector);
-	if (ret < 0)
-		goto err_cleanup;
+	return connector;
+}
+
+/* -----------------------------------------------------------------------------
+ * Connector
+ */
+
+int shmob_drm_connector_create(struct shmob_drm_device *sdev,
+			       struct drm_encoder *encoder)
+{
+	struct drm_connector *connector;
+	int ret;
+
+	if (sdev->pdata)
+		connector = shmob_drm_connector_init(sdev, encoder);
+	else
+		connector = drm_bridge_connector_init(&sdev->ddev, encoder);
+	if (IS_ERR(connector)) {
+		dev_err(sdev->dev, "failed to created connector: %pe\n",
+			connector);
+		return PTR_ERR(connector);
+	}
 
 	ret = drm_connector_attach_encoder(connector, encoder);
 	if (ret < 0)
-		goto err_backlight;
+		goto error;
+
+	connector->dpms = DRM_MODE_DPMS_OFF;
 
-	drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-	drm_object_property_set_value(&connector->base,
-		sdev->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
+	sdev->connector = connector;
 
 	return 0;
 
-err_backlight:
-	shmob_drm_backlight_exit(&sdev->connector);
-err_cleanup:
+error:
 	drm_connector_cleanup(connector);
 	return ret;
 }
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h
index 21718843f46d..16e1712dd04e 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h
@@ -14,39 +14,30 @@
 #include <drm/drm_connector.h>
 #include <drm/drm_encoder.h>
 
-struct backlight_device;
+#include <linux/wait.h>
+
+#include <video/videomode.h>
+
 struct drm_pending_vblank_event;
 struct shmob_drm_device;
 struct shmob_drm_format_info;
 
 struct shmob_drm_crtc {
-	struct drm_crtc crtc;
+	struct drm_crtc base;
 
 	struct drm_pending_vblank_event *event;
-	int dpms;
-
-	const struct shmob_drm_format_info *format;
-	unsigned long dma[2];
-	unsigned int line_size;
-	bool started;
-};
-
-struct shmob_drm_encoder {
-	struct drm_encoder encoder;
-	int dpms;
+	wait_queue_head_t flip_wait;
 };
 
+/* Legacy connector */
 struct shmob_drm_connector {
-	struct drm_connector connector;
+	struct drm_connector base;
 	struct drm_encoder *encoder;
-
-	struct backlight_device *backlight;
+	const struct videomode *mode;
 };
 
 int shmob_drm_crtc_create(struct shmob_drm_device *sdev);
 void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc);
-void shmob_drm_crtc_suspend(struct shmob_drm_crtc *scrtc);
-void shmob_drm_crtc_resume(struct shmob_drm_crtc *scrtc);
 
 int shmob_drm_encoder_create(struct shmob_drm_device *sdev);
 int shmob_drm_connector_create(struct shmob_drm_device *sdev,
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c
index e5db4e0095ba..e83c3e52251d 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c
@@ -11,13 +11,17 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_modeset_helper.h>
 #include <drm/drm_module.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
@@ -31,57 +35,23 @@
  * Hardware initialization
  */
 
-static int shmob_drm_init_interface(struct shmob_drm_device *sdev)
-{
-	static const u32 ldmt1r[] = {
-		[SHMOB_DRM_IFACE_RGB8] = LDMT1R_MIFTYP_RGB8,
-		[SHMOB_DRM_IFACE_RGB9] = LDMT1R_MIFTYP_RGB9,
-		[SHMOB_DRM_IFACE_RGB12A] = LDMT1R_MIFTYP_RGB12A,
-		[SHMOB_DRM_IFACE_RGB12B] = LDMT1R_MIFTYP_RGB12B,
-		[SHMOB_DRM_IFACE_RGB16] = LDMT1R_MIFTYP_RGB16,
-		[SHMOB_DRM_IFACE_RGB18] = LDMT1R_MIFTYP_RGB18,
-		[SHMOB_DRM_IFACE_RGB24] = LDMT1R_MIFTYP_RGB24,
-		[SHMOB_DRM_IFACE_YUV422] = LDMT1R_MIFTYP_YCBCR,
-		[SHMOB_DRM_IFACE_SYS8A] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8A,
-		[SHMOB_DRM_IFACE_SYS8B] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8B,
-		[SHMOB_DRM_IFACE_SYS8C] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8C,
-		[SHMOB_DRM_IFACE_SYS8D] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8D,
-		[SHMOB_DRM_IFACE_SYS9] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS9,
-		[SHMOB_DRM_IFACE_SYS12] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS12,
-		[SHMOB_DRM_IFACE_SYS16A] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16A,
-		[SHMOB_DRM_IFACE_SYS16B] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16B,
-		[SHMOB_DRM_IFACE_SYS16C] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16C,
-		[SHMOB_DRM_IFACE_SYS18] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS18,
-		[SHMOB_DRM_IFACE_SYS24] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS24,
-	};
-
-	if (sdev->pdata->iface.interface >= ARRAY_SIZE(ldmt1r)) {
-		dev_err(sdev->dev, "invalid interface type %u\n",
-			sdev->pdata->iface.interface);
-		return -EINVAL;
-	}
-
-	sdev->ldmt1r = ldmt1r[sdev->pdata->iface.interface];
-	return 0;
-}
-
 static int shmob_drm_setup_clocks(struct shmob_drm_device *sdev,
-					    enum shmob_drm_clk_source clksrc)
+				  enum shmob_drm_clk_source clksrc)
 {
 	struct clk *clk;
 	char *clkname;
 
 	switch (clksrc) {
 	case SHMOB_DRM_CLK_BUS:
-		clkname = "bus_clk";
+		clkname = "fck";
 		sdev->lddckr = LDDCKR_ICKSEL_BUS;
 		break;
 	case SHMOB_DRM_CLK_PERIPHERAL:
-		clkname = "peripheral_clk";
+		clkname = "media";
 		sdev->lddckr = LDDCKR_ICKSEL_MIPI;
 		break;
 	case SHMOB_DRM_CLK_EXTERNAL:
-		clkname = NULL;
+		clkname = "lclk";
 		sdev->lddckr = LDDCKR_ICKSEL_HDMI;
 		break;
 	default:
@@ -105,7 +75,7 @@ static int shmob_drm_setup_clocks(struct shmob_drm_device *sdev,
 static irqreturn_t shmob_drm_irq(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
-	struct shmob_drm_device *sdev = dev->dev_private;
+	struct shmob_drm_device *sdev = to_shmob_device(dev);
 	unsigned long flags;
 	u32 status;
 
@@ -119,7 +89,7 @@ static irqreturn_t shmob_drm_irq(int irq, void *arg)
 	spin_unlock_irqrestore(&sdev->irq_lock, flags);
 
 	if (status & LDINTR_VES) {
-		drm_handle_vblank(dev, 0);
+		drm_crtc_handle_vblank(&sdev->crtc.base);
 		shmob_drm_crtc_finish_page_flip(&sdev->crtc);
 	}
 
@@ -129,7 +99,7 @@ static irqreturn_t shmob_drm_irq(int irq, void *arg)
 DEFINE_DRM_GEM_DMA_FOPS(shmob_drm_fops);
 
 static const struct drm_driver shmob_drm_driver = {
-	.driver_features	= DRIVER_GEM | DRIVER_MODESET,
+	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	DRM_GEM_DMA_DRIVER_OPS,
 	.fops			= &shmob_drm_fops,
 	.name			= "shmob-drm",
@@ -147,26 +117,45 @@ static int shmob_drm_pm_suspend(struct device *dev)
 {
 	struct shmob_drm_device *sdev = dev_get_drvdata(dev);
 
-	drm_kms_helper_poll_disable(sdev->ddev);
-	shmob_drm_crtc_suspend(&sdev->crtc);
+	return drm_mode_config_helper_suspend(&sdev->ddev);
+}
+
+static int shmob_drm_pm_resume(struct device *dev)
+{
+	struct shmob_drm_device *sdev = dev_get_drvdata(dev);
+
+	return drm_mode_config_helper_resume(&sdev->ddev);
+}
+
+static int shmob_drm_pm_runtime_suspend(struct device *dev)
+{
+	struct shmob_drm_device *sdev = dev_get_drvdata(dev);
+
+	if (sdev->clock)
+		clk_disable_unprepare(sdev->clock);
 
 	return 0;
 }
 
-static int shmob_drm_pm_resume(struct device *dev)
+static int shmob_drm_pm_runtime_resume(struct device *dev)
 {
 	struct shmob_drm_device *sdev = dev_get_drvdata(dev);
+	int ret;
 
-	drm_modeset_lock_all(sdev->ddev);
-	shmob_drm_crtc_resume(&sdev->crtc);
-	drm_modeset_unlock_all(sdev->ddev);
+	if (sdev->clock) {
+		ret = clk_prepare_enable(sdev->clock);
+		if (ret < 0)
+			return ret;
+	}
 
-	drm_kms_helper_poll_enable(sdev->ddev);
 	return 0;
 }
 
-static DEFINE_SIMPLE_DEV_PM_OPS(shmob_drm_pm_ops,
-				shmob_drm_pm_suspend, shmob_drm_pm_resume);
+static const struct dev_pm_ops shmob_drm_pm_ops = {
+	SYSTEM_SLEEP_PM_OPS(shmob_drm_pm_suspend, shmob_drm_pm_resume)
+	RUNTIME_PM_OPS(shmob_drm_pm_runtime_suspend,
+		       shmob_drm_pm_runtime_resume, NULL)
+};
 
 /* -----------------------------------------------------------------------------
  * Platform driver
@@ -175,37 +164,45 @@ static DEFINE_SIMPLE_DEV_PM_OPS(shmob_drm_pm_ops,
 static void shmob_drm_remove(struct platform_device *pdev)
 {
 	struct shmob_drm_device *sdev = platform_get_drvdata(pdev);
-	struct drm_device *ddev = sdev->ddev;
+	struct drm_device *ddev = &sdev->ddev;
 
 	drm_dev_unregister(ddev);
+	drm_atomic_helper_shutdown(ddev);
 	drm_kms_helper_poll_fini(ddev);
-	free_irq(sdev->irq, ddev);
-	drm_dev_put(ddev);
 }
 
 static int shmob_drm_probe(struct platform_device *pdev)
 {
 	struct shmob_drm_platform_data *pdata = pdev->dev.platform_data;
+	const struct shmob_drm_config *config;
 	struct shmob_drm_device *sdev;
 	struct drm_device *ddev;
-	unsigned int i;
 	int ret;
 
-	if (pdata == NULL) {
+	config = of_device_get_match_data(&pdev->dev);
+	if (!config && !pdata) {
 		dev_err(&pdev->dev, "no platform data\n");
 		return -EINVAL;
 	}
 
 	/*
-	 * Allocate and initialize the driver private data, I/O resources and
-	 * clocks.
+	 * Allocate and initialize the DRM device, driver private data, I/O
+	 * resources and clocks.
 	 */
-	sdev = devm_kzalloc(&pdev->dev, sizeof(*sdev), GFP_KERNEL);
-	if (sdev == NULL)
-		return -ENOMEM;
+	sdev = devm_drm_dev_alloc(&pdev->dev, &shmob_drm_driver,
+				  struct shmob_drm_device, ddev);
+	if (IS_ERR(sdev))
+		return PTR_ERR(sdev);
 
+	ddev = &sdev->ddev;
 	sdev->dev = &pdev->dev;
-	sdev->pdata = pdata;
+	if (config) {
+		sdev->config = *config;
+	} else {
+		sdev->pdata = pdata;
+		sdev->config.clk_source = pdata->clk_source;
+		sdev->config.clk_div = pdata->iface.clk_div;
+	}
 	spin_lock_init(&sdev->irq_lock);
 
 	platform_set_drvdata(pdev, sdev);
@@ -214,49 +211,32 @@ static int shmob_drm_probe(struct platform_device *pdev)
 	if (IS_ERR(sdev->mmio))
 		return PTR_ERR(sdev->mmio);
 
-	ret = shmob_drm_setup_clocks(sdev, pdata->clk_source);
+	ret = shmob_drm_setup_clocks(sdev, sdev->config.clk_source);
 	if (ret < 0)
 		return ret;
 
-	ret = shmob_drm_init_interface(sdev);
-	if (ret < 0)
+	ret = devm_pm_runtime_enable(&pdev->dev);
+	if (ret)
 		return ret;
 
-	/* Allocate and initialize the DRM device. */
-	ddev = drm_dev_alloc(&shmob_drm_driver, &pdev->dev);
-	if (IS_ERR(ddev))
-		return PTR_ERR(ddev);
-
-	sdev->ddev = ddev;
-	ddev->dev_private = sdev;
-
-	ret = shmob_drm_modeset_init(sdev);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to initialize mode setting\n");
-		goto err_free_drm_dev;
-	}
-
-	for (i = 0; i < 4; ++i) {
-		ret = shmob_drm_plane_create(sdev, i);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "failed to create plane %u\n", i);
-			goto err_modeset_cleanup;
-		}
-	}
-
 	ret = drm_vblank_init(ddev, 1);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to initialize vblank\n");
-		goto err_modeset_cleanup;
+		return ret;
 	}
 
+	ret = shmob_drm_modeset_init(sdev);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret,
+				     "failed to initialize mode setting\n");
+
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0)
 		goto err_modeset_cleanup;
 	sdev->irq = ret;
 
-	ret = request_irq(sdev->irq, shmob_drm_irq, 0, ddev->driver->name,
-			  ddev);
+	ret = devm_request_irq(&pdev->dev, sdev->irq, shmob_drm_irq, 0,
+			       ddev->driver->name, ddev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to install IRQ handler\n");
 		goto err_modeset_cleanup;
@@ -268,28 +248,35 @@ static int shmob_drm_probe(struct platform_device *pdev)
 	 */
 	ret = drm_dev_register(ddev, 0);
 	if (ret < 0)
-		goto err_irq_uninstall;
+		goto err_modeset_cleanup;
 
 	drm_fbdev_generic_setup(ddev, 16);
 
 	return 0;
 
-err_irq_uninstall:
-	free_irq(sdev->irq, ddev);
 err_modeset_cleanup:
 	drm_kms_helper_poll_fini(ddev);
-err_free_drm_dev:
-	drm_dev_put(ddev);
-
 	return ret;
 }
 
+static const struct shmob_drm_config shmob_arm_config = {
+	.clk_source = SHMOB_DRM_CLK_BUS,
+	.clk_div = 5,
+};
+
+static const struct of_device_id shmob_drm_of_table[] __maybe_unused = {
+	{ .compatible = "renesas,r8a7740-lcdc",	.data = &shmob_arm_config, },
+	{ .compatible = "renesas,sh73a0-lcdc",	.data = &shmob_arm_config, },
+	{ /* sentinel */ }
+};
+
 static struct platform_driver shmob_drm_platform_driver = {
 	.probe		= shmob_drm_probe,
 	.remove_new	= shmob_drm_remove,
 	.driver		= {
 		.name	= "shmob-drm",
-		.pm	= pm_sleep_ptr(&shmob_drm_pm_ops),
+		.of_match_table = of_match_ptr(shmob_drm_of_table),
+		.pm	= &shmob_drm_pm_ops,
 	},
 };
 
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h
index 4964ddd5ab74..088ac5381e91 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h
@@ -20,23 +20,33 @@ struct clk;
 struct device;
 struct drm_device;
 
+struct shmob_drm_config {
+	enum shmob_drm_clk_source clk_source;
+	unsigned int clk_div;
+};
+
 struct shmob_drm_device {
 	struct device *dev;
 	const struct shmob_drm_platform_data *pdata;
+	struct shmob_drm_config config;
 
 	void __iomem *mmio;
 	struct clk *clock;
 	u32 lddckr;
-	u32 ldmt1r;
 
 	unsigned int irq;
 	spinlock_t irq_lock;		/* Protects hardware LDINTR register */
 
-	struct drm_device *ddev;
+	struct drm_device ddev;
 
 	struct shmob_drm_crtc crtc;
-	struct shmob_drm_encoder encoder;
-	struct shmob_drm_connector connector;
+	struct drm_encoder encoder;
+	struct drm_connector *connector;
 };
 
+static inline struct shmob_drm_device *to_shmob_device(struct drm_device *dev)
+{
+	return container_of(dev, struct shmob_drm_device, ddev);
+}
+
 #endif /* __SHMOB_DRM_DRV_H__ */
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c
index 99381cc0abf3..4202ab00fb0c 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c
@@ -7,6 +7,7 @@
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  */
 
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fourcc.h>
@@ -17,6 +18,7 @@
 #include "shmob_drm_crtc.h"
 #include "shmob_drm_drv.h"
 #include "shmob_drm_kms.h"
+#include "shmob_drm_plane.h"
 #include "shmob_drm_regs.h"
 
 /* -----------------------------------------------------------------------------
@@ -27,53 +29,73 @@ static const struct shmob_drm_format_info shmob_drm_format_infos[] = {
 	{
 		.fourcc = DRM_FORMAT_RGB565,
 		.bpp = 16,
-		.yuv = false,
 		.lddfr = LDDFR_PKF_RGB16,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_RY | LDBBSIFR_RPKF_RGB16,
 	}, {
 		.fourcc = DRM_FORMAT_RGB888,
 		.bpp = 24,
-		.yuv = false,
 		.lddfr = LDDFR_PKF_RGB24,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_SWPB | LDBBSIFR_RY | LDBBSIFR_RPKF_RGB24,
 	}, {
 		.fourcc = DRM_FORMAT_ARGB8888,
 		.bpp = 32,
-		.yuv = false,
 		.lddfr = LDDFR_PKF_ARGB32,
+		.ldddsr = LDDDSR_LS,
+		.ldbbsifr = LDBBSIFR_AL_PK | LDBBSIFR_SWPL | LDBBSIFR_RY |
+			    LDBBSIFR_RPKF_ARGB32,
 	}, {
 		.fourcc = DRM_FORMAT_XRGB8888,
 		.bpp = 32,
-		.yuv = false,
 		.lddfr = LDDFR_PKF_ARGB32,
+		.ldddsr = LDDDSR_LS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_RY |
+			    LDBBSIFR_RPKF_ARGB32,
 	}, {
 		.fourcc = DRM_FORMAT_NV12,
 		.bpp = 12,
-		.yuv = true,
 		.lddfr = LDDFR_CC | LDDFR_YF_420,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_SWPB | LDBBSIFR_CHRR_420,
 	}, {
 		.fourcc = DRM_FORMAT_NV21,
 		.bpp = 12,
-		.yuv = true,
 		.lddfr = LDDFR_CC | LDDFR_YF_420,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_CHRR_420,
 	}, {
 		.fourcc = DRM_FORMAT_NV16,
 		.bpp = 16,
-		.yuv = true,
 		.lddfr = LDDFR_CC | LDDFR_YF_422,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_SWPB | LDBBSIFR_CHRR_422,
 	}, {
 		.fourcc = DRM_FORMAT_NV61,
 		.bpp = 16,
-		.yuv = true,
 		.lddfr = LDDFR_CC | LDDFR_YF_422,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_CHRR_422,
 	}, {
 		.fourcc = DRM_FORMAT_NV24,
 		.bpp = 24,
-		.yuv = true,
 		.lddfr = LDDFR_CC | LDDFR_YF_444,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_SWPB | LDBBSIFR_CHRR_444,
 	}, {
 		.fourcc = DRM_FORMAT_NV42,
 		.bpp = 24,
-		.yuv = true,
 		.lddfr = LDDFR_CC | LDDFR_YF_444,
+		.ldddsr = LDDDSR_LS | LDDDSR_WS,
+		.ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW |
+			    LDBBSIFR_CHRR_444,
 	},
 };
 
@@ -112,7 +134,7 @@ shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (format->yuv) {
+	if (shmob_drm_format_is_yuv(format)) {
 		unsigned int chroma_cpp = format->bpp == 24 ? 2 : 1;
 
 		if (mode_cmd->pitches[1] != mode_cmd->pitches[0] * chroma_cpp) {
@@ -127,29 +149,40 @@ shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 
 static const struct drm_mode_config_funcs shmob_drm_mode_config_funcs = {
 	.fb_create = shmob_drm_fb_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
 };
 
 int shmob_drm_modeset_init(struct shmob_drm_device *sdev)
 {
+	struct drm_device *dev = &sdev->ddev;
 	int ret;
 
-	ret = drmm_mode_config_init(sdev->ddev);
+	ret = drmm_mode_config_init(dev);
 	if (ret)
 		return ret;
 
-	shmob_drm_crtc_create(sdev);
-	shmob_drm_encoder_create(sdev);
-	shmob_drm_connector_create(sdev, &sdev->encoder.encoder);
+	ret = shmob_drm_crtc_create(sdev);
+	if (ret < 0)
+		return ret;
+
+	ret = shmob_drm_encoder_create(sdev);
+	if (ret < 0)
+		return ret;
+
+	ret = shmob_drm_connector_create(sdev, &sdev->encoder);
+	if (ret < 0)
+		return ret;
 
-	drm_kms_helper_poll_init(sdev->ddev);
+	drm_mode_config_reset(dev);
 
-	sdev->ddev->mode_config.min_width = 0;
-	sdev->ddev->mode_config.min_height = 0;
-	sdev->ddev->mode_config.max_width = 4095;
-	sdev->ddev->mode_config.max_height = 4095;
-	sdev->ddev->mode_config.funcs = &shmob_drm_mode_config_funcs;
+	drm_kms_helper_poll_init(dev);
 
-	drm_helper_disable_unused_functions(sdev->ddev);
+	sdev->ddev.mode_config.min_width = 0;
+	sdev->ddev.mode_config.min_height = 0;
+	sdev->ddev.mode_config.max_width = 4095;
+	sdev->ddev.mode_config.max_height = 4095;
+	sdev->ddev.mode_config.funcs = &shmob_drm_mode_config_funcs;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h
index 0347b1fd2338..590162c3db20 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h
@@ -17,11 +17,14 @@ struct shmob_drm_device;
 
 struct shmob_drm_format_info {
 	u32 fourcc;
-	unsigned int bpp;
-	bool yuv;
-	u32 lddfr;
+	u32 lddfr;	/* LCD Data Format Register */
+	u16 ldbbsifr;	/* CHn Source Image Format Register low bits */
+	u8 ldddsr;	/* LCDC Input Image Data Swap Register low bits */
+	u8 bpp;
 };
 
+#define shmob_drm_format_is_yuv(format)	((format)->lddfr & LDDFR_CC)
+
 const struct shmob_drm_format_info *shmob_drm_format_info(u32 fourcc);
 
 int shmob_drm_modeset_init(struct shmob_drm_device *sdev);
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c
index 850986cee848..8f9a728affde 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c
@@ -7,11 +7,14 @@
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  */
 
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_fb_dma_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_plane_helper.h>
 
 #include "shmob_drm_drv.h"
 #include "shmob_drm_kms.h"
@@ -19,102 +22,84 @@
 #include "shmob_drm_regs.h"
 
 struct shmob_drm_plane {
-	struct drm_plane plane;
+	struct drm_plane base;
 	unsigned int index;
-	unsigned int alpha;
+};
+
+struct shmob_drm_plane_state {
+	struct drm_plane_state base;
 
 	const struct shmob_drm_format_info *format;
-	unsigned long dma[2];
-
-	unsigned int src_x;
-	unsigned int src_y;
-	unsigned int crtc_x;
-	unsigned int crtc_y;
-	unsigned int crtc_w;
-	unsigned int crtc_h;
+	u32 dma[2];
 };
 
-#define to_shmob_plane(p)	container_of(p, struct shmob_drm_plane, plane)
+static inline struct shmob_drm_plane *to_shmob_plane(struct drm_plane *plane)
+{
+	return container_of(plane, struct shmob_drm_plane, base);
+}
+
+static inline struct shmob_drm_plane_state *to_shmob_plane_state(struct drm_plane_state *state)
+{
+	return container_of(state, struct shmob_drm_plane_state, base);
+}
 
-static void shmob_drm_plane_compute_base(struct shmob_drm_plane *splane,
-					 struct drm_framebuffer *fb,
-					 int x, int y)
+static void shmob_drm_plane_compute_base(struct shmob_drm_plane_state *sstate)
 {
+	struct drm_framebuffer *fb = sstate->base.fb;
+	unsigned int x = sstate->base.src_x >> 16;
+	unsigned int y = sstate->base.src_y >> 16;
 	struct drm_gem_dma_object *gem;
 	unsigned int bpp;
 
-	bpp = splane->format->yuv ? 8 : splane->format->bpp;
+	bpp = shmob_drm_format_is_yuv(sstate->format) ? 8 : sstate->format->bpp;
 	gem = drm_fb_dma_get_gem_obj(fb, 0);
-	splane->dma[0] = gem->dma_addr + fb->offsets[0]
+	sstate->dma[0] = gem->dma_addr + fb->offsets[0]
 		       + y * fb->pitches[0] + x * bpp / 8;
 
-	if (splane->format->yuv) {
-		bpp = splane->format->bpp - 8;
+	if (shmob_drm_format_is_yuv(sstate->format)) {
+		bpp = sstate->format->bpp - 8;
 		gem = drm_fb_dma_get_gem_obj(fb, 1);
-		splane->dma[1] = gem->dma_addr + fb->offsets[1]
+		sstate->dma[1] = gem->dma_addr + fb->offsets[1]
 			       + y / (bpp == 4 ? 2 : 1) * fb->pitches[1]
 			       + x * (bpp == 16 ? 2 : 1);
 	}
 }
 
-static void __shmob_drm_plane_setup(struct shmob_drm_plane *splane,
-				    struct drm_framebuffer *fb)
+static void shmob_drm_primary_plane_setup(struct shmob_drm_plane *splane,
+					  struct drm_plane_state *state)
 {
-	struct shmob_drm_device *sdev = splane->plane.dev->dev_private;
+	struct shmob_drm_plane_state *sstate = to_shmob_plane_state(state);
+	struct shmob_drm_device *sdev = to_shmob_device(splane->base.dev);
+	struct drm_framebuffer *fb = state->fb;
+
+	/* TODO: Handle YUV colorspaces. Hardcode REC709 for now. */
+	lcdc_write(sdev, LDDFR, sstate->format->lddfr | LDDFR_CF1);
+	lcdc_write(sdev, LDMLSR, fb->pitches[0]);
+
+	/* Word and long word swap. */
+	lcdc_write(sdev, LDDDSR, sstate->format->ldddsr);
+
+	lcdc_write_mirror(sdev, LDSA1R, sstate->dma[0]);
+	if (shmob_drm_format_is_yuv(sstate->format))
+		lcdc_write_mirror(sdev, LDSA2R, sstate->dma[1]);
+
+	lcdc_write(sdev, LDRCNTR, lcdc_read(sdev, LDRCNTR) ^ LDRCNTR_MRS);
+}
+
+static void shmob_drm_overlay_plane_setup(struct shmob_drm_plane *splane,
+					  struct drm_plane_state *state)
+{
+	struct shmob_drm_plane_state *sstate = to_shmob_plane_state(state);
+	struct shmob_drm_device *sdev = to_shmob_device(splane->base.dev);
+	struct drm_framebuffer *fb = state->fb;
 	u32 format;
 
 	/* TODO: Support ROP3 mode */
-	format = LDBBSIFR_EN | (splane->alpha << LDBBSIFR_LAY_SHIFT);
-
-	switch (splane->format->fourcc) {
-	case DRM_FORMAT_RGB565:
-	case DRM_FORMAT_NV21:
-	case DRM_FORMAT_NV61:
-	case DRM_FORMAT_NV42:
-		format |= LDBBSIFR_SWPL | LDBBSIFR_SWPW;
-		break;
-	case DRM_FORMAT_RGB888:
-	case DRM_FORMAT_NV12:
-	case DRM_FORMAT_NV16:
-	case DRM_FORMAT_NV24:
-		format |= LDBBSIFR_SWPL | LDBBSIFR_SWPW | LDBBSIFR_SWPB;
-		break;
-	case DRM_FORMAT_ARGB8888:
-	case DRM_FORMAT_XRGB8888:
-	default:
-		format |= LDBBSIFR_SWPL;
-		break;
-	}
-
-	switch (splane->format->fourcc) {
-	case DRM_FORMAT_RGB565:
-		format |= LDBBSIFR_AL_1 | LDBBSIFR_RY | LDBBSIFR_RPKF_RGB16;
-		break;
-	case DRM_FORMAT_RGB888:
-		format |= LDBBSIFR_AL_1 | LDBBSIFR_RY | LDBBSIFR_RPKF_RGB24;
-		break;
-	case DRM_FORMAT_ARGB8888:
-		format |= LDBBSIFR_AL_PK | LDBBSIFR_RY | LDDFR_PKF_ARGB32;
-		break;
-	case DRM_FORMAT_XRGB8888:
-		format |= LDBBSIFR_AL_1 | LDBBSIFR_RY | LDDFR_PKF_ARGB32;
-		break;
-	case DRM_FORMAT_NV12:
-	case DRM_FORMAT_NV21:
-		format |= LDBBSIFR_AL_1 | LDBBSIFR_CHRR_420;
-		break;
-	case DRM_FORMAT_NV16:
-	case DRM_FORMAT_NV61:
-		format |= LDBBSIFR_AL_1 | LDBBSIFR_CHRR_422;
-		break;
-	case DRM_FORMAT_NV24:
-	case DRM_FORMAT_NV42:
-		format |= LDBBSIFR_AL_1 | LDBBSIFR_CHRR_444;
-		break;
-	}
+	format = LDBBSIFR_EN | ((state->alpha >> 8) << LDBBSIFR_LAY_SHIFT) |
+		 sstate->format->ldbbsifr;
 
 #define plane_reg_dump(sdev, splane, reg) \
-	dev_dbg(sdev->ddev->dev, "%s(%u): %s 0x%08x 0x%08x\n", __func__, \
+	dev_dbg(sdev->ddev.dev, "%s(%u): %s 0x%08x 0x%08x\n", __func__, \
 		splane->index, #reg, \
 		lcdc_read(sdev, reg(splane->index)), \
 		lcdc_read(sdev, reg(splane->index) + LCDC_SIDE_B_OFFSET))
@@ -127,29 +112,27 @@ static void __shmob_drm_plane_setup(struct shmob_drm_plane *splane,
 	plane_reg_dump(sdev, splane, LDBnBSACR);
 
 	lcdc_write(sdev, LDBCR, LDBCR_UPC(splane->index));
-	dev_dbg(sdev->ddev->dev, "%s(%u): %s 0x%08x\n", __func__, splane->index,
+	dev_dbg(sdev->ddev.dev, "%s(%u): %s 0x%08x\n", __func__, splane->index,
 		"LDBCR", lcdc_read(sdev, LDBCR));
 
 	lcdc_write(sdev, LDBnBSIFR(splane->index), format);
 
 	lcdc_write(sdev, LDBnBSSZR(splane->index),
-		   (splane->crtc_h << LDBBSSZR_BVSS_SHIFT) |
-		   (splane->crtc_w << LDBBSSZR_BHSS_SHIFT));
+		   (state->crtc_h << LDBBSSZR_BVSS_SHIFT) |
+		   (state->crtc_w << LDBBSSZR_BHSS_SHIFT));
 	lcdc_write(sdev, LDBnBLOCR(splane->index),
-		   (splane->crtc_y << LDBBLOCR_CVLC_SHIFT) |
-		   (splane->crtc_x << LDBBLOCR_CHLC_SHIFT));
+		   (state->crtc_y << LDBBLOCR_CVLC_SHIFT) |
+		   (state->crtc_x << LDBBLOCR_CHLC_SHIFT));
 	lcdc_write(sdev, LDBnBSMWR(splane->index),
 		   fb->pitches[0] << LDBBSMWR_BSMW_SHIFT);
 
-	shmob_drm_plane_compute_base(splane, fb, splane->src_x, splane->src_y);
-
-	lcdc_write(sdev, LDBnBSAYR(splane->index), splane->dma[0]);
-	if (splane->format->yuv)
-		lcdc_write(sdev, LDBnBSACR(splane->index), splane->dma[1]);
+	lcdc_write(sdev, LDBnBSAYR(splane->index), sstate->dma[0]);
+	if (shmob_drm_format_is_yuv(sstate->format))
+		lcdc_write(sdev, LDBnBSACR(splane->index), sstate->dma[1]);
 
 	lcdc_write(sdev, LDBCR,
 		   LDBCR_UPF(splane->index) | LDBCR_UPD(splane->index));
-	dev_dbg(sdev->ddev->dev, "%s(%u): %s 0x%08x\n", __func__, splane->index,
+	dev_dbg(sdev->ddev.dev, "%s(%u): %s 0x%08x\n", __func__, splane->index,
 		"LDBCR", lcdc_read(sdev, LDBCR));
 
 	plane_reg_dump(sdev, splane, LDBnBSIFR);
@@ -160,75 +143,143 @@ static void __shmob_drm_plane_setup(struct shmob_drm_plane *splane,
 	plane_reg_dump(sdev, splane, LDBnBSACR);
 }
 
-void shmob_drm_plane_setup(struct drm_plane *plane)
+static int shmob_drm_plane_atomic_check(struct drm_plane *plane,
+					struct drm_atomic_state *state)
 {
-	struct shmob_drm_plane *splane = to_shmob_plane(plane);
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+	struct shmob_drm_plane_state *sstate = to_shmob_plane_state(new_plane_state);
+	struct drm_crtc_state *crtc_state;
+	bool is_primary = plane->type == DRM_PLANE_TYPE_PRIMARY;
+	int ret;
 
-	if (plane->fb == NULL)
-		return;
+	if (!new_plane_state->crtc) {
+		/*
+		 * The visible field is not reset by the DRM core but only
+		 * updated by drm_atomic_helper_check_plane_state(), set it
+		 * manually.
+		 */
+		new_plane_state->visible = false;
+		sstate->format = NULL;
+		return 0;
+	}
 
-	__shmob_drm_plane_setup(splane, plane->fb);
-}
+	crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
 
-static int
-shmob_drm_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
-		       struct drm_framebuffer *fb, int crtc_x, int crtc_y,
-		       unsigned int crtc_w, unsigned int crtc_h,
-		       uint32_t src_x, uint32_t src_y,
-		       uint32_t src_w, uint32_t src_h,
-		       struct drm_modeset_acquire_ctx *ctx)
-{
-	struct shmob_drm_plane *splane = to_shmob_plane(plane);
-	struct shmob_drm_device *sdev = plane->dev->dev_private;
-	const struct shmob_drm_format_info *format;
+	ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state,
+						  DRM_PLANE_NO_SCALING,
+						  DRM_PLANE_NO_SCALING,
+						  !is_primary, true);
+	if (ret < 0)
+		return ret;
 
-	format = shmob_drm_format_info(fb->format->format);
-	if (format == NULL) {
-		dev_dbg(sdev->dev, "update_plane: unsupported format %08x\n",
-			fb->format->format);
-		return -EINVAL;
+	if (!new_plane_state->visible) {
+		sstate->format = NULL;
+		return 0;
 	}
 
-	if (src_w >> 16 != crtc_w || src_h >> 16 != crtc_h) {
-		dev_dbg(sdev->dev, "%s: scaling not supported\n", __func__);
+	sstate->format = shmob_drm_format_info(new_plane_state->fb->format->format);
+	if (!sstate->format) {
+		dev_dbg(plane->dev->dev,
+			"plane_atomic_check: unsupported format %p4cc\n",
+			&new_plane_state->fb->format->format);
 		return -EINVAL;
 	}
 
-	splane->format = format;
+	shmob_drm_plane_compute_base(sstate);
 
-	splane->src_x = src_x >> 16;
-	splane->src_y = src_y >> 16;
-	splane->crtc_x = crtc_x;
-	splane->crtc_y = crtc_y;
-	splane->crtc_w = crtc_w;
-	splane->crtc_h = crtc_h;
-
-	__shmob_drm_plane_setup(splane, fb);
 	return 0;
 }
 
-static int shmob_drm_plane_disable(struct drm_plane *plane,
-				   struct drm_modeset_acquire_ctx *ctx)
+static void shmob_drm_plane_atomic_update(struct drm_plane *plane,
+					  struct drm_atomic_state *state)
 {
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane);
 	struct shmob_drm_plane *splane = to_shmob_plane(plane);
-	struct shmob_drm_device *sdev = plane->dev->dev_private;
 
-	splane->format = NULL;
+	if (!new_plane_state->visible)
+		return;
 
+	if (plane->type == DRM_PLANE_TYPE_PRIMARY)
+		shmob_drm_primary_plane_setup(splane, new_plane_state);
+	else
+		shmob_drm_overlay_plane_setup(splane, new_plane_state);
+}
+
+static void shmob_drm_plane_atomic_disable(struct drm_plane *plane,
+					   struct drm_atomic_state *state)
+{
+	struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, plane);
+	struct shmob_drm_device *sdev = to_shmob_device(plane->dev);
+	struct shmob_drm_plane *splane = to_shmob_plane(plane);
+
+	if (!old_state->crtc)
+		return;
+
+	if (plane->type != DRM_PLANE_TYPE_OVERLAY)
+		return;
+
+	lcdc_write(sdev, LDBCR, LDBCR_UPC(splane->index));
 	lcdc_write(sdev, LDBnBSIFR(splane->index), 0);
-	return 0;
+	lcdc_write(sdev, LDBCR,
+			 LDBCR_UPF(splane->index) | LDBCR_UPD(splane->index));
 }
 
-static void shmob_drm_plane_destroy(struct drm_plane *plane)
+static struct drm_plane_state *
+shmob_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
 {
-	drm_plane_force_disable(plane);
-	drm_plane_cleanup(plane);
+	struct shmob_drm_plane_state *state;
+	struct shmob_drm_plane_state *copy;
+
+	if (WARN_ON(!plane->state))
+		return NULL;
+
+	state = to_shmob_plane_state(plane->state);
+	copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
+	if (copy == NULL)
+		return NULL;
+
+	__drm_atomic_helper_plane_duplicate_state(plane, &copy->base);
+
+	return &copy->base;
 }
 
+static void shmob_drm_plane_atomic_destroy_state(struct drm_plane *plane,
+						 struct drm_plane_state *state)
+{
+	__drm_atomic_helper_plane_destroy_state(state);
+	kfree(to_shmob_plane_state(state));
+}
+
+static void shmob_drm_plane_reset(struct drm_plane *plane)
+{
+	struct shmob_drm_plane_state *state;
+
+	if (plane->state) {
+		shmob_drm_plane_atomic_destroy_state(plane, plane->state);
+		plane->state = NULL;
+	}
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state == NULL)
+		return;
+
+	__drm_atomic_helper_plane_reset(plane, &state->base);
+}
+
+static const struct drm_plane_helper_funcs shmob_drm_plane_helper_funcs = {
+	.atomic_check = shmob_drm_plane_atomic_check,
+	.atomic_update = shmob_drm_plane_atomic_update,
+	.atomic_disable = shmob_drm_plane_atomic_disable,
+};
+
 static const struct drm_plane_funcs shmob_drm_plane_funcs = {
-	.update_plane = shmob_drm_plane_update,
-	.disable_plane = shmob_drm_plane_disable,
-	.destroy = shmob_drm_plane_destroy,
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.reset = shmob_drm_plane_reset,
+	.atomic_duplicate_state = shmob_drm_plane_atomic_duplicate_state,
+	.atomic_destroy_state = shmob_drm_plane_atomic_destroy_state,
 };
 
 static const uint32_t formats[] = {
@@ -244,22 +295,23 @@ static const uint32_t formats[] = {
 	DRM_FORMAT_NV42,
 };
 
-int shmob_drm_plane_create(struct shmob_drm_device *sdev, unsigned int index)
+struct drm_plane *shmob_drm_plane_create(struct shmob_drm_device *sdev,
+					 enum drm_plane_type type,
+					 unsigned int index)
 {
 	struct shmob_drm_plane *splane;
-	int ret;
 
-	splane = devm_kzalloc(sdev->dev, sizeof(*splane), GFP_KERNEL);
-	if (splane == NULL)
-		return -ENOMEM;
+	splane = drmm_universal_plane_alloc(&sdev->ddev,
+					    struct shmob_drm_plane, base, 1,
+					    &shmob_drm_plane_funcs, formats,
+					    ARRAY_SIZE(formats),  NULL, type,
+					    NULL);
+	if (IS_ERR(splane))
+		return ERR_CAST(splane);
 
 	splane->index = index;
-	splane->alpha = 255;
 
-	ret = drm_universal_plane_init(sdev->ddev, &splane->plane, 1,
-				       &shmob_drm_plane_funcs,
-				       formats, ARRAY_SIZE(formats), NULL,
-				       DRM_PLANE_TYPE_OVERLAY, NULL);
+	drm_plane_helper_add(&splane->base, &shmob_drm_plane_helper_funcs);
 
-	return ret;
+	return &splane->base;
 }
diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h
index e72b21a4288f..dcfddd605899 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h
@@ -13,7 +13,8 @@
 struct drm_plane;
 struct shmob_drm_device;
 
-int shmob_drm_plane_create(struct shmob_drm_device *sdev, unsigned int index);
-void shmob_drm_plane_setup(struct drm_plane *plane);
+struct drm_plane *shmob_drm_plane_create(struct shmob_drm_device *sdev,
+					 enum drm_plane_type type,
+					 unsigned int index);
 
 #endif /* __SHMOB_DRM_PLANE_H__ */
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index 32f0857aec9f..e0174f82e353 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -910,7 +910,7 @@ static int ssd132x_primary_plane_atomic_check(struct drm_plane *plane,
 	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
 	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
 	struct drm_crtc *crtc = plane_state->crtc;
-	struct drm_crtc_state *crtc_state;
+	struct drm_crtc_state *crtc_state = NULL;
 	const struct drm_format_info *fi;
 	unsigned int pitch;
 	int ret;
diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c b/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c
index 5d12d7beef0e..ade3309ae042 100644
--- a/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c
+++ b/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c
@@ -26,7 +26,7 @@ struct vc4_dummy_crtc *vc4_mock_pv(struct kunit *test,
 	struct vc4_crtc *vc4_crtc;
 	int ret;
 
-	dummy_crtc = kunit_kzalloc(test, sizeof(*dummy_crtc), GFP_KERNEL);
+	dummy_crtc = drmm_kzalloc(drm, sizeof(*dummy_crtc), GFP_KERNEL);
 	KUNIT_ASSERT_NOT_NULL(test, dummy_crtc);
 
 	vc4_crtc = &dummy_crtc->crtc;
diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c
index 6e11fcc9ef45..e70d7c3076ac 100644
--- a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c
+++ b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c
@@ -32,7 +32,7 @@ struct vc4_dummy_output *vc4_dummy_output(struct kunit *test,
 	struct drm_encoder *enc;
 	int ret;
 
-	dummy_output = kunit_kzalloc(test, sizeof(*dummy_output), GFP_KERNEL);
+	dummy_output = drmm_kzalloc(drm, sizeof(*dummy_output), GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dummy_output);
 	dummy_output->encoder.type = vc4_encoder_type;
 
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index 51aab662050b..e905734c26a0 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -316,26 +316,44 @@ static void bcm_iproc_i2c_slave_init(
 	iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
 }
 
-static void bcm_iproc_i2c_check_slave_status(
-	struct bcm_iproc_i2c_dev *iproc_i2c)
+static bool bcm_iproc_i2c_check_slave_status
+	(struct bcm_iproc_i2c_dev *iproc_i2c, u32 status)
 {
 	u32 val;
+	bool recover = false;
 
-	val = iproc_i2c_rd_reg(iproc_i2c, S_CMD_OFFSET);
-	/* status is valid only when START_BUSY is cleared after it was set */
-	if (val & BIT(S_CMD_START_BUSY_SHIFT))
-		return;
+	/* check slave transmit status only if slave is transmitting */
+	if (!iproc_i2c->slave_rx_only) {
+		val = iproc_i2c_rd_reg(iproc_i2c, S_CMD_OFFSET);
+		/* status is valid only when START_BUSY is cleared */
+		if (!(val & BIT(S_CMD_START_BUSY_SHIFT))) {
+			val = (val >> S_CMD_STATUS_SHIFT) & S_CMD_STATUS_MASK;
+			if (val == S_CMD_STATUS_TIMEOUT ||
+			    val == S_CMD_STATUS_MASTER_ABORT) {
+				dev_warn(iproc_i2c->device,
+					 (val == S_CMD_STATUS_TIMEOUT) ?
+					 "slave random stretch time timeout\n" :
+					 "Master aborted read transaction\n");
+				recover = true;
+			}
+		}
+	}
+
+	/* RX_EVENT is not valid when START_BUSY is set */
+	if ((status & BIT(IS_S_RX_EVENT_SHIFT)) &&
+	    (status & BIT(IS_S_START_BUSY_SHIFT))) {
+		dev_warn(iproc_i2c->device, "Slave aborted read transaction\n");
+		recover = true;
+	}
 
-	val = (val >> S_CMD_STATUS_SHIFT) & S_CMD_STATUS_MASK;
-	if (val == S_CMD_STATUS_TIMEOUT || val == S_CMD_STATUS_MASTER_ABORT) {
-		dev_err(iproc_i2c->device, (val == S_CMD_STATUS_TIMEOUT) ?
-			"slave random stretch time timeout\n" :
-			"Master aborted read transaction\n");
+	if (recover) {
 		/* re-initialize i2c for recovery */
 		bcm_iproc_i2c_enable_disable(iproc_i2c, false);
 		bcm_iproc_i2c_slave_init(iproc_i2c, true);
 		bcm_iproc_i2c_enable_disable(iproc_i2c, true);
 	}
+
+	return recover;
 }
 
 static void bcm_iproc_i2c_slave_read(struct bcm_iproc_i2c_dev *iproc_i2c)
@@ -420,48 +438,6 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c,
 	u32 val;
 	u8 value;
 
-	/*
-	 * Slave events in case of master-write, master-write-read and,
-	 * master-read
-	 *
-	 * Master-write     : only IS_S_RX_EVENT_SHIFT event
-	 * Master-write-read: both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
-	 *                    events
-	 * Master-read      : both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
-	 *                    events or only IS_S_RD_EVENT_SHIFT
-	 *
-	 * iproc has a slave rx fifo size of 64 bytes. Rx fifo full interrupt
-	 * (IS_S_RX_FIFO_FULL_SHIFT) will be generated when RX fifo becomes
-	 * full. This can happen if Master issues write requests of more than
-	 * 64 bytes.
-	 */
-	if (status & BIT(IS_S_RX_EVENT_SHIFT) ||
-	    status & BIT(IS_S_RD_EVENT_SHIFT) ||
-	    status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) {
-		/* disable slave interrupts */
-		val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET);
-		val &= ~iproc_i2c->slave_int_mask;
-		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
-
-		if (status & BIT(IS_S_RD_EVENT_SHIFT))
-			/* Master-write-read request */
-			iproc_i2c->slave_rx_only = false;
-		else
-			/* Master-write request only */
-			iproc_i2c->slave_rx_only = true;
-
-		/* schedule tasklet to read data later */
-		tasklet_schedule(&iproc_i2c->slave_rx_tasklet);
-
-		/*
-		 * clear only IS_S_RX_EVENT_SHIFT and
-		 * IS_S_RX_FIFO_FULL_SHIFT interrupt.
-		 */
-		val = BIT(IS_S_RX_EVENT_SHIFT);
-		if (status & BIT(IS_S_RX_FIFO_FULL_SHIFT))
-			val |= BIT(IS_S_RX_FIFO_FULL_SHIFT);
-		iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, val);
-	}
 
 	if (status & BIT(IS_S_TX_UNDERRUN_SHIFT)) {
 		iproc_i2c->tx_underrun++;
@@ -493,8 +469,9 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c,
 		 * less than PKT_LENGTH bytes were output on the SMBUS
 		 */
 		iproc_i2c->slave_int_mask &= ~BIT(IE_S_TX_UNDERRUN_SHIFT);
-		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET,
-				 iproc_i2c->slave_int_mask);
+		val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET);
+		val &= ~BIT(IE_S_TX_UNDERRUN_SHIFT);
+		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
 
 		/* End of SMBUS for Master Read */
 		val = BIT(S_TX_WR_STATUS_SHIFT);
@@ -515,9 +492,49 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c,
 				 BIT(IS_S_START_BUSY_SHIFT));
 	}
 
-	/* check slave transmit status only if slave is transmitting */
-	if (!iproc_i2c->slave_rx_only)
-		bcm_iproc_i2c_check_slave_status(iproc_i2c);
+	/* if the controller has been reset, immediately return from the ISR */
+	if (bcm_iproc_i2c_check_slave_status(iproc_i2c, status))
+		return true;
+
+	/*
+	 * Slave events in case of master-write, master-write-read and,
+	 * master-read
+	 *
+	 * Master-write     : only IS_S_RX_EVENT_SHIFT event
+	 * Master-write-read: both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
+	 *                    events
+	 * Master-read      : both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT
+	 *                    events or only IS_S_RD_EVENT_SHIFT
+	 *
+	 * iproc has a slave rx fifo size of 64 bytes. Rx fifo full interrupt
+	 * (IS_S_RX_FIFO_FULL_SHIFT) will be generated when RX fifo becomes
+	 * full. This can happen if Master issues write requests of more than
+	 * 64 bytes.
+	 */
+	if (status & BIT(IS_S_RX_EVENT_SHIFT) ||
+	    status & BIT(IS_S_RD_EVENT_SHIFT) ||
+	    status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) {
+		/* disable slave interrupts */
+		val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET);
+		val &= ~iproc_i2c->slave_int_mask;
+		iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val);
+
+		if (status & BIT(IS_S_RD_EVENT_SHIFT))
+			/* Master-write-read request */
+			iproc_i2c->slave_rx_only = false;
+		else
+			/* Master-write request only */
+			iproc_i2c->slave_rx_only = true;
+
+		/* schedule tasklet to read data later */
+		tasklet_schedule(&iproc_i2c->slave_rx_tasklet);
+
+		/* clear IS_S_RX_FIFO_FULL_SHIFT interrupt */
+		if (status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) {
+			val = BIT(IS_S_RX_FIFO_FULL_SHIFT);
+			iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, val);
+		}
+	}
 
 	return true;
 }
diff --git a/drivers/i2c/busses/i2c-cp2615.c b/drivers/i2c/busses/i2c-cp2615.c
index 20f8f7c9a8cd..cf3747d87034 100644
--- a/drivers/i2c/busses/i2c-cp2615.c
+++ b/drivers/i2c/busses/i2c-cp2615.c
@@ -85,7 +85,7 @@ static int cp2615_init_iop_msg(struct cp2615_iop_msg *ret, enum cp2615_iop_msg_t
 	if (!ret)
 		return -EINVAL;
 
-	ret->preamble = 0x2A2A;
+	ret->preamble = htons(0x2A2AU);
 	ret->length = htons(data_len + 6);
 	ret->msg = htons(msg);
 	if (data && data_len)
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index ca1035e010c7..85dbd0eb5392 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -519,10 +519,16 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
 
 		/*
 		 * Because we don't know the buffer length in the
-		 * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop
-		 * the transaction here.
+		 * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop the
+		 * transaction here. Also disable the TX_EMPTY IRQ
+		 * while waiting for the data length byte to avoid the
+		 * bogus interrupts flood.
 		 */
-		if (buf_len > 0 || flags & I2C_M_RECV_LEN) {
+		if (flags & I2C_M_RECV_LEN) {
+			dev->status |= STATUS_WRITE_IN_PROGRESS;
+			intr_mask &= ~DW_IC_INTR_TX_EMPTY;
+			break;
+		} else if (buf_len > 0) {
 			/* more bytes to be written */
 			dev->status |= STATUS_WRITE_IN_PROGRESS;
 			break;
@@ -558,6 +564,13 @@ i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len)
 	msgs[dev->msg_read_idx].len = len;
 	msgs[dev->msg_read_idx].flags &= ~I2C_M_RECV_LEN;
 
+	/*
+	 * Received buffer length, re-enable TX_EMPTY interrupt
+	 * to resume the SMBUS transaction.
+	 */
+	regmap_update_bits(dev->map, DW_IC_INTR_MASK, DW_IC_INTR_TX_EMPTY,
+			   DW_IC_INTR_TX_EMPTY);
+
 	return len;
 }
 
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index a91201509bc1..8b7e599f1674 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -450,8 +450,8 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS)
 			return -EINVAL;
 
-		rdwr_pa = memdup_user(rdwr_arg.msgs,
-				      rdwr_arg.nmsgs * sizeof(struct i2c_msg));
+		rdwr_pa = memdup_array_user(rdwr_arg.msgs,
+					    rdwr_arg.nmsgs, sizeof(struct i2c_msg));
 		if (IS_ERR(rdwr_pa))
 			return PTR_ERR(rdwr_pa);
 
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 95f90699d2b1..51e0c4954600 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -50,7 +50,7 @@ struct evdev_client {
 	bool revoked;
 	unsigned long *evmasks[EV_CNT];
 	unsigned int bufsize;
-	struct input_event buffer[];
+	struct input_event buffer[] __counted_by(bufsize);
 };
 
 static size_t evdev_get_mask_cnt(unsigned int type)
diff --git a/drivers/input/input-leds.c b/drivers/input/input-leds.c
index 0b11990ade46..0e935914bc3a 100644
--- a/drivers/input/input-leds.c
+++ b/drivers/input/input-leds.c
@@ -44,7 +44,7 @@ struct input_led {
 struct input_leds {
 	struct input_handle handle;
 	unsigned int num_leds;
-	struct input_led leds[];
+	struct input_led leds[] __counted_by(num_leds);
 };
 
 static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev)
diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c
index 56abc8c6c763..27d95d6cf56e 100644
--- a/drivers/input/joystick/walkera0701.c
+++ b/drivers/input/joystick/walkera0701.c
@@ -296,15 +296,4 @@ static struct parport_driver walkera0701_parport_driver = {
 	.devmodel = true,
 };
 
-static int __init walkera0701_init(void)
-{
-	return parport_register_driver(&walkera0701_parport_driver);
-}
-
-static void __exit walkera0701_exit(void)
-{
-	parport_unregister_driver(&walkera0701_parport_driver);
-}
-
-module_init(walkera0701_init);
-module_exit(walkera0701_exit);
+module_parport_driver(walkera0701_parport_driver);
diff --git a/drivers/input/keyboard/adp5520-keys.c b/drivers/input/keyboard/adp5520-keys.c
index 7851ffd678a8..10c248f0c1fc 100644
--- a/drivers/input/keyboard/adp5520-keys.c
+++ b/drivers/input/keyboard/adp5520-keys.c
@@ -168,14 +168,12 @@ static int adp5520_keys_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int adp5520_keys_remove(struct platform_device *pdev)
+static void adp5520_keys_remove(struct platform_device *pdev)
 {
 	struct adp5520_keys *dev = platform_get_drvdata(pdev);
 
 	adp5520_unregister_notifier(dev->master, &dev->notifier,
 				ADP5520_KP_IEN | ADP5520_KR_IEN);
-
-	return 0;
 }
 
 static struct platform_driver adp5520_keys_driver = {
@@ -183,7 +181,7 @@ static struct platform_driver adp5520_keys_driver = {
 		.name	= "adp5520-keys",
 	},
 	.probe		= adp5520_keys_probe,
-	.remove		= adp5520_keys_remove,
+	.remove_new	= adp5520_keys_remove,
 };
 module_platform_driver(adp5520_keys_driver);
 
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index e7ecfca838df..30678a34cf64 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -686,10 +686,11 @@ static umode_t cros_ec_keyb_attr_is_visible(struct kobject *kobj,
 	return attr->mode;
 }
 
-static const struct attribute_group cros_ec_keyb_attr_group = {
+static const struct attribute_group cros_ec_keyb_group = {
 	.is_visible = cros_ec_keyb_attr_is_visible,
 	.attrs = cros_ec_keyb_attrs,
 };
+__ATTRIBUTE_GROUPS(cros_ec_keyb);
 
 static int cros_ec_keyb_probe(struct platform_device *pdev)
 {
@@ -730,12 +731,6 @@ static int cros_ec_keyb_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	err = devm_device_add_group(dev, &cros_ec_keyb_attr_group);
-	if (err) {
-		dev_err(dev, "failed to create attributes: %d\n", err);
-		return err;
-	}
-
 	ckdev->notifier.notifier_call = cros_ec_keyb_work;
 	err = blocking_notifier_chain_register(&ckdev->ec->event_notifier,
 					       &ckdev->notifier);
@@ -748,14 +743,12 @@ static int cros_ec_keyb_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int cros_ec_keyb_remove(struct platform_device *pdev)
+static void cros_ec_keyb_remove(struct platform_device *pdev)
 {
 	struct cros_ec_keyb *ckdev = dev_get_drvdata(&pdev->dev);
 
 	blocking_notifier_chain_unregister(&ckdev->ec->event_notifier,
 					   &ckdev->notifier);
-
-	return 0;
 }
 
 #ifdef CONFIG_ACPI
@@ -779,9 +772,10 @@ static DEFINE_SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume);
 
 static struct platform_driver cros_ec_keyb_driver = {
 	.probe = cros_ec_keyb_probe,
-	.remove = cros_ec_keyb_remove,
+	.remove_new = cros_ec_keyb_remove,
 	.driver = {
 		.name = "cros-ec-keyb",
+		.dev_groups = cros_ec_keyb_groups,
 		.of_match_table = of_match_ptr(cros_ec_keyb_of_match),
 		.acpi_match_table = ACPI_PTR(cros_ec_keyb_acpi_match),
 		.pm = pm_sleep_ptr(&cros_ec_keyb_pm_ops),
diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c
index 55075addcac2..6b811d6bf625 100644
--- a/drivers/input/keyboard/ep93xx_keypad.c
+++ b/drivers/input/keyboard/ep93xx_keypad.c
@@ -308,11 +308,9 @@ static int ep93xx_keypad_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int ep93xx_keypad_remove(struct platform_device *pdev)
+static void ep93xx_keypad_remove(struct platform_device *pdev)
 {
 	dev_pm_clear_wake_irq(&pdev->dev);
-
-	return 0;
 }
 
 static struct platform_driver ep93xx_keypad_driver = {
@@ -321,7 +319,7 @@ static struct platform_driver ep93xx_keypad_driver = {
 		.pm	= pm_sleep_ptr(&ep93xx_keypad_pm_ops),
 	},
 	.probe		= ep93xx_keypad_probe,
-	.remove		= ep93xx_keypad_remove,
+	.remove_new	= ep93xx_keypad_remove,
 };
 module_platform_driver(ep93xx_keypad_driver);
 
diff --git a/drivers/input/keyboard/iqs62x-keys.c b/drivers/input/keyboard/iqs62x-keys.c
index 02ceebad7bda..688d61244b5f 100644
--- a/drivers/input/keyboard/iqs62x-keys.c
+++ b/drivers/input/keyboard/iqs62x-keys.c
@@ -310,7 +310,7 @@ static int iqs62x_keys_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int iqs62x_keys_remove(struct platform_device *pdev)
+static void iqs62x_keys_remove(struct platform_device *pdev)
 {
 	struct iqs62x_keys_private *iqs62x_keys = platform_get_drvdata(pdev);
 	int ret;
@@ -319,8 +319,6 @@ static int iqs62x_keys_remove(struct platform_device *pdev)
 						 &iqs62x_keys->notifier);
 	if (ret)
 		dev_err(&pdev->dev, "Failed to unregister notifier: %d\n", ret);
-
-	return 0;
 }
 
 static struct platform_driver iqs62x_keys_platform_driver = {
@@ -328,7 +326,7 @@ static struct platform_driver iqs62x_keys_platform_driver = {
 		.name = "iqs62x-keys",
 	},
 	.probe = iqs62x_keys_probe,
-	.remove = iqs62x_keys_remove,
+	.remove_new = iqs62x_keys_remove,
 };
 module_platform_driver(iqs62x_keys_platform_driver);
 
diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index a1b037891af2..50fa764c82d2 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -549,15 +549,13 @@ err_free_mem:
 	return err;
 }
 
-static int matrix_keypad_remove(struct platform_device *pdev)
+static void matrix_keypad_remove(struct platform_device *pdev)
 {
 	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
 
 	matrix_keypad_free_gpio(keypad);
 	input_unregister_device(keypad->input_dev);
 	kfree(keypad);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -570,7 +568,7 @@ MODULE_DEVICE_TABLE(of, matrix_keypad_dt_match);
 
 static struct platform_driver matrix_keypad_driver = {
 	.probe		= matrix_keypad_probe,
-	.remove		= matrix_keypad_remove,
+	.remove_new	= matrix_keypad_remove,
 	.driver		= {
 		.name	= "matrix-keypad",
 		.pm	= pm_sleep_ptr(&matrix_keypad_pm_ops),
diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c
index 24440b498645..454fb8675657 100644
--- a/drivers/input/keyboard/omap-keypad.c
+++ b/drivers/input/keyboard/omap-keypad.c
@@ -287,7 +287,7 @@ err2:
 	return -EINVAL;
 }
 
-static int omap_kp_remove(struct platform_device *pdev)
+static void omap_kp_remove(struct platform_device *pdev)
 {
 	struct omap_kp *omap_kp = platform_get_drvdata(pdev);
 
@@ -303,13 +303,11 @@ static int omap_kp_remove(struct platform_device *pdev)
 	input_unregister_device(omap_kp->input);
 
 	kfree(omap_kp);
-
-	return 0;
 }
 
 static struct platform_driver omap_kp_driver = {
 	.probe		= omap_kp_probe,
-	.remove		= omap_kp_remove,
+	.remove_new	= omap_kp_remove,
 	.driver		= {
 		.name	= "omap-keypad",
 	},
diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index 773e55eed88b..d3f8688fdd9c 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c
@@ -461,11 +461,9 @@ static int omap4_keypad_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int omap4_keypad_remove(struct platform_device *pdev)
+static void omap4_keypad_remove(struct platform_device *pdev)
 {
 	dev_pm_clear_wake_irq(&pdev->dev);
-
-	return 0;
 }
 
 static const struct of_device_id omap_keypad_dt_match[] = {
@@ -476,7 +474,7 @@ MODULE_DEVICE_TABLE(of, omap_keypad_dt_match);
 
 static struct platform_driver omap4_keypad_driver = {
 	.probe		= omap4_keypad_probe,
-	.remove		= omap4_keypad_remove,
+	.remove_new	= omap4_keypad_remove,
 	.driver		= {
 		.name	= "omap4-keypad",
 		.of_match_table = omap_keypad_dt_match,
diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c
index d85dd2489293..e212eff7687c 100644
--- a/drivers/input/keyboard/samsung-keypad.c
+++ b/drivers/input/keyboard/samsung-keypad.c
@@ -444,7 +444,7 @@ err_unprepare_clk:
 	return error;
 }
 
-static int samsung_keypad_remove(struct platform_device *pdev)
+static void samsung_keypad_remove(struct platform_device *pdev)
 {
 	struct samsung_keypad *keypad = platform_get_drvdata(pdev);
 
@@ -453,8 +453,6 @@ static int samsung_keypad_remove(struct platform_device *pdev)
 	input_unregister_device(keypad->input_dev);
 
 	clk_unprepare(keypad->clk);
-
-	return 0;
 }
 
 static int samsung_keypad_runtime_suspend(struct device *dev)
@@ -589,7 +587,7 @@ MODULE_DEVICE_TABLE(platform, samsung_keypad_driver_ids);
 
 static struct platform_driver samsung_keypad_driver = {
 	.probe		= samsung_keypad_probe,
-	.remove		= samsung_keypad_remove,
+	.remove_new	= samsung_keypad_remove,
 	.driver		= {
 		.name	= "samsung-keypad",
 		.of_match_table = of_match_ptr(samsung_keypad_dt_match),
diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index 2c00320f739f..4ea4fd25c5d2 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -265,7 +265,7 @@ static int sh_keysc_probe(struct platform_device *pdev)
 	return error;
 }
 
-static int sh_keysc_remove(struct platform_device *pdev)
+static void sh_keysc_remove(struct platform_device *pdev)
 {
 	struct sh_keysc_priv *priv = platform_get_drvdata(pdev);
 
@@ -279,8 +279,6 @@ static int sh_keysc_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 
 	kfree(priv);
-
-	return 0;
 }
 
 static int sh_keysc_suspend(struct device *dev)
@@ -321,7 +319,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(sh_keysc_dev_pm_ops,
 
 static struct platform_driver sh_keysc_device_driver = {
 	.probe		= sh_keysc_probe,
-	.remove		= sh_keysc_remove,
+	.remove_new	= sh_keysc_remove,
 	.driver		= {
 		.name	= "sh_keysc",
 		.pm	= pm_sleep_ptr(&sh_keysc_dev_pm_ops),
diff --git a/drivers/input/keyboard/spear-keyboard.c b/drivers/input/keyboard/spear-keyboard.c
index a50fa9915381..557d00a667ce 100644
--- a/drivers/input/keyboard/spear-keyboard.c
+++ b/drivers/input/keyboard/spear-keyboard.c
@@ -272,14 +272,12 @@ static int spear_kbd_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int spear_kbd_remove(struct platform_device *pdev)
+static void spear_kbd_remove(struct platform_device *pdev)
 {
 	struct spear_kbd *kbd = platform_get_drvdata(pdev);
 
 	input_unregister_device(kbd->input);
 	clk_unprepare(kbd->clk);
-
-	return 0;
 }
 
 static int spear_kbd_suspend(struct device *dev)
@@ -375,7 +373,7 @@ MODULE_DEVICE_TABLE(of, spear_kbd_id_table);
 
 static struct platform_driver spear_kbd_driver = {
 	.probe		= spear_kbd_probe,
-	.remove		= spear_kbd_remove,
+	.remove_new	= spear_kbd_remove,
 	.driver		= {
 		.name	= "keyboard",
 		.pm	= pm_sleep_ptr(&spear_kbd_pm_ops),
diff --git a/drivers/input/keyboard/stmpe-keypad.c b/drivers/input/keyboard/stmpe-keypad.c
index 2c6c53290cc0..2013c0afd0c3 100644
--- a/drivers/input/keyboard/stmpe-keypad.c
+++ b/drivers/input/keyboard/stmpe-keypad.c
@@ -404,20 +404,18 @@ static int stmpe_keypad_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int stmpe_keypad_remove(struct platform_device *pdev)
+static void stmpe_keypad_remove(struct platform_device *pdev)
 {
 	struct stmpe_keypad *keypad = platform_get_drvdata(pdev);
 
 	stmpe_disable(keypad->stmpe, STMPE_BLOCK_KEYPAD);
-
-	return 0;
 }
 
 static struct platform_driver stmpe_keypad_driver = {
 	.driver.name	= "stmpe-keypad",
 	.driver.owner	= THIS_MODULE,
 	.probe		= stmpe_keypad_probe,
-	.remove		= stmpe_keypad_remove,
+	.remove_new	= stmpe_keypad_remove,
 };
 module_platform_driver(stmpe_keypad_driver);
 
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index c9a823ea45d0..a1765ed8c825 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -14,7 +14,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/input/matrix_keypad.h>
@@ -602,9 +602,6 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 	unsigned int debounce_cnt;
 	unsigned int scan_time_rows;
 	unsigned int keymap_rows;
-	const struct of_device_id *match;
-
-	match = of_match_device(tegra_kbc_of_match, &pdev->dev);
 
 	kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL);
 	if (!kbc) {
@@ -613,7 +610,7 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 	}
 
 	kbc->dev = &pdev->dev;
-	kbc->hw_support = match->data;
+	kbc->hw_support = device_get_match_data(&pdev->dev);
 	kbc->max_keys = kbc->hw_support->max_rows *
 				kbc->hw_support->max_columns;
 	kbc->num_rows_and_columns = kbc->hw_support->max_rows +
diff --git a/drivers/input/misc/88pm80x_onkey.c b/drivers/input/misc/88pm80x_onkey.c
index 51c8a326fd06..31f0702c3d01 100644
--- a/drivers/input/misc/88pm80x_onkey.c
+++ b/drivers/input/misc/88pm80x_onkey.c
@@ -138,14 +138,13 @@ out:
 	return err;
 }
 
-static int pm80x_onkey_remove(struct platform_device *pdev)
+static void pm80x_onkey_remove(struct platform_device *pdev)
 {
 	struct pm80x_onkey_info *info = platform_get_drvdata(pdev);
 
 	pm80x_free_irq(info->pm80x, info->irq, info);
 	input_unregister_device(info->idev);
 	kfree(info);
-	return 0;
 }
 
 static struct platform_driver pm80x_onkey_driver = {
@@ -154,7 +153,7 @@ static struct platform_driver pm80x_onkey_driver = {
 		   .pm = &pm80x_onkey_pm_ops,
 		   },
 	.probe = pm80x_onkey_probe,
-	.remove = pm80x_onkey_remove,
+	.remove_new = pm80x_onkey_remove,
 };
 
 module_platform_driver(pm80x_onkey_driver);
diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c
index 4581606a28d6..24f9e9d893de 100644
--- a/drivers/input/misc/axp20x-pek.c
+++ b/drivers/input/misc/axp20x-pek.c
@@ -133,20 +133,11 @@ static ssize_t axp20x_store_attr(struct device *dev,
 				 size_t count)
 {
 	struct axp20x_pek *axp20x_pek = dev_get_drvdata(dev);
-	char val_str[20];
-	size_t len;
 	int ret, i;
 	unsigned int val, idx = 0;
 	unsigned int best_err = UINT_MAX;
 
-	val_str[sizeof(val_str) - 1] = '\0';
-	strncpy(val_str, buf, sizeof(val_str) - 1);
-	len = strlen(val_str);
-
-	if (len && val_str[len - 1] == '\n')
-		val_str[len - 1] = '\0';
-
-	ret = kstrtouint(val_str, 10, &val);
+	ret = kstrtouint(buf, 10, &val);
 	if (ret)
 		return ret;
 
diff --git a/drivers/input/misc/da9052_onkey.c b/drivers/input/misc/da9052_onkey.c
index 6d1152850a6d..7a1122e1efb9 100644
--- a/drivers/input/misc/da9052_onkey.c
+++ b/drivers/input/misc/da9052_onkey.c
@@ -127,7 +127,7 @@ err_free_mem:
 	return error;
 }
 
-static int da9052_onkey_remove(struct platform_device *pdev)
+static void da9052_onkey_remove(struct platform_device *pdev)
 {
 	struct da9052_onkey *onkey = platform_get_drvdata(pdev);
 
@@ -136,13 +136,11 @@ static int da9052_onkey_remove(struct platform_device *pdev)
 
 	input_unregister_device(onkey->input);
 	kfree(onkey);
-
-	return 0;
 }
 
 static struct platform_driver da9052_onkey_driver = {
 	.probe	= da9052_onkey_probe,
-	.remove	= da9052_onkey_remove,
+	.remove_new = da9052_onkey_remove,
 	.driver = {
 		.name	= "da9052-onkey",
 	},
diff --git a/drivers/input/misc/da9055_onkey.c b/drivers/input/misc/da9055_onkey.c
index 7a0d3a1d503c..871812f1b398 100644
--- a/drivers/input/misc/da9055_onkey.c
+++ b/drivers/input/misc/da9055_onkey.c
@@ -132,7 +132,7 @@ err_free_input:
 	return err;
 }
 
-static int da9055_onkey_remove(struct platform_device *pdev)
+static void da9055_onkey_remove(struct platform_device *pdev)
 {
 	struct da9055_onkey *onkey = platform_get_drvdata(pdev);
 	int irq = platform_get_irq_byname(pdev, "ONKEY");
@@ -141,13 +141,11 @@ static int da9055_onkey_remove(struct platform_device *pdev)
 	free_irq(irq, onkey);
 	cancel_delayed_work_sync(&onkey->work);
 	input_unregister_device(onkey->input);
-
-	return 0;
 }
 
 static struct platform_driver da9055_onkey_driver = {
 	.probe	= da9055_onkey_probe,
-	.remove	= da9055_onkey_remove,
+	.remove_new = da9055_onkey_remove,
 	.driver = {
 		.name	= "da9055-onkey",
 	},
diff --git a/drivers/input/misc/ideapad_slidebar.c b/drivers/input/misc/ideapad_slidebar.c
index 68f1c584da05..fa4e7f67d713 100644
--- a/drivers/input/misc/ideapad_slidebar.c
+++ b/drivers/input/misc/ideapad_slidebar.c
@@ -256,20 +256,18 @@ err_release_ports:
 	return err;
 }
 
-static int ideapad_remove(struct platform_device *pdev)
+static void ideapad_remove(struct platform_device *pdev)
 {
 	i8042_remove_filter(slidebar_i8042_filter);
 	input_unregister_device(slidebar_input_dev);
 	release_region(IDEAPAD_BASE, 3);
-
-	return 0;
 }
 
 static struct platform_driver slidebar_drv = {
 	.driver = {
 		.name = "ideapad_slidebar",
 	},
-	.remove = ideapad_remove,
+	.remove_new = ideapad_remove,
 };
 
 static int __init ideapad_dmi_check(const struct dmi_system_id *id)
diff --git a/drivers/input/misc/iqs269a.c b/drivers/input/misc/iqs269a.c
index c0a085639870..3c636c75e8a1 100644
--- a/drivers/input/misc/iqs269a.c
+++ b/drivers/input/misc/iqs269a.c
@@ -1586,10 +1586,7 @@ static struct attribute *iqs269_attrs[] = {
 	&dev_attr_ati_trigger.attr,
 	NULL,
 };
-
-static const struct attribute_group iqs269_attr_group = {
-	.attrs = iqs269_attrs,
-};
+ATTRIBUTE_GROUPS(iqs269);
 
 static const struct regmap_config iqs269_regmap_config = {
 	.reg_bits = 8,
@@ -1671,10 +1668,6 @@ static int iqs269_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &iqs269_attr_group);
-	if (error)
-		dev_err(&client->dev, "Failed to add attributes: %d\n", error);
-
 	return error;
 }
 
@@ -1743,6 +1736,7 @@ MODULE_DEVICE_TABLE(of, iqs269_of_match);
 static struct i2c_driver iqs269_i2c_driver = {
 	.driver = {
 		.name = "iqs269a",
+		.dev_groups = iqs269_groups,
 		.of_match_table = iqs269_of_match,
 		.pm = pm_sleep_ptr(&iqs269_pm),
 	},
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
index 912e614d039d..d47269b10e9a 100644
--- a/drivers/input/misc/kxtj9.c
+++ b/drivers/input/misc/kxtj9.c
@@ -334,14 +334,25 @@ static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll);
 
-static struct attribute *kxtj9_attributes[] = {
+static struct attribute *kxtj9_attrs[] = {
 	&dev_attr_poll.attr,
 	NULL
 };
 
-static struct attribute_group kxtj9_attribute_group = {
-	.attrs = kxtj9_attributes
+static umode_t kxtj9_attr_is_visible(struct kobject *kobj,
+				     struct attribute *attr, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return client->irq ? attr->mode : 0;
+}
+
+static struct attribute_group kxtj9_group = {
+	.attrs = kxtj9_attrs,
+	.is_visible = kxtj9_attr_is_visible,
 };
+__ATTRIBUTE_GROUPS(kxtj9);
 
 static void kxtj9_poll(struct input_dev *input)
 {
@@ -482,13 +493,6 @@ static int kxtj9_probe(struct i2c_client *client)
 			dev_err(&client->dev, "request irq failed: %d\n", err);
 			return err;
 		}
-
-		err = devm_device_add_group(&client->dev,
-					    &kxtj9_attribute_group);
-		if (err) {
-			dev_err(&client->dev, "sysfs create failed: %d\n", err);
-			return err;
-		}
 	}
 
 	return 0;
@@ -535,8 +539,9 @@ MODULE_DEVICE_TABLE(i2c, kxtj9_id);
 
 static struct i2c_driver kxtj9_driver = {
 	.driver = {
-		.name	= NAME,
-		.pm	= pm_sleep_ptr(&kxtj9_pm_ops),
+		.name		= NAME,
+		.dev_groups	= kxtj9_groups,
+		.pm		= pm_sleep_ptr(&kxtj9_pm_ops),
 	},
 	.probe		= kxtj9_probe,
 	.id_table	= kxtj9_id,
diff --git a/drivers/input/misc/m68kspkr.c b/drivers/input/misc/m68kspkr.c
index 25fcf1467151..3fe0a85c45e0 100644
--- a/drivers/input/misc/m68kspkr.c
+++ b/drivers/input/misc/m68kspkr.c
@@ -75,15 +75,13 @@ static int m68kspkr_probe(struct platform_device *dev)
 	return 0;
 }
 
-static int m68kspkr_remove(struct platform_device *dev)
+static void m68kspkr_remove(struct platform_device *dev)
 {
 	struct input_dev *input_dev = platform_get_drvdata(dev);
 
 	input_unregister_device(input_dev);
 	/* turn off the speaker */
 	m68kspkr_event(NULL, EV_SND, SND_BELL, 0);
-
-	return 0;
 }
 
 static void m68kspkr_shutdown(struct platform_device *dev)
@@ -97,7 +95,7 @@ static struct platform_driver m68kspkr_platform_driver = {
 		.name	= "m68kspkr",
 	},
 	.probe		= m68kspkr_probe,
-	.remove		= m68kspkr_remove,
+	.remove_new	= m68kspkr_remove,
 	.shutdown	= m68kspkr_shutdown,
 };
 
diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c
index c4dff476d479..8861a67be575 100644
--- a/drivers/input/misc/max8997_haptic.c
+++ b/drivers/input/misc/max8997_haptic.c
@@ -351,7 +351,7 @@ err_free_mem:
 	return error;
 }
 
-static int max8997_haptic_remove(struct platform_device *pdev)
+static void max8997_haptic_remove(struct platform_device *pdev)
 {
 	struct max8997_haptic *chip = platform_get_drvdata(pdev);
 
@@ -362,8 +362,6 @@ static int max8997_haptic_remove(struct platform_device *pdev)
 		pwm_put(chip->pwm);
 
 	kfree(chip);
-
-	return 0;
 }
 
 static int max8997_haptic_suspend(struct device *dev)
@@ -391,7 +389,7 @@ static struct platform_driver max8997_haptic_driver = {
 		.pm	= pm_sleep_ptr(&max8997_haptic_pm_ops),
 	},
 	.probe		= max8997_haptic_probe,
-	.remove		= max8997_haptic_remove,
+	.remove_new	= max8997_haptic_remove,
 	.id_table	= max8997_haptic_id,
 };
 module_platform_driver(max8997_haptic_driver);
diff --git a/drivers/input/misc/mc13783-pwrbutton.c b/drivers/input/misc/mc13783-pwrbutton.c
index 0636eee4bb6c..1c8c939638f6 100644
--- a/drivers/input/misc/mc13783-pwrbutton.c
+++ b/drivers/input/misc/mc13783-pwrbutton.c
@@ -229,7 +229,7 @@ free_input_dev:
 	return err;
 }
 
-static int mc13783_pwrbutton_remove(struct platform_device *pdev)
+static void mc13783_pwrbutton_remove(struct platform_device *pdev)
 {
 	struct mc13783_pwrb *priv = platform_get_drvdata(pdev);
 	const struct mc13xxx_buttons_platform_data *pdata;
@@ -249,13 +249,11 @@ static int mc13783_pwrbutton_remove(struct platform_device *pdev)
 
 	input_unregister_device(priv->pwr);
 	kfree(priv);
-
-	return 0;
 }
 
 static struct platform_driver mc13783_pwrbutton_driver = {
 	.probe		= mc13783_pwrbutton_probe,
-	.remove		= mc13783_pwrbutton_remove,
+	.remove_new	= mc13783_pwrbutton_remove,
 	.driver		= {
 		.name	= "mc13783-pwrbutton",
 	},
diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c
index 7e361727b0d9..06d5972e8e84 100644
--- a/drivers/input/misc/palmas-pwrbutton.c
+++ b/drivers/input/misc/palmas-pwrbutton.c
@@ -245,7 +245,7 @@ err_free_mem:
  *
  * Return: 0
  */
-static int palmas_pwron_remove(struct platform_device *pdev)
+static void palmas_pwron_remove(struct platform_device *pdev)
 {
 	struct palmas_pwron *pwron = platform_get_drvdata(pdev);
 
@@ -254,8 +254,6 @@ static int palmas_pwron_remove(struct platform_device *pdev)
 
 	input_unregister_device(pwron->input_dev);
 	kfree(pwron);
-
-	return 0;
 }
 
 /**
@@ -312,7 +310,7 @@ MODULE_DEVICE_TABLE(of, of_palmas_pwr_match);
 
 static struct platform_driver palmas_pwron_driver = {
 	.probe	= palmas_pwron_probe,
-	.remove	= palmas_pwron_remove,
+	.remove_new = palmas_pwron_remove,
 	.driver	= {
 		.name	= "palmas_pwrbutton",
 		.of_match_table = of_match_ptr(of_palmas_pwr_match),
diff --git a/drivers/input/misc/pcap_keys.c b/drivers/input/misc/pcap_keys.c
index b5a53636d7e2..8a7e9ada5952 100644
--- a/drivers/input/misc/pcap_keys.c
+++ b/drivers/input/misc/pcap_keys.c
@@ -99,7 +99,7 @@ fail:
 	return err;
 }
 
-static int pcap_keys_remove(struct platform_device *pdev)
+static void pcap_keys_remove(struct platform_device *pdev)
 {
 	struct pcap_keys *pcap_keys = platform_get_drvdata(pdev);
 
@@ -108,13 +108,11 @@ static int pcap_keys_remove(struct platform_device *pdev)
 
 	input_unregister_device(pcap_keys->input);
 	kfree(pcap_keys);
-
-	return 0;
 }
 
 static struct platform_driver pcap_keys_device_driver = {
 	.probe		= pcap_keys_probe,
-	.remove		= pcap_keys_remove,
+	.remove_new	= pcap_keys_remove,
 	.driver		= {
 		.name	= "pcap-keys",
 	}
diff --git a/drivers/input/misc/pcf50633-input.c b/drivers/input/misc/pcf50633-input.c
index 4c60c70c4c10..c5c5fe236c18 100644
--- a/drivers/input/misc/pcf50633-input.c
+++ b/drivers/input/misc/pcf50633-input.c
@@ -87,7 +87,7 @@ static int pcf50633_input_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int pcf50633_input_remove(struct platform_device *pdev)
+static void pcf50633_input_remove(struct platform_device *pdev)
 {
 	struct pcf50633_input *input  = platform_get_drvdata(pdev);
 
@@ -96,8 +96,6 @@ static int pcf50633_input_remove(struct platform_device *pdev)
 
 	input_unregister_device(input->input_dev);
 	kfree(input);
-
-	return 0;
 }
 
 static struct platform_driver pcf50633_input_driver = {
@@ -105,7 +103,7 @@ static struct platform_driver pcf50633_input_driver = {
 		.name = "pcf50633-input",
 	},
 	.probe = pcf50633_input_probe,
-	.remove = pcf50633_input_remove,
+	.remove_new = pcf50633_input_remove,
 };
 module_platform_driver(pcf50633_input_driver);
 
diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index 9c666b2f14fe..897854fd245f 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c
@@ -95,15 +95,13 @@ static int pcspkr_probe(struct platform_device *dev)
 	return 0;
 }
 
-static int pcspkr_remove(struct platform_device *dev)
+static void pcspkr_remove(struct platform_device *dev)
 {
 	struct input_dev *pcspkr_dev = platform_get_drvdata(dev);
 
 	input_unregister_device(pcspkr_dev);
 	/* turn off the speaker */
 	pcspkr_event(NULL, EV_SND, SND_BELL, 0);
-
-	return 0;
 }
 
 static int pcspkr_suspend(struct device *dev)
@@ -129,7 +127,7 @@ static struct platform_driver pcspkr_platform_driver = {
 		.pm	= &pcspkr_pm_ops,
 	},
 	.probe		= pcspkr_probe,
-	.remove		= pcspkr_remove,
+	.remove_new	= pcspkr_remove,
 	.shutdown	= pcspkr_shutdown,
 };
 module_platform_driver(pcspkr_platform_driver);
diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c
index ba747c5b2b5f..bab710023d8f 100644
--- a/drivers/input/misc/pm8941-pwrkey.c
+++ b/drivers/input/misc/pm8941-pwrkey.c
@@ -408,14 +408,12 @@ static int pm8941_pwrkey_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int pm8941_pwrkey_remove(struct platform_device *pdev)
+static void pm8941_pwrkey_remove(struct platform_device *pdev)
 {
 	struct pm8941_pwrkey *pwrkey = platform_get_drvdata(pdev);
 
 	if (pwrkey->data->supports_ps_hold_poff_config)
 		unregister_reboot_notifier(&pwrkey->reboot_notifier);
-
-	return 0;
 }
 
 static const struct pm8941_data pwrkey_data = {
@@ -467,7 +465,7 @@ MODULE_DEVICE_TABLE(of, pm8941_pwr_key_id_table);
 
 static struct platform_driver pm8941_pwrkey_driver = {
 	.probe = pm8941_pwrkey_probe,
-	.remove = pm8941_pwrkey_remove,
+	.remove_new = pm8941_pwrkey_remove,
 	.driver = {
 		.name = "pm8941-pwrkey",
 		.pm = pm_sleep_ptr(&pm8941_pwr_key_pm_ops),
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index e79f5497948b..08bcee3d6bcc 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -411,7 +411,7 @@ out:
 	return button_info;
 }
 
-static int soc_button_remove(struct platform_device *pdev)
+static void soc_button_remove(struct platform_device *pdev)
 {
 	struct soc_button_data *priv = platform_get_drvdata(pdev);
 
@@ -420,8 +420,6 @@ static int soc_button_remove(struct platform_device *pdev)
 	for (i = 0; i < BUTTON_TYPES; i++)
 		if (priv->children[i])
 			platform_device_unregister(priv->children[i]);
-
-	return 0;
 }
 
 static int soc_button_probe(struct platform_device *pdev)
@@ -609,7 +607,7 @@ MODULE_DEVICE_TABLE(acpi, soc_button_acpi_match);
 
 static struct platform_driver soc_button_driver = {
 	.probe          = soc_button_probe,
-	.remove		= soc_button_remove,
+	.remove_new	= soc_button_remove,
 	.driver		= {
 		.name = KBUILD_MODNAME,
 		.acpi_match_table = ACPI_PTR(soc_button_acpi_match),
diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
index e5dd84725c6e..20020cbc0752 100644
--- a/drivers/input/misc/sparcspkr.c
+++ b/drivers/input/misc/sparcspkr.c
@@ -231,7 +231,7 @@ out_err:
 	return err;
 }
 
-static int bbc_remove(struct platform_device *op)
+static void bbc_remove(struct platform_device *op)
 {
 	struct sparcspkr_state *state = platform_get_drvdata(op);
 	struct input_dev *input_dev = state->input_dev;
@@ -245,8 +245,6 @@ static int bbc_remove(struct platform_device *op)
 	of_iounmap(&op->resource[0], info->regs, 6);
 
 	kfree(state);
-
-	return 0;
 }
 
 static const struct of_device_id bbc_beep_match[] = {
@@ -264,7 +262,7 @@ static struct platform_driver bbc_beep_driver = {
 		.of_match_table = bbc_beep_match,
 	},
 	.probe		= bbc_beep_probe,
-	.remove		= bbc_remove,
+	.remove_new	= bbc_remove,
 	.shutdown	= sparcspkr_shutdown,
 };
 
@@ -310,7 +308,7 @@ out_err:
 	return err;
 }
 
-static int grover_remove(struct platform_device *op)
+static void grover_remove(struct platform_device *op)
 {
 	struct sparcspkr_state *state = platform_get_drvdata(op);
 	struct grover_beep_info *info = &state->u.grover;
@@ -325,8 +323,6 @@ static int grover_remove(struct platform_device *op)
 	of_iounmap(&op->resource[2], info->freq_regs, 2);
 
 	kfree(state);
-
-	return 0;
 }
 
 static const struct of_device_id grover_beep_match[] = {
@@ -344,7 +340,7 @@ static struct platform_driver grover_beep_driver = {
 		.of_match_table = grover_beep_match,
 	},
 	.probe		= grover_beep_probe,
-	.remove		= grover_remove,
+	.remove_new	= grover_remove,
 	.shutdown	= sparcspkr_shutdown,
 };
 
diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c
index 111cb70cde46..5c4956678cd0 100644
--- a/drivers/input/misc/wistron_btns.c
+++ b/drivers/input/misc/wistron_btns.c
@@ -1286,13 +1286,11 @@ static int wistron_probe(struct platform_device *dev)
 	return 0;
 }
 
-static int wistron_remove(struct platform_device *dev)
+static void wistron_remove(struct platform_device *dev)
 {
 	wistron_led_remove();
 	input_unregister_device(wistron_idev);
 	bios_detach();
-
-	return 0;
 }
 
 static int wistron_suspend(struct device *dev)
@@ -1336,7 +1334,7 @@ static struct platform_driver wistron_driver = {
 		.pm	= pm_sleep_ptr(&wistron_pm_ops),
 	},
 	.probe		= wistron_probe,
-	.remove		= wistron_remove,
+	.remove_new	= wistron_remove,
 };
 
 static int __init wb_module_init(void)
diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c
index a42fe041b73c..e4a06c73b72d 100644
--- a/drivers/input/misc/wm831x-on.c
+++ b/drivers/input/misc/wm831x-on.c
@@ -123,20 +123,18 @@ err:
 	return ret;
 }
 
-static int wm831x_on_remove(struct platform_device *pdev)
+static void wm831x_on_remove(struct platform_device *pdev)
 {
 	struct wm831x_on *wm831x_on = platform_get_drvdata(pdev);
 	int irq = platform_get_irq(pdev, 0);
 
 	free_irq(irq, wm831x_on);
 	cancel_delayed_work_sync(&wm831x_on->work);
-
-	return 0;
 }
 
 static struct platform_driver wm831x_on_driver = {
 	.probe		= wm831x_on_probe,
-	.remove		= wm831x_on_remove,
+	.remove_new	= wm831x_on_remove,
 	.driver		= {
 		.name	= "wm831x-on",
 	},
diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c
index 05851bc32541..a84098448f5b 100644
--- a/drivers/input/mouse/cyapa.c
+++ b/drivers/input/mouse/cyapa.c
@@ -1223,7 +1223,7 @@ static DEVICE_ATTR(baseline, S_IRUGO, cyapa_show_baseline, NULL);
 static DEVICE_ATTR(calibrate, S_IWUSR, NULL, cyapa_calibrate_store);
 static DEVICE_ATTR(mode, S_IRUGO, cyapa_show_mode, NULL);
 
-static struct attribute *cyapa_sysfs_entries[] = {
+static struct attribute *cyapa_attrs[] = {
 	&dev_attr_firmware_version.attr,
 	&dev_attr_product_id.attr,
 	&dev_attr_update_fw.attr,
@@ -1232,10 +1232,7 @@ static struct attribute *cyapa_sysfs_entries[] = {
 	&dev_attr_mode.attr,
 	NULL,
 };
-
-static const struct attribute_group cyapa_sysfs_group = {
-	.attrs = cyapa_sysfs_entries,
-};
+ATTRIBUTE_GROUPS(cyapa);
 
 static void cyapa_disable_regulator(void *data)
 {
@@ -1302,12 +1299,6 @@ static int cyapa_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(dev, &cyapa_sysfs_group);
-	if (error) {
-		dev_err(dev, "failed to create sysfs entries: %d\n", error);
-		return error;
-	}
-
 	error = cyapa_prepare_wakeup_controls(cyapa);
 	if (error) {
 		dev_err(dev, "failed to prepare wakeup controls: %d\n", error);
@@ -1484,6 +1475,7 @@ MODULE_DEVICE_TABLE(of, cyapa_of_match);
 static struct i2c_driver cyapa_driver = {
 	.driver = {
 		.name = "cyapa",
+		.dev_groups = cyapa_groups,
 		.pm = pm_ptr(&cyapa_pm_ops),
 		.acpi_match_table = ACPI_PTR(cyapa_acpi_id),
 		.of_match_table = of_match_ptr(cyapa_of_match),
diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c
index 2b7b86eef280..c00dc1275da2 100644
--- a/drivers/input/mouse/navpoint.c
+++ b/drivers/input/mouse/navpoint.c
@@ -295,7 +295,7 @@ err_free_gpio:
 	return error;
 }
 
-static int navpoint_remove(struct platform_device *pdev)
+static void navpoint_remove(struct platform_device *pdev)
 {
 	const struct navpoint_platform_data *pdata =
 					dev_get_platdata(&pdev->dev);
@@ -311,8 +311,6 @@ static int navpoint_remove(struct platform_device *pdev)
 
 	if (gpio_is_valid(pdata->gpio))
 		gpio_free(pdata->gpio);
-
-	return 0;
 }
 
 static int navpoint_suspend(struct device *dev)
@@ -348,7 +346,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(navpoint_pm_ops,
 
 static struct platform_driver navpoint_driver = {
 	.probe		= navpoint_probe,
-	.remove		= navpoint_remove,
+	.remove_new	= navpoint_remove,
 	.driver = {
 		.name	= "navpoint",
 		.pm	= pm_sleep_ptr(&navpoint_pm_ops),
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index f2e093b0b998..1b45b1d3077d 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -277,11 +277,11 @@ void rmi_unregister_function(struct rmi_function *fn)
 
 	device_del(&fn->dev);
 	of_node_put(fn->dev.of_node);
-	put_device(&fn->dev);
 
 	for (i = 0; i < fn->num_of_irqs; i++)
 		irq_dispose_mapping(fn->irq[i]);
 
+	put_device(&fn->dev);
 }
 
 /**
diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c
index 0d9a5756e3f5..3b3ac71e53dc 100644
--- a/drivers/input/rmi4/rmi_f34.c
+++ b/drivers/input/rmi4/rmi_f34.c
@@ -471,7 +471,7 @@ static ssize_t rmi_driver_update_fw_store(struct device *dev,
 	if (buf[count - 1] == '\0' || buf[count - 1] == '\n')
 		copy_count -= 1;
 
-	strncpy(fw_name, buf, copy_count);
+	memcpy(fw_name, buf, copy_count);
 	fw_name[copy_count] = '\0';
 
 	ret = request_firmware(&fw, fw_name, dev);
diff --git a/drivers/input/serio/altera_ps2.c b/drivers/input/serio/altera_ps2.c
index 9f8d7b332d1b..c5b634940cfc 100644
--- a/drivers/input/serio/altera_ps2.c
+++ b/drivers/input/serio/altera_ps2.c
@@ -125,13 +125,11 @@ static int altera_ps2_probe(struct platform_device *pdev)
 /*
  * Remove one device from this driver.
  */
-static int altera_ps2_remove(struct platform_device *pdev)
+static void altera_ps2_remove(struct platform_device *pdev)
 {
 	struct ps2if *ps2if = platform_get_drvdata(pdev);
 
 	serio_unregister_port(ps2if->io);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -148,7 +146,7 @@ MODULE_DEVICE_TABLE(of, altera_ps2_match);
  */
 static struct platform_driver altera_ps2_driver = {
 	.probe		= altera_ps2_probe,
-	.remove		= altera_ps2_remove,
+	.remove_new	= altera_ps2_remove,
 	.driver	= {
 		.name	= DRV_NAME,
 		.of_match_table = of_match_ptr(altera_ps2_match),
diff --git a/drivers/input/serio/ams_delta_serio.c b/drivers/input/serio/ams_delta_serio.c
index ec93cb4573c3..0bd6ae106809 100644
--- a/drivers/input/serio/ams_delta_serio.c
+++ b/drivers/input/serio/ams_delta_serio.c
@@ -173,18 +173,16 @@ static int ams_delta_serio_init(struct platform_device *pdev)
 	return 0;
 }
 
-static int ams_delta_serio_exit(struct platform_device *pdev)
+static void ams_delta_serio_exit(struct platform_device *pdev)
 {
 	struct ams_delta_serio *priv = platform_get_drvdata(pdev);
 
 	serio_unregister_port(priv->serio);
-
-	return 0;
 }
 
 static struct platform_driver ams_delta_serio_driver = {
 	.probe	= ams_delta_serio_init,
-	.remove	= ams_delta_serio_exit,
+	.remove_new = ams_delta_serio_exit,
 	.driver	= {
 		.name	= DRIVER_NAME
 	},
diff --git a/drivers/input/serio/apbps2.c b/drivers/input/serio/apbps2.c
index 3f6866d39b86..dbbb10251520 100644
--- a/drivers/input/serio/apbps2.c
+++ b/drivers/input/serio/apbps2.c
@@ -187,13 +187,11 @@ static int apbps2_of_probe(struct platform_device *ofdev)
 	return 0;
 }
 
-static int apbps2_of_remove(struct platform_device *of_dev)
+static void apbps2_of_remove(struct platform_device *of_dev)
 {
 	struct apbps2_priv *priv = platform_get_drvdata(of_dev);
 
 	serio_unregister_port(priv->io);
-
-	return 0;
 }
 
 static const struct of_device_id apbps2_of_match[] = {
@@ -210,7 +208,7 @@ static struct platform_driver apbps2_of_driver = {
 		.of_match_table = apbps2_of_match,
 	},
 	.probe = apbps2_of_probe,
-	.remove = apbps2_of_remove,
+	.remove_new = apbps2_of_remove,
 };
 
 module_platform_driver(apbps2_of_driver);
diff --git a/drivers/input/serio/arc_ps2.c b/drivers/input/serio/arc_ps2.c
index a6debb13d527..9d8726830140 100644
--- a/drivers/input/serio/arc_ps2.c
+++ b/drivers/input/serio/arc_ps2.c
@@ -232,7 +232,7 @@ static int arc_ps2_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int arc_ps2_remove(struct platform_device *pdev)
+static void arc_ps2_remove(struct platform_device *pdev)
 {
 	struct arc_ps2_data *arc_ps2 = platform_get_drvdata(pdev);
 	int i;
@@ -244,8 +244,6 @@ static int arc_ps2_remove(struct platform_device *pdev)
 	dev_dbg(&pdev->dev, "frame error count = %i\n", arc_ps2->frame_error);
 	dev_dbg(&pdev->dev, "buffer overflow count = %i\n",
 		arc_ps2->buf_overflow);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -262,7 +260,7 @@ static struct platform_driver arc_ps2_driver = {
 		.of_match_table	= of_match_ptr(arc_ps2_match),
 	},
 	.probe	= arc_ps2_probe,
-	.remove	= arc_ps2_remove,
+	.remove_new = arc_ps2_remove,
 };
 
 module_platform_driver(arc_ps2_driver);
diff --git a/drivers/input/serio/ct82c710.c b/drivers/input/serio/ct82c710.c
index 3da751f4a6bf..d5c9bb3d0103 100644
--- a/drivers/input/serio/ct82c710.c
+++ b/drivers/input/serio/ct82c710.c
@@ -180,11 +180,9 @@ static int ct82c710_probe(struct platform_device *dev)
 	return 0;
 }
 
-static int ct82c710_remove(struct platform_device *dev)
+static void ct82c710_remove(struct platform_device *dev)
 {
 	serio_unregister_port(ct82c710_port);
-
-	return 0;
 }
 
 static struct platform_driver ct82c710_driver = {
@@ -192,7 +190,7 @@ static struct platform_driver ct82c710_driver = {
 		.name	= "ct82c710",
 	},
 	.probe		= ct82c710_probe,
-	.remove		= ct82c710_remove,
+	.remove_new	= ct82c710_remove,
 };
 
 
diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h
index b68793bf05c8..c2fda54dc384 100644
--- a/drivers/input/serio/i8042-sparcio.h
+++ b/drivers/input/serio/i8042-sparcio.h
@@ -82,11 +82,9 @@ static int sparc_i8042_probe(struct platform_device *op)
 	return 0;
 }
 
-static int sparc_i8042_remove(struct platform_device *op)
+static void sparc_i8042_remove(struct platform_device *op)
 {
 	of_iounmap(kbd_res, kbd_iobase, 8);
-
-	return 0;
 }
 
 static const struct of_device_id sparc_i8042_match[] = {
@@ -103,7 +101,7 @@ static struct platform_driver sparc_i8042_driver = {
 		.of_match_table = sparc_i8042_match,
 	},
 	.probe		= sparc_i8042_probe,
-	.remove		= sparc_i8042_remove,
+	.remove_new	= sparc_i8042_remove,
 };
 
 static bool i8042_is_mr_coffee(void)
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 6dac7c1853a5..9fbb8d31575a 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -1584,13 +1584,11 @@ static int i8042_probe(struct platform_device *dev)
 	return error;
 }
 
-static int i8042_remove(struct platform_device *dev)
+static void i8042_remove(struct platform_device *dev)
 {
 	i8042_unregister_ports();
 	i8042_free_irqs();
 	i8042_controller_reset(false);
-
-	return 0;
 }
 
 static struct platform_driver i8042_driver = {
@@ -1601,7 +1599,7 @@ static struct platform_driver i8042_driver = {
 #endif
 	},
 	.probe		= i8042_probe,
-	.remove		= i8042_remove,
+	.remove_new	= i8042_remove,
 	.shutdown	= i8042_shutdown,
 };
 
diff --git a/drivers/input/serio/ioc3kbd.c b/drivers/input/serio/ioc3kbd.c
index d51bfe912db5..50552dc7b4f5 100644
--- a/drivers/input/serio/ioc3kbd.c
+++ b/drivers/input/serio/ioc3kbd.c
@@ -190,7 +190,7 @@ static int ioc3kbd_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int ioc3kbd_remove(struct platform_device *pdev)
+static void ioc3kbd_remove(struct platform_device *pdev)
 {
 	struct ioc3kbd_data *d = platform_get_drvdata(pdev);
 
@@ -198,13 +198,11 @@ static int ioc3kbd_remove(struct platform_device *pdev)
 
 	serio_unregister_port(d->kbd);
 	serio_unregister_port(d->aux);
-
-	return 0;
 }
 
 static struct platform_driver ioc3kbd_driver = {
 	.probe          = ioc3kbd_probe,
-	.remove         = ioc3kbd_remove,
+	.remove_new     = ioc3kbd_remove,
 	.driver = {
 		.name = "ioc3-kbd",
 	},
diff --git a/drivers/input/serio/maceps2.c b/drivers/input/serio/maceps2.c
index 629e15089c21..5ccfb82759b3 100644
--- a/drivers/input/serio/maceps2.c
+++ b/drivers/input/serio/maceps2.c
@@ -148,12 +148,10 @@ static int maceps2_probe(struct platform_device *dev)
 	return 0;
 }
 
-static int maceps2_remove(struct platform_device *dev)
+static void maceps2_remove(struct platform_device *dev)
 {
 	serio_unregister_port(maceps2_port[0]);
 	serio_unregister_port(maceps2_port[1]);
-
-	return 0;
 }
 
 static struct platform_driver maceps2_driver = {
@@ -161,7 +159,7 @@ static struct platform_driver maceps2_driver = {
 		.name	= "maceps2",
 	},
 	.probe		= maceps2_probe,
-	.remove		= maceps2_remove,
+	.remove_new	= maceps2_remove,
 };
 
 static int __init maceps2_init(void)
diff --git a/drivers/input/serio/olpc_apsp.c b/drivers/input/serio/olpc_apsp.c
index 33a8e5889bd8..240a714f7081 100644
--- a/drivers/input/serio/olpc_apsp.c
+++ b/drivers/input/serio/olpc_apsp.c
@@ -238,7 +238,7 @@ err_pad:
 	return error;
 }
 
-static int olpc_apsp_remove(struct platform_device *pdev)
+static void olpc_apsp_remove(struct platform_device *pdev)
 {
 	struct olpc_apsp *priv = platform_get_drvdata(pdev);
 
@@ -246,8 +246,6 @@ static int olpc_apsp_remove(struct platform_device *pdev)
 
 	serio_unregister_port(priv->kbio);
 	serio_unregister_port(priv->padio);
-
-	return 0;
 }
 
 static const struct of_device_id olpc_apsp_dt_ids[] = {
@@ -258,7 +256,7 @@ MODULE_DEVICE_TABLE(of, olpc_apsp_dt_ids);
 
 static struct platform_driver olpc_apsp_driver = {
 	.probe		= olpc_apsp_probe,
-	.remove		= olpc_apsp_remove,
+	.remove_new	= olpc_apsp_remove,
 	.driver		= {
 		.name	= "olpc-apsp",
 		.of_match_table = olpc_apsp_dt_ids,
diff --git a/drivers/input/serio/ps2-gpio.c b/drivers/input/serio/ps2-gpio.c
index bc1dc484389b..c3ff60859a03 100644
--- a/drivers/input/serio/ps2-gpio.c
+++ b/drivers/input/serio/ps2-gpio.c
@@ -476,12 +476,11 @@ err_free_serio:
 	return error;
 }
 
-static int ps2_gpio_remove(struct platform_device *pdev)
+static void ps2_gpio_remove(struct platform_device *pdev)
 {
 	struct ps2_gpio_data *drvdata = platform_get_drvdata(pdev);
 
 	serio_unregister_port(drvdata->serio);
-	return 0;
 }
 
 #if defined(CONFIG_OF)
@@ -494,7 +493,7 @@ MODULE_DEVICE_TABLE(of, ps2_gpio_match);
 
 static struct platform_driver ps2_gpio_driver = {
 	.probe		= ps2_gpio_probe,
-	.remove		= ps2_gpio_remove,
+	.remove_new	= ps2_gpio_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = of_match_ptr(ps2_gpio_match),
diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c
index ba04058fc3cb..3f81f8749cd5 100644
--- a/drivers/input/serio/q40kbd.c
+++ b/drivers/input/serio/q40kbd.c
@@ -148,7 +148,7 @@ err_free_mem:
 	return error;
 }
 
-static int q40kbd_remove(struct platform_device *pdev)
+static void q40kbd_remove(struct platform_device *pdev)
 {
 	struct q40kbd *q40kbd = platform_get_drvdata(pdev);
 
@@ -160,15 +160,13 @@ static int q40kbd_remove(struct platform_device *pdev)
 	serio_unregister_port(q40kbd->port);
 	free_irq(Q40_IRQ_KEYBOARD, q40kbd);
 	kfree(q40kbd);
-
-	return 0;
 }
 
 static struct platform_driver q40kbd_driver = {
 	.driver		= {
 		.name	= "q40kbd",
 	},
-	.remove		= q40kbd_remove,
+	.remove_new	= q40kbd_remove,
 };
 
 module_platform_driver_probe(q40kbd_driver, q40kbd_probe);
diff --git a/drivers/input/serio/rpckbd.c b/drivers/input/serio/rpckbd.c
index e8a9709f32eb..9bbfefd092c0 100644
--- a/drivers/input/serio/rpckbd.c
+++ b/drivers/input/serio/rpckbd.c
@@ -133,20 +133,18 @@ static int rpckbd_probe(struct platform_device *dev)
 	return 0;
 }
 
-static int rpckbd_remove(struct platform_device *dev)
+static void rpckbd_remove(struct platform_device *dev)
 {
 	struct serio *serio = platform_get_drvdata(dev);
 	struct rpckbd_data *rpckbd = serio->port_data;
 
 	serio_unregister_port(serio);
 	kfree(rpckbd);
-
-	return 0;
 }
 
 static struct platform_driver rpckbd_driver = {
 	.probe		= rpckbd_probe,
-	.remove		= rpckbd_remove,
+	.remove_new	= rpckbd_remove,
 	.driver		= {
 		.name	= "kart",
 	},
diff --git a/drivers/input/serio/sun4i-ps2.c b/drivers/input/serio/sun4i-ps2.c
index eb262640192e..aec66d9f5176 100644
--- a/drivers/input/serio/sun4i-ps2.c
+++ b/drivers/input/serio/sun4i-ps2.c
@@ -297,7 +297,7 @@ err_free_mem:
 	return error;
 }
 
-static int sun4i_ps2_remove(struct platform_device *pdev)
+static void sun4i_ps2_remove(struct platform_device *pdev)
 {
 	struct sun4i_ps2data *drvdata = platform_get_drvdata(pdev);
 
@@ -311,8 +311,6 @@ static int sun4i_ps2_remove(struct platform_device *pdev)
 	iounmap(drvdata->reg_base);
 
 	kfree(drvdata);
-
-	return 0;
 }
 
 static const struct of_device_id sun4i_ps2_match[] = {
@@ -324,7 +322,7 @@ MODULE_DEVICE_TABLE(of, sun4i_ps2_match);
 
 static struct platform_driver sun4i_ps2_driver = {
 	.probe		= sun4i_ps2_probe,
-	.remove		= sun4i_ps2_remove,
+	.remove_new	= sun4i_ps2_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = sun4i_ps2_match,
diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c
index f3d28da70b75..d8f9faf2b529 100644
--- a/drivers/input/serio/xilinx_ps2.c
+++ b/drivers/input/serio/xilinx_ps2.c
@@ -329,7 +329,7 @@ failed1:
  * if the driver module is being unloaded. It frees any resources allocated to
  * the device.
  */
-static int xps2_of_remove(struct platform_device *of_dev)
+static void xps2_of_remove(struct platform_device *of_dev)
 {
 	struct xps2data *drvdata = platform_get_drvdata(of_dev);
 	struct resource r_mem; /* IO mem resources */
@@ -344,8 +344,6 @@ static int xps2_of_remove(struct platform_device *of_dev)
 		release_mem_region(r_mem.start, resource_size(&r_mem));
 
 	kfree(drvdata);
-
-	return 0;
 }
 
 /* Match table for of_platform binding */
@@ -361,7 +359,7 @@ static struct platform_driver xps2_of_driver = {
 		.of_match_table = xps2_of_match,
 	},
 	.probe		= xps2_of_probe,
-	.remove		= xps2_of_remove,
+	.remove_new	= xps2_of_remove,
 };
 module_platform_driver(xps2_of_driver);
 
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index edb36d663f22..a0598e9c7aff 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -612,10 +612,11 @@ static umode_t ad7877_attr_is_visible(struct kobject *kobj,
 	return mode;
 }
 
-static const struct attribute_group ad7877_attr_group = {
+static const struct attribute_group ad7877_group = {
 	.is_visible	= ad7877_attr_is_visible,
 	.attrs		= ad7877_attributes,
 };
+__ATTRIBUTE_GROUPS(ad7877);
 
 static void ad7877_setup_ts_def_msg(struct spi_device *spi, struct ad7877 *ts)
 {
@@ -777,10 +778,6 @@ static int ad7877_probe(struct spi_device *spi)
 		return err;
 	}
 
-	err = devm_device_add_group(&spi->dev, &ad7877_attr_group);
-	if (err)
-		return err;
-
 	err = input_register_device(input_dev);
 	if (err)
 		return err;
@@ -810,8 +807,9 @@ static DEFINE_SIMPLE_DEV_PM_OPS(ad7877_pm, ad7877_suspend, ad7877_resume);
 
 static struct spi_driver ad7877_driver = {
 	.driver = {
-		.name	= "ad7877",
-		.pm	= pm_sleep_ptr(&ad7877_pm),
+		.name		= "ad7877",
+		.dev_groups	= ad7877_groups,
+		.pm		= pm_sleep_ptr(&ad7877_pm),
 	},
 	.probe		= ad7877_probe,
 };
diff --git a/drivers/input/touchscreen/ad7879-i2c.c b/drivers/input/touchscreen/ad7879-i2c.c
index feaa6f8b01ed..5c094ab74698 100644
--- a/drivers/input/touchscreen/ad7879-i2c.c
+++ b/drivers/input/touchscreen/ad7879-i2c.c
@@ -58,9 +58,10 @@ MODULE_DEVICE_TABLE(of, ad7879_i2c_dt_ids);
 
 static struct i2c_driver ad7879_i2c_driver = {
 	.driver = {
-		.name	= "ad7879",
-		.pm	= &ad7879_pm_ops,
-		.of_match_table = of_match_ptr(ad7879_i2c_dt_ids),
+		.name		= "ad7879",
+		.dev_groups	= ad7879_groups,
+		.pm		= &ad7879_pm_ops,
+		.of_match_table	= of_match_ptr(ad7879_i2c_dt_ids),
 	},
 	.probe		= ad7879_i2c_probe,
 	.id_table	= ad7879_id,
diff --git a/drivers/input/touchscreen/ad7879-spi.c b/drivers/input/touchscreen/ad7879-spi.c
index 50e889846800..064968fe57cf 100644
--- a/drivers/input/touchscreen/ad7879-spi.c
+++ b/drivers/input/touchscreen/ad7879-spi.c
@@ -56,9 +56,10 @@ MODULE_DEVICE_TABLE(of, ad7879_spi_dt_ids);
 
 static struct spi_driver ad7879_spi_driver = {
 	.driver = {
-		.name	= "ad7879",
-		.pm	= &ad7879_pm_ops,
-		.of_match_table = of_match_ptr(ad7879_spi_dt_ids),
+		.name		= "ad7879",
+		.dev_groups	= ad7879_groups,
+		.pm		= &ad7879_pm_ops,
+		.of_match_table	= of_match_ptr(ad7879_spi_dt_ids),
 	},
 	.probe		= ad7879_spi_probe,
 };
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index e850853328f1..e5d69bf2276e 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -391,6 +391,12 @@ static const struct attribute_group ad7879_attr_group = {
 	.attrs = ad7879_attributes,
 };
 
+const struct attribute_group *ad7879_groups[] = {
+	&ad7879_attr_group,
+	NULL
+};
+EXPORT_SYMBOL_GPL(ad7879_groups);
+
 #ifdef CONFIG_GPIOLIB
 static int ad7879_gpio_direction_input(struct gpio_chip *chip,
 					unsigned gpio)
@@ -612,10 +618,6 @@ int ad7879_probe(struct device *dev, struct regmap *regmap,
 
 	__ad7879_disable(ts);
 
-	err = devm_device_add_group(dev, &ad7879_attr_group);
-	if (err)
-		return err;
-
 	err = ad7879_gpio_add(ts);
 	if (err)
 		return err;
diff --git a/drivers/input/touchscreen/ad7879.h b/drivers/input/touchscreen/ad7879.h
index ae8aa1428e56..d71a8e787290 100644
--- a/drivers/input/touchscreen/ad7879.h
+++ b/drivers/input/touchscreen/ad7879.h
@@ -8,11 +8,14 @@
 #ifndef _AD7879_H_
 #define _AD7879_H_
 
+#include <linux/pm.h>
 #include <linux/types.h>
 
+struct attribute_group;
 struct device;
 struct regmap;
 
+extern const struct attribute_group *ad7879_groups[];
 extern const struct dev_pm_ops ad7879_pm_ops;
 
 int ad7879_probe(struct device *dev, struct regmap *regmap,
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index faea40dd66d0..d2bbb436a77d 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -625,15 +625,12 @@ static ssize_t ads7846_disable_store(struct device *dev,
 
 static DEVICE_ATTR(disable, 0664, ads7846_disable_show, ads7846_disable_store);
 
-static struct attribute *ads784x_attributes[] = {
+static struct attribute *ads784x_attrs[] = {
 	&dev_attr_pen_down.attr,
 	&dev_attr_disable.attr,
 	NULL,
 };
-
-static const struct attribute_group ads784x_attr_group = {
-	.attrs = ads784x_attributes,
-};
+ATTRIBUTE_GROUPS(ads784x);
 
 /*--------------------------------------------------------------------------*/
 
@@ -1357,10 +1354,6 @@ static int ads7846_probe(struct spi_device *spi)
 	else
 		(void) ads7846_read12_ser(dev, READ_12BIT_SER(vaux));
 
-	err = devm_device_add_group(dev, &ads784x_attr_group);
-	if (err)
-		return err;
-
 	err = input_register_device(input_dev);
 	if (err)
 		return err;
@@ -1386,9 +1379,10 @@ static void ads7846_remove(struct spi_device *spi)
 
 static struct spi_driver ads7846_driver = {
 	.driver = {
-		.name	= "ads7846",
-		.pm	= pm_sleep_ptr(&ads7846_pm),
-		.of_match_table = ads7846_dt_ids,
+		.name		= "ads7846",
+		.dev_groups	= ads784x_groups,
+		.pm		= pm_sleep_ptr(&ads7846_pm),
+		.of_match_table	= ads7846_dt_ids,
 	},
 	.probe		= ads7846_probe,
 	.remove		= ads7846_remove,
diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c
index db5a885ecd72..68527ede5c0e 100644
--- a/drivers/input/touchscreen/cyttsp5.c
+++ b/drivers/input/touchscreen/cyttsp5.c
@@ -207,7 +207,7 @@ struct cyttsp5 {
 	int num_prv_rec;
 	struct regmap *regmap;
 	struct touchscreen_properties prop;
-	struct regulator *vdd;
+	struct regulator_bulk_data supplies[2];
 };
 
 /*
@@ -817,7 +817,7 @@ static void cyttsp5_cleanup(void *data)
 {
 	struct cyttsp5 *ts = data;
 
-	regulator_disable(ts->vdd);
+	regulator_bulk_disable(ARRAY_SIZE(ts->supplies), ts->supplies);
 }
 
 static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq,
@@ -840,9 +840,12 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq,
 	init_completion(&ts->cmd_done);
 
 	/* Power up the device */
-	ts->vdd = devm_regulator_get(dev, "vdd");
-	if (IS_ERR(ts->vdd)) {
-		error = PTR_ERR(ts->vdd);
+	ts->supplies[0].supply = "vdd";
+	ts->supplies[1].supply = "vddio";
+	error = devm_regulator_bulk_get(dev, ARRAY_SIZE(ts->supplies),
+					ts->supplies);
+	if (error) {
+		dev_err(ts->dev, "Failed to get regulators, error %d\n", error);
 		return error;
 	}
 
@@ -850,9 +853,11 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq,
 	if (error)
 		return error;
 
-	error = regulator_enable(ts->vdd);
-	if (error)
+	error = regulator_bulk_enable(ARRAY_SIZE(ts->supplies), ts->supplies);
+	if (error) {
+		dev_err(ts->dev, "Failed to enable regulators, error %d\n", error);
 		return error;
+	}
 
 	ts->input = devm_input_allocate_device(dev);
 	if (!ts->input) {
diff --git a/drivers/input/touchscreen/da9052_tsi.c b/drivers/input/touchscreen/da9052_tsi.c
index f91d0e02ddae..d71690ce6463 100644
--- a/drivers/input/touchscreen/da9052_tsi.c
+++ b/drivers/input/touchscreen/da9052_tsi.c
@@ -311,7 +311,7 @@ err_free_mem:
 	return error;
 }
 
-static int  da9052_ts_remove(struct platform_device *pdev)
+static void da9052_ts_remove(struct platform_device *pdev)
 {
 	struct da9052_tsi *tsi = platform_get_drvdata(pdev);
 
@@ -322,13 +322,11 @@ static int  da9052_ts_remove(struct platform_device *pdev)
 
 	input_unregister_device(tsi->dev);
 	kfree(tsi);
-
-	return 0;
 }
 
 static struct platform_driver da9052_tsi_driver = {
 	.probe	= da9052_ts_probe,
-	.remove	= da9052_ts_remove,
+	.remove_new = da9052_ts_remove,
 	.driver	= {
 		.name	= "da9052-tsi",
 	},
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 457d53337fbb..3e102bcc4a1c 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -580,10 +580,7 @@ static struct attribute *edt_ft5x06_attrs[] = {
 	&dev_attr_crc_errors.attr,
 	NULL
 };
-
-static const struct attribute_group edt_ft5x06_attr_group = {
-	.attrs = edt_ft5x06_attrs,
-};
+ATTRIBUTE_GROUPS(edt_ft5x06);
 
 static void edt_ft5x06_restore_reg_parameters(struct edt_ft5x06_ts_data *tsdata)
 {
@@ -1330,10 +1327,6 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &edt_ft5x06_attr_group);
-	if (error)
-		return error;
-
 	error = input_register_device(input);
 	if (error)
 		return error;
@@ -1502,6 +1495,7 @@ MODULE_DEVICE_TABLE(of, edt_ft5x06_of_match);
 static struct i2c_driver edt_ft5x06_ts_driver = {
 	.driver = {
 		.name = "edt_ft5x06",
+		.dev_groups = edt_ft5x06_groups,
 		.of_match_table = edt_ft5x06_of_match,
 		.pm = pm_sleep_ptr(&edt_ft5x06_ts_pm_ops),
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index a1af3de9f310..365765d40e62 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -1299,7 +1299,7 @@ static ELANTS_VERSION_ATTR(solution_version);
 static ELANTS_VERSION_ATTR(bc_version);
 static ELANTS_VERSION_ATTR(iap_version);
 
-static struct attribute *elants_attributes[] = {
+static struct attribute *elants_i2c_attrs[] = {
 	&dev_attr_calibrate.attr,
 	&dev_attr_update_fw.attr,
 	&dev_attr_iap_mode.attr,
@@ -1313,10 +1313,7 @@ static struct attribute *elants_attributes[] = {
 	&elants_ver_attr_iap_version.dattr.attr,
 	NULL
 };
-
-static const struct attribute_group elants_attribute_group = {
-	.attrs = elants_attributes,
-};
+ATTRIBUTE_GROUPS(elants_i2c);
 
 static int elants_i2c_power_on(struct elants_data *ts)
 {
@@ -1552,13 +1549,6 @@ static int elants_i2c_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &elants_attribute_group);
-	if (error) {
-		dev_err(&client->dev, "failed to create sysfs attributes: %d\n",
-			error);
-		return error;
-	}
-
 	return 0;
 }
 
@@ -1667,6 +1657,7 @@ static struct i2c_driver elants_i2c_driver = {
 	.id_table = elants_i2c_id,
 	.driver = {
 		.name = DEVICE_NAME,
+		.dev_groups = elants_i2c_groups,
 		.pm = pm_sleep_ptr(&elants_i2c_pm_ops),
 		.acpi_match_table = ACPI_PTR(elants_acpi_id),
 		.of_match_table = of_match_ptr(elants_of_match),
diff --git a/drivers/input/touchscreen/exc3000.c b/drivers/input/touchscreen/exc3000.c
index 4c0d99aae9e0..a4030cc9ff60 100644
--- a/drivers/input/touchscreen/exc3000.c
+++ b/drivers/input/touchscreen/exc3000.c
@@ -325,16 +325,13 @@ static ssize_t type_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(type);
 
-static struct attribute *sysfs_attrs[] = {
+static struct attribute *exc3000_attrs[] = {
 	&dev_attr_fw_version.attr,
 	&dev_attr_model.attr,
 	&dev_attr_type.attr,
 	NULL
 };
-
-static struct attribute_group exc3000_attribute_group = {
-	.attrs = sysfs_attrs
-};
+ATTRIBUTE_GROUPS(exc3000);
 
 static int exc3000_probe(struct i2c_client *client)
 {
@@ -437,10 +434,6 @@ static int exc3000_probe(struct i2c_client *client)
 
 	i2c_set_clientdata(client, data);
 
-	error = devm_device_add_group(&client->dev, &exc3000_attribute_group);
-	if (error)
-		return error;
-
 	return 0;
 }
 
@@ -473,6 +466,7 @@ MODULE_DEVICE_TABLE(acpi, exc3000_acpi_match);
 static struct i2c_driver exc3000_driver = {
 	.driver = {
 		.name	= "exc3000",
+		.dev_groups = exc3000_groups,
 		.of_match_table = of_match_ptr(exc3000_of_match),
 		.acpi_match_table = ACPI_PTR(exc3000_acpi_match),
 	},
diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c
index 404153338df7..0f58258306bf 100644
--- a/drivers/input/touchscreen/hideep.c
+++ b/drivers/input/touchscreen/hideep.c
@@ -954,16 +954,13 @@ static DEVICE_ATTR(version, 0664, hideep_fw_version_show, NULL);
 static DEVICE_ATTR(product_id, 0664, hideep_product_id_show, NULL);
 static DEVICE_ATTR(update_fw, 0664, NULL, hideep_update_fw);
 
-static struct attribute *hideep_ts_sysfs_entries[] = {
+static struct attribute *hideep_ts_attrs[] = {
 	&dev_attr_version.attr,
 	&dev_attr_product_id.attr,
 	&dev_attr_update_fw.attr,
 	NULL,
 };
-
-static const struct attribute_group hideep_ts_attr_group = {
-	.attrs = hideep_ts_sysfs_entries,
-};
+ATTRIBUTE_GROUPS(hideep_ts);
 
 static void hideep_set_work_mode(struct hideep_ts *ts)
 {
@@ -1096,13 +1093,6 @@ static int hideep_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &hideep_ts_attr_group);
-	if (error) {
-		dev_err(&client->dev,
-			"failed to add sysfs attributes: %d\n", error);
-		return error;
-	}
-
 	return 0;
 }
 
@@ -1131,6 +1121,7 @@ MODULE_DEVICE_TABLE(of, hideep_match_table);
 static struct i2c_driver hideep_driver = {
 	.driver = {
 		.name			= HIDEEP_I2C_NAME,
+		.dev_groups		= hideep_ts_groups,
 		.of_match_table		= of_match_ptr(hideep_match_table),
 		.acpi_match_table	= ACPI_PTR(hideep_acpi_id),
 		.pm			= pm_sleep_ptr(&hideep_pm_ops),
diff --git a/drivers/input/touchscreen/hycon-hy46xx.c b/drivers/input/touchscreen/hycon-hy46xx.c
index 2450cfa14de9..d0f257989fd6 100644
--- a/drivers/input/touchscreen/hycon-hy46xx.c
+++ b/drivers/input/touchscreen/hycon-hy46xx.c
@@ -274,10 +274,7 @@ static struct attribute *hycon_hy46xx_attrs[] = {
 	&hycon_hy46xx_attr_bootloader_version.dattr.attr,
 	NULL
 };
-
-static const struct attribute_group hycon_hy46xx_attr_group = {
-	.attrs = hycon_hy46xx_attrs,
-};
+ATTRIBUTE_GROUPS(hycon_hy46xx);
 
 static void hycon_hy46xx_get_defaults(struct device *dev, struct hycon_hy46xx_data *tsdata)
 {
@@ -535,10 +532,6 @@ static int hycon_hy46xx_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &hycon_hy46xx_attr_group);
-	if (error)
-		return error;
-
 	error = input_register_device(input);
 	if (error)
 		return error;
@@ -576,6 +569,7 @@ MODULE_DEVICE_TABLE(of, hycon_hy46xx_of_match);
 static struct i2c_driver hycon_hy46xx_driver = {
 	.driver = {
 		.name = "hycon_hy46xx",
+		.dev_groups = hycon_hy46xx_groups,
 		.of_match_table = hycon_hy46xx_of_match,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index ad6828e4f2e2..31ffdc2a93f3 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -876,7 +876,7 @@ exit:
 
 static DEVICE_ATTR(firmware_update, 0200, NULL, ili210x_firmware_update_store);
 
-static struct attribute *ili210x_attributes[] = {
+static struct attribute *ili210x_attrs[] = {
 	&dev_attr_calibrate.attr,
 	&dev_attr_firmware_update.attr,
 	&dev_attr_firmware_version.attr,
@@ -904,10 +904,11 @@ static umode_t ili210x_attributes_visible(struct kobject *kobj,
 	return attr->mode;
 }
 
-static const struct attribute_group ili210x_attr_group = {
-	.attrs = ili210x_attributes,
+static const struct attribute_group ili210x_group = {
+	.attrs = ili210x_attrs,
 	.is_visible = ili210x_attributes_visible,
 };
+__ATTRIBUTE_GROUPS(ili210x);
 
 static void ili210x_power_down(void *data)
 {
@@ -1013,13 +1014,6 @@ static int ili210x_i2c_probe(struct i2c_client *client)
 	if (error)
 		return error;
 
-	error = devm_device_add_group(dev, &ili210x_attr_group);
-	if (error) {
-		dev_err(dev, "Unable to create sysfs attributes, err: %d\n",
-			error);
-		return error;
-	}
-
 	error = input_register_device(priv->input);
 	if (error) {
 		dev_err(dev, "Cannot register input device, err: %d\n", error);
@@ -1050,6 +1044,7 @@ MODULE_DEVICE_TABLE(of, ili210x_dt_ids);
 static struct i2c_driver ili210x_ts_driver = {
 	.driver = {
 		.name = "ili210x_i2c",
+		.dev_groups = ili210x_groups,
 		.of_match_table = ili210x_dt_ids,
 	},
 	.id_table = ili210x_i2c_id,
diff --git a/drivers/input/touchscreen/ilitek_ts_i2c.c b/drivers/input/touchscreen/ilitek_ts_i2c.c
index 2f872e95fbba..90c4934e750a 100644
--- a/drivers/input/touchscreen/ilitek_ts_i2c.c
+++ b/drivers/input/touchscreen/ilitek_ts_i2c.c
@@ -537,10 +537,7 @@ static struct attribute *ilitek_sysfs_attrs[] = {
 	&dev_attr_product_id.attr,
 	NULL
 };
-
-static struct attribute_group ilitek_attrs_group = {
-	.attrs = ilitek_sysfs_attrs,
-};
+ATTRIBUTE_GROUPS(ilitek_sysfs);
 
 static int ilitek_ts_i2c_probe(struct i2c_client *client)
 {
@@ -595,12 +592,6 @@ static int ilitek_ts_i2c_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(dev, &ilitek_attrs_group);
-	if (error) {
-		dev_err(dev, "sysfs create group failed: %d\n", error);
-		return error;
-	}
-
 	return 0;
 }
 
@@ -675,6 +666,7 @@ MODULE_DEVICE_TABLE(of, ilitek_ts_i2c_match);
 static struct i2c_driver ilitek_ts_i2c_driver = {
 	.driver = {
 		.name = ILITEK_TS_NAME,
+		.dev_groups = ilitek_sysfs_groups,
 		.pm = pm_sleep_ptr(&ilitek_pm_ops),
 		.of_match_table = of_match_ptr(ilitek_ts_i2c_match),
 		.acpi_match_table = ACPI_PTR(ilitekts_acpi_id),
diff --git a/drivers/input/touchscreen/iqs5xx.c b/drivers/input/touchscreen/iqs5xx.c
index b4768b66eb10..a3f4fb85bee5 100644
--- a/drivers/input/touchscreen/iqs5xx.c
+++ b/drivers/input/touchscreen/iqs5xx.c
@@ -974,10 +974,11 @@ static umode_t iqs5xx_attr_is_visible(struct kobject *kobj,
 	return attr->mode;
 }
 
-static const struct attribute_group iqs5xx_attr_group = {
+static const struct attribute_group iqs5xx_group = {
 	.is_visible = iqs5xx_attr_is_visible,
 	.attrs = iqs5xx_attrs,
 };
+__ATTRIBUTE_GROUPS(iqs5xx);
 
 static int iqs5xx_suspend(struct device *dev)
 {
@@ -1053,12 +1054,6 @@ static int iqs5xx_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &iqs5xx_attr_group);
-	if (error) {
-		dev_err(&client->dev, "Failed to add attributes: %d\n", error);
-		return error;
-	}
-
 	if (iqs5xx->input) {
 		error = input_register_device(iqs5xx->input);
 		if (error)
@@ -1089,6 +1084,7 @@ MODULE_DEVICE_TABLE(of, iqs5xx_of_match);
 static struct i2c_driver iqs5xx_i2c_driver = {
 	.driver = {
 		.name		= "iqs5xx",
+		.dev_groups	= iqs5xx_groups,
 		.of_match_table	= iqs5xx_of_match,
 		.pm		= pm_sleep_ptr(&iqs5xx_pm),
 	},
diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c
index 85b95ed461e7..bfbebe245040 100644
--- a/drivers/input/touchscreen/mainstone-wm97xx.c
+++ b/drivers/input/touchscreen/mainstone-wm97xx.c
@@ -252,18 +252,16 @@ static int mainstone_wm97xx_probe(struct platform_device *pdev)
 	return wm97xx_register_mach_ops(wm, &mainstone_mach_ops);
 }
 
-static int mainstone_wm97xx_remove(struct platform_device *pdev)
+static void mainstone_wm97xx_remove(struct platform_device *pdev)
 {
 	struct wm97xx *wm = platform_get_drvdata(pdev);
 
 	wm97xx_unregister_mach_ops(wm);
-
-	return 0;
 }
 
 static struct platform_driver mainstone_wm97xx_driver = {
 	.probe	= mainstone_wm97xx_probe,
-	.remove	= mainstone_wm97xx_remove,
+	.remove_new = mainstone_wm97xx_remove,
 	.driver	= {
 		.name	= "wm97xx-touch",
 	},
diff --git a/drivers/input/touchscreen/mc13783_ts.c b/drivers/input/touchscreen/mc13783_ts.c
index ae0d978c83bf..cbcd6e34efb7 100644
--- a/drivers/input/touchscreen/mc13783_ts.c
+++ b/drivers/input/touchscreen/mc13783_ts.c
@@ -217,18 +217,16 @@ err_free_mem:
 	return ret;
 }
 
-static int mc13783_ts_remove(struct platform_device *pdev)
+static void mc13783_ts_remove(struct platform_device *pdev)
 {
 	struct mc13783_ts_priv *priv = platform_get_drvdata(pdev);
 
 	input_unregister_device(priv->idev);
 	kfree(priv);
-
-	return 0;
 }
 
 static struct platform_driver mc13783_ts_driver = {
-	.remove		= mc13783_ts_remove,
+	.remove_new	= mc13783_ts_remove,
 	.driver		= {
 		.name	= MC13783_TS_NAME,
 	},
diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c
index 2ac4483fbc25..aa325486f618 100644
--- a/drivers/input/touchscreen/melfas_mip4.c
+++ b/drivers/input/touchscreen/melfas_mip4.c
@@ -1419,10 +1419,7 @@ static struct attribute *mip4_attrs[] = {
 	&dev_attr_update_fw.attr,
 	NULL,
 };
-
-static const struct attribute_group mip4_attr_group = {
-	.attrs = mip4_attrs,
-};
+ATTRIBUTE_GROUPS(mip4);
 
 static int mip4_probe(struct i2c_client *client)
 {
@@ -1514,13 +1511,6 @@ static int mip4_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &mip4_attr_group);
-	if (error) {
-		dev_err(&client->dev,
-			"Failed to create sysfs attribute group: %d\n", error);
-		return error;
-	}
-
 	return 0;
 }
 
@@ -1589,6 +1579,7 @@ static struct i2c_driver mip4_driver = {
 	.probe = mip4_probe,
 	.driver = {
 		.name = MIP4_DEVICE_NAME,
+		.dev_groups = mip4_groups,
 		.of_match_table = of_match_ptr(mip4_of_match),
 		.acpi_match_table = ACPI_PTR(mip4_acpi_match),
 		.pm = pm_sleep_ptr(&mip4_pm_ops),
diff --git a/drivers/input/touchscreen/pcap_ts.c b/drivers/input/touchscreen/pcap_ts.c
index b2da0194e02a..821245019fea 100644
--- a/drivers/input/touchscreen/pcap_ts.c
+++ b/drivers/input/touchscreen/pcap_ts.c
@@ -197,7 +197,7 @@ fail:
 	return err;
 }
 
-static int pcap_ts_remove(struct platform_device *pdev)
+static void pcap_ts_remove(struct platform_device *pdev)
 {
 	struct pcap_ts *pcap_ts = platform_get_drvdata(pdev);
 
@@ -207,8 +207,6 @@ static int pcap_ts_remove(struct platform_device *pdev)
 	input_unregister_device(pcap_ts->input);
 
 	kfree(pcap_ts);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -240,7 +238,7 @@ static const struct dev_pm_ops pcap_ts_pm_ops = {
 
 static struct platform_driver pcap_ts_driver = {
 	.probe		= pcap_ts_probe,
-	.remove		= pcap_ts_remove,
+	.remove_new	= pcap_ts_remove,
 	.driver		= {
 		.name	= "pcap-ts",
 		.pm	= PCAP_TS_PM_OPS,
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c
index 78dd3059d585..13c500e776f6 100644
--- a/drivers/input/touchscreen/raydium_i2c_ts.c
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -1004,7 +1004,7 @@ static DEVICE_ATTR(boot_mode, S_IRUGO, raydium_i2c_boot_mode_show, NULL);
 static DEVICE_ATTR(update_fw, S_IWUSR, NULL, raydium_i2c_update_fw_store);
 static DEVICE_ATTR(calibrate, S_IWUSR, NULL, raydium_i2c_calibrate_store);
 
-static struct attribute *raydium_i2c_attributes[] = {
+static struct attribute *raydium_i2c_attrs[] = {
 	&dev_attr_update_fw.attr,
 	&dev_attr_boot_mode.attr,
 	&dev_attr_fw_version.attr,
@@ -1012,10 +1012,7 @@ static struct attribute *raydium_i2c_attributes[] = {
 	&dev_attr_calibrate.attr,
 	NULL
 };
-
-static const struct attribute_group raydium_i2c_attribute_group = {
-	.attrs = raydium_i2c_attributes,
-};
+ATTRIBUTE_GROUPS(raydium_i2c);
 
 static int raydium_i2c_power_on(struct raydium_data *ts)
 {
@@ -1174,14 +1171,6 @@ static int raydium_i2c_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev,
-				   &raydium_i2c_attribute_group);
-	if (error) {
-		dev_err(&client->dev, "failed to create sysfs attributes: %d\n",
-			error);
-		return error;
-	}
-
 	return 0;
 }
 
@@ -1265,6 +1254,7 @@ static struct i2c_driver raydium_i2c_driver = {
 	.id_table = raydium_i2c_id,
 	.driver = {
 		.name = "raydium_ts",
+		.dev_groups = raydium_i2c_groups,
 		.pm = pm_sleep_ptr(&raydium_i2c_pm_ops),
 		.acpi_match_table = ACPI_PTR(raydium_acpi_id),
 		.of_match_table = of_match_ptr(raydium_of_match),
diff --git a/drivers/input/touchscreen/rohm_bu21023.c b/drivers/input/touchscreen/rohm_bu21023.c
index 240424f06b98..4493ad0c9322 100644
--- a/drivers/input/touchscreen/rohm_bu21023.c
+++ b/drivers/input/touchscreen/rohm_bu21023.c
@@ -854,10 +854,7 @@ static struct attribute *rohm_ts_attrs[] = {
 	&dev_attr_inv_y.attr,
 	NULL,
 };
-
-static const struct attribute_group rohm_ts_attr_group = {
-	.attrs = rohm_ts_attrs,
-};
+ATTRIBUTE_GROUPS(rohm_ts);
 
 static int rohm_ts_device_init(struct i2c_client *client, u8 setup2)
 {
@@ -1164,12 +1161,6 @@ static int rohm_bu21023_i2c_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(dev, &rohm_ts_attr_group);
-	if (error) {
-		dev_err(dev, "failed to create sysfs group: %d\n", error);
-		return error;
-	}
-
 	return error;
 }
 
@@ -1182,6 +1173,7 @@ MODULE_DEVICE_TABLE(i2c, rohm_bu21023_i2c_id);
 static struct i2c_driver rohm_bu21023_i2c_driver = {
 	.driver = {
 		.name = BU21023_NAME,
+		.dev_groups = rohm_ts_groups,
 	},
 	.probe = rohm_bu21023_i2c_probe,
 	.id_table = rohm_bu21023_i2c_id,
diff --git a/drivers/input/touchscreen/s6sy761.c b/drivers/input/touchscreen/s6sy761.c
index 998d99d18911..149cc2c4925e 100644
--- a/drivers/input/touchscreen/s6sy761.c
+++ b/drivers/input/touchscreen/s6sy761.c
@@ -286,10 +286,7 @@ static struct attribute *s6sy761_sysfs_attrs[] = {
 	&dev_attr_devid.attr,
 	NULL
 };
-
-static struct attribute_group s6sy761_attribute_group = {
-	.attrs = s6sy761_sysfs_attrs
-};
+ATTRIBUTE_GROUPS(s6sy761_sysfs);
 
 static int s6sy761_power_on(struct s6sy761_data *sdata)
 {
@@ -465,10 +462,6 @@ static int s6sy761_probe(struct i2c_client *client)
 	if (err)
 		return err;
 
-	err = devm_device_add_group(&client->dev, &s6sy761_attribute_group);
-	if (err)
-		return err;
-
 	pm_runtime_enable(&client->dev);
 
 	return 0;
@@ -535,6 +528,7 @@ MODULE_DEVICE_TABLE(i2c, s6sy761_id);
 static struct i2c_driver s6sy761_driver = {
 	.driver = {
 		.name = S6SY761_DEV_NAME,
+		.dev_groups = s6sy761_sysfs_groups,
 		.of_match_table = of_match_ptr(s6sy761_of_match),
 		.pm = pm_ptr(&s6sy761_pm_ops),
 	},
diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c
index 56e371fd88fa..85010fa07908 100644
--- a/drivers/input/touchscreen/stmfts.c
+++ b/drivers/input/touchscreen/stmfts.c
@@ -517,10 +517,7 @@ static struct attribute *stmfts_sysfs_attrs[] = {
 	&dev_attr_hover_enable.attr,
 	NULL
 };
-
-static struct attribute_group stmfts_attribute_group = {
-	.attrs = stmfts_sysfs_attrs
-};
+ATTRIBUTE_GROUPS(stmfts_sysfs);
 
 static int stmfts_power_on(struct stmfts_data *sdata)
 {
@@ -727,10 +724,6 @@ static int stmfts_probe(struct i2c_client *client)
 		}
 	}
 
-	err = devm_device_add_group(&client->dev, &stmfts_attribute_group);
-	if (err)
-		return err;
-
 	pm_runtime_enable(&client->dev);
 	device_enable_async_suspend(&client->dev);
 
@@ -804,6 +797,7 @@ MODULE_DEVICE_TABLE(i2c, stmfts_id);
 static struct i2c_driver stmfts_driver = {
 	.driver = {
 		.name = STMFTS_DEV_NAME,
+		.dev_groups = stmfts_sysfs_groups,
 		.of_match_table = of_match_ptr(stmfts_of_match),
 		.pm = pm_ptr(&stmfts_pm_ops),
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c
index 25c45c3a3561..b204fdb2d22c 100644
--- a/drivers/input/touchscreen/stmpe-ts.c
+++ b/drivers/input/touchscreen/stmpe-ts.c
@@ -350,13 +350,11 @@ static int stmpe_input_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int stmpe_ts_remove(struct platform_device *pdev)
+static void stmpe_ts_remove(struct platform_device *pdev)
 {
 	struct stmpe_touch *ts = platform_get_drvdata(pdev);
 
 	stmpe_disable(ts->stmpe, STMPE_BLOCK_TOUCHSCREEN);
-
-	return 0;
 }
 
 static struct platform_driver stmpe_ts_driver = {
@@ -364,7 +362,7 @@ static struct platform_driver stmpe_ts_driver = {
 		.name = STMPE_TS_NAME,
 	},
 	.probe = stmpe_input_probe,
-	.remove = stmpe_ts_remove,
+	.remove_new = stmpe_ts_remove,
 };
 module_platform_driver(stmpe_ts_driver);
 
diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c
index bb3c6072fc82..92b2b840b4b7 100644
--- a/drivers/input/touchscreen/sun4i-ts.c
+++ b/drivers/input/touchscreen/sun4i-ts.c
@@ -375,7 +375,7 @@ static int sun4i_ts_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int sun4i_ts_remove(struct platform_device *pdev)
+static void sun4i_ts_remove(struct platform_device *pdev)
 {
 	struct sun4i_ts_data *ts = platform_get_drvdata(pdev);
 
@@ -385,8 +385,6 @@ static int sun4i_ts_remove(struct platform_device *pdev)
 
 	/* Deactivate all IRQs */
 	writel(0, ts->base + TP_INT_FIFOC);
-
-	return 0;
 }
 
 static const struct of_device_id sun4i_ts_of_match[] = {
@@ -403,7 +401,7 @@ static struct platform_driver sun4i_ts_driver = {
 		.of_match_table = sun4i_ts_of_match,
 	},
 	.probe	= sun4i_ts_probe,
-	.remove	= sun4i_ts_remove,
+	.remove_new = sun4i_ts_remove,
 };
 
 module_platform_driver(sun4i_ts_driver);
diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
index 9aa4e35fb4f5..34324f8512ac 100644
--- a/drivers/input/touchscreen/ti_am335x_tsc.c
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -491,7 +491,7 @@ err_free_mem:
 	return err;
 }
 
-static int titsc_remove(struct platform_device *pdev)
+static void titsc_remove(struct platform_device *pdev)
 {
 	struct titsc *ts_dev = platform_get_drvdata(pdev);
 	u32 steps;
@@ -508,7 +508,6 @@ static int titsc_remove(struct platform_device *pdev)
 	input_unregister_device(ts_dev->input);
 
 	kfree(ts_dev);
-	return 0;
 }
 
 static int titsc_suspend(struct device *dev)
@@ -552,7 +551,7 @@ MODULE_DEVICE_TABLE(of, ti_tsc_dt_ids);
 
 static struct platform_driver ti_tsc_driver = {
 	.probe	= titsc_probe,
-	.remove	= titsc_remove,
+	.remove_new = titsc_remove,
 	.driver	= {
 		.name   = "TI-am335x-tsc",
 		.pm	= pm_sleep_ptr(&titsc_pm_ops),
diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c
index b5e904c5b7c4..89c5248f66f6 100644
--- a/drivers/input/touchscreen/tsc2004.c
+++ b/drivers/input/touchscreen/tsc2004.c
@@ -63,9 +63,10 @@ MODULE_DEVICE_TABLE(of, tsc2004_of_match);
 
 static struct i2c_driver tsc2004_driver = {
 	.driver = {
-		.name   = "tsc2004",
-		.of_match_table = of_match_ptr(tsc2004_of_match),
-		.pm     = pm_sleep_ptr(&tsc200x_pm_ops),
+		.name		= "tsc2004",
+		.dev_groups	= tsc200x_groups,
+		.of_match_table	= of_match_ptr(tsc2004_of_match),
+		.pm		= pm_sleep_ptr(&tsc200x_pm_ops),
 	},
 	.id_table       = tsc2004_idtable,
 	.probe          = tsc2004_probe,
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index b6dfbcfc8c19..1b40ce0ca1b9 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -79,9 +79,10 @@ MODULE_DEVICE_TABLE(of, tsc2005_of_match);
 
 static struct spi_driver tsc2005_driver = {
 	.driver	= {
-		.name	= "tsc2005",
-		.of_match_table = of_match_ptr(tsc2005_of_match),
-		.pm	= pm_sleep_ptr(&tsc200x_pm_ops),
+		.name		= "tsc2005",
+		.dev_groups	= tsc200x_groups,
+		.of_match_table	= of_match_ptr(tsc2005_of_match),
+		.pm		= pm_sleep_ptr(&tsc200x_pm_ops),
 	},
 	.probe	= tsc2005_probe,
 	.remove	= tsc2005_remove,
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index b799f26fcf8f..a4c0e9db9bb9 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -356,6 +356,12 @@ static const struct attribute_group tsc200x_attr_group = {
 	.attrs		= tsc200x_attrs,
 };
 
+const struct attribute_group *tsc200x_groups[] = {
+	&tsc200x_attr_group,
+	NULL
+};
+EXPORT_SYMBOL_GPL(tsc200x_groups);
+
 static void tsc200x_esd_work(struct work_struct *work)
 {
 	struct tsc200x *ts = container_of(work, struct tsc200x, esd_work.work);
@@ -553,25 +559,17 @@ int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		return error;
 
 	dev_set_drvdata(dev, ts);
-	error = sysfs_create_group(&dev->kobj, &tsc200x_attr_group);
-	if (error) {
-		dev_err(dev,
-			"Failed to create sysfs attributes, err: %d\n", error);
-		goto disable_regulator;
-	}
 
 	error = input_register_device(ts->idev);
 	if (error) {
 		dev_err(dev,
 			"Failed to register input device, err: %d\n", error);
-		goto err_remove_sysfs;
+		goto disable_regulator;
 	}
 
 	irq_set_irq_wake(irq, 1);
 	return 0;
 
-err_remove_sysfs:
-	sysfs_remove_group(&dev->kobj, &tsc200x_attr_group);
 disable_regulator:
 	regulator_disable(ts->vio);
 	return error;
@@ -582,8 +580,6 @@ void tsc200x_remove(struct device *dev)
 {
 	struct tsc200x *ts = dev_get_drvdata(dev);
 
-	sysfs_remove_group(&dev->kobj, &tsc200x_attr_group);
-
 	regulator_disable(ts->vio);
 }
 EXPORT_SYMBOL_GPL(tsc200x_remove);
diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h
index 4ded34425b21..37de91efd78e 100644
--- a/drivers/input/touchscreen/tsc200x-core.h
+++ b/drivers/input/touchscreen/tsc200x-core.h
@@ -70,6 +70,7 @@
 
 extern const struct regmap_config tsc200x_regmap_config;
 extern const struct dev_pm_ops tsc200x_pm_ops;
+extern const struct attribute_group *tsc200x_groups[];
 
 int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c
index cbc4750c53f9..128341a6696b 100644
--- a/drivers/input/touchscreen/wdt87xx_i2c.c
+++ b/drivers/input/touchscreen/wdt87xx_i2c.c
@@ -944,10 +944,7 @@ static struct attribute *wdt87xx_attrs[] = {
 	&dev_attr_update_fw.attr,
 	NULL
 };
-
-static const struct attribute_group wdt87xx_attr_group = {
-	.attrs = wdt87xx_attrs,
-};
+ATTRIBUTE_GROUPS(wdt87xx);
 
 static void wdt87xx_report_contact(struct input_dev *input,
 				   struct wdt87xx_sys_param *param,
@@ -1104,12 +1101,6 @@ static int wdt87xx_ts_probe(struct i2c_client *client)
 		return error;
 	}
 
-	error = devm_device_add_group(&client->dev, &wdt87xx_attr_group);
-	if (error) {
-		dev_err(&client->dev, "create sysfs failed: %d\n", error);
-		return error;
-	}
-
 	return 0;
 }
 
@@ -1172,8 +1163,9 @@ static struct i2c_driver wdt87xx_driver = {
 	.probe		= wdt87xx_ts_probe,
 	.id_table	= wdt87xx_dev_id,
 	.driver	= {
-		.name	= WDT87XX_NAME,
-		.pm     = pm_sleep_ptr(&wdt87xx_pm_ops),
+		.name = WDT87XX_NAME,
+		.dev_groups = wdt87xx_groups,
+		.pm = pm_sleep_ptr(&wdt87xx_pm_ops),
 		.acpi_match_table = ACPI_PTR(wdt87xx_acpi_id),
 	},
 };
diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index 319f57fb9af5..9cee26b63341 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c
@@ -374,14 +374,12 @@ err_alloc:
 	return error;
 }
 
-static int wm831x_ts_remove(struct platform_device *pdev)
+static void wm831x_ts_remove(struct platform_device *pdev)
 {
 	struct wm831x_ts *wm831x_ts = platform_get_drvdata(pdev);
 
 	free_irq(wm831x_ts->pd_irq, wm831x_ts);
 	free_irq(wm831x_ts->data_irq, wm831x_ts);
-
-	return 0;
 }
 
 static struct platform_driver wm831x_ts_driver = {
@@ -389,7 +387,7 @@ static struct platform_driver wm831x_ts_driver = {
 		.name = "wm831x-touch",
 	},
 	.probe = wm831x_ts_probe,
-	.remove = wm831x_ts_remove,
+	.remove_new = wm831x_ts_remove,
 };
 module_platform_driver(wm831x_ts_driver);
 
diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c
index ac3b3dd59488..f01f6cc9b59f 100644
--- a/drivers/input/touchscreen/wm97xx-core.c
+++ b/drivers/input/touchscreen/wm97xx-core.c
@@ -756,11 +756,9 @@ batt_err:
 	return ret;
 }
 
-static int wm97xx_mfd_remove(struct platform_device *pdev)
+static void wm97xx_mfd_remove(struct platform_device *pdev)
 {
 	wm97xx_remove(&pdev->dev);
-
-	return 0;
 }
 
 static int wm97xx_suspend(struct device *dev)
@@ -878,7 +876,7 @@ static struct platform_driver wm97xx_mfd_driver = {
 		.pm =		pm_sleep_ptr(&wm97xx_pm_ops),
 	},
 	.probe =	wm97xx_mfd_probe,
-	.remove =	wm97xx_mfd_remove,
+	.remove_new =	wm97xx_mfd_remove,
 };
 
 static int __init wm97xx_init(void)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index ee9e2a2edbf5..7673bb82945b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -95,7 +95,7 @@ config IOMMU_DEBUGFS
 choice
 	prompt "IOMMU default domain type"
 	depends on IOMMU_API
-	default IOMMU_DEFAULT_DMA_LAZY if X86
+	default IOMMU_DEFAULT_DMA_LAZY if X86 || S390
 	default IOMMU_DEFAULT_DMA_STRICT
 	help
 	  Choose the type of IOMMU domain used to manage DMA API usage by
@@ -150,7 +150,7 @@ config OF_IOMMU
 
 # IOMMU-agnostic DMA-mapping layer
 config IOMMU_DMA
-	def_bool ARM64 || X86
+	def_bool ARM64 || X86 || S390
 	select DMA_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
@@ -240,17 +240,6 @@ config SUN50I_IOMMU
 	help
 	  Support for the IOMMU introduced in the Allwinner H6 SoCs.
 
-config TEGRA_IOMMU_GART
-	bool "Tegra GART IOMMU Support"
-	depends on ARCH_TEGRA_2x_SOC
-	depends on TEGRA_MC
-	select IOMMU_API
-	help
-	  Enables support for remapping discontiguous physical memory
-	  shared with the operating system into contiguous I/O virtual
-	  space through the GART (Graphics Address Relocation Table)
-	  hardware included on Tegra SoCs.
-
 config TEGRA_IOMMU_SMMU
 	bool "NVIDIA Tegra SMMU Support"
 	depends on ARCH_TEGRA
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 769e43d780ce..95ad9dbfbda0 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
 obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o
-obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 8bd4c3b183ec..443b2c13c37b 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -23,15 +23,6 @@ config AMD_IOMMU
 	  your BIOS for an option to enable it or if you have an IVRS ACPI
 	  table.
 
-config AMD_IOMMU_V2
-	tristate "AMD IOMMU Version 2 driver"
-	depends on AMD_IOMMU
-	select MMU_NOTIFIER
-	help
-	  This option enables support for the AMD IOMMUv2 features of the IOMMU
-	  hardware. Select this option if you want to use devices that support
-	  the PCI PRI and PASID interface.
-
 config AMD_IOMMU_DEBUGFS
 	bool "Enable AMD IOMMU internals in DebugFS"
 	depends on AMD_IOMMU && IOMMU_DEBUGFS
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 773d8aa00283..f454fbb1569e 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
-obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index e2857109e966..86be1edd50ee 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -38,9 +38,6 @@ extern int amd_iommu_guest_ir;
 extern enum io_pgtable_fmt amd_iommu_pgtable;
 extern int amd_iommu_gpt_level;
 
-/* IOMMUv2 specific functions */
-struct iommu_domain;
-
 bool amd_iommu_v2_supported(void);
 struct amd_iommu *get_amd_iommu(unsigned int idx);
 u8 amd_iommu_pc_get_max_banks(unsigned int idx);
@@ -51,10 +48,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
 			 u8 fxn, u64 *value);
 
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+/* Device capabilities */
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev);
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev);
+
 int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
 void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
 void amd_iommu_domain_update(struct protection_domain *domain);
@@ -87,9 +84,25 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
 	       (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
 }
 
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask)
+static inline bool check_feature(u64 mask)
+{
+	return (amd_iommu_efr & mask);
+}
+
+static inline bool check_feature2(u64 mask)
+{
+	return (amd_iommu_efr2 & mask);
+}
+
+static inline int check_feature_gpt_level(void)
+{
+	return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
+}
+
+static inline bool amd_iommu_gt_ppr_supported(void)
 {
-	return !!(iommu->features & mask);
+	return (check_feature(FEATURE_GT) &&
+		check_feature(FEATURE_PPR));
 }
 
 static inline u64 iommu_virt_to_phys(void *vaddr)
@@ -105,7 +118,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
 static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
-	atomic64_set(&domain->iop.pt_root, root);
 	domain->iop.root = (u64 *)(root & PAGE_MASK);
 	domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
 }
@@ -146,8 +158,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 				  u64 *root, int mode);
 struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
 
-extern u64 amd_iommu_efr;
-extern u64 amd_iommu_efr2;
-
 extern bool amd_iommu_snp_en;
 #endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index dec4e5c2b66b..90b7d7950a9e 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -462,6 +462,10 @@
 #define PD_IOMMUV2_MASK		BIT(3) /* domain has gcr3 table */
 #define PD_GIOV_MASK		BIT(4) /* domain enable GIOV support */
 
+/* Timeout stuff */
+#define LOOP_TIMEOUT		100000
+#define MMIO_STATUS_TIMEOUT	2000000
+
 extern bool amd_iommu_dump;
 #define DUMP_printk(format, arg...)				\
 	do {							\
@@ -516,19 +520,6 @@ extern struct kmem_cache *amd_iommu_irq_cache;
 #define APERTURE_RANGE_INDEX(a)	((a) >> APERTURE_RANGE_SHIFT)
 #define APERTURE_PAGE_INDEX(a)	(((a) >> 21) & 0x3fULL)
 
-/*
- * This struct is used to pass information about
- * incoming PPR faults around.
- */
-struct amd_iommu_fault {
-	u64 address;    /* IO virtual address of the fault*/
-	u32 pasid;      /* Address space identifier */
-	u32 sbdf;	/* Originating PCI device id */
-	u16 tag;        /* PPR tag */
-	u16 flags;      /* Fault flags */
-
-};
-
 
 struct amd_iommu;
 struct iommu_domain;
@@ -555,7 +546,6 @@ struct amd_io_pgtable {
 	struct io_pgtable	iop;
 	int			mode;
 	u64			*root;
-	atomic64_t		pt_root;	/* pgtable root and pgtable mode */
 	u64			*pgd;		/* v2 pgtable pgd pointer */
 };
 
@@ -688,9 +678,6 @@ struct amd_iommu {
 	/* Extended features 2 */
 	u64 features2;
 
-	/* IOMMUv2 */
-	bool is_iommu_v2;
-
 	/* PCI device id of the IOMMU device */
 	u16 devid;
 
@@ -811,6 +798,14 @@ struct devid_map {
 	bool cmd_line;
 };
 
+#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP     0x1    /* ATS feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP     0x2    /* PRI feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP   0x4    /* PASID context supported */
+/* Device may request execution on memory pages */
+#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP    0x8
+/* Device may request super-user privileges */
+#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP   0x10
+
 /*
  * This struct contains device specific data for the IOMMU
  */
@@ -823,13 +818,15 @@ struct iommu_dev_data {
 	struct protection_domain *domain; /* Domain the device is bound to */
 	struct device *dev;
 	u16 devid;			  /* PCI Device ID */
-	bool iommu_v2;			  /* Device can make use of IOMMUv2 */
-	struct {
-		bool enabled;
-		int qdep;
-	} ats;				  /* ATS state */
-	bool pri_tlp;			  /* PASID TLB required for
+
+	u32 flags;			  /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */
+	int ats_qdep;
+	u8 ats_enabled  :1;		  /* ATS state */
+	u8 pri_enabled  :1;		  /* PRI state */
+	u8 pasid_enabled:1;		  /* PASID state */
+	u8 pri_tlp      :1;		  /* PASID TLB required for
 					     PPR completions */
+	u8 ppr          :1;		  /* Enable device PPR support */
 	bool use_vapic;			  /* Enable device to use vapic mode */
 	bool defer_attach;
 
@@ -896,16 +893,15 @@ extern unsigned amd_iommu_aperture_order;
 /* allocation bitmap for domain ids */
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
-/* Smallest max PASID supported by any IOMMU in the system */
-extern u32 amd_iommu_max_pasid;
-
-extern bool amd_iommu_v2_present;
-
 extern bool amd_iommu_force_isolation;
 
 /* Max levels of glxval supported */
 extern int amd_iommu_max_glx_val;
 
+/* Global EFR and EFR2 registers */
+extern u64 amd_iommu_efr;
+extern u64 amd_iommu_efr2;
+
 /*
  * This function flushes all internal caches of
  * the IOMMU used by this driver.
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 45efb7e5d725..64bcf3df37ee 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -83,8 +83,6 @@
 #define ACPI_DEVFLAG_LINT1              0x80
 #define ACPI_DEVFLAG_ATSDIS             0x10000000
 
-#define LOOP_TIMEOUT	2000000
-
 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
 						 | ((dev & 0x1f) << 3) | (fn & 0x7))
 
@@ -187,9 +185,6 @@ static int amd_iommus_present;
 bool amd_iommu_np_cache __read_mostly;
 bool amd_iommu_iotlb_sup __read_mostly = true;
 
-u32 amd_iommu_max_pasid __read_mostly = ~0;
-
-bool amd_iommu_v2_present __read_mostly;
 static bool amd_iommu_pc_present __read_mostly;
 bool amdr_ivrs_remap_support __read_mostly;
 
@@ -272,7 +267,7 @@ int amd_iommu_get_num_iommus(void)
  * Iterate through all the IOMMUs to get common EFR
  * masks among all IOMMUs and warn if found inconsistency.
  */
-static void get_global_efr(void)
+static __init void get_global_efr(void)
 {
 	struct amd_iommu *iommu;
 
@@ -304,16 +299,6 @@ static void get_global_efr(void)
 	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
 }
 
-static bool check_feature_on_all_iommus(u64 mask)
-{
-	return !!(amd_iommu_efr & mask);
-}
-
-static inline int check_feature_gpt_level(void)
-{
-	return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
-}
-
 /*
  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
  * Default to IVHD EFR since it is available sooner
@@ -399,7 +384,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu)
 	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
 	u64 entry = start & PM_ADDR_MASK;
 
-	if (!check_feature_on_all_iommus(FEATURE_SNP))
+	if (!check_feature(FEATURE_SNP))
 		return;
 
 	/* Note:
@@ -869,7 +854,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
 	void *buf = (void *)__get_free_pages(gfp, order);
 
 	if (buf &&
-	    check_feature_on_all_iommus(FEATURE_SNP) &&
+	    check_feature(FEATURE_SNP) &&
 	    set_memory_4k((unsigned long)buf, (1 << order))) {
 		free_pages((unsigned long)buf, order);
 		buf = NULL;
@@ -985,14 +970,14 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
 
-	for (i = 0; i < LOOP_TIMEOUT; ++i) {
+	for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 			break;
 		udelay(10);
 	}
 
-	if (WARN_ON(i >= LOOP_TIMEOUT))
+	if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
 		return -EINVAL;
 
 	return 0;
@@ -1048,7 +1033,7 @@ static void iommu_enable_xt(struct amd_iommu *iommu)
 
 static void iommu_enable_gt(struct amd_iommu *iommu)
 {
-	if (!iommu_feature(iommu, FEATURE_GT))
+	if (!check_feature(FEATURE_GT))
 		return;
 
 	iommu_feature_enable(iommu, CONTROL_GT_EN);
@@ -1987,7 +1972,7 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
 	u64 val;
 	struct pci_dev *pdev = iommu->dev;
 
-	if (!iommu_feature(iommu, FEATURE_PC))
+	if (!check_feature(FEATURE_PC))
 		return;
 
 	amd_iommu_pc_present = true;
@@ -2014,8 +1999,7 @@ static ssize_t amd_iommu_show_features(struct device *dev,
 				       struct device_attribute *attr,
 				       char *buf)
 {
-	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
-	return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features);
+	return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
 }
 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
 
@@ -2051,9 +2035,9 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
 	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
 	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
 
-	if (!iommu->features) {
-		iommu->features = features;
-		iommu->features2 = features2;
+	if (!amd_iommu_efr) {
+		amd_iommu_efr = features;
+		amd_iommu_efr2 = features2;
 		return;
 	}
 
@@ -2061,12 +2045,12 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
 	 * Sanity check and warn if EFR values from
 	 * IVHD and MMIO conflict.
 	 */
-	if (features != iommu->features ||
-	    features2 != iommu->features2) {
+	if (features != amd_iommu_efr ||
+	    features2 != amd_iommu_efr2) {
 		pr_warn(FW_WARN
 			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
-			features, iommu->features,
-			features2, iommu->features2);
+			features, amd_iommu_efr,
+			features2, amd_iommu_efr2);
 	}
 }
 
@@ -2092,20 +2076,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
 
 	late_iommu_features_init(iommu);
 
-	if (iommu_feature(iommu, FEATURE_GT)) {
+	if (check_feature(FEATURE_GT)) {
 		int glxval;
-		u32 max_pasid;
 		u64 pasmax;
 
-		pasmax = iommu->features & FEATURE_PASID_MASK;
+		pasmax = amd_iommu_efr & FEATURE_PASID_MASK;
 		pasmax >>= FEATURE_PASID_SHIFT;
-		max_pasid  = (1 << (pasmax + 1)) - 1;
+		iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
 
-		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
+		BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
 
-		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
-
-		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
+		glxval   = amd_iommu_efr & FEATURE_GLXVAL_MASK;
 		glxval >>= FEATURE_GLXVAL_SHIFT;
 
 		if (amd_iommu_max_glx_val == -1)
@@ -2114,13 +2095,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
 			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
 	}
 
-	if (iommu_feature(iommu, FEATURE_GT) &&
-	    iommu_feature(iommu, FEATURE_PPR)) {
-		iommu->is_iommu_v2   = true;
-		amd_iommu_v2_present = true;
-	}
-
-	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
+	if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu))
 		return -ENOMEM;
 
 	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
@@ -2132,13 +2107,10 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
 	init_iommu_perf_ctr(iommu);
 
 	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
-		if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
-		    !iommu_feature(iommu, FEATURE_GT)) {
+		if (!check_feature(FEATURE_GIOSUP) ||
+		    !check_feature(FEATURE_GT)) {
 			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
 			amd_iommu_pgtable = AMD_IOMMU_V1;
-		} else if (iommu_default_passthrough()) {
-			pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
-			amd_iommu_pgtable = AMD_IOMMU_V1;
 		}
 	}
 
@@ -2186,35 +2158,29 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
 
 static void print_iommu_info(void)
 {
+	int i;
 	static const char * const feat_str[] = {
 		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
 		"IA", "GA", "HE", "PC"
 	};
-	struct amd_iommu *iommu;
-
-	for_each_iommu(iommu) {
-		struct pci_dev *pdev = iommu->dev;
-		int i;
 
-		pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
+	if (amd_iommu_efr) {
+		pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
 
-		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
-			pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
-
-			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
-				if (iommu_feature(iommu, (1ULL << i)))
-					pr_cont(" %s", feat_str[i]);
-			}
+		for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
+			if (check_feature(1ULL << i))
+				pr_cont(" %s", feat_str[i]);
+		}
 
-			if (iommu->features & FEATURE_GAM_VAPIC)
-				pr_cont(" GA_vAPIC");
+		if (check_feature(FEATURE_GAM_VAPIC))
+			pr_cont(" GA_vAPIC");
 
-			if (iommu->features & FEATURE_SNP)
-				pr_cont(" SNP");
+		if (check_feature(FEATURE_SNP))
+			pr_cont(" SNP");
 
-			pr_cont("\n");
-		}
+		pr_cont("\n");
 	}
+
 	if (irq_remapping_enabled) {
 		pr_info("Interrupt remapping enabled\n");
 		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
@@ -2900,19 +2866,19 @@ static void enable_iommus_vapic(void)
 		 * Need to set and poll check the GALOGRun bit to zero before
 		 * we can set/ modify GA Log registers safely.
 		 */
-		for (i = 0; i < LOOP_TIMEOUT; ++i) {
+		for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
 			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
 				break;
 			udelay(10);
 		}
 
-		if (WARN_ON(i >= LOOP_TIMEOUT))
+		if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
 			return;
 	}
 
 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
-	    !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
+	    !check_feature(FEATURE_GAM_VAPIC)) {
 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
 		return;
 	}
@@ -3698,9 +3664,8 @@ bool amd_iommu_v2_supported(void)
 	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
 	 * setting up IOMMUv1 page table.
 	 */
-	return amd_iommu_v2_present && !amd_iommu_snp_en;
+	return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
 }
-EXPORT_SYMBOL(amd_iommu_v2_supported);
 
 struct amd_iommu *get_amd_iommu(unsigned int idx)
 {
@@ -3824,7 +3789,7 @@ int amd_iommu_snp_enable(void)
 		return -EINVAL;
 	}
 
-	amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
+	amd_iommu_snp_en = check_feature(FEATURE_SNP);
 	if (!amd_iommu_snp_en)
 		return -EINVAL;
 
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index e9ef2e0a62f6..f818a7e254d4 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -363,10 +363,10 @@ static void v2_free_pgtable(struct io_pgtable *iop)
 	if (!(pdom->flags & PD_IOMMUV2_MASK))
 		return;
 
-	/*
-	 * Make changes visible to IOMMUs. No need to clear gcr3 entry
-	 * as gcr3 table is already freed.
-	 */
+	/* Clear gcr3 entry */
+	amd_iommu_domain_clear_gcr3(&pdom->domain, 0);
+
+	/* Make changes visible to IOMMUs */
 	amd_iommu_domain_update(pdom);
 
 	/* Free page table */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b399c5741378..fcc987f5d4ed 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -45,8 +45,6 @@
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
-#define LOOP_TIMEOUT	100000
-
 /* IO virtual address start page frame number */
 #define IOVA_START_PFN		(1)
 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
@@ -68,7 +66,6 @@ LIST_HEAD(acpihid_map);
 const struct iommu_ops amd_iommu_ops;
 const struct iommu_dirty_ops amd_dirty_ops;
 
-static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
 int amd_iommu_max_glx_val = -1;
 
 /*
@@ -81,7 +78,6 @@ struct iommu_cmd {
 struct kmem_cache *amd_iommu_irq_cache;
 
 static void detach_device(struct device *dev);
-static int domain_enable_v2(struct protection_domain *domain, int pasids);
 
 /****************************************************************************
  *
@@ -324,24 +320,141 @@ static struct iommu_group *acpihid_device_group(struct device *dev)
 	return entry->group;
 }
 
-static bool pci_iommuv2_capable(struct pci_dev *pdev)
+static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data)
 {
-	static const int caps[] = {
-		PCI_EXT_CAP_ID_PRI,
-		PCI_EXT_CAP_ID_PASID,
-	};
-	int i, pos;
+	return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP);
+}
 
-	if (!pci_ats_supported(pdev))
-		return false;
+static u32 pdev_get_caps(struct pci_dev *pdev)
+{
+	int features;
+	u32 flags = 0;
+
+	if (pci_ats_supported(pdev))
+		flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
+
+	if (pci_pri_supported(pdev))
+		flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+	features = pci_pasid_features(pdev);
+	if (features >= 0) {
+		flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+
+		if (features & PCI_PASID_CAP_EXEC)
+			flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
 
-	for (i = 0; i < 2; ++i) {
-		pos = pci_find_ext_capability(pdev, caps[i]);
-		if (pos == 0)
-			return false;
+		if (features & PCI_PASID_CAP_PRIV)
+			flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
 	}
 
-	return true;
+	return flags;
+}
+
+static inline int pdev_enable_cap_ats(struct pci_dev *pdev)
+{
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+	int ret = -EINVAL;
+
+	if (dev_data->ats_enabled)
+		return 0;
+
+	if (amd_iommu_iotlb_sup &&
+	    (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) {
+		ret = pci_enable_ats(pdev, PAGE_SHIFT);
+		if (!ret) {
+			dev_data->ats_enabled = 1;
+			dev_data->ats_qdep    = pci_ats_queue_depth(pdev);
+		}
+	}
+
+	return ret;
+}
+
+static inline void pdev_disable_cap_ats(struct pci_dev *pdev)
+{
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+	if (dev_data->ats_enabled) {
+		pci_disable_ats(pdev);
+		dev_data->ats_enabled = 0;
+	}
+}
+
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev)
+{
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+	int ret = -EINVAL;
+
+	if (dev_data->pri_enabled)
+		return 0;
+
+	if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) {
+		/*
+		 * First reset the PRI state of the device.
+		 * FIXME: Hardcode number of outstanding requests for now
+		 */
+		if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) {
+			dev_data->pri_enabled = 1;
+			dev_data->pri_tlp     = pci_prg_resp_pasid_required(pdev);
+
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev)
+{
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+	if (dev_data->pri_enabled) {
+		pci_disable_pri(pdev);
+		dev_data->pri_enabled = 0;
+	}
+}
+
+static inline int pdev_enable_cap_pasid(struct pci_dev *pdev)
+{
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+	int ret = -EINVAL;
+
+	if (dev_data->pasid_enabled)
+		return 0;
+
+	if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) {
+		/* Only allow access to user-accessible pages */
+		ret = pci_enable_pasid(pdev, 0);
+		if (!ret)
+			dev_data->pasid_enabled = 1;
+	}
+
+	return ret;
+}
+
+static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
+{
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+	if (dev_data->pasid_enabled) {
+		pci_disable_pasid(pdev);
+		dev_data->pasid_enabled = 0;
+	}
+}
+
+static void pdev_enable_caps(struct pci_dev *pdev)
+{
+	pdev_enable_cap_ats(pdev);
+	pdev_enable_cap_pasid(pdev);
+	amd_iommu_pdev_enable_cap_pri(pdev);
+
+}
+
+static void pdev_disable_caps(struct pci_dev *pdev)
+{
+	pdev_disable_cap_ats(pdev);
+	pdev_disable_cap_pasid(pdev);
+	amd_iommu_pdev_disable_cap_pri(pdev);
 }
 
 /*
@@ -401,8 +514,8 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
 	 * it'll be forced to go into translation mode.
 	 */
 	if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
-	    dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
-		dev_data->iommu_v2 = iommu->is_iommu_v2;
+	    dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) {
+		dev_data->flags = pdev_get_caps(to_pci_dev(dev));
 	}
 
 	dev_iommu_priv_set(dev, dev_data);
@@ -703,24 +816,6 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 }
 
-static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
-{
-	struct amd_iommu_fault fault;
-
-	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
-		pr_err_ratelimited("Unknown PPR request received\n");
-		return;
-	}
-
-	fault.address   = raw[1];
-	fault.pasid     = PPR_PASID(raw[0]);
-	fault.sbdf      = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0]));
-	fault.tag       = PPR_TAG(raw[0]);
-	fault.flags     = PPR_FLAGS(raw[0]);
-
-	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
-}
-
 static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 {
 	u32 head, tail;
@@ -766,8 +861,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
 		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 
-		/* Handle PPR entry */
-		iommu_handle_ppr_entry(iommu, entry);
+		/* TODO: PPR Handler will be added when we add IOPF support */
 
 		/* Refresh ring-buffer information */
 		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
@@ -1096,7 +1190,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid,
 }
 
 static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
-			       int status, int tag, bool gn)
+			       int status, int tag, u8 gn)
 {
 	memset(cmd, 0, sizeof(*cmd));
 
@@ -1300,7 +1394,7 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
 
 void iommu_flush_all_caches(struct amd_iommu *iommu)
 {
-	if (iommu_feature(iommu, FEATURE_IA)) {
+	if (check_feature(FEATURE_IA)) {
 		amd_iommu_flush_all(iommu);
 	} else {
 		amd_iommu_flush_dte_all(iommu);
@@ -1319,7 +1413,7 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
 	struct iommu_cmd cmd;
 	int qdep;
 
-	qdep     = dev_data->ats.qdep;
+	qdep     = dev_data->ats_qdep;
 	iommu    = rlookup_amd_iommu(dev_data->dev);
 	if (!iommu)
 		return -EINVAL;
@@ -1370,7 +1464,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
 			return ret;
 	}
 
-	if (dev_data->ats.enabled)
+	if (dev_data->ats_enabled)
 		ret = device_flush_iotlb(dev_data, 0, ~0UL);
 
 	return ret;
@@ -1403,7 +1497,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
 
 	list_for_each_entry(dev_data, &domain->dev_list, list) {
 
-		if (!dev_data->ats.enabled)
+		if (!dev_data->ats_enabled)
 			continue;
 
 		ret |= device_flush_iotlb(dev_data, address, size);
@@ -1579,6 +1673,42 @@ static void free_gcr3_table(struct protection_domain *domain)
 	free_page((unsigned long)domain->gcr3_tbl);
 }
 
+/*
+ * Number of GCR3 table levels required. Level must be 4-Kbyte
+ * page and can contain up to 512 entries.
+ */
+static int get_gcr3_levels(int pasids)
+{
+	int levels;
+
+	if (pasids == -1)
+		return amd_iommu_max_glx_val;
+
+	levels = get_count_order(pasids);
+
+	return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels;
+}
+
+/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
+static int setup_gcr3_table(struct protection_domain *domain, int pasids)
+{
+	int levels = get_gcr3_levels(pasids);
+
+	if (levels > amd_iommu_max_glx_val)
+		return -EINVAL;
+
+	domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
+	if (domain->gcr3_tbl == NULL)
+		return -ENOMEM;
+
+	domain->glx      = levels;
+	domain->flags   |= PD_IOMMUV2_MASK;
+
+	amd_iommu_domain_update(domain);
+
+	return 0;
+}
+
 static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
 			  struct protection_domain *domain, bool ats, bool ppr)
 {
@@ -1607,10 +1737,8 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
 	if (ats)
 		flags |= DTE_FLAG_IOTLB;
 
-	if (ppr) {
-		if (iommu_feature(iommu, FEATURE_EPHSUP))
-			pte_root |= 1ULL << DEV_ENTRY_PPR;
-	}
+	if (ppr)
+		pte_root |= 1ULL << DEV_ENTRY_PPR;
 
 	if (domain->dirty_tracking)
 		pte_root |= DTE_FLAG_HAD;
@@ -1690,7 +1818,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 	iommu = rlookup_amd_iommu(dev_data->dev);
 	if (!iommu)
 		return;
-	ats   = dev_data->ats.enabled;
+	ats   = dev_data->ats_enabled;
 
 	/* Update data structures */
 	dev_data->domain = domain;
@@ -1706,7 +1834,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 
 	/* Update device table */
 	set_dte_entry(iommu, dev_data->devid, domain,
-		      ats, dev_data->iommu_v2);
+		      ats, dev_data->ppr);
 	clone_aliases(iommu, dev_data->dev);
 
 	device_flush_dte(dev_data);
@@ -1741,48 +1869,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
 	domain->dev_cnt                 -= 1;
 }
 
-static void pdev_iommuv2_disable(struct pci_dev *pdev)
-{
-	pci_disable_ats(pdev);
-	pci_disable_pri(pdev);
-	pci_disable_pasid(pdev);
-}
-
-static int pdev_pri_ats_enable(struct pci_dev *pdev)
-{
-	int ret;
-
-	/* Only allow access to user-accessible pages */
-	ret = pci_enable_pasid(pdev, 0);
-	if (ret)
-		return ret;
-
-	/* First reset the PRI state of the device */
-	ret = pci_reset_pri(pdev);
-	if (ret)
-		goto out_err_pasid;
-
-	/* Enable PRI */
-	/* FIXME: Hardcode number of outstanding requests for now */
-	ret = pci_enable_pri(pdev, 32);
-	if (ret)
-		goto out_err_pasid;
-
-	ret = pci_enable_ats(pdev, PAGE_SHIFT);
-	if (ret)
-		goto out_err_pri;
-
-	return 0;
-
-out_err_pri:
-	pci_disable_pri(pdev);
-
-out_err_pasid:
-	pci_disable_pasid(pdev);
-
-	return ret;
-}
-
 /*
  * If a device is not yet associated with a domain, this function makes the
  * device visible in the domain
@@ -1791,9 +1877,8 @@ static int attach_device(struct device *dev,
 			 struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
-	struct pci_dev *pdev;
 	unsigned long flags;
-	int ret;
+	int ret = 0;
 
 	spin_lock_irqsave(&domain->lock, flags);
 
@@ -1801,45 +1886,13 @@ static int attach_device(struct device *dev,
 
 	spin_lock(&dev_data->lock);
 
-	ret = -EBUSY;
-	if (dev_data->domain != NULL)
+	if (dev_data->domain != NULL) {
+		ret = -EBUSY;
 		goto out;
-
-	if (!dev_is_pci(dev))
-		goto skip_ats_check;
-
-	pdev = to_pci_dev(dev);
-	if (domain->flags & PD_IOMMUV2_MASK) {
-		struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
-
-		ret = -EINVAL;
-
-		/*
-		 * In case of using AMD_IOMMU_V1 page table mode and the device
-		 * is enabling for PPR/ATS support (using v2 table),
-		 * we need to make sure that the domain type is identity map.
-		 */
-		if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
-		    def_domain->type != IOMMU_DOMAIN_IDENTITY) {
-			goto out;
-		}
-
-		if (dev_data->iommu_v2) {
-			if (pdev_pri_ats_enable(pdev) != 0)
-				goto out;
-
-			dev_data->ats.enabled = true;
-			dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
-			dev_data->pri_tlp     = pci_prg_resp_pasid_required(pdev);
-		}
-	} else if (amd_iommu_iotlb_sup &&
-		   pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
-		dev_data->ats.enabled = true;
-		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
 	}
 
-skip_ats_check:
-	ret = 0;
+	if (dev_is_pci(dev))
+		pdev_enable_caps(to_pci_dev(dev));
 
 	do_attach(dev_data, domain);
 
@@ -1887,15 +1940,8 @@ static void detach_device(struct device *dev)
 
 	do_detach(dev_data);
 
-	if (!dev_is_pci(dev))
-		goto out;
-
-	if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
-		pdev_iommuv2_disable(to_pci_dev(dev));
-	else if (dev_data->ats.enabled)
-		pci_disable_ats(to_pci_dev(dev));
-
-	dev_data->ats.enabled = false;
+	if (dev_is_pci(dev))
+		pdev_disable_caps(to_pci_dev(dev));
 
 out:
 	spin_unlock(&dev_data->lock);
@@ -1985,7 +2031,7 @@ static void update_device_table(struct protection_domain *domain)
 		if (!iommu)
 			continue;
 		set_dte_entry(iommu, dev_data->devid, domain,
-			      dev_data->ats.enabled, dev_data->iommu_v2);
+			      dev_data->ats_enabled, dev_data->ppr);
 		clone_aliases(iommu, dev_data->dev);
 	}
 }
@@ -2019,9 +2065,11 @@ void amd_iommu_domain_update(struct protection_domain *domain)
 static void cleanup_domain(struct protection_domain *domain)
 {
 	struct iommu_dev_data *entry;
-	unsigned long flags;
 
-	spin_lock_irqsave(&domain->lock, flags);
+	lockdep_assert_held(&domain->lock);
+
+	if (!domain->dev_cnt)
+		return;
 
 	while (!list_empty(&domain->dev_list)) {
 		entry = list_first_entry(&domain->dev_list,
@@ -2029,8 +2077,7 @@ static void cleanup_domain(struct protection_domain *domain)
 		BUG_ON(!entry->domain);
 		do_detach(entry);
 	}
-
-	spin_unlock_irqrestore(&domain->lock, flags);
+	WARN_ON(domain->dev_cnt != 0);
 }
 
 static void protection_domain_free(struct protection_domain *domain)
@@ -2041,6 +2088,12 @@ static void protection_domain_free(struct protection_domain *domain)
 	if (domain->iop.pgtbl_cfg.tlb)
 		free_io_pgtable_ops(&domain->iop.iop.ops);
 
+	if (domain->flags & PD_IOMMUV2_MASK)
+		free_gcr3_table(domain);
+
+	if (domain->iop.root)
+		free_page((unsigned long)domain->iop.root);
+
 	if (domain->id)
 		domain_id_free(domain->id);
 
@@ -2053,18 +2106,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
 
 	BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
 
-	spin_lock_init(&domain->lock);
-	domain->id = domain_id_alloc();
-	if (!domain->id)
-		return -ENOMEM;
-	INIT_LIST_HEAD(&domain->dev_list);
-
 	if (mode != PAGE_MODE_NONE) {
 		pt_root = (void *)get_zeroed_page(GFP_KERNEL);
-		if (!pt_root) {
-			domain_id_free(domain->id);
+		if (!pt_root)
 			return -ENOMEM;
-		}
 	}
 
 	amd_iommu_domain_set_pgtable(domain, pt_root, mode);
@@ -2074,20 +2119,12 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
 
 static int protection_domain_init_v2(struct protection_domain *domain)
 {
-	spin_lock_init(&domain->lock);
-	domain->id = domain_id_alloc();
-	if (!domain->id)
-		return -ENOMEM;
-	INIT_LIST_HEAD(&domain->dev_list);
-
 	domain->flags |= PD_GIOV_MASK;
 
 	domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
 
-	if (domain_enable_v2(domain, 1)) {
-		domain_id_free(domain->id);
+	if (setup_gcr3_table(domain, 1))
 		return -ENOMEM;
-	}
 
 	return 0;
 }
@@ -2097,57 +2134,60 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
 	struct io_pgtable_ops *pgtbl_ops;
 	struct protection_domain *domain;
 	int pgtable;
-	int mode = DEFAULT_PGTABLE_LEVEL;
 	int ret;
 
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return NULL;
+
+	domain->id = domain_id_alloc();
+	if (!domain->id)
+		goto out_err;
+
+	spin_lock_init(&domain->lock);
+	INIT_LIST_HEAD(&domain->dev_list);
+	domain->nid = NUMA_NO_NODE;
+
+	switch (type) {
+	/* No need to allocate io pgtable ops in passthrough mode */
+	case IOMMU_DOMAIN_IDENTITY:
+		return domain;
+	case IOMMU_DOMAIN_DMA:
+		pgtable = amd_iommu_pgtable;
+		break;
 	/*
-	 * Force IOMMU v1 page table when iommu=pt and
-	 * when allocating domain for pass-through devices.
+	 * Force IOMMU v1 page table when allocating
+	 * domain for pass-through devices.
 	 */
-	if (type == IOMMU_DOMAIN_IDENTITY) {
-		pgtable = AMD_IOMMU_V1;
-		mode = PAGE_MODE_NONE;
-	} else if (type == IOMMU_DOMAIN_UNMANAGED) {
+	case IOMMU_DOMAIN_UNMANAGED:
 		pgtable = AMD_IOMMU_V1;
-	} else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) {
-		pgtable = amd_iommu_pgtable;
-	} else {
-		return NULL;
+		break;
+	default:
+		goto out_err;
 	}
 
-	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
-	if (!domain)
-		return NULL;
-
 	switch (pgtable) {
 	case AMD_IOMMU_V1:
-		ret = protection_domain_init_v1(domain, mode);
+		ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL);
 		break;
 	case AMD_IOMMU_V2:
 		ret = protection_domain_init_v2(domain);
 		break;
 	default:
 		ret = -EINVAL;
+		break;
 	}
 
 	if (ret)
 		goto out_err;
 
-	/* No need to allocate io pgtable ops in passthrough mode */
-	if (type == IOMMU_DOMAIN_IDENTITY)
-		return domain;
-
-	domain->nid = NUMA_NO_NODE;
-
 	pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
-	if (!pgtbl_ops) {
-		domain_id_free(domain->id);
+	if (!pgtbl_ops)
 		goto out_err;
-	}
 
 	return domain;
 out_err:
-	kfree(domain);
+	protection_domain_free(domain);
 	return NULL;
 }
 
@@ -2236,19 +2276,18 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
 static void amd_iommu_domain_free(struct iommu_domain *dom)
 {
 	struct protection_domain *domain;
+	unsigned long flags;
 
-	domain = to_pdomain(dom);
+	if (!dom)
+		return;
 
-	if (domain->dev_cnt > 0)
-		cleanup_domain(domain);
+	domain = to_pdomain(dom);
 
-	BUG_ON(domain->dev_cnt != 0);
+	spin_lock_irqsave(&domain->lock, flags);
 
-	if (!dom)
-		return;
+	cleanup_domain(domain);
 
-	if (domain->flags & PD_IOMMUV2_MASK)
-		free_gcr3_table(domain);
+	spin_unlock_irqrestore(&domain->lock, flags);
 
 	protection_domain_free(domain);
 }
@@ -2296,14 +2335,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 	return ret;
 }
 
-static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
-				     unsigned long iova, size_t size)
+static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+				    unsigned long iova, size_t size)
 {
 	struct protection_domain *domain = to_pdomain(dom);
 	struct io_pgtable_ops *ops = &domain->iop.iop.ops;
 
 	if (ops->map_pages)
 		domain_flush_np_cache(domain, iova, size);
+	return 0;
 }
 
 static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
@@ -2541,7 +2581,6 @@ bool amd_iommu_is_attach_deferred(struct device *dev)
 
 	return dev_data->defer_attach;
 }
-EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
 
 static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
@@ -2581,7 +2620,7 @@ static int amd_iommu_def_domain_type(struct device *dev)
 	 *    and require remapping.
 	 *  - SNP is enabled, because it prohibits DTE[Mode]=0.
 	 */
-	if (dev_data->iommu_v2 &&
+	if (pdev_pasid_supported(dev_data) &&
 	    !cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
 	    !amd_iommu_snp_en) {
 		return IOMMU_DOMAIN_IDENTITY;
@@ -2626,93 +2665,6 @@ const struct iommu_ops amd_iommu_ops = {
 	}
 };
 
-/*****************************************************************************
- *
- * The next functions do a basic initialization of IOMMU for pass through
- * mode
- *
- * In passthrough mode the IOMMU is initialized and enabled but not used for
- * DMA-API translation.
- *
- *****************************************************************************/
-
-/* IOMMUv2 specific functions */
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
-
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
-
-void amd_iommu_domain_direct_map(struct iommu_domain *dom)
-{
-	struct protection_domain *domain = to_pdomain(dom);
-	unsigned long flags;
-
-	spin_lock_irqsave(&domain->lock, flags);
-
-	if (domain->iop.pgtbl_cfg.tlb)
-		free_io_pgtable_ops(&domain->iop.iop.ops);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
-}
-EXPORT_SYMBOL(amd_iommu_domain_direct_map);
-
-/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
-static int domain_enable_v2(struct protection_domain *domain, int pasids)
-{
-	int levels;
-
-	/* Number of GCR3 table levels required */
-	for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
-		levels += 1;
-
-	if (levels > amd_iommu_max_glx_val)
-		return -EINVAL;
-
-	domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
-	if (domain->gcr3_tbl == NULL)
-		return -ENOMEM;
-
-	domain->glx      = levels;
-	domain->flags   |= PD_IOMMUV2_MASK;
-
-	amd_iommu_domain_update(domain);
-
-	return 0;
-}
-
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
-{
-	struct protection_domain *pdom = to_pdomain(dom);
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&pdom->lock, flags);
-
-	/*
-	 * Save us all sanity checks whether devices already in the
-	 * domain support IOMMUv2. Just force that the domain has no
-	 * devices attached when it is switched into IOMMUv2 mode.
-	 */
-	ret = -EBUSY;
-	if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK)
-		goto out;
-
-	if (!pdom->gcr3_tbl)
-		ret = domain_enable_v2(pdom, pasids);
-
-out:
-	spin_unlock_irqrestore(&pdom->lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
-
 static int __flush_pasid(struct protection_domain *domain, u32 pasid,
 			 u64 address, bool size)
 {
@@ -2750,10 +2702,10 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
 		   There might be non-IOMMUv2 capable devices in an IOMMUv2
 		 * domain.
 		 */
-		if (!dev_data->ats.enabled)
+		if (!dev_data->ats_enabled)
 			continue;
 
-		qdep  = dev_data->ats.qdep;
+		qdep  = dev_data->ats_qdep;
 		iommu = rlookup_amd_iommu(dev_data->dev);
 		if (!iommu)
 			continue;
@@ -2794,7 +2746,6 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
 
 	return ret;
 }
-EXPORT_SYMBOL(amd_iommu_flush_page);
 
 static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
 {
@@ -2814,7 +2765,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
 
 	return ret;
 }
-EXPORT_SYMBOL(amd_iommu_flush_tlb);
 
 static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
 {
@@ -2894,7 +2844,6 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
 
 	return ret;
 }
-EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
 
 int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
 {
@@ -2908,7 +2857,6 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
 
 	return ret;
 }
-EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
 
 int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
 			   int status, int tag)
@@ -2927,49 +2875,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
 
 	return iommu_queue_command(iommu, &cmd);
 }
-EXPORT_SYMBOL(amd_iommu_complete_ppr);
-
-int amd_iommu_device_info(struct pci_dev *pdev,
-                          struct amd_iommu_device_info *info)
-{
-	int max_pasids;
-	int pos;
-
-	if (pdev == NULL || info == NULL)
-		return -EINVAL;
-
-	if (!amd_iommu_v2_supported())
-		return -EINVAL;
-
-	memset(info, 0, sizeof(*info));
-
-	if (pci_ats_supported(pdev))
-		info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
-
-	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
-	if (pos)
-		info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
-
-	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
-	if (pos) {
-		int features;
-
-		max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
-		max_pasids = min(max_pasids, (1 << 20));
-
-		info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
-		info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
-
-		features = pci_pasid_features(pdev);
-		if (features & PCI_PASID_CAP_EXEC)
-			info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
-		if (features & PCI_PASID_CAP_PRIV)
-			info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(amd_iommu_device_info);
 
 #ifdef CONFIG_IRQ_REMAP
 
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
deleted file mode 100644
index 57c2fb1146e2..000000000000
--- a/drivers/iommu/amd/iommu_v2.c
+++ /dev/null
@@ -1,996 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#define pr_fmt(fmt)     "AMD-Vi: " fmt
-
-#include <linux/refcount.h>
-#include <linux/mmu_notifier.h>
-#include <linux/amd-iommu.h>
-#include <linux/mm_types.h>
-#include <linux/profile.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/wait.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/cc_platform.h>
-
-#include "amd_iommu.h"
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>");
-
-#define PRI_QUEUE_SIZE		512
-
-struct pri_queue {
-	atomic_t inflight;
-	bool finish;
-	int status;
-};
-
-struct pasid_state {
-	struct list_head list;			/* For global state-list */
-	refcount_t count;				/* Reference count */
-	unsigned mmu_notifier_count;		/* Counting nested mmu_notifier
-						   calls */
-	struct mm_struct *mm;			/* mm_struct for the faults */
-	struct mmu_notifier mn;                 /* mmu_notifier handle */
-	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */
-	struct device_state *device_state;	/* Link to our device_state */
-	u32 pasid;				/* PASID index */
-	bool invalid;				/* Used during setup and
-						   teardown of the pasid */
-	spinlock_t lock;			/* Protect pri_queues and
-						   mmu_notifer_count */
-	wait_queue_head_t wq;			/* To wait for count == 0 */
-};
-
-struct device_state {
-	struct list_head list;
-	u32 sbdf;
-	atomic_t count;
-	struct pci_dev *pdev;
-	struct pasid_state **states;
-	struct iommu_domain *domain;
-	int pasid_levels;
-	int max_pasids;
-	amd_iommu_invalid_ppr_cb inv_ppr_cb;
-	amd_iommu_invalidate_ctx inv_ctx_cb;
-	spinlock_t lock;
-	wait_queue_head_t wq;
-};
-
-struct fault {
-	struct work_struct work;
-	struct device_state *dev_state;
-	struct pasid_state *state;
-	struct mm_struct *mm;
-	u64 address;
-	u32 pasid;
-	u16 tag;
-	u16 finish;
-	u16 flags;
-};
-
-static LIST_HEAD(state_list);
-static DEFINE_SPINLOCK(state_lock);
-
-static struct workqueue_struct *iommu_wq;
-
-static void free_pasid_states(struct device_state *dev_state);
-
-static struct device_state *__get_device_state(u32 sbdf)
-{
-	struct device_state *dev_state;
-
-	list_for_each_entry(dev_state, &state_list, list) {
-		if (dev_state->sbdf == sbdf)
-			return dev_state;
-	}
-
-	return NULL;
-}
-
-static struct device_state *get_device_state(u32 sbdf)
-{
-	struct device_state *dev_state;
-	unsigned long flags;
-
-	spin_lock_irqsave(&state_lock, flags);
-	dev_state = __get_device_state(sbdf);
-	if (dev_state != NULL)
-		atomic_inc(&dev_state->count);
-	spin_unlock_irqrestore(&state_lock, flags);
-
-	return dev_state;
-}
-
-static void free_device_state(struct device_state *dev_state)
-{
-	struct iommu_group *group;
-
-	/* Get rid of any remaining pasid states */
-	free_pasid_states(dev_state);
-
-	/*
-	 * Wait until the last reference is dropped before freeing
-	 * the device state.
-	 */
-	wait_event(dev_state->wq, !atomic_read(&dev_state->count));
-
-	/*
-	 * First detach device from domain - No more PRI requests will arrive
-	 * from that device after it is unbound from the IOMMUv2 domain.
-	 */
-	group = iommu_group_get(&dev_state->pdev->dev);
-	if (WARN_ON(!group))
-		return;
-
-	iommu_detach_group(dev_state->domain, group);
-
-	iommu_group_put(group);
-
-	/* Everything is down now, free the IOMMUv2 domain */
-	iommu_domain_free(dev_state->domain);
-
-	/* Finally get rid of the device-state */
-	kfree(dev_state);
-}
-
-static void put_device_state(struct device_state *dev_state)
-{
-	if (atomic_dec_and_test(&dev_state->count))
-		wake_up(&dev_state->wq);
-}
-
-/* Must be called under dev_state->lock */
-static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
-						  u32 pasid, bool alloc)
-{
-	struct pasid_state **root, **ptr;
-	int level, index;
-
-	level = dev_state->pasid_levels;
-	root  = dev_state->states;
-
-	while (true) {
-
-		index = (pasid >> (9 * level)) & 0x1ff;
-		ptr   = &root[index];
-
-		if (level == 0)
-			break;
-
-		if (*ptr == NULL) {
-			if (!alloc)
-				return NULL;
-
-			*ptr = (void *)get_zeroed_page(GFP_ATOMIC);
-			if (*ptr == NULL)
-				return NULL;
-		}
-
-		root   = (struct pasid_state **)*ptr;
-		level -= 1;
-	}
-
-	return ptr;
-}
-
-static int set_pasid_state(struct device_state *dev_state,
-			   struct pasid_state *pasid_state,
-			   u32 pasid)
-{
-	struct pasid_state **ptr;
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&dev_state->lock, flags);
-	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
-	ret = -ENOMEM;
-	if (ptr == NULL)
-		goto out_unlock;
-
-	ret = -ENOMEM;
-	if (*ptr != NULL)
-		goto out_unlock;
-
-	*ptr = pasid_state;
-
-	ret = 0;
-
-out_unlock:
-	spin_unlock_irqrestore(&dev_state->lock, flags);
-
-	return ret;
-}
-
-static void clear_pasid_state(struct device_state *dev_state, u32 pasid)
-{
-	struct pasid_state **ptr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_state->lock, flags);
-	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
-	if (ptr == NULL)
-		goto out_unlock;
-
-	*ptr = NULL;
-
-out_unlock:
-	spin_unlock_irqrestore(&dev_state->lock, flags);
-}
-
-static struct pasid_state *get_pasid_state(struct device_state *dev_state,
-					   u32 pasid)
-{
-	struct pasid_state **ptr, *ret = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_state->lock, flags);
-	ptr = __get_pasid_state_ptr(dev_state, pasid, false);
-
-	if (ptr == NULL)
-		goto out_unlock;
-
-	ret = *ptr;
-	if (ret)
-		refcount_inc(&ret->count);
-
-out_unlock:
-	spin_unlock_irqrestore(&dev_state->lock, flags);
-
-	return ret;
-}
-
-static void free_pasid_state(struct pasid_state *pasid_state)
-{
-	kfree(pasid_state);
-}
-
-static void put_pasid_state(struct pasid_state *pasid_state)
-{
-	if (refcount_dec_and_test(&pasid_state->count))
-		wake_up(&pasid_state->wq);
-}
-
-static void put_pasid_state_wait(struct pasid_state *pasid_state)
-{
-	if (!refcount_dec_and_test(&pasid_state->count))
-		wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
-	free_pasid_state(pasid_state);
-}
-
-static void unbind_pasid(struct pasid_state *pasid_state)
-{
-	struct iommu_domain *domain;
-
-	domain = pasid_state->device_state->domain;
-
-	/*
-	 * Mark pasid_state as invalid, no more faults will we added to the
-	 * work queue after this is visible everywhere.
-	 */
-	pasid_state->invalid = true;
-
-	/* Make sure this is visible */
-	smp_wmb();
-
-	/* After this the device/pasid can't access the mm anymore */
-	amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
-
-	/* Make sure no more pending faults are in the queue */
-	flush_workqueue(iommu_wq);
-}
-
-static void free_pasid_states_level1(struct pasid_state **tbl)
-{
-	int i;
-
-	for (i = 0; i < 512; ++i) {
-		if (tbl[i] == NULL)
-			continue;
-
-		free_page((unsigned long)tbl[i]);
-	}
-}
-
-static void free_pasid_states_level2(struct pasid_state **tbl)
-{
-	struct pasid_state **ptr;
-	int i;
-
-	for (i = 0; i < 512; ++i) {
-		if (tbl[i] == NULL)
-			continue;
-
-		ptr = (struct pasid_state **)tbl[i];
-		free_pasid_states_level1(ptr);
-	}
-}
-
-static void free_pasid_states(struct device_state *dev_state)
-{
-	struct pasid_state *pasid_state;
-	int i;
-
-	for (i = 0; i < dev_state->max_pasids; ++i) {
-		pasid_state = get_pasid_state(dev_state, i);
-		if (pasid_state == NULL)
-			continue;
-
-		put_pasid_state(pasid_state);
-
-		/* Clear the pasid state so that the pasid can be re-used */
-		clear_pasid_state(dev_state, pasid_state->pasid);
-
-		/*
-		 * This will call the mn_release function and
-		 * unbind the PASID
-		 */
-		mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
-		put_pasid_state_wait(pasid_state); /* Reference taken in
-						      amd_iommu_bind_pasid */
-
-		/* Drop reference taken in amd_iommu_bind_pasid */
-		put_device_state(dev_state);
-	}
-
-	if (dev_state->pasid_levels == 2)
-		free_pasid_states_level2(dev_state->states);
-	else if (dev_state->pasid_levels == 1)
-		free_pasid_states_level1(dev_state->states);
-	else
-		BUG_ON(dev_state->pasid_levels != 0);
-
-	free_page((unsigned long)dev_state->states);
-}
-
-static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
-{
-	return container_of(mn, struct pasid_state, mn);
-}
-
-static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
-					struct mm_struct *mm,
-					unsigned long start, unsigned long end)
-{
-	struct pasid_state *pasid_state;
-	struct device_state *dev_state;
-
-	pasid_state = mn_to_state(mn);
-	dev_state   = pasid_state->device_state;
-
-	if ((start ^ (end - 1)) < PAGE_SIZE)
-		amd_iommu_flush_page(dev_state->domain, pasid_state->pasid,
-				     start);
-	else
-		amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid);
-}
-
-static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
-	struct pasid_state *pasid_state;
-	struct device_state *dev_state;
-	bool run_inv_ctx_cb;
-
-	might_sleep();
-
-	pasid_state    = mn_to_state(mn);
-	dev_state      = pasid_state->device_state;
-	run_inv_ctx_cb = !pasid_state->invalid;
-
-	if (run_inv_ctx_cb && dev_state->inv_ctx_cb)
-		dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid);
-
-	unbind_pasid(pasid_state);
-}
-
-static const struct mmu_notifier_ops iommu_mn = {
-	.release			= mn_release,
-	.arch_invalidate_secondary_tlbs	= mn_arch_invalidate_secondary_tlbs,
-};
-
-static void set_pri_tag_status(struct pasid_state *pasid_state,
-			       u16 tag, int status)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pasid_state->lock, flags);
-	pasid_state->pri[tag].status = status;
-	spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void finish_pri_tag(struct device_state *dev_state,
-			   struct pasid_state *pasid_state,
-			   u16 tag)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pasid_state->lock, flags);
-	if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
-	    pasid_state->pri[tag].finish) {
-		amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
-				       pasid_state->pri[tag].status, tag);
-		pasid_state->pri[tag].finish = false;
-		pasid_state->pri[tag].status = PPR_SUCCESS;
-	}
-	spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void handle_fault_error(struct fault *fault)
-{
-	int status;
-
-	if (!fault->dev_state->inv_ppr_cb) {
-		set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
-		return;
-	}
-
-	status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
-					      fault->pasid,
-					      fault->address,
-					      fault->flags);
-	switch (status) {
-	case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
-		set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
-		break;
-	case AMD_IOMMU_INV_PRI_RSP_INVALID:
-		set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
-		break;
-	case AMD_IOMMU_INV_PRI_RSP_FAIL:
-		set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
-		break;
-	default:
-		BUG();
-	}
-}
-
-static bool access_error(struct vm_area_struct *vma, struct fault *fault)
-{
-	unsigned long requested = 0;
-
-	if (fault->flags & PPR_FAULT_EXEC)
-		requested |= VM_EXEC;
-
-	if (fault->flags & PPR_FAULT_READ)
-		requested |= VM_READ;
-
-	if (fault->flags & PPR_FAULT_WRITE)
-		requested |= VM_WRITE;
-
-	return (requested & ~vma->vm_flags) != 0;
-}
-
-static void do_fault(struct work_struct *work)
-{
-	struct fault *fault = container_of(work, struct fault, work);
-	struct vm_area_struct *vma;
-	vm_fault_t ret = VM_FAULT_ERROR;
-	unsigned int flags = 0;
-	struct mm_struct *mm;
-	u64 address;
-
-	mm = fault->state->mm;
-	address = fault->address;
-
-	if (fault->flags & PPR_FAULT_USER)
-		flags |= FAULT_FLAG_USER;
-	if (fault->flags & PPR_FAULT_WRITE)
-		flags |= FAULT_FLAG_WRITE;
-	flags |= FAULT_FLAG_REMOTE;
-
-	mmap_read_lock(mm);
-	vma = vma_lookup(mm, address);
-	if (!vma)
-		/* failed to get a vma in the right range */
-		goto out;
-
-	/* Check if we have the right permissions on the vma */
-	if (access_error(vma, fault))
-		goto out;
-
-	ret = handle_mm_fault(vma, address, flags, NULL);
-out:
-	mmap_read_unlock(mm);
-
-	if (ret & VM_FAULT_ERROR)
-		/* failed to service fault */
-		handle_fault_error(fault);
-
-	finish_pri_tag(fault->dev_state, fault->state, fault->tag);
-
-	put_pasid_state(fault->state);
-
-	kfree(fault);
-}
-
-static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
-{
-	struct amd_iommu_fault *iommu_fault;
-	struct pasid_state *pasid_state;
-	struct device_state *dev_state;
-	struct pci_dev *pdev = NULL;
-	unsigned long flags;
-	struct fault *fault;
-	bool finish;
-	u16 tag, devid, seg_id;
-	int ret;
-
-	iommu_fault = data;
-	tag         = iommu_fault->tag & 0x1ff;
-	finish      = (iommu_fault->tag >> 9) & 1;
-
-	seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf);
-	devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf);
-	pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid),
-					   devid & 0xff);
-	if (!pdev)
-		return -ENODEV;
-
-	ret = NOTIFY_DONE;
-
-	/* In kdump kernel pci dev is not initialized yet -> send INVALID */
-	if (amd_iommu_is_attach_deferred(&pdev->dev)) {
-		amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
-				       PPR_INVALID, tag);
-		goto out;
-	}
-
-	dev_state = get_device_state(iommu_fault->sbdf);
-	if (dev_state == NULL)
-		goto out;
-
-	pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
-	if (pasid_state == NULL || pasid_state->invalid) {
-		/* We know the device but not the PASID -> send INVALID */
-		amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
-				       PPR_INVALID, tag);
-		goto out_drop_state;
-	}
-
-	spin_lock_irqsave(&pasid_state->lock, flags);
-	atomic_inc(&pasid_state->pri[tag].inflight);
-	if (finish)
-		pasid_state->pri[tag].finish = true;
-	spin_unlock_irqrestore(&pasid_state->lock, flags);
-
-	fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
-	if (fault == NULL) {
-		/* We are OOM - send success and let the device re-fault */
-		finish_pri_tag(dev_state, pasid_state, tag);
-		goto out_drop_state;
-	}
-
-	fault->dev_state = dev_state;
-	fault->address   = iommu_fault->address;
-	fault->state     = pasid_state;
-	fault->tag       = tag;
-	fault->finish    = finish;
-	fault->pasid     = iommu_fault->pasid;
-	fault->flags     = iommu_fault->flags;
-	INIT_WORK(&fault->work, do_fault);
-
-	queue_work(iommu_wq, &fault->work);
-
-	ret = NOTIFY_OK;
-
-out_drop_state:
-
-	if (ret != NOTIFY_OK && pasid_state)
-		put_pasid_state(pasid_state);
-
-	put_device_state(dev_state);
-
-out:
-	pci_dev_put(pdev);
-	return ret;
-}
-
-static struct notifier_block ppr_nb = {
-	.notifier_call = ppr_notifier,
-};
-
-int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
-			 struct task_struct *task)
-{
-	struct pasid_state *pasid_state;
-	struct device_state *dev_state;
-	struct mm_struct *mm;
-	u32 sbdf;
-	int ret;
-
-	might_sleep();
-
-	if (!amd_iommu_v2_supported())
-		return -ENODEV;
-
-	sbdf      = get_pci_sbdf_id(pdev);
-	dev_state = get_device_state(sbdf);
-
-	if (dev_state == NULL)
-		return -EINVAL;
-
-	ret = -EINVAL;
-	if (pasid >= dev_state->max_pasids)
-		goto out;
-
-	ret = -ENOMEM;
-	pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
-	if (pasid_state == NULL)
-		goto out;
-
-
-	refcount_set(&pasid_state->count, 1);
-	init_waitqueue_head(&pasid_state->wq);
-	spin_lock_init(&pasid_state->lock);
-
-	mm                        = get_task_mm(task);
-	pasid_state->mm           = mm;
-	pasid_state->device_state = dev_state;
-	pasid_state->pasid        = pasid;
-	pasid_state->invalid      = true; /* Mark as valid only if we are
-					     done with setting up the pasid */
-	pasid_state->mn.ops       = &iommu_mn;
-
-	if (pasid_state->mm == NULL)
-		goto out_free;
-
-	ret = mmu_notifier_register(&pasid_state->mn, mm);
-	if (ret)
-		goto out_free;
-
-	ret = set_pasid_state(dev_state, pasid_state, pasid);
-	if (ret)
-		goto out_unregister;
-
-	ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
-					__pa(pasid_state->mm->pgd));
-	if (ret)
-		goto out_clear_state;
-
-	/* Now we are ready to handle faults */
-	pasid_state->invalid = false;
-
-	/*
-	 * Drop the reference to the mm_struct here. We rely on the
-	 * mmu_notifier release call-back to inform us when the mm
-	 * is going away.
-	 */
-	mmput(mm);
-
-	return 0;
-
-out_clear_state:
-	clear_pasid_state(dev_state, pasid);
-
-out_unregister:
-	mmu_notifier_unregister(&pasid_state->mn, mm);
-	mmput(mm);
-
-out_free:
-	free_pasid_state(pasid_state);
-
-out:
-	put_device_state(dev_state);
-
-	return ret;
-}
-EXPORT_SYMBOL(amd_iommu_bind_pasid);
-
-void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid)
-{
-	struct pasid_state *pasid_state;
-	struct device_state *dev_state;
-	u32 sbdf;
-
-	might_sleep();
-
-	if (!amd_iommu_v2_supported())
-		return;
-
-	sbdf = get_pci_sbdf_id(pdev);
-	dev_state = get_device_state(sbdf);
-	if (dev_state == NULL)
-		return;
-
-	if (pasid >= dev_state->max_pasids)
-		goto out;
-
-	pasid_state = get_pasid_state(dev_state, pasid);
-	if (pasid_state == NULL)
-		goto out;
-	/*
-	 * Drop reference taken here. We are safe because we still hold
-	 * the reference taken in the amd_iommu_bind_pasid function.
-	 */
-	put_pasid_state(pasid_state);
-
-	/* Clear the pasid state so that the pasid can be re-used */
-	clear_pasid_state(dev_state, pasid_state->pasid);
-
-	/*
-	 * Call mmu_notifier_unregister to drop our reference
-	 * to pasid_state->mm
-	 */
-	mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
-	put_pasid_state_wait(pasid_state); /* Reference taken in
-					      amd_iommu_bind_pasid */
-out:
-	/* Drop reference taken in this function */
-	put_device_state(dev_state);
-
-	/* Drop reference taken in amd_iommu_bind_pasid */
-	put_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_unbind_pasid);
-
-int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
-{
-	struct device_state *dev_state;
-	struct iommu_group *group;
-	unsigned long flags;
-	int ret, tmp;
-	u32 sbdf;
-
-	might_sleep();
-
-	/*
-	 * When memory encryption is active the device is likely not in a
-	 * direct-mapped domain. Forbid using IOMMUv2 functionality for now.
-	 */
-	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
-		return -ENODEV;
-
-	if (!amd_iommu_v2_supported())
-		return -ENODEV;
-
-	if (pasids <= 0 || pasids > (PASID_MASK + 1))
-		return -EINVAL;
-
-	sbdf = get_pci_sbdf_id(pdev);
-
-	dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
-	if (dev_state == NULL)
-		return -ENOMEM;
-
-	spin_lock_init(&dev_state->lock);
-	init_waitqueue_head(&dev_state->wq);
-	dev_state->pdev  = pdev;
-	dev_state->sbdf = sbdf;
-
-	tmp = pasids;
-	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
-		dev_state->pasid_levels += 1;
-
-	atomic_set(&dev_state->count, 1);
-	dev_state->max_pasids = pasids;
-
-	ret = -ENOMEM;
-	dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
-	if (dev_state->states == NULL)
-		goto out_free_dev_state;
-
-	dev_state->domain = iommu_domain_alloc(&pci_bus_type);
-	if (dev_state->domain == NULL)
-		goto out_free_states;
-
-	/* See iommu_is_default_domain() */
-	dev_state->domain->type = IOMMU_DOMAIN_IDENTITY;
-	amd_iommu_domain_direct_map(dev_state->domain);
-
-	ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
-	if (ret)
-		goto out_free_domain;
-
-	group = iommu_group_get(&pdev->dev);
-	if (!group) {
-		ret = -EINVAL;
-		goto out_free_domain;
-	}
-
-	ret = iommu_attach_group(dev_state->domain, group);
-	if (ret != 0)
-		goto out_drop_group;
-
-	iommu_group_put(group);
-
-	spin_lock_irqsave(&state_lock, flags);
-
-	if (__get_device_state(sbdf) != NULL) {
-		spin_unlock_irqrestore(&state_lock, flags);
-		ret = -EBUSY;
-		goto out_free_domain;
-	}
-
-	list_add_tail(&dev_state->list, &state_list);
-
-	spin_unlock_irqrestore(&state_lock, flags);
-
-	return 0;
-
-out_drop_group:
-	iommu_group_put(group);
-
-out_free_domain:
-	iommu_domain_free(dev_state->domain);
-
-out_free_states:
-	free_page((unsigned long)dev_state->states);
-
-out_free_dev_state:
-	kfree(dev_state);
-
-	return ret;
-}
-EXPORT_SYMBOL(amd_iommu_init_device);
-
-void amd_iommu_free_device(struct pci_dev *pdev)
-{
-	struct device_state *dev_state;
-	unsigned long flags;
-	u32 sbdf;
-
-	if (!amd_iommu_v2_supported())
-		return;
-
-	sbdf = get_pci_sbdf_id(pdev);
-
-	spin_lock_irqsave(&state_lock, flags);
-
-	dev_state = __get_device_state(sbdf);
-	if (dev_state == NULL) {
-		spin_unlock_irqrestore(&state_lock, flags);
-		return;
-	}
-
-	list_del(&dev_state->list);
-
-	spin_unlock_irqrestore(&state_lock, flags);
-
-	put_device_state(dev_state);
-	free_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_free_device);
-
-int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
-				 amd_iommu_invalid_ppr_cb cb)
-{
-	struct device_state *dev_state;
-	unsigned long flags;
-	u32 sbdf;
-	int ret;
-
-	if (!amd_iommu_v2_supported())
-		return -ENODEV;
-
-	sbdf = get_pci_sbdf_id(pdev);
-
-	spin_lock_irqsave(&state_lock, flags);
-
-	ret = -EINVAL;
-	dev_state = __get_device_state(sbdf);
-	if (dev_state == NULL)
-		goto out_unlock;
-
-	dev_state->inv_ppr_cb = cb;
-
-	ret = 0;
-
-out_unlock:
-	spin_unlock_irqrestore(&state_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
-
-int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
-				    amd_iommu_invalidate_ctx cb)
-{
-	struct device_state *dev_state;
-	unsigned long flags;
-	u32 sbdf;
-	int ret;
-
-	if (!amd_iommu_v2_supported())
-		return -ENODEV;
-
-	sbdf = get_pci_sbdf_id(pdev);
-
-	spin_lock_irqsave(&state_lock, flags);
-
-	ret = -EINVAL;
-	dev_state = __get_device_state(sbdf);
-	if (dev_state == NULL)
-		goto out_unlock;
-
-	dev_state->inv_ctx_cb = cb;
-
-	ret = 0;
-
-out_unlock:
-	spin_unlock_irqrestore(&state_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
-
-static int __init amd_iommu_v2_init(void)
-{
-	int ret;
-
-	if (!amd_iommu_v2_supported()) {
-		pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
-		/*
-		 * Load anyway to provide the symbols to other modules
-		 * which may use AMD IOMMUv2 optionally.
-		 */
-		return 0;
-	}
-
-	ret = -ENOMEM;
-	iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
-	if (iommu_wq == NULL)
-		goto out;
-
-	amd_iommu_register_ppr_notifier(&ppr_nb);
-
-	pr_info("AMD IOMMUv2 loaded and initialized\n");
-
-	return 0;
-
-out:
-	return ret;
-}
-
-static void __exit amd_iommu_v2_exit(void)
-{
-	struct device_state *dev_state, *next;
-	unsigned long flags;
-	LIST_HEAD(freelist);
-
-	if (!amd_iommu_v2_supported())
-		return;
-
-	amd_iommu_unregister_ppr_notifier(&ppr_nb);
-
-	flush_workqueue(iommu_wq);
-
-	/*
-	 * The loop below might call flush_workqueue(), so call
-	 * destroy_workqueue() after it
-	 */
-	spin_lock_irqsave(&state_lock, flags);
-
-	list_for_each_entry_safe(dev_state, next, &state_list, list) {
-		WARN_ON_ONCE(1);
-
-		put_device_state(dev_state);
-		list_del(&dev_state->list);
-		list_add_tail(&dev_state->list, &freelist);
-	}
-
-	spin_unlock_irqrestore(&state_lock, flags);
-
-	/*
-	 * Since free_device_state waits on the count to be zero,
-	 * we need to free dev_state outside the spinlock.
-	 */
-	list_for_each_entry_safe(dev_state, next, &freelist, list) {
-		list_del(&dev_state->list);
-		free_device_state(dev_state);
-	}
-
-	destroy_workqueue(iommu_wq);
-}
-
-module_init(amd_iommu_v2_init);
-module_exit(amd_iommu_v2_exit);
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 0b8927508427..ee05f4824bfa 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -196,7 +196,6 @@ struct apple_dart_hw {
  * @lock: lock for hardware operations involving this dart
  * @pgsize: pagesize supported by this DART
  * @supports_bypass: indicates if this DART supports bypass mode
- * @force_bypass: force bypass mode due to pagesize mismatch?
  * @sid2group: maps stream ids to iommu_groups
  * @iommu: iommu core device
  */
@@ -217,7 +216,6 @@ struct apple_dart {
 	u32 pgsize;
 	u32 num_streams;
 	u32 supports_bypass : 1;
-	u32 force_bypass : 1;
 
 	struct iommu_group *sid2group[DART_MAX_STREAMS];
 	struct iommu_device iommu;
@@ -506,10 +504,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain *domain,
 	apple_dart_domain_flush_tlb(to_dart_domain(domain));
 }
 
-static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
-				      unsigned long iova, size_t size)
+static int apple_dart_iotlb_sync_map(struct iommu_domain *domain,
+				     unsigned long iova, size_t size)
 {
 	apple_dart_domain_flush_tlb(to_dart_domain(domain));
+	return 0;
 }
 
 static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
@@ -568,15 +567,17 @@ apple_dart_setup_translation(struct apple_dart_domain *domain,
 	stream_map->dart->hw->invalidate_tlb(stream_map);
 }
 
-static int apple_dart_finalize_domain(struct iommu_domain *domain,
+static int apple_dart_finalize_domain(struct apple_dart_domain *dart_domain,
 				      struct apple_dart_master_cfg *cfg)
 {
-	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
 	struct apple_dart *dart = cfg->stream_maps[0].dart;
 	struct io_pgtable_cfg pgtbl_cfg;
 	int ret = 0;
 	int i, j;
 
+	if (dart->pgsize > PAGE_SIZE)
+		return -EINVAL;
+
 	mutex_lock(&dart_domain->init_lock);
 
 	if (dart_domain->finalized)
@@ -597,17 +598,18 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain,
 		.iommu_dev = dart->dev,
 	};
 
-	dart_domain->pgtbl_ops =
-		alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg, domain);
+	dart_domain->pgtbl_ops = alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg,
+						      &dart_domain->domain);
 	if (!dart_domain->pgtbl_ops) {
 		ret = -ENOMEM;
 		goto done;
 	}
 
-	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
-	domain->geometry.aperture_start = 0;
-	domain->geometry.aperture_end = (dma_addr_t)DMA_BIT_MASK(dart->ias);
-	domain->geometry.force_aperture = true;
+	dart_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	dart_domain->domain.geometry.aperture_start = 0;
+	dart_domain->domain.geometry.aperture_end =
+		(dma_addr_t)DMA_BIT_MASK(dart->ias);
+	dart_domain->domain.geometry.force_aperture = true;
 
 	dart_domain->finalized = true;
 
@@ -651,47 +653,72 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain,
 				      true);
 }
 
-static int apple_dart_attach_dev(struct iommu_domain *domain,
-				 struct device *dev)
+static int apple_dart_attach_dev_paging(struct iommu_domain *domain,
+					struct device *dev)
 {
 	int ret, i;
 	struct apple_dart_stream_map *stream_map;
 	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
 	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
 
-	if (cfg->stream_maps[0].dart->force_bypass &&
-	    domain->type != IOMMU_DOMAIN_IDENTITY)
-		return -EINVAL;
-	if (!cfg->stream_maps[0].dart->supports_bypass &&
-	    domain->type == IOMMU_DOMAIN_IDENTITY)
-		return -EINVAL;
+	ret = apple_dart_finalize_domain(dart_domain, cfg);
+	if (ret)
+		return ret;
 
-	ret = apple_dart_finalize_domain(domain, cfg);
+	ret = apple_dart_domain_add_streams(dart_domain, cfg);
 	if (ret)
 		return ret;
 
-	switch (domain->type) {
-	default:
-		ret = apple_dart_domain_add_streams(dart_domain, cfg);
-		if (ret)
-			return ret;
+	for_each_stream_map(i, cfg, stream_map)
+		apple_dart_setup_translation(dart_domain, stream_map);
+	return 0;
+}
 
-		for_each_stream_map(i, cfg, stream_map)
-			apple_dart_setup_translation(dart_domain, stream_map);
-		break;
-	case IOMMU_DOMAIN_BLOCKED:
-		for_each_stream_map(i, cfg, stream_map)
-			apple_dart_hw_disable_dma(stream_map);
-		break;
-	case IOMMU_DOMAIN_IDENTITY:
-		for_each_stream_map(i, cfg, stream_map)
-			apple_dart_hw_enable_bypass(stream_map);
-		break;
-	}
+static int apple_dart_attach_dev_identity(struct iommu_domain *domain,
+					  struct device *dev)
+{
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+	struct apple_dart_stream_map *stream_map;
+	int i;
 
-	return ret;
+	if (!cfg->stream_maps[0].dart->supports_bypass)
+		return -EINVAL;
+
+	for_each_stream_map(i, cfg, stream_map)
+		apple_dart_hw_enable_bypass(stream_map);
+	return 0;
 }
 
+static const struct iommu_domain_ops apple_dart_identity_ops = {
+	.attach_dev = apple_dart_attach_dev_identity,
+};
+
+static struct iommu_domain apple_dart_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &apple_dart_identity_ops,
+};
+
+static int apple_dart_attach_dev_blocked(struct iommu_domain *domain,
+					 struct device *dev)
+{
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+	struct apple_dart_stream_map *stream_map;
+	int i;
+
+	for_each_stream_map(i, cfg, stream_map)
+		apple_dart_hw_disable_dma(stream_map);
+	return 0;
+}
+
+static const struct iommu_domain_ops apple_dart_blocked_ops = {
+	.attach_dev = apple_dart_attach_dev_blocked,
+};
+
+static struct iommu_domain apple_dart_blocked_domain = {
+	.type = IOMMU_DOMAIN_BLOCKED,
+	.ops = &apple_dart_blocked_ops,
+};
+
 static struct iommu_device *apple_dart_probe_device(struct device *dev)
 {
 	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
@@ -717,24 +744,26 @@ static void apple_dart_release_device(struct device *dev)
 	kfree(cfg);
 }
 
-static struct iommu_domain *apple_dart_domain_alloc(unsigned int type)
+static struct iommu_domain *apple_dart_domain_alloc_paging(struct device *dev)
 {
 	struct apple_dart_domain *dart_domain;
 
-	if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED &&
-	    type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_BLOCKED)
-		return NULL;
-
 	dart_domain = kzalloc(sizeof(*dart_domain), GFP_KERNEL);
 	if (!dart_domain)
 		return NULL;
 
 	mutex_init(&dart_domain->init_lock);
 
-	/* no need to allocate pgtbl_ops or do any other finalization steps */
-	if (type == IOMMU_DOMAIN_IDENTITY || type == IOMMU_DOMAIN_BLOCKED)
-		dart_domain->finalized = true;
+	if (dev) {
+		struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+		int ret;
 
+		ret = apple_dart_finalize_domain(dart_domain, cfg);
+		if (ret) {
+			kfree(dart_domain);
+			return ERR_PTR(ret);
+		}
+	}
 	return &dart_domain->domain;
 }
 
@@ -770,8 +799,6 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
 	if (cfg_dart) {
 		if (cfg_dart->supports_bypass != dart->supports_bypass)
 			return -EINVAL;
-		if (cfg_dart->force_bypass != dart->force_bypass)
-			return -EINVAL;
 		if (cfg_dart->pgsize != dart->pgsize)
 			return -EINVAL;
 	}
@@ -913,7 +940,7 @@ static int apple_dart_def_domain_type(struct device *dev)
 {
 	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
 
-	if (cfg->stream_maps[0].dart->force_bypass)
+	if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE)
 		return IOMMU_DOMAIN_IDENTITY;
 	if (!cfg->stream_maps[0].dart->supports_bypass)
 		return IOMMU_DOMAIN_DMA;
@@ -947,7 +974,9 @@ static void apple_dart_get_resv_regions(struct device *dev,
 }
 
 static const struct iommu_ops apple_dart_iommu_ops = {
-	.domain_alloc = apple_dart_domain_alloc,
+	.identity_domain = &apple_dart_identity_domain,
+	.blocked_domain = &apple_dart_blocked_domain,
+	.domain_alloc_paging = apple_dart_domain_alloc_paging,
 	.probe_device = apple_dart_probe_device,
 	.release_device = apple_dart_release_device,
 	.device_group = apple_dart_device_group,
@@ -957,7 +986,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
 	.pgsize_bitmap = -1UL, /* Restricted during dart probe */
 	.owner = THIS_MODULE,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
-		.attach_dev	= apple_dart_attach_dev,
+		.attach_dev	= apple_dart_attach_dev_paging,
 		.map_pages	= apple_dart_map_pages,
 		.unmap_pages	= apple_dart_unmap_pages,
 		.flush_iotlb_all = apple_dart_flush_iotlb_all,
@@ -1111,8 +1140,6 @@ static int apple_dart_probe(struct platform_device *pdev)
 		goto err_clk_disable;
 	}
 
-	dart->force_bypass = dart->pgsize > PAGE_SIZE;
-
 	ret = apple_dart_hw_reset(dart);
 	if (ret)
 		goto err_clk_disable;
@@ -1136,7 +1163,8 @@ static int apple_dart_probe(struct platform_device *pdev)
 	dev_info(
 		&pdev->dev,
 		"DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n",
-		dart->pgsize, dart->num_streams, dart->supports_bypass, dart->force_bypass);
+		dart->pgsize, dart->num_streams, dart->supports_bypass,
+		dart->pgsize > PAGE_SIZE);
 	return 0;
 
 err_sysfs_remove:
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 8a16cd3ef487..353248ab18e7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -25,11 +25,9 @@ struct arm_smmu_mmu_notifier {
 #define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn)
 
 struct arm_smmu_bond {
-	struct iommu_sva		sva;
 	struct mm_struct		*mm;
 	struct arm_smmu_mmu_notifier	*smmu_mn;
 	struct list_head		list;
-	refcount_t			refs;
 };
 
 #define sva_to_bond(handle) \
@@ -38,6 +36,25 @@ struct arm_smmu_bond {
 static DEFINE_MUTEX(sva_lock);
 
 /*
+ * Write the CD to the CD tables for all masters that this domain is attached
+ * to. Note that this is only used to update existing CD entries in the target
+ * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail.
+ */
+static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
+					   int ssid,
+					   struct arm_smmu_ctx_desc *cd)
+{
+	struct arm_smmu_master *master;
+	unsigned long flags;
+
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+		arm_smmu_write_ctx_desc(master, ssid, cd);
+	}
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+}
+
+/*
  * Check if the CPU ASID is available on the SMMU side. If a private context
  * descriptor is using it, try to replace it.
  */
@@ -62,7 +79,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
 		return cd;
 	}
 
-	smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
+	smmu_domain = container_of(cd, struct arm_smmu_domain, cd);
 	smmu = smmu_domain->smmu;
 
 	ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd,
@@ -80,7 +97,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
 	 * be some overlap between use of both ASIDs, until we invalidate the
 	 * TLB.
 	 */
-	arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd);
+	arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
 
 	/* Invalidate TLB entries previously associated with that context */
 	arm_smmu_tlb_inv_asid(smmu, asid);
@@ -247,7 +264,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	 * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
 	 * but disable translation.
 	 */
-	arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd);
+	arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
 
 	arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
 	arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
@@ -273,8 +290,10 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
 			  struct mm_struct *mm)
 {
 	int ret;
+	unsigned long flags;
 	struct arm_smmu_ctx_desc *cd;
 	struct arm_smmu_mmu_notifier *smmu_mn;
+	struct arm_smmu_master *master;
 
 	list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) {
 		if (smmu_mn->mn.mm == mm) {
@@ -304,7 +323,16 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
 		goto err_free_cd;
 	}
 
-	ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+		ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd);
+		if (ret) {
+			list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head)
+				arm_smmu_write_ctx_desc(master, mm->pasid, NULL);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 	if (ret)
 		goto err_put_notifier;
 
@@ -329,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
 		return;
 
 	list_del(&smmu_mn->list);
-	arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL);
+
+	arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
 
 	/*
 	 * If we went through clear(), we've already invalidated, and no
@@ -345,8 +374,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
 	arm_smmu_free_shared_cd(cd);
 }
 
-static struct iommu_sva *
-__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
+static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 {
 	int ret;
 	struct arm_smmu_bond *bond;
@@ -355,23 +383,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
 	if (!master || !master->sva_enabled)
-		return ERR_PTR(-ENODEV);
-
-	/* If bind() was already called for this {dev, mm} pair, reuse it. */
-	list_for_each_entry(bond, &master->bonds, list) {
-		if (bond->mm == mm) {
-			refcount_inc(&bond->refs);
-			return &bond->sva;
-		}
-	}
+		return -ENODEV;
 
 	bond = kzalloc(sizeof(*bond), GFP_KERNEL);
 	if (!bond)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	bond->mm = mm;
-	bond->sva.dev = dev;
-	refcount_set(&bond->refs, 1);
 
 	bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
 	if (IS_ERR(bond->smmu_mn)) {
@@ -380,11 +398,11 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 	}
 
 	list_add(&bond->list, &master->bonds);
-	return &bond->sva;
+	return 0;
 
 err_free_bond:
 	kfree(bond);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
@@ -550,7 +568,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
 		}
 	}
 
-	if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
+	if (!WARN_ON(!bond)) {
 		list_del(&bond->list);
 		arm_smmu_mmu_notifier_put(bond->smmu_mn);
 		kfree(bond);
@@ -562,13 +580,10 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
 				      struct device *dev, ioasid_t id)
 {
 	int ret = 0;
-	struct iommu_sva *handle;
 	struct mm_struct *mm = domain->mm;
 
 	mutex_lock(&sva_lock);
-	handle = __arm_smmu_sva_bind(dev, mm);
-	if (IS_ERR(handle))
-		ret = PTR_ERR(handle);
+	ret = __arm_smmu_sva_bind(dev, mm);
 	mutex_unlock(&sva_lock);
 
 	return ret;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bd0a596f9863..7445454c2af2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
-static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+static void arm_smmu_sync_cd(struct arm_smmu_master *master,
 			     int ssid, bool leaf)
 {
 	size_t i;
-	unsigned long flags;
-	struct arm_smmu_master *master;
 	struct arm_smmu_cmdq_batch cmds;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_device *smmu = master->smmu;
 	struct arm_smmu_cmdq_ent cmd = {
 		.opcode	= CMDQ_OP_CFGI_CD,
 		.cfgi	= {
@@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
 	};
 
 	cmds.num = 0;
-
-	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
-		for (i = 0; i < master->num_streams; i++) {
-			cmd.cfgi.sid = master->streams[i].id;
-			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
-		}
+	for (i = 0; i < master->num_streams; i++) {
+		cmd.cfgi.sid = master->streams[i].id;
+		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 	}
-	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
 }
@@ -1026,34 +1019,33 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
 	WRITE_ONCE(*dst, cpu_to_le64(val));
 }
 
-static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
-				   u32 ssid)
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
 {
 	__le64 *l1ptr;
 	unsigned int idx;
 	struct arm_smmu_l1_ctx_desc *l1_desc;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
 
-	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
-		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
+	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+		return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
 
 	idx = ssid >> CTXDESC_SPLIT;
-	l1_desc = &cdcfg->l1_desc[idx];
+	l1_desc = &cd_table->l1_desc[idx];
 	if (!l1_desc->l2ptr) {
 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
 			return NULL;
 
-		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
 		/* An invalid L1CD can be cached */
-		arm_smmu_sync_cd(smmu_domain, ssid, false);
+		arm_smmu_sync_cd(master, ssid, false);
 	}
 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
 }
 
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 			    struct arm_smmu_ctx_desc *cd)
 {
 	/*
@@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 	u64 val;
 	bool cd_live;
 	__le64 *cdptr;
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
 
-	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
 		return -E2BIG;
 
-	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+	cdptr = arm_smmu_get_cd_ptr(master, ssid);
 	if (!cdptr)
 		return -ENOMEM;
 
@@ -1098,11 +1091,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 		cdptr[3] = cpu_to_le64(cd->mair);
 
 		/*
-		 * STE is live, and the SMMU might read dwords of this CD in any
+		 * STE may be live, and the SMMU might read dwords of this CD in any
 		 * order. Ensure that it observes valid values before reading
 		 * V=1.
 		 */
-		arm_smmu_sync_cd(smmu_domain, ssid, true);
+		arm_smmu_sync_cd(master, ssid, true);
 
 		val = cd->tcr |
 #ifdef __BIG_ENDIAN
@@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
 			CTXDESC_CD_0_V;
 
-		if (smmu_domain->stall_enabled)
+		if (cd_table->stall_enabled)
 			val |= CTXDESC_CD_0_S;
 	}
 
@@ -1128,44 +1121,45 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 	 *   without first making the structure invalid.
 	 */
 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
-	arm_smmu_sync_cd(smmu_domain, ssid, true);
+	arm_smmu_sync_cd(master, ssid, true);
 	return 0;
 }
 
-static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
 {
 	int ret;
 	size_t l1size;
 	size_t max_contexts;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
 
-	max_contexts = 1 << cfg->s1cdmax;
+	cd_table->stall_enabled = master->stall_enabled;
+	cd_table->s1cdmax = master->ssid_bits;
+	max_contexts = 1 << cd_table->s1cdmax;
 
 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
 	    max_contexts <= CTXDESC_L2_ENTRIES) {
-		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
-		cdcfg->num_l1_ents = max_contexts;
+		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+		cd_table->num_l1_ents = max_contexts;
 
 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
 	} else {
-		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
-		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
+		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
 						  CTXDESC_L2_ENTRIES);
 
-		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
-					      sizeof(*cdcfg->l1_desc),
+		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
+					      sizeof(*cd_table->l1_desc),
 					      GFP_KERNEL);
-		if (!cdcfg->l1_desc)
+		if (!cd_table->l1_desc)
 			return -ENOMEM;
 
-		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
 	}
 
-	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
+	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
 					   GFP_KERNEL);
-	if (!cdcfg->cdtab) {
+	if (!cd_table->cdtab) {
 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
 		ret = -ENOMEM;
 		goto err_free_l1;
@@ -1174,42 +1168,42 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
 	return 0;
 
 err_free_l1:
-	if (cdcfg->l1_desc) {
-		devm_kfree(smmu->dev, cdcfg->l1_desc);
-		cdcfg->l1_desc = NULL;
+	if (cd_table->l1_desc) {
+		devm_kfree(smmu->dev, cd_table->l1_desc);
+		cd_table->l1_desc = NULL;
 	}
 	return ret;
 }
 
-static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
 {
 	int i;
 	size_t size, l1size;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
 
-	if (cdcfg->l1_desc) {
+	if (cd_table->l1_desc) {
 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
 
-		for (i = 0; i < cdcfg->num_l1_ents; i++) {
-			if (!cdcfg->l1_desc[i].l2ptr)
+		for (i = 0; i < cd_table->num_l1_ents; i++) {
+			if (!cd_table->l1_desc[i].l2ptr)
 				continue;
 
 			dmam_free_coherent(smmu->dev, size,
-					   cdcfg->l1_desc[i].l2ptr,
-					   cdcfg->l1_desc[i].l2ptr_dma);
+					   cd_table->l1_desc[i].l2ptr,
+					   cd_table->l1_desc[i].l2ptr_dma);
 		}
-		devm_kfree(smmu->dev, cdcfg->l1_desc);
-		cdcfg->l1_desc = NULL;
+		devm_kfree(smmu->dev, cd_table->l1_desc);
+		cd_table->l1_desc = NULL;
 
-		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
 	} else {
-		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
 	}
 
-	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
-	cdcfg->cdtab_dma = 0;
-	cdcfg->cdtab = NULL;
+	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
+	cd_table->cdtab_dma = 0;
+	cd_table->cdtab = NULL;
 }
 
 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
@@ -1276,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	u64 val = le64_to_cpu(dst[0]);
 	bool ste_live = false;
 	struct arm_smmu_device *smmu = NULL;
-	struct arm_smmu_s1_cfg *s1_cfg = NULL;
+	struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
 	struct arm_smmu_domain *smmu_domain = NULL;
 	struct arm_smmu_cmdq_ent prefetch_cmd = {
@@ -1294,7 +1288,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	if (smmu_domain) {
 		switch (smmu_domain->stage) {
 		case ARM_SMMU_DOMAIN_S1:
-			s1_cfg = &smmu_domain->s1_cfg;
+			cd_table = &master->cd_table;
 			break;
 		case ARM_SMMU_DOMAIN_S2:
 		case ARM_SMMU_DOMAIN_NESTED:
@@ -1325,7 +1319,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	val = STRTAB_STE_0_V;
 
 	/* Bypass/fault */
-	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+	if (!smmu_domain || !(cd_table || s2_cfg)) {
 		if (!smmu_domain && disable_bypass)
 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
 		else
@@ -1344,7 +1338,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		return;
 	}
 
-	if (s1_cfg) {
+	if (cd_table) {
 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
 
@@ -1360,10 +1354,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		    !master->stall_enabled)
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
-		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+		val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
-			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
-			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
+			FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
+			FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
 	}
 
 	if (s2_cfg) {
@@ -1869,7 +1863,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 	 * careful, 007.
 	 */
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
+		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
 	} else {
 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
@@ -1962,7 +1956,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
-		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
+		cmd.tlbi.asid	= smmu_domain->cd.asid;
 	} else {
 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
@@ -2067,15 +2061,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 
-	/* Free the CD and ASID, if we allocated them */
+	/* Free the ASID or VMID */
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
-
 		/* Prevent SVA from touching the CD while we're freeing it */
 		mutex_lock(&arm_smmu_asid_lock);
-		if (cfg->cdcfg.cdtab)
-			arm_smmu_free_cd_tables(smmu_domain);
-		arm_smmu_free_asid(&cfg->cd);
+		arm_smmu_free_asid(&smmu_domain->cd);
 		mutex_unlock(&arm_smmu_asid_lock);
 	} else {
 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -2087,66 +2077,43 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 }
 
 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
-				       struct arm_smmu_master *master,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
 	int ret;
 	u32 asid;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
 
-	refcount_set(&cfg->cd.refs, 1);
+	refcount_set(&cd->refs, 1);
 
 	/* Prevent SVA from modifying the ASID until it is written to the CD */
 	mutex_lock(&arm_smmu_asid_lock);
-	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
+	ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
 	if (ret)
 		goto out_unlock;
 
-	cfg->s1cdmax = master->ssid_bits;
-
-	smmu_domain->stall_enabled = master->stall_enabled;
-
-	ret = arm_smmu_alloc_cd_tables(smmu_domain);
-	if (ret)
-		goto out_free_asid;
-
-	cfg->cd.asid	= (u16)asid;
-	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
-	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
+	cd->asid	= (u16)asid;
+	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
-	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
-
-	/*
-	 * Note that this will end up calling arm_smmu_sync_cd() before
-	 * the master has been added to the devices list for this domain.
-	 * This isn't an issue because the STE hasn't been installed yet.
-	 */
-	ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
-	if (ret)
-		goto out_free_cd_tables;
+	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
 
 	mutex_unlock(&arm_smmu_asid_lock);
 	return 0;
 
-out_free_cd_tables:
-	arm_smmu_free_cd_tables(smmu_domain);
-out_free_asid:
-	arm_smmu_free_asid(&cfg->cd);
 out_unlock:
 	mutex_unlock(&arm_smmu_asid_lock);
 	return ret;
 }
 
 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
-				       struct arm_smmu_master *master,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
 	int vmid;
@@ -2173,8 +2140,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 	return 0;
 }
 
-static int arm_smmu_domain_finalise(struct iommu_domain *domain,
-				    struct arm_smmu_master *master)
+static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 {
 	int ret;
 	unsigned long ias, oas;
@@ -2182,7 +2148,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
 	struct io_pgtable_cfg pgtbl_cfg;
 	struct io_pgtable_ops *pgtbl_ops;
 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
-				 struct arm_smmu_master *,
 				 struct io_pgtable_cfg *);
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -2234,7 +2199,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
 	domain->geometry.force_aperture = true;
 
-	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
+	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
 	if (ret < 0) {
 		free_io_pgtable_ops(pgtbl_ops);
 		return ret;
@@ -2403,6 +2368,14 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
 	master->domain = NULL;
 	master->ats_enabled = false;
 	arm_smmu_install_ste_for_dev(master);
+	/*
+	 * Clearing the CD entry isn't strictly required to detach the domain
+	 * since the table is uninstalled anyway, but it helps avoid confusion
+	 * in the call to arm_smmu_write_ctx_desc on the next attach (which
+	 * expects the entry to be empty).
+	 */
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
+		arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -2436,23 +2409,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 
 	if (!smmu_domain->smmu) {
 		smmu_domain->smmu = smmu;
-		ret = arm_smmu_domain_finalise(domain, master);
-		if (ret) {
+		ret = arm_smmu_domain_finalise(domain);
+		if (ret)
 			smmu_domain->smmu = NULL;
-			goto out_unlock;
-		}
-	} else if (smmu_domain->smmu != smmu) {
-		ret = -EINVAL;
-		goto out_unlock;
-	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
-		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+	} else if (smmu_domain->smmu != smmu)
 		ret = -EINVAL;
-		goto out_unlock;
-	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
-		   smmu_domain->stall_enabled != master->stall_enabled) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
+
+	mutex_unlock(&smmu_domain->init_mutex);
+	if (ret)
+		return ret;
 
 	master->domain = smmu_domain;
 
@@ -2466,16 +2431,42 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
 		master->ats_enabled = arm_smmu_ats_supported(master);
 
-	arm_smmu_install_ste_for_dev(master);
-
 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
 	list_add(&master->domain_head, &smmu_domain->devices);
 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+		if (!master->cd_table.cdtab) {
+			ret = arm_smmu_alloc_cd_tables(master);
+			if (ret) {
+				master->domain = NULL;
+				goto out_list_del;
+			}
+		}
+
+		/*
+		 * Prevent SVA from concurrently modifying the CD or writing to
+		 * the CD entry
+		 */
+		mutex_lock(&arm_smmu_asid_lock);
+		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
+		mutex_unlock(&arm_smmu_asid_lock);
+		if (ret) {
+			master->domain = NULL;
+			goto out_list_del;
+		}
+	}
+
+	arm_smmu_install_ste_for_dev(master);
+
 	arm_smmu_enable_ats(master);
+	return 0;
+
+out_list_del:
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_del(&master->domain_head);
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
-out_unlock:
-	mutex_unlock(&smmu_domain->init_mutex);
 	return ret;
 }
 
@@ -2720,6 +2711,8 @@ static void arm_smmu_release_device(struct device *dev)
 	arm_smmu_detach_dev(master);
 	arm_smmu_disable_pasid(master);
 	arm_smmu_remove_master(master);
+	if (master->cd_table.cdtab)
+		arm_smmu_free_cd_tables(master);
 	kfree(master);
 }
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 9915850dd4db..961205ba86d2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -595,13 +595,11 @@ struct arm_smmu_ctx_desc_cfg {
 	dma_addr_t			cdtab_dma;
 	struct arm_smmu_l1_ctx_desc	*l1_desc;
 	unsigned int			num_l1_ents;
-};
-
-struct arm_smmu_s1_cfg {
-	struct arm_smmu_ctx_desc_cfg	cdcfg;
-	struct arm_smmu_ctx_desc	cd;
 	u8				s1fmt;
+	/* log2 of the maximum number of CDs supported by this table */
 	u8				s1cdmax;
+	/* Whether CD entries in this table have the stall bit set. */
+	u8				stall_enabled:1;
 };
 
 struct arm_smmu_s2_cfg {
@@ -697,6 +695,8 @@ struct arm_smmu_master {
 	struct arm_smmu_domain		*domain;
 	struct list_head		domain_head;
 	struct arm_smmu_stream		*streams;
+	/* Locked by the iommu core using the group mutex */
+	struct arm_smmu_ctx_desc_cfg	cd_table;
 	unsigned int			num_streams;
 	bool				ats_enabled;
 	bool				stall_enabled;
@@ -719,13 +719,12 @@ struct arm_smmu_domain {
 	struct mutex			init_mutex; /* Protects smmu pointer */
 
 	struct io_pgtable_ops		*pgtbl_ops;
-	bool				stall_enabled;
 	atomic_t			nr_ats_masters;
 
 	enum arm_smmu_domain_stage	stage;
 	union {
-		struct arm_smmu_s1_cfg	s1_cfg;
-		struct arm_smmu_s2_cfg	s2_cfg;
+		struct arm_smmu_ctx_desc	cd;
+		struct arm_smmu_s2_cfg		s2_cfg;
 	};
 
 	struct iommu_domain		domain;
@@ -745,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa;
 extern struct mutex arm_smmu_asid_lock;
 extern struct arm_smmu_ctx_desc quiet_cd;
 
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
 			    struct arm_smmu_ctx_desc *cd);
 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 7f52ac67495f..549ae4dba3a6 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -251,6 +251,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
 	{ .compatible = "qcom,sc7280-mss-pil" },
 	{ .compatible = "qcom,sc8180x-mdss" },
 	{ .compatible = "qcom,sc8280xp-mdss" },
+	{ .compatible = "qcom,sdm670-mdss" },
 	{ .compatible = "qcom,sdm845-mdss" },
 	{ .compatible = "qcom,sdm845-mss-pil" },
 	{ .compatible = "qcom,sm6350-mdss" },
@@ -532,6 +533,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data },
 	{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm7150-smmu-v2", .data = &qcom_smmu_v2_data },
 	{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 775a3cbaff4e..97b2122032b2 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -332,12 +332,10 @@ out_unlock:
 	return ret;
 }
 
-static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct qcom_iommu_domain *qcom_domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
-		return NULL;
 	/*
 	 * Allocate the domain and initialise some of its data structures.
 	 * We can't really do anything meaningful until we've added a
@@ -400,6 +398,44 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
 	return 0;
 }
 
+static int qcom_iommu_identity_attach(struct iommu_domain *identity_domain,
+				      struct device *dev)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct qcom_iommu_domain *qcom_domain;
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+	struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
+	unsigned int i;
+
+	if (domain == identity_domain || !domain)
+		return 0;
+
+	qcom_domain = to_qcom_iommu_domain(domain);
+	if (WARN_ON(!qcom_domain->iommu))
+		return -EINVAL;
+
+	pm_runtime_get_sync(qcom_iommu->dev);
+	for (i = 0; i < fwspec->num_ids; i++) {
+		struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
+
+		/* Disable the context bank: */
+		iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+		ctx->domain = NULL;
+	}
+	pm_runtime_put_sync(qcom_iommu->dev);
+	return 0;
+}
+
+static struct iommu_domain_ops qcom_iommu_identity_ops = {
+	.attach_dev = qcom_iommu_identity_attach,
+};
+
+static struct iommu_domain qcom_iommu_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &qcom_iommu_identity_ops,
+};
+
 static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
 			  phys_addr_t paddr, size_t pgsize, size_t pgcount,
 			  int prot, gfp_t gfp, size_t *mapped)
@@ -565,8 +601,9 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
 }
 
 static const struct iommu_ops qcom_iommu_ops = {
+	.identity_domain = &qcom_iommu_identity_domain,
 	.capable	= qcom_iommu_capable,
-	.domain_alloc	= qcom_iommu_domain_alloc,
+	.domain_alloc_paging = qcom_iommu_domain_alloc_paging,
 	.probe_device	= qcom_iommu_probe_device,
 	.device_group	= generic_device_group,
 	.of_xlate	= qcom_iommu_of_xlate,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4b1a88f514c9..85163a83df2f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -43,14 +43,28 @@ enum iommu_dma_cookie_type {
 	IOMMU_DMA_MSI_COOKIE,
 };
 
+enum iommu_dma_queue_type {
+	IOMMU_DMA_OPTS_PER_CPU_QUEUE,
+	IOMMU_DMA_OPTS_SINGLE_QUEUE,
+};
+
+struct iommu_dma_options {
+	enum iommu_dma_queue_type qt;
+	size_t		fq_size;
+	unsigned int	fq_timeout;
+};
+
 struct iommu_dma_cookie {
 	enum iommu_dma_cookie_type	type;
 	union {
 		/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
 		struct {
 			struct iova_domain	iovad;
-
-			struct iova_fq __percpu *fq;	/* Flush queue */
+			/* Flush queue */
+			union {
+				struct iova_fq	*single_fq;
+				struct iova_fq	__percpu *percpu_fq;
+			};
 			/* Number of TLB flushes that have been started */
 			atomic64_t		fq_flush_start_cnt;
 			/* Number of TLB flushes that have been finished */
@@ -67,6 +81,8 @@ struct iommu_dma_cookie {
 
 	/* Domain for flush queue callback; NULL if flush queue not in use */
 	struct iommu_domain		*fq_domain;
+	/* Options for dma-iommu use */
+	struct iommu_dma_options	options;
 	struct mutex			mutex;
 };
 
@@ -84,10 +100,12 @@ static int __init iommu_dma_forcedac_setup(char *str)
 early_param("iommu.forcedac", iommu_dma_forcedac_setup);
 
 /* Number of entries per flush queue */
-#define IOVA_FQ_SIZE	256
+#define IOVA_DEFAULT_FQ_SIZE	256
+#define IOVA_SINGLE_FQ_SIZE	32768
 
 /* Timeout (in ms) after which entries are flushed from the queue */
-#define IOVA_FQ_TIMEOUT	10
+#define IOVA_DEFAULT_FQ_TIMEOUT	10
+#define IOVA_SINGLE_FQ_TIMEOUT	1000
 
 /* Flush queue entry for deferred flushing */
 struct iova_fq_entry {
@@ -99,18 +117,19 @@ struct iova_fq_entry {
 
 /* Per-CPU flush queue structure */
 struct iova_fq {
-	struct iova_fq_entry entries[IOVA_FQ_SIZE];
-	unsigned int head, tail;
 	spinlock_t lock;
+	unsigned int head, tail;
+	unsigned int mod_mask;
+	struct iova_fq_entry entries[];
 };
 
 #define fq_ring_for_each(i, fq) \
-	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
+	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask)
 
 static inline bool fq_full(struct iova_fq *fq)
 {
 	assert_spin_locked(&fq->lock);
-	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
+	return (((fq->tail + 1) & fq->mod_mask) == fq->head);
 }
 
 static inline unsigned int fq_ring_add(struct iova_fq *fq)
@@ -119,12 +138,12 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
 
 	assert_spin_locked(&fq->lock);
 
-	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
+	fq->tail = (idx + 1) & fq->mod_mask;
 
 	return idx;
 }
 
-static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
 {
 	u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
 	unsigned int idx;
@@ -141,10 +160,19 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
 			       fq->entries[idx].iova_pfn,
 			       fq->entries[idx].pages);
 
-		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
+		fq->head = (fq->head + 1) & fq->mod_mask;
 	}
 }
 
+static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fq->lock, flags);
+	fq_ring_free_locked(cookie, fq);
+	spin_unlock_irqrestore(&fq->lock, flags);
+}
+
 static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
 {
 	atomic64_inc(&cookie->fq_flush_start_cnt);
@@ -160,14 +188,11 @@ static void fq_flush_timeout(struct timer_list *t)
 	atomic_set(&cookie->fq_timer_on, 0);
 	fq_flush_iotlb(cookie);
 
-	for_each_possible_cpu(cpu) {
-		unsigned long flags;
-		struct iova_fq *fq;
-
-		fq = per_cpu_ptr(cookie->fq, cpu);
-		spin_lock_irqsave(&fq->lock, flags);
-		fq_ring_free(cookie, fq);
-		spin_unlock_irqrestore(&fq->lock, flags);
+	if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) {
+		fq_ring_free(cookie, cookie->single_fq);
+	} else {
+		for_each_possible_cpu(cpu)
+			fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu));
 	}
 }
 
@@ -188,7 +213,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
 	 */
 	smp_mb();
 
-	fq = raw_cpu_ptr(cookie->fq);
+	if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+		fq = cookie->single_fq;
+	else
+		fq = raw_cpu_ptr(cookie->percpu_fq);
+
 	spin_lock_irqsave(&fq->lock, flags);
 
 	/*
@@ -196,11 +225,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
 	 * flushed out on another CPU. This makes the fq_full() check below less
 	 * likely to be true.
 	 */
-	fq_ring_free(cookie, fq);
+	fq_ring_free_locked(cookie, fq);
 
 	if (fq_full(fq)) {
 		fq_flush_iotlb(cookie);
-		fq_ring_free(cookie, fq);
+		fq_ring_free_locked(cookie, fq);
 	}
 
 	idx = fq_ring_add(fq);
@@ -216,34 +245,95 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
 	if (!atomic_read(&cookie->fq_timer_on) &&
 	    !atomic_xchg(&cookie->fq_timer_on, 1))
 		mod_timer(&cookie->fq_timer,
-			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
+			  jiffies + msecs_to_jiffies(cookie->options.fq_timeout));
 }
 
-static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+static void iommu_dma_free_fq_single(struct iova_fq *fq)
 {
-	int cpu, idx;
+	int idx;
 
-	if (!cookie->fq)
-		return;
+	fq_ring_for_each(idx, fq)
+		put_pages_list(&fq->entries[idx].freelist);
+	vfree(fq);
+}
+
+static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq)
+{
+	int cpu, idx;
 
-	del_timer_sync(&cookie->fq_timer);
 	/* The IOVAs will be torn down separately, so just free our queued pages */
 	for_each_possible_cpu(cpu) {
-		struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
+		struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu);
 
 		fq_ring_for_each(idx, fq)
 			put_pages_list(&fq->entries[idx].freelist);
 	}
 
-	free_percpu(cookie->fq);
+	free_percpu(percpu_fq);
+}
+
+static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+{
+	if (!cookie->fq_domain)
+		return;
+
+	del_timer_sync(&cookie->fq_timer);
+	if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+		iommu_dma_free_fq_single(cookie->single_fq);
+	else
+		iommu_dma_free_fq_percpu(cookie->percpu_fq);
+}
+
+static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size)
+{
+	int i;
+
+	fq->head = 0;
+	fq->tail = 0;
+	fq->mod_mask = fq_size - 1;
+
+	spin_lock_init(&fq->lock);
+
+	for (i = 0; i < fq_size; i++)
+		INIT_LIST_HEAD(&fq->entries[i].freelist);
+}
+
+static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
+{
+	size_t fq_size = cookie->options.fq_size;
+	struct iova_fq *queue;
+
+	queue = vmalloc(struct_size(queue, entries, fq_size));
+	if (!queue)
+		return -ENOMEM;
+	iommu_dma_init_one_fq(queue, fq_size);
+	cookie->single_fq = queue;
+
+	return 0;
+}
+
+static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie)
+{
+	size_t fq_size = cookie->options.fq_size;
+	struct iova_fq __percpu *queue;
+	int cpu;
+
+	queue = __alloc_percpu(struct_size(queue, entries, fq_size),
+			       __alignof__(*queue));
+	if (!queue)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu)
+		iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size);
+	cookie->percpu_fq = queue;
+	return 0;
 }
 
 /* sysfs updates are serialised by the mutex of the group owning @domain */
 int iommu_dma_init_fq(struct iommu_domain *domain)
 {
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
-	struct iova_fq __percpu *queue;
-	int i, cpu;
+	int rc;
 
 	if (cookie->fq_domain)
 		return 0;
@@ -251,26 +341,16 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
 	atomic64_set(&cookie->fq_flush_start_cnt,  0);
 	atomic64_set(&cookie->fq_flush_finish_cnt, 0);
 
-	queue = alloc_percpu(struct iova_fq);
-	if (!queue) {
+	if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+		rc = iommu_dma_init_fq_single(cookie);
+	else
+		rc = iommu_dma_init_fq_percpu(cookie);
+
+	if (rc) {
 		pr_warn("iova flush queue initialization failed\n");
 		return -ENOMEM;
 	}
 
-	for_each_possible_cpu(cpu) {
-		struct iova_fq *fq = per_cpu_ptr(queue, cpu);
-
-		fq->head = 0;
-		fq->tail = 0;
-
-		spin_lock_init(&fq->lock);
-
-		for (i = 0; i < IOVA_FQ_SIZE; i++)
-			INIT_LIST_HEAD(&fq->entries[i].freelist);
-	}
-
-	cookie->fq = queue;
-
 	timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
 	atomic_set(&cookie->fq_timer_on, 0);
 	/*
@@ -555,6 +635,28 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 }
 
 /**
+ * iommu_dma_init_options - Initialize dma-iommu options
+ * @options: The options to be initialized
+ * @dev: Device the options are set for
+ *
+ * This allows tuning dma-iommu specific to device properties
+ */
+static void iommu_dma_init_options(struct iommu_dma_options *options,
+				   struct device *dev)
+{
+	/* Shadowing IOTLB flushes do better with a single large queue */
+	if (dev->iommu->shadow_on_flush) {
+		options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE;
+		options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT;
+		options->fq_size = IOVA_SINGLE_FQ_SIZE;
+	} else {
+		options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE;
+		options->fq_size = IOVA_DEFAULT_FQ_SIZE;
+		options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT;
+	}
+}
+
+/**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
  * @base: IOVA at which the mappable address space starts
@@ -614,6 +716,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	if (ret)
 		goto done_unlock;
 
+	iommu_dma_init_options(&cookie->options, dev);
+
 	/* If the FQ fails we can simply fall back to strict mode */
 	if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
 	    (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c275fe71c4db..2c6e9094f1e9 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -24,6 +24,7 @@
 
 typedef u32 sysmmu_iova_t;
 typedef u32 sysmmu_pte_t;
+static struct iommu_domain exynos_identity_domain;
 
 /* We do not consider super section mapping (16MB) */
 #define SECT_ORDER 20
@@ -829,7 +830,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
 		struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
 
 		mutex_lock(&owner->rpm_lock);
-		if (data->domain) {
+		if (&data->domain->domain != &exynos_identity_domain) {
 			dev_dbg(data->sysmmu, "saving state\n");
 			__sysmmu_disable(data);
 		}
@@ -847,7 +848,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
 		struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
 
 		mutex_lock(&owner->rpm_lock);
-		if (data->domain) {
+		if (&data->domain->domain != &exynos_identity_domain) {
 			dev_dbg(data->sysmmu, "restoring state\n");
 			__sysmmu_enable(data);
 		}
@@ -886,7 +887,7 @@ static inline void exynos_iommu_set_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
 				   DMA_TO_DEVICE);
 }
 
-static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct exynos_iommu_domain *domain;
 	dma_addr_t handle;
@@ -895,9 +896,6 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
 	/* Check if correct PTE offsets are initialized */
 	BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev);
 
-	if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
@@ -980,17 +978,20 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
 	kfree(domain);
 }
 
-static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
-				    struct device *dev)
+static int exynos_iommu_identity_attach(struct iommu_domain *identity_domain,
+					struct device *dev)
 {
-	struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
 	struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
-	phys_addr_t pagetable = virt_to_phys(domain->pgtable);
+	struct exynos_iommu_domain *domain;
+	phys_addr_t pagetable;
 	struct sysmmu_drvdata *data, *next;
 	unsigned long flags;
 
-	if (!has_sysmmu(dev) || owner->domain != iommu_domain)
-		return;
+	if (owner->domain == identity_domain)
+		return 0;
+
+	domain = to_exynos_domain(owner->domain);
+	pagetable = virt_to_phys(domain->pgtable);
 
 	mutex_lock(&owner->rpm_lock);
 
@@ -1009,15 +1010,25 @@ static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
 		list_del_init(&data->domain_node);
 		spin_unlock(&data->lock);
 	}
-	owner->domain = NULL;
+	owner->domain = identity_domain;
 	spin_unlock_irqrestore(&domain->lock, flags);
 
 	mutex_unlock(&owner->rpm_lock);
 
-	dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", __func__,
-		&pagetable);
+	dev_dbg(dev, "%s: Restored IOMMU to IDENTITY from pgtable %pa\n",
+		__func__, &pagetable);
+	return 0;
 }
 
+static struct iommu_domain_ops exynos_identity_ops = {
+	.attach_dev = exynos_iommu_identity_attach,
+};
+
+static struct iommu_domain exynos_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &exynos_identity_ops,
+};
+
 static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
 				   struct device *dev)
 {
@@ -1026,12 +1037,11 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
 	struct sysmmu_drvdata *data;
 	phys_addr_t pagetable = virt_to_phys(domain->pgtable);
 	unsigned long flags;
+	int err;
 
-	if (!has_sysmmu(dev))
-		return -ENODEV;
-
-	if (owner->domain)
-		exynos_iommu_detach_device(owner->domain, dev);
+	err = exynos_iommu_identity_attach(&exynos_identity_domain, dev);
+	if (err)
+		return err;
 
 	mutex_lock(&owner->rpm_lock);
 
@@ -1219,7 +1229,7 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
  */
 static int exynos_iommu_map(struct iommu_domain *iommu_domain,
 			    unsigned long l_iova, phys_addr_t paddr, size_t size,
-			    int prot, gfp_t gfp)
+			    size_t count, int prot, gfp_t gfp, size_t *mapped)
 {
 	struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
 	sysmmu_pte_t *entry;
@@ -1253,6 +1263,8 @@ static int exynos_iommu_map(struct iommu_domain *iommu_domain,
 	if (ret)
 		pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n",
 			__func__, ret, size, iova);
+	else
+		*mapped = size;
 
 	spin_unlock_irqrestore(&domain->pgtablelock, flags);
 
@@ -1274,7 +1286,7 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain
 }
 
 static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
-				 unsigned long l_iova, size_t size,
+				 unsigned long l_iova, size_t size, size_t count,
 				 struct iommu_iotlb_gather *gather)
 {
 	struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
@@ -1407,26 +1419,12 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
 	return &data->iommu;
 }
 
-static void exynos_iommu_set_platform_dma(struct device *dev)
-{
-	struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
-
-	if (owner->domain) {
-		struct iommu_group *group = iommu_group_get(dev);
-
-		if (group) {
-			exynos_iommu_detach_device(owner->domain, dev);
-			iommu_group_put(group);
-		}
-	}
-}
-
 static void exynos_iommu_release_device(struct device *dev)
 {
 	struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
 	struct sysmmu_drvdata *data;
 
-	exynos_iommu_set_platform_dma(dev);
+	WARN_ON(exynos_iommu_identity_attach(&exynos_identity_domain, dev));
 
 	list_for_each_entry(data, &owner->controllers, owner_node)
 		device_link_del(data->link);
@@ -1457,6 +1455,7 @@ static int exynos_iommu_of_xlate(struct device *dev,
 
 		INIT_LIST_HEAD(&owner->controllers);
 		mutex_init(&owner->rpm_lock);
+		owner->domain = &exynos_identity_domain;
 		dev_iommu_priv_set(dev, owner);
 	}
 
@@ -1471,19 +1470,17 @@ static int exynos_iommu_of_xlate(struct device *dev,
 }
 
 static const struct iommu_ops exynos_iommu_ops = {
-	.domain_alloc = exynos_iommu_domain_alloc,
+	.identity_domain = &exynos_identity_domain,
+	.domain_alloc_paging = exynos_iommu_domain_alloc_paging,
 	.device_group = generic_device_group,
-#ifdef CONFIG_ARM
-	.set_platform_dma_ops = exynos_iommu_set_platform_dma,
-#endif
 	.probe_device = exynos_iommu_probe_device,
 	.release_device = exynos_iommu_release_device,
 	.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
 	.of_xlate = exynos_iommu_of_xlate,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= exynos_iommu_attach_device,
-		.map		= exynos_iommu_map,
-		.unmap		= exynos_iommu_unmap,
+		.map_pages	= exynos_iommu_map,
+		.unmap_pages	= exynos_iommu_unmap,
 		.iova_to_phys	= exynos_iommu_iova_to_phys,
 		.free		= exynos_iommu_domain_free,
 	}
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 4ac0e247ec2b..e9d2bff4659b 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -196,6 +196,13 @@ static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type)
 {
 	struct fsl_dma_domain *dma_domain;
 
+	/*
+	 * FIXME: This isn't creating an unmanaged domain since the
+	 * default_domain_ops do not have any map/unmap function it doesn't meet
+	 * the requirements for __IOMMU_DOMAIN_PAGING. The only purpose seems to
+	 * allow drivers/soc/fsl/qbman/qman_portal.c to do
+	 * fsl_pamu_configure_l1_stash()
+	 */
 	if (type != IOMMU_DOMAIN_UNMANAGED)
 		return NULL;
 
@@ -283,16 +290,34 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
 	return ret;
 }
 
-static void fsl_pamu_set_platform_dma(struct device *dev)
+/*
+ * FIXME: fsl/pamu is completely broken in terms of how it works with the iommu
+ * API. Immediately after probe the HW is left in an IDENTITY translation and
+ * the driver provides a non-working UNMANAGED domain that it can switch over
+ * to. However it cannot switch back to an IDENTITY translation, instead it
+ * switches to what looks like BLOCKING.
+ */
+static int fsl_pamu_platform_attach(struct iommu_domain *platform_domain,
+				    struct device *dev)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+	struct fsl_dma_domain *dma_domain;
 	const u32 *prop;
 	int len;
 	struct pci_dev *pdev = NULL;
 	struct pci_controller *pci_ctl;
 
 	/*
+	 * Hack to keep things working as they always have, only leaving an
+	 * UNMANAGED domain makes it BLOCKING.
+	 */
+	if (domain == platform_domain || !domain ||
+	    domain->type != IOMMU_DOMAIN_UNMANAGED)
+		return 0;
+
+	dma_domain = to_fsl_dma_domain(domain);
+
+	/*
 	 * Use LIODN of the PCI controller while detaching a
 	 * PCI device.
 	 */
@@ -312,8 +337,18 @@ static void fsl_pamu_set_platform_dma(struct device *dev)
 		detach_device(dev, dma_domain);
 	else
 		pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
+	return 0;
 }
 
+static struct iommu_domain_ops fsl_pamu_platform_ops = {
+	.attach_dev = fsl_pamu_platform_attach,
+};
+
+static struct iommu_domain fsl_pamu_platform_domain = {
+	.type = IOMMU_DOMAIN_PLATFORM,
+	.ops = &fsl_pamu_platform_ops,
+};
+
 /* Set the domain stash attribute */
 int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu)
 {
@@ -395,11 +430,11 @@ static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
 }
 
 static const struct iommu_ops fsl_pamu_ops = {
+	.default_domain = &fsl_pamu_platform_domain,
 	.capable	= fsl_pamu_capable,
 	.domain_alloc	= fsl_pamu_domain_alloc,
 	.probe_device	= fsl_pamu_probe_device,
 	.device_group   = fsl_pamu_device_group,
-	.set_platform_dma_ops = fsl_pamu_set_platform_dma,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= fsl_pamu_attach_device,
 		.iova_to_phys	= fsl_pamu_iova_to_phys,
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 1f925285104e..dee61e513be6 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -111,6 +111,8 @@ static const struct iommu_regset iommu_regs_64[] = {
 	IOMMU_REGSET_ENTRY(VCRSP),
 };
 
+static struct dentry *intel_iommu_debug;
+
 static int iommu_regset_show(struct seq_file *m, void *unused)
 {
 	struct dmar_drhd_unit *drhd;
@@ -311,9 +313,14 @@ static inline unsigned long level_to_directory_size(int level)
 static inline void
 dump_page_info(struct seq_file *m, unsigned long iova, u64 *path)
 {
-	seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n",
-		   iova >> VTD_PAGE_SHIFT, path[5], path[4],
-		   path[3], path[2], path[1]);
+	seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx",
+		   iova >> VTD_PAGE_SHIFT, path[5], path[4], path[3]);
+	if (path[2]) {
+		seq_printf(m, "\t0x%016llx", path[2]);
+		if (path[1])
+			seq_printf(m, "\t0x%016llx", path[1]);
+	}
+	seq_putc(m, '\n');
 }
 
 static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
@@ -340,58 +347,140 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
 	}
 }
 
-static int __show_device_domain_translation(struct device *dev, void *data)
+static int domain_translation_struct_show(struct seq_file *m,
+					  struct device_domain_info *info,
+					  ioasid_t pasid)
 {
-	struct dmar_domain *domain;
-	struct seq_file *m = data;
-	u64 path[6] = { 0 };
-
-	domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
-	if (!domain)
-		return 0;
+	bool scalable, found = false;
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	u16 devfn, bus, seg;
 
-	seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
-		   (u64)virt_to_phys(domain->pgd));
-	seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
+	bus = info->bus;
+	devfn = info->devfn;
+	seg = info->segment;
 
-	pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
-	seq_putc(m, '\n');
+	rcu_read_lock();
+	for_each_active_iommu(iommu, drhd) {
+		struct context_entry *context;
+		u64 pgd, path[6] = { 0 };
+		u32 sts, agaw;
 
-	/* Don't iterate */
-	return 1;
-}
+		if (seg != iommu->segment)
+			continue;
 
-static int show_device_domain_translation(struct device *dev, void *data)
-{
-	struct iommu_group *group;
+		sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+		if (!(sts & DMA_GSTS_TES)) {
+			seq_printf(m, "DMA Remapping is not enabled on %s\n",
+				   iommu->name);
+			continue;
+		}
+		if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
+			scalable = true;
+		else
+			scalable = false;
 
-	group = iommu_group_get(dev);
-	if (group) {
 		/*
-		 * The group->mutex is held across the callback, which will
-		 * block calls to iommu_attach/detach_group/device. Hence,
+		 * The iommu->lock is held across the callback, which will
+		 * block calls to domain_attach/domain_detach. Hence,
 		 * the domain of the device will not change during traversal.
 		 *
-		 * All devices in an iommu group share a single domain, hence
-		 * we only dump the domain of the first device. Even though,
-		 * this code still possibly races with the iommu_unmap()
+		 * Traversing page table possibly races with the iommu_unmap()
 		 * interface. This could be solved by RCU-freeing the page
 		 * table pages in the iommu_unmap() path.
 		 */
-		iommu_group_for_each_dev(group, data,
-					 __show_device_domain_translation);
-		iommu_group_put(group);
+		spin_lock(&iommu->lock);
+
+		context = iommu_context_addr(iommu, bus, devfn, 0);
+		if (!context || !context_present(context))
+			goto iommu_unlock;
+
+		if (scalable) {	/* scalable mode */
+			struct pasid_entry *pasid_tbl, *pasid_tbl_entry;
+			struct pasid_dir_entry *dir_tbl, *dir_entry;
+			u16 dir_idx, tbl_idx, pgtt;
+			u64 pasid_dir_ptr;
+
+			pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
+
+			/* Dump specified device domain mappings with PASID. */
+			dir_idx = pasid >> PASID_PDE_SHIFT;
+			tbl_idx = pasid & PASID_PTE_MASK;
+
+			dir_tbl = phys_to_virt(pasid_dir_ptr);
+			dir_entry = &dir_tbl[dir_idx];
+
+			pasid_tbl = get_pasid_table_from_pde(dir_entry);
+			if (!pasid_tbl)
+				goto iommu_unlock;
+
+			pasid_tbl_entry = &pasid_tbl[tbl_idx];
+			if (!pasid_pte_is_present(pasid_tbl_entry))
+				goto iommu_unlock;
+
+			/*
+			 * According to PASID Granular Translation Type(PGTT),
+			 * get the page table pointer.
+			 */
+			pgtt = (u16)(pasid_tbl_entry->val[0] & GENMASK_ULL(8, 6)) >> 6;
+			agaw = (u8)(pasid_tbl_entry->val[0] & GENMASK_ULL(4, 2)) >> 2;
+
+			switch (pgtt) {
+			case PASID_ENTRY_PGTT_FL_ONLY:
+				pgd = pasid_tbl_entry->val[2];
+				break;
+			case PASID_ENTRY_PGTT_SL_ONLY:
+			case PASID_ENTRY_PGTT_NESTED:
+				pgd = pasid_tbl_entry->val[0];
+				break;
+			default:
+				goto iommu_unlock;
+			}
+			pgd &= VTD_PAGE_MASK;
+		} else { /* legacy mode */
+			pgd = context->lo & VTD_PAGE_MASK;
+			agaw = context->hi & 7;
+		}
+
+		seq_printf(m, "Device %04x:%02x:%02x.%x ",
+			   iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+		if (scalable)
+			seq_printf(m, "with pasid %x @0x%llx\n", pasid, pgd);
+		else
+			seq_printf(m, "@0x%llx\n", pgd);
+
+		seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n",
+			   "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE");
+		pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path);
+
+		found = true;
+iommu_unlock:
+		spin_unlock(&iommu->lock);
+		if (found)
+			break;
 	}
+	rcu_read_unlock();
 
 	return 0;
 }
 
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int dev_domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+	struct device_domain_info *info = (struct device_domain_info *)m->private;
+
+	return domain_translation_struct_show(m, info, IOMMU_NO_PASID);
+}
+DEFINE_SHOW_ATTRIBUTE(dev_domain_translation_struct);
+
+static int pasid_domain_translation_struct_show(struct seq_file *m, void *unused)
 {
-	return bus_for_each_dev(&pci_bus_type, NULL, m,
-				show_device_domain_translation);
+	struct dev_pasid_info *dev_pasid = (struct dev_pasid_info *)m->private;
+	struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+
+	return domain_translation_struct_show(m, info, dev_pasid->pasid);
 }
-DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+DEFINE_SHOW_ATTRIBUTE(pasid_domain_translation_struct);
 
 static void invalidation_queue_entry_show(struct seq_file *m,
 					  struct intel_iommu *iommu)
@@ -666,16 +755,12 @@ static const struct file_operations dmar_perf_latency_fops = {
 
 void __init intel_iommu_debugfs_init(void)
 {
-	struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
-						iommu_debugfs_dir);
+	intel_iommu_debug = debugfs_create_dir("intel", iommu_debugfs_dir);
 
 	debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
 			    &iommu_regset_fops);
 	debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
 			    NULL, &dmar_translation_struct_fops);
-	debugfs_create_file("domain_translation_struct", 0444,
-			    intel_iommu_debug, NULL,
-			    &domain_translation_struct_fops);
 	debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
 			    NULL, &invalidation_queue_fops);
 #ifdef CONFIG_IRQ_REMAP
@@ -685,3 +770,51 @@ void __init intel_iommu_debugfs_init(void)
 	debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
 			    NULL, &dmar_perf_latency_fops);
 }
+
+/*
+ * Create a debugfs directory for each device, and then create a
+ * debugfs file in this directory for users to dump the page table
+ * of the default domain. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/domain_translation_struct
+ */
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info)
+{
+	info->debugfs_dentry = debugfs_create_dir(dev_name(info->dev), intel_iommu_debug);
+
+	debugfs_create_file("domain_translation_struct", 0444, info->debugfs_dentry,
+			    info, &dev_domain_translation_struct_fops);
+}
+
+/* Remove the device debugfs directory. */
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info)
+{
+	debugfs_remove_recursive(info->debugfs_dentry);
+}
+
+/*
+ * Create a debugfs directory per pair of {device, pasid}, then create the
+ * corresponding debugfs file in this directory for users to dump its page
+ * table. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/1/domain_translation_struct
+ *
+ * The debugfs only dumps the page tables whose mappings are created and
+ * destroyed by the iommu_map/unmap() interfaces. Check the mapping type
+ * of the domain before creating debugfs directory.
+ */
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+	struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+	char dir_name[10];
+
+	sprintf(dir_name, "%x", dev_pasid->pasid);
+	dev_pasid->debugfs_dentry = debugfs_create_dir(dir_name, info->debugfs_dentry);
+
+	debugfs_create_file("domain_translation_struct", 0444, dev_pasid->debugfs_dentry,
+			    dev_pasid, &pasid_domain_translation_struct_fops);
+}
+
+/* Remove the device pasid debugfs directory. */
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+	debugfs_remove_recursive(dev_pasid->debugfs_dentry);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d1037280abf7..3531b956556c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4016,9 +4016,9 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain,
 }
 
 static struct iommu_domain blocking_domain = {
+	.type = IOMMU_DOMAIN_BLOCKED,
 	.ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= blocking_domain_attach_dev,
-		.free		= intel_iommu_domain_free
 	}
 };
 
@@ -4028,8 +4028,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 	struct iommu_domain *domain;
 
 	switch (type) {
-	case IOMMU_DOMAIN_BLOCKED:
-		return &blocking_domain;
 	case IOMMU_DOMAIN_DMA:
 	case IOMMU_DOMAIN_UNMANAGED:
 		dmar_domain = alloc_domain(type);
@@ -4111,7 +4109,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
 
 static void intel_iommu_domain_free(struct iommu_domain *domain)
 {
-	if (domain != &si_domain->domain && domain != &blocking_domain)
+	if (domain != &si_domain->domain)
 		domain_exit(to_dmar_domain(domain));
 }
 
@@ -4465,6 +4463,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
 		}
 	}
 
+	intel_iommu_debugfs_create_dev(info);
+
 	return &iommu->iommu;
 }
 
@@ -4474,6 +4474,7 @@ static void intel_iommu_release_device(struct device *dev)
 
 	dmar_remove_one_dev_info(dev);
 	intel_pasid_free_table(dev);
+	intel_iommu_debugfs_remove_dev(info);
 	dev_iommu_priv_set(dev, NULL);
 	kfree(info);
 	set_dma_ops(dev, NULL);
@@ -4718,8 +4719,8 @@ static bool risky_device(struct pci_dev *pdev)
 	return false;
 }
 
-static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
-				       unsigned long iova, size_t size)
+static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
+				      unsigned long iova, size_t size)
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	unsigned long pages = aligned_nrpages(iova, size);
@@ -4729,6 +4730,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
 
 	xa_for_each(&dmar_domain->iommu_array, i, info)
 		__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
+	return 0;
 }
 
 static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
@@ -4766,6 +4768,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
 
 	domain_detach_iommu(dmar_domain, iommu);
+	intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
 	kfree(dev_pasid);
 out_tear_down:
 	intel_pasid_tear_down_entry(iommu, dev, pasid, false);
@@ -4821,6 +4824,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
 	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
 
+	if (domain->type & __IOMMU_DOMAIN_PAGING)
+		intel_iommu_debugfs_create_dev_pasid(dev_pasid);
+
 	return 0;
 out_detach_iommu:
 	domain_detach_iommu(dmar_domain, iommu);
@@ -4925,6 +4931,7 @@ const struct iommu_dirty_ops intel_dirty_ops = {
 };
 
 const struct iommu_ops intel_iommu_ops = {
+	.blocked_domain		= &blocking_domain,
 	.capable		= intel_iommu_capable,
 	.hw_info		= intel_iommu_hw_info,
 	.domain_alloc		= intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index d796d0d9b114..65d37a138c75 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -749,12 +749,18 @@ struct device_domain_info {
 	struct intel_iommu *iommu; /* IOMMU used by this device */
 	struct dmar_domain *domain; /* pointer to domain */
 	struct pasid_table *pasid_table; /* pasid table */
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+	struct dentry *debugfs_dentry; /* pointer to device directory dentry */
+#endif
 };
 
 struct dev_pasid_info {
 	struct list_head link_domain;	/* link to domain siblings */
 	struct device *dev;
 	ioasid_t pasid;
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+	struct dentry *debugfs_dentry; /* pointer to pasid directory dentry */
+#endif
 };
 
 static inline void __iommu_flush_cache(
@@ -935,8 +941,16 @@ static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid
 
 #ifdef CONFIG_INTEL_IOMMU_DEBUGFS
 void intel_iommu_debugfs_init(void);
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid);
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid);
 #else
 static inline void intel_iommu_debugfs_init(void) {}
+static inline void intel_iommu_debugfs_create_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) {}
+static inline void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) {}
 #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
 
 extern const struct attribute_group *intel_iommu_groups[];
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index c146378c7d03..f17a1113f3d6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -37,7 +37,6 @@
 #include "iommu-priv.h"
 
 #include "iommu-sva.h"
-#include "iommu-priv.h"
 
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
@@ -96,8 +95,8 @@ static const char * const iommu_group_resv_type_string[] = {
 static int iommu_bus_notifier(struct notifier_block *nb,
 			      unsigned long action, void *data);
 static void iommu_release_device(struct device *dev);
-static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
-						 unsigned type);
+static struct iommu_domain *
+__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type);
 static int __iommu_attach_device(struct iommu_domain *domain,
 				 struct device *dev);
 static int __iommu_attach_group(struct iommu_domain *domain,
@@ -184,6 +183,8 @@ static const char *iommu_domain_type_str(unsigned int t)
 	case IOMMU_DOMAIN_DMA:
 	case IOMMU_DOMAIN_DMA_FQ:
 		return "Translated";
+	case IOMMU_DOMAIN_PLATFORM:
+		return "Platform";
 	default:
 		return "Unknown";
 	}
@@ -290,6 +291,10 @@ void iommu_device_unregister(struct iommu_device *iommu)
 	spin_lock(&iommu_device_lock);
 	list_del(&iommu->list);
 	spin_unlock(&iommu_device_lock);
+
+	/* Pairs with the alloc in generic_single_device_group() */
+	iommu_group_put(iommu->singleton_group);
+	iommu->singleton_group = NULL;
 }
 EXPORT_SYMBOL_GPL(iommu_device_unregister);
 
@@ -404,6 +409,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
 		ret = PTR_ERR(iommu_dev);
 		goto err_module_put;
 	}
+	dev->iommu->iommu_dev = iommu_dev;
 
 	ret = iommu_device_link(iommu_dev, dev);
 	if (ret)
@@ -418,7 +424,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
 	}
 	dev->iommu_group = group;
 
-	dev->iommu->iommu_dev = iommu_dev;
 	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
 	if (ops->is_attach_deferred)
 		dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
@@ -432,6 +437,7 @@ err_release:
 err_module_put:
 	module_put(ops->owner);
 err_free:
+	dev->iommu->iommu_dev = NULL;
 	dev_iommu_free(dev);
 	return ret;
 }
@@ -1637,6 +1643,27 @@ struct iommu_group *generic_device_group(struct device *dev)
 EXPORT_SYMBOL_GPL(generic_device_group);
 
 /*
+ * Generic device_group call-back function. It just allocates one
+ * iommu-group per iommu driver instance shared by every device
+ * probed by that iommu driver.
+ */
+struct iommu_group *generic_single_device_group(struct device *dev)
+{
+	struct iommu_device *iommu = dev->iommu->iommu_dev;
+
+	if (!iommu->singleton_group) {
+		struct iommu_group *group;
+
+		group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return group;
+		iommu->singleton_group = group;
+	}
+	return iommu_group_ref_get(iommu->singleton_group);
+}
+EXPORT_SYMBOL_GPL(generic_single_device_group);
+
+/*
  * Use standard PCI bus topology, isolation features, and DMA alias quirks
  * to find or create an IOMMU group for a device.
  */
@@ -1717,26 +1744,29 @@ struct iommu_group *fsl_mc_device_group(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(fsl_mc_device_group);
 
-static int iommu_get_def_domain_type(struct device *dev)
-{
-	const struct iommu_ops *ops = dev_iommu_ops(dev);
-
-	if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
-		return IOMMU_DOMAIN_DMA;
-
-	if (ops->def_domain_type)
-		return ops->def_domain_type(dev);
-
-	return 0;
-}
-
 static struct iommu_domain *
-__iommu_group_alloc_default_domain(const struct bus_type *bus,
-				   struct iommu_group *group, int req_type)
+__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
 {
 	if (group->default_domain && group->default_domain->type == req_type)
 		return group->default_domain;
-	return __iommu_domain_alloc(bus, req_type);
+	return __iommu_group_domain_alloc(group, req_type);
+}
+
+/*
+ * Returns the iommu_ops for the devices in an iommu group.
+ *
+ * It is assumed that all devices in an iommu group are managed by a single
+ * IOMMU unit. Therefore, this returns the dev_iommu_ops of the first device
+ * in the group.
+ */
+static const struct iommu_ops *group_iommu_ops(struct iommu_group *group)
+{
+	struct group_device *device =
+		list_first_entry(&group->devices, struct group_device, list);
+
+	lockdep_assert_held(&group->mutex);
+
+	return dev_iommu_ops(device->dev);
 }
 
 /*
@@ -1746,25 +1776,34 @@ __iommu_group_alloc_default_domain(const struct bus_type *bus,
 static struct iommu_domain *
 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
 {
-	const struct bus_type *bus =
-		list_first_entry(&group->devices, struct group_device, list)
-			->dev->bus;
+	const struct iommu_ops *ops = group_iommu_ops(group);
 	struct iommu_domain *dom;
 
 	lockdep_assert_held(&group->mutex);
 
+	/*
+	 * Allow legacy drivers to specify the domain that will be the default
+	 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM
+	 * domain. Do not use in new drivers.
+	 */
+	if (ops->default_domain) {
+		if (req_type)
+			return NULL;
+		return ops->default_domain;
+	}
+
 	if (req_type)
-		return __iommu_group_alloc_default_domain(bus, group, req_type);
+		return __iommu_group_alloc_default_domain(group, req_type);
 
 	/* The driver gave no guidance on what type to use, try the default */
-	dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type);
+	dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type);
 	if (dom)
 		return dom;
 
 	/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
 	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
 		return NULL;
-	dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA);
+	dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
 	if (!dom)
 		return NULL;
 
@@ -1808,40 +1847,109 @@ static int iommu_bus_notifier(struct notifier_block *nb,
 	return 0;
 }
 
-/* A target_type of 0 will select the best domain type and cannot fail */
+/*
+ * Combine the driver's chosen def_domain_type across all the devices in a
+ * group. Drivers must give a consistent result.
+ */
+static int iommu_get_def_domain_type(struct iommu_group *group,
+				     struct device *dev, int cur_type)
+{
+	const struct iommu_ops *ops = group_iommu_ops(group);
+	int type;
+
+	if (!ops->def_domain_type)
+		return cur_type;
+
+	type = ops->def_domain_type(dev);
+	if (!type || cur_type == type)
+		return cur_type;
+	if (!cur_type)
+		return type;
+
+	dev_err_ratelimited(
+		dev,
+		"IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n",
+		iommu_domain_type_str(cur_type), iommu_domain_type_str(type),
+		group->id);
+
+	/*
+	 * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY
+	 * takes precedence.
+	 */
+	if (type == IOMMU_DOMAIN_IDENTITY)
+		return type;
+	return cur_type;
+}
+
+/*
+ * A target_type of 0 will select the best domain type. 0 can be returned in
+ * this case meaning the global default should be used.
+ */
 static int iommu_get_default_domain_type(struct iommu_group *group,
 					 int target_type)
 {
-	int best_type = target_type;
+	struct device *untrusted = NULL;
 	struct group_device *gdev;
-	struct device *last_dev;
+	int driver_type = 0;
 
 	lockdep_assert_held(&group->mutex);
 
+	/*
+	 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an
+	 * identity_domain and it will automatically become their default
+	 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain.
+	 * Override the selection to IDENTITY.
+	 */
+	if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
+		static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) &&
+				IS_ENABLED(CONFIG_IOMMU_DMA)));
+		driver_type = IOMMU_DOMAIN_IDENTITY;
+	}
+
 	for_each_group_device(group, gdev) {
-		unsigned int type = iommu_get_def_domain_type(gdev->dev);
-
-		if (best_type && type && best_type != type) {
-			if (target_type) {
-				dev_err_ratelimited(
-					gdev->dev,
-					"Device cannot be in %s domain\n",
-					iommu_domain_type_str(target_type));
+		driver_type = iommu_get_def_domain_type(group, gdev->dev,
+							driver_type);
+
+		if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) {
+			/*
+			 * No ARM32 using systems will set untrusted, it cannot
+			 * work.
+			 */
+			if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)))
 				return -1;
-			}
+			untrusted = gdev->dev;
+		}
+	}
 
-			dev_warn(
-				gdev->dev,
-				"Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
-				iommu_domain_type_str(type), dev_name(last_dev),
-				iommu_domain_type_str(best_type));
-			return 0;
+	/*
+	 * If the common dma ops are not selected in kconfig then we cannot use
+	 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been
+	 * selected.
+	 */
+	if (!IS_ENABLED(CONFIG_IOMMU_DMA)) {
+		if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA))
+			return -1;
+		if (!driver_type)
+			driver_type = IOMMU_DOMAIN_IDENTITY;
+	}
+
+	if (untrusted) {
+		if (driver_type && driver_type != IOMMU_DOMAIN_DMA) {
+			dev_err_ratelimited(
+				untrusted,
+				"Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n",
+				group->id, iommu_domain_type_str(driver_type));
+			return -1;
 		}
-		if (!best_type)
-			best_type = type;
-		last_dev = gdev->dev;
+		driver_type = IOMMU_DOMAIN_DMA;
 	}
-	return best_type;
+
+	if (target_type) {
+		if (driver_type && target_type != driver_type)
+			return -1;
+		return target_type;
+	}
+	return driver_type;
 }
 
 static void iommu_group_do_probe_finalize(struct device *dev)
@@ -1970,16 +2078,24 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
 
-static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
-						 unsigned type)
+static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
+						 struct device *dev,
+						 unsigned int type)
 {
 	struct iommu_domain *domain;
 	unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
 
-	if (bus == NULL || bus->iommu_ops == NULL)
+	if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain)
+		return ops->identity_domain;
+	else if (alloc_type == IOMMU_DOMAIN_BLOCKED && ops->blocked_domain)
+		return ops->blocked_domain;
+	else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging)
+		domain = ops->domain_alloc_paging(dev);
+	else if (ops->domain_alloc)
+		domain = ops->domain_alloc(alloc_type);
+	else
 		return NULL;
 
-	domain = bus->iommu_ops->domain_alloc(alloc_type);
 	if (!domain)
 		return NULL;
 
@@ -1989,10 +2105,10 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
 	 * may override this later
 	 */
 	if (!domain->pgsize_bitmap)
-		domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
+		domain->pgsize_bitmap = ops->pgsize_bitmap;
 
 	if (!domain->ops)
-		domain->ops = bus->iommu_ops->default_domain_ops;
+		domain->ops = ops->default_domain_ops;
 
 	if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
 		iommu_domain_free(domain);
@@ -2001,9 +2117,22 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
 	return domain;
 }
 
+static struct iommu_domain *
+__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
+{
+	struct device *dev =
+		list_first_entry(&group->devices, struct group_device, list)
+			->dev;
+
+	return __iommu_domain_alloc(group_iommu_ops(group), dev, type);
+}
+
 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
 {
-	return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
+	if (bus == NULL || bus->iommu_ops == NULL)
+		return NULL;
+	return __iommu_domain_alloc(bus->iommu_ops, NULL,
+				    IOMMU_DOMAIN_UNMANAGED);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_alloc);
 
@@ -2012,7 +2141,8 @@ void iommu_domain_free(struct iommu_domain *domain)
 	if (domain->type == IOMMU_DOMAIN_SVA)
 		mmdrop(domain->mm);
 	iommu_put_dma_cookie(domain);
-	domain->ops->free(domain);
+	if (domain->ops->free)
+		domain->ops->free(domain);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
 
@@ -2062,10 +2192,10 @@ static int __iommu_attach_device(struct iommu_domain *domain,
  */
 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 {
-	struct iommu_group *group;
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 	int ret;
 
-	group = iommu_group_get(dev);
 	if (!group)
 		return -ENODEV;
 
@@ -2082,8 +2212,6 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 
 out_unlock:
 	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device);
@@ -2098,9 +2226,9 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
 
 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 {
-	struct iommu_group *group;
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 
-	group = iommu_group_get(dev);
 	if (!group)
 		return;
 
@@ -2112,24 +2240,18 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 
 out_unlock:
 	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
 }
 EXPORT_SYMBOL_GPL(iommu_detach_device);
 
 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
 {
-	struct iommu_domain *domain;
-	struct iommu_group *group;
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 
-	group = iommu_group_get(dev);
 	if (!group)
 		return NULL;
 
-	domain = group->domain;
-
-	iommu_group_put(group);
-
-	return domain;
+	return group->domain;
 }
 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
 
@@ -2275,21 +2397,8 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
 	if (group->domain == new_domain)
 		return 0;
 
-	/*
-	 * New drivers should support default domains, so set_platform_dma()
-	 * op will never be called. Otherwise the NULL domain represents some
-	 * platform specific behavior.
-	 */
-	if (!new_domain) {
-		for_each_group_device(group, gdev) {
-			const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
-
-			if (!WARN_ON(!ops->set_platform_dma_ops))
-				ops->set_platform_dma_ops(gdev->dev);
-		}
-		group->domain = NULL;
-		return 0;
-	}
+	if (WARN_ON(!new_domain))
+		return -EINVAL;
 
 	/*
 	 * Changing the domain is done by calling attach_dev() on the new
@@ -2325,19 +2434,15 @@ err_revert:
 	 */
 	last_gdev = gdev;
 	for_each_group_device(group, gdev) {
-		const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
-
 		/*
-		 * If set_platform_dma_ops is not present a NULL domain can
-		 * happen only for first probe, in which case we leave
-		 * group->domain as NULL and let release clean everything up.
+		 * A NULL domain can happen only for first probe, in which case
+		 * we leave group->domain as NULL and let release clean
+		 * everything up.
 		 */
 		if (group->domain)
 			WARN_ON(__iommu_device_set_domain(
 				group, gdev->dev, group->domain,
 				IOMMU_SET_DOMAIN_MUST_SUCCEED));
-		else if (ops->set_platform_dma_ops)
-			ops->set_platform_dma_ops(gdev->dev);
 		if (gdev == last_gdev)
 			break;
 	}
@@ -2418,30 +2523,6 @@ out_set_count:
 	return pgsize;
 }
 
-static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
-			     phys_addr_t paddr, size_t size, int prot,
-			     gfp_t gfp, size_t *mapped)
-{
-	const struct iommu_domain_ops *ops = domain->ops;
-	size_t pgsize, count;
-	int ret;
-
-	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
-
-	pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
-		 iova, &paddr, pgsize, count);
-
-	if (ops->map_pages) {
-		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
-				     gfp, mapped);
-	} else {
-		ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
-		*mapped = ret ? 0 : pgsize;
-	}
-
-	return ret;
-}
-
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
 		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2452,13 +2533,12 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
 	phys_addr_t orig_paddr = paddr;
 	int ret = 0;
 
-	if (unlikely(!(ops->map || ops->map_pages) ||
-		     domain->pgsize_bitmap == 0UL))
-		return -ENODEV;
-
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
 		return -EINVAL;
 
+	if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
+		return -ENODEV;
+
 	/* find out the minimum page size supported */
 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
 
@@ -2476,10 +2556,14 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
 	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
 	while (size) {
-		size_t mapped = 0;
+		size_t pgsize, count, mapped = 0;
+
+		pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
 
-		ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
-					&mapped);
+		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
+			 iova, &paddr, pgsize, count);
+		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+				     gfp, &mapped);
 		/*
 		 * Some pages may have been mapped, even if an error occurred,
 		 * so we should account for those so they can be unmapped.
@@ -2516,25 +2600,21 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		return -EINVAL;
 
 	ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
-	if (ret == 0 && ops->iotlb_sync_map)
-		ops->iotlb_sync_map(domain, iova, size);
+	if (ret == 0 && ops->iotlb_sync_map) {
+		ret = ops->iotlb_sync_map(domain, iova, size);
+		if (ret)
+			goto out_err;
+	}
 
 	return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_map);
 
-static size_t __iommu_unmap_pages(struct iommu_domain *domain,
-				  unsigned long iova, size_t size,
-				  struct iommu_iotlb_gather *iotlb_gather)
-{
-	const struct iommu_domain_ops *ops = domain->ops;
-	size_t pgsize, count;
+out_err:
+	/* undo mappings already done */
+	iommu_unmap(domain, iova, size);
 
-	pgsize = iommu_pgsize(domain, iova, iova, size, &count);
-	return ops->unmap_pages ?
-	       ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
-	       ops->unmap(domain, iova, pgsize, iotlb_gather);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(iommu_map);
 
 static size_t __iommu_unmap(struct iommu_domain *domain,
 			    unsigned long iova, size_t size,
@@ -2545,11 +2625,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 	unsigned long orig_iova = iova;
 	unsigned int min_pagesz;
 
-	if (unlikely(!(ops->unmap || ops->unmap_pages) ||
-		     domain->pgsize_bitmap == 0UL))
+	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
 		return 0;
 
-	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+	if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL))
 		return 0;
 
 	/* find out the minimum page size supported */
@@ -2573,9 +2652,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 	 * or we hit an area that isn't mapped.
 	 */
 	while (unmapped < size) {
-		unmapped_page = __iommu_unmap_pages(domain, iova,
-						    size - unmapped,
-						    iotlb_gather);
+		size_t pgsize, count;
+
+		pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count);
+		unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather);
 		if (!unmapped_page)
 			break;
 
@@ -2658,8 +2738,11 @@ next:
 			sg = sg_next(sg);
 	}
 
-	if (ops->iotlb_sync_map)
-		ops->iotlb_sync_map(domain, iova, mapped);
+	if (ops->iotlb_sync_map) {
+		ret = ops->iotlb_sync_map(domain, iova, mapped);
+		if (ret)
+			goto out_err;
+	}
 	return mapped;
 
 out_err:
@@ -2957,21 +3040,9 @@ static int iommu_setup_default_domain(struct iommu_group *group,
 	if (req_type < 0)
 		return -EINVAL;
 
-	/*
-	 * There are still some drivers which don't support default domains, so
-	 * we ignore the failure and leave group->default_domain NULL.
-	 *
-	 * We assume that the iommu driver starts up the device in
-	 * 'set_platform_dma_ops' mode if it does not support default domains.
-	 */
 	dom = iommu_group_alloc_default_domain(group, req_type);
-	if (!dom) {
-		/* Once in default_domain mode we never leave */
-		if (group->default_domain)
-			return -ENODEV;
-		group->default_domain = NULL;
-		return 0;
-	}
+	if (!dom)
+		return -ENODEV;
 
 	if (group->default_domain == dom)
 		return 0;
@@ -3114,24 +3185,6 @@ out_unlock:
 	return ret ?: count;
 }
 
-static bool iommu_is_default_domain(struct iommu_group *group)
-{
-	if (group->domain == group->default_domain)
-		return true;
-
-	/*
-	 * If the default domain was set to identity and it is still an identity
-	 * domain then we consider this a pass. This happens because of
-	 * amd_iommu_init_device() replacing the default idenytity domain with an
-	 * identity domain that has a different configuration for AMDGPU.
-	 */
-	if (group->default_domain &&
-	    group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
-	    group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
-		return true;
-	return false;
-}
-
 /**
  * iommu_device_use_default_domain() - Device driver wants to handle device
  *                                     DMA through the kernel DMA API.
@@ -3142,7 +3195,8 @@ static bool iommu_is_default_domain(struct iommu_group *group)
  */
 int iommu_device_use_default_domain(struct device *dev)
 {
-	struct iommu_group *group = iommu_group_get(dev);
+	/* Caller is the driver core during the pre-probe path */
+	struct iommu_group *group = dev->iommu_group;
 	int ret = 0;
 
 	if (!group)
@@ -3150,7 +3204,7 @@ int iommu_device_use_default_domain(struct device *dev)
 
 	mutex_lock(&group->mutex);
 	if (group->owner_cnt) {
-		if (group->owner || !iommu_is_default_domain(group) ||
+		if (group->domain != group->default_domain || group->owner ||
 		    !xa_empty(&group->pasid_array)) {
 			ret = -EBUSY;
 			goto unlock_out;
@@ -3161,8 +3215,6 @@ int iommu_device_use_default_domain(struct device *dev)
 
 unlock_out:
 	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
-
 	return ret;
 }
 
@@ -3176,7 +3228,8 @@ unlock_out:
  */
 void iommu_device_unuse_default_domain(struct device *dev)
 {
-	struct iommu_group *group = iommu_group_get(dev);
+	/* Caller is the driver core during the post-probe path */
+	struct iommu_group *group = dev->iommu_group;
 
 	if (!group)
 		return;
@@ -3186,26 +3239,22 @@ void iommu_device_unuse_default_domain(struct device *dev)
 		group->owner_cnt--;
 
 	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
 }
 
 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
 {
-	struct group_device *dev =
-		list_first_entry(&group->devices, struct group_device, list);
-
 	if (group->blocking_domain)
 		return 0;
 
 	group->blocking_domain =
-		__iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED);
+		__iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
 	if (!group->blocking_domain) {
 		/*
 		 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
 		 * create an empty domain instead.
 		 */
-		group->blocking_domain = __iommu_domain_alloc(
-			dev->dev->bus, IOMMU_DOMAIN_UNMANAGED);
+		group->blocking_domain = __iommu_group_domain_alloc(
+			group, IOMMU_DOMAIN_UNMANAGED);
 		if (!group->blocking_domain)
 			return -EINVAL;
 	}
@@ -3273,13 +3322,13 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
  */
 int iommu_device_claim_dma_owner(struct device *dev, void *owner)
 {
-	struct iommu_group *group;
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 	int ret = 0;
 
 	if (WARN_ON(!owner))
 		return -EINVAL;
 
-	group = iommu_group_get(dev);
 	if (!group)
 		return -ENODEV;
 
@@ -3296,8 +3345,6 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner)
 	ret = __iommu_take_dma_ownership(group, owner);
 unlock_out:
 	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
@@ -3335,7 +3382,8 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
  */
 void iommu_device_release_dma_owner(struct device *dev)
 {
-	struct iommu_group *group = iommu_group_get(dev);
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 
 	mutex_lock(&group->mutex);
 	if (group->owner_cnt > 1)
@@ -3343,7 +3391,6 @@ void iommu_device_release_dma_owner(struct device *dev)
 	else
 		__iommu_release_dma_ownership(group);
 	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
 }
 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
 
@@ -3404,14 +3451,14 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
 int iommu_attach_device_pasid(struct iommu_domain *domain,
 			      struct device *dev, ioasid_t pasid)
 {
-	struct iommu_group *group;
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 	void *curr;
 	int ret;
 
 	if (!domain->ops->set_dev_pasid)
 		return -EOPNOTSUPP;
 
-	group = iommu_group_get(dev);
 	if (!group)
 		return -ENODEV;
 
@@ -3429,8 +3476,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
 	}
 out_unlock:
 	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
@@ -3447,14 +3492,13 @@ EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
 			       ioasid_t pasid)
 {
-	struct iommu_group *group = iommu_group_get(dev);
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 
 	mutex_lock(&group->mutex);
 	__iommu_remove_group_pasid(group, pasid);
 	WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
 	mutex_unlock(&group->mutex);
-
-	iommu_group_put(group);
 }
 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
 
@@ -3476,10 +3520,10 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
 						    ioasid_t pasid,
 						    unsigned int type)
 {
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
 	struct iommu_domain *domain;
-	struct iommu_group *group;
 
-	group = iommu_group_get(dev);
 	if (!group)
 		return NULL;
 
@@ -3488,7 +3532,6 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
 	if (type && domain && domain->type != type)
 		domain = ERR_PTR(-EBUSY);
 	xa_unlock(&group->pasid_array);
-	iommu_group_put(group);
 
 	return domain;
 }
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index d43a87737c1e..5d93434003d8 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -123,10 +123,6 @@ struct selftest_obj {
 	};
 };
 
-static void mock_domain_blocking_free(struct iommu_domain *domain)
-{
-}
-
 static int mock_domain_nop_attach(struct iommu_domain *domain,
 				  struct device *dev)
 {
@@ -139,7 +135,6 @@ static int mock_domain_nop_attach(struct iommu_domain *domain,
 }
 
 static const struct iommu_domain_ops mock_blocking_ops = {
-	.free = mock_domain_blocking_free,
 	.attach_dev = mock_domain_nop_attach,
 };
 
@@ -258,15 +253,6 @@ __mock_domain_alloc_nested(struct mock_iommu_domain *mock_parent,
 	return &mock_nested->domain;
 }
 
-static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
-{
-	if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED)
-		return &mock_blocking_domain;
-	if (iommu_domain_type == IOMMU_DOMAIN_UNMANAGED)
-		return mock_domain_alloc_paging(NULL);
-	return NULL;
-}
-
 static struct iommu_domain *
 mock_domain_alloc_user(struct device *dev, u32 flags,
 		       struct iommu_domain *parent,
@@ -446,14 +432,6 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap)
 	return false;
 }
 
-static void mock_domain_set_plaform_dma_ops(struct device *dev)
-{
-	/*
-	 * mock doesn't setup default domains because we can't hook into the
-	 * normal probe path
-	 */
-}
-
 static struct iommu_device mock_iommu_device = {
 };
 
@@ -463,13 +441,18 @@ static struct iommu_device *mock_probe_device(struct device *dev)
 }
 
 static const struct iommu_ops mock_ops = {
+	/*
+	 * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type()
+	 * because it is zero.
+	 */
+	.default_domain = &mock_blocking_domain,
+	.blocked_domain = &mock_blocking_domain,
 	.owner = THIS_MODULE,
 	.pgsize_bitmap = MOCK_IO_PAGE_SIZE,
 	.hw_info = mock_domain_hw_info,
-	.domain_alloc = mock_domain_alloc,
+	.domain_alloc_paging = mock_domain_alloc_paging,
 	.domain_alloc_user = mock_domain_alloc_user,
 	.capable = mock_domain_capable,
-	.set_platform_dma_ops = mock_domain_set_plaform_dma_ops,
 	.device_group = generic_device_group,
 	.probe_device = mock_probe_device,
 	.default_domain_ops =
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 10b964600948..d30e453d0fb4 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -11,6 +11,7 @@
 #include <linux/smp.h>
 #include <linux/bitops.h>
 #include <linux/cpu.h>
+#include <linux/workqueue.h>
 
 /* The anchor node sits above the top of the usable address space */
 #define IOVA_ANCHOR	~0UL
@@ -622,15 +623,21 @@ EXPORT_SYMBOL_GPL(reserve_iova);
 /*
  * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
  * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
- * will be wasted.
+ * will be wasted. Since only full magazines are inserted into the depot,
+ * we don't need to waste PFN capacity on a separate list head either.
  */
 #define IOVA_MAG_SIZE 127
-#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
+
+#define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
 
 struct iova_magazine {
-	unsigned long size;
+	union {
+		unsigned long size;
+		struct iova_magazine *next;
+	};
 	unsigned long pfns[IOVA_MAG_SIZE];
 };
+static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
 
 struct iova_cpu_rcache {
 	spinlock_t lock;
@@ -640,9 +647,11 @@ struct iova_cpu_rcache {
 
 struct iova_rcache {
 	spinlock_t lock;
-	unsigned long depot_size;
-	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
+	unsigned int depot_size;
+	struct iova_magazine *depot;
 	struct iova_cpu_rcache __percpu *cpu_rcaches;
+	struct iova_domain *iovad;
+	struct delayed_work work;
 };
 
 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
@@ -717,6 +726,41 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 	mag->pfns[mag->size++] = pfn;
 }
 
+static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
+{
+	struct iova_magazine *mag = rcache->depot;
+
+	rcache->depot = mag->next;
+	mag->size = IOVA_MAG_SIZE;
+	rcache->depot_size--;
+	return mag;
+}
+
+static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
+{
+	mag->next = rcache->depot;
+	rcache->depot = mag;
+	rcache->depot_size++;
+}
+
+static void iova_depot_work_func(struct work_struct *work)
+{
+	struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
+	struct iova_magazine *mag = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rcache->lock, flags);
+	if (rcache->depot_size > num_online_cpus())
+		mag = iova_depot_pop(rcache);
+	spin_unlock_irqrestore(&rcache->lock, flags);
+
+	if (mag) {
+		iova_magazine_free_pfns(mag, rcache->iovad);
+		iova_magazine_free(mag);
+		schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
+	}
+}
+
 int iova_domain_init_rcaches(struct iova_domain *iovad)
 {
 	unsigned int cpu;
@@ -734,7 +778,8 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
 
 		rcache = &iovad->rcaches[i];
 		spin_lock_init(&rcache->lock);
-		rcache->depot_size = 0;
+		rcache->iovad = iovad;
+		INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
 		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
 						     cache_line_size());
 		if (!rcache->cpu_rcaches) {
@@ -776,7 +821,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 				 struct iova_rcache *rcache,
 				 unsigned long iova_pfn)
 {
-	struct iova_magazine *mag_to_free = NULL;
 	struct iova_cpu_rcache *cpu_rcache;
 	bool can_insert = false;
 	unsigned long flags;
@@ -794,13 +838,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 
 		if (new_mag) {
 			spin_lock(&rcache->lock);
-			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
-				rcache->depot[rcache->depot_size++] =
-						cpu_rcache->loaded;
-			} else {
-				mag_to_free = cpu_rcache->loaded;
-			}
+			iova_depot_push(rcache, cpu_rcache->loaded);
 			spin_unlock(&rcache->lock);
+			schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
 
 			cpu_rcache->loaded = new_mag;
 			can_insert = true;
@@ -812,11 +852,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 
-	if (mag_to_free) {
-		iova_magazine_free_pfns(mag_to_free, iovad);
-		iova_magazine_free(mag_to_free);
-	}
-
 	return can_insert;
 }
 
@@ -854,9 +889,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		has_pfn = true;
 	} else {
 		spin_lock(&rcache->lock);
-		if (rcache->depot_size > 0) {
+		if (rcache->depot) {
 			iova_magazine_free(cpu_rcache->loaded);
-			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
+			cpu_rcache->loaded = iova_depot_pop(rcache);
 			has_pfn = true;
 		}
 		spin_unlock(&rcache->lock);
@@ -895,9 +930,8 @@ static void free_iova_rcaches(struct iova_domain *iovad)
 	struct iova_rcache *rcache;
 	struct iova_cpu_rcache *cpu_rcache;
 	unsigned int cpu;
-	int i, j;
 
-	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+	for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 		rcache = &iovad->rcaches[i];
 		if (!rcache->cpu_rcaches)
 			break;
@@ -907,8 +941,9 @@ static void free_iova_rcaches(struct iova_domain *iovad)
 			iova_magazine_free(cpu_rcache->prev);
 		}
 		free_percpu(rcache->cpu_rcaches);
-		for (j = 0; j < rcache->depot_size; ++j)
-			iova_magazine_free(rcache->depot[j]);
+		cancel_delayed_work_sync(&rcache->work);
+		while (rcache->depot)
+			iova_magazine_free(iova_depot_pop(rcache));
 	}
 
 	kfree(iovad->rcaches);
@@ -942,16 +977,16 @@ static void free_global_cached_iovas(struct iova_domain *iovad)
 {
 	struct iova_rcache *rcache;
 	unsigned long flags;
-	int i, j;
 
-	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+	for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 		rcache = &iovad->rcaches[i];
 		spin_lock_irqsave(&rcache->lock, flags);
-		for (j = 0; j < rcache->depot_size; ++j) {
-			iova_magazine_free_pfns(rcache->depot[j], iovad);
-			iova_magazine_free(rcache->depot[j]);
+		while (rcache->depot) {
+			struct iova_magazine *mag = iova_depot_pop(rcache);
+
+			iova_magazine_free_pfns(mag, iovad);
+			iova_magazine_free(mag);
 		}
-		rcache->depot_size = 0;
 		spin_unlock_irqrestore(&rcache->lock, flags);
 	}
 }
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 65ff69477c43..ace1fc4bd34b 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -64,7 +64,6 @@ struct ipmmu_vmsa_device {
 	struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
 	s8 utlb_ctx[IPMMU_UTLB_MAX];
 
-	struct iommu_group *group;
 	struct dma_iommu_mapping *mapping;
 };
 
@@ -295,6 +294,18 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain,
 	mmu->utlb_ctx[utlb] = domain->context_id;
 }
 
+/*
+ * Disable MMU translation for the microTLB.
+ */
+static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain,
+			       unsigned int utlb)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+
+	ipmmu_imuctr_write(mmu, utlb, 0);
+	mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID;
+}
+
 static void ipmmu_tlb_flush_all(void *cookie)
 {
 	struct ipmmu_vmsa_domain *domain = cookie;
@@ -551,13 +562,10 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
  * IOMMU Operations
  */
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *ipmmu_domain_alloc_paging(struct device *dev)
 {
 	struct ipmmu_vmsa_domain *domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
-		return NULL;
-
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
@@ -627,6 +635,36 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 	return 0;
 }
 
+static int ipmmu_iommu_identity_attach(struct iommu_domain *identity_domain,
+				       struct device *dev)
+{
+	struct iommu_domain *io_domain = iommu_get_domain_for_dev(dev);
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+	struct ipmmu_vmsa_domain *domain;
+	unsigned int i;
+
+	if (io_domain == identity_domain || !io_domain)
+		return 0;
+
+	domain = to_vmsa_domain(io_domain);
+	for (i = 0; i < fwspec->num_ids; ++i)
+		ipmmu_utlb_disable(domain, fwspec->ids[i]);
+
+	/*
+	 * TODO: Optimize by disabling the context when no device is attached.
+	 */
+	return 0;
+}
+
+static struct iommu_domain_ops ipmmu_iommu_identity_ops = {
+	.attach_dev = ipmmu_iommu_identity_attach,
+};
+
+static struct iommu_domain ipmmu_iommu_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &ipmmu_iommu_identity_ops,
+};
+
 static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
 		     phys_addr_t paddr, size_t pgsize, size_t pgcount,
 		     int prot, gfp_t gfp, size_t *mapped)
@@ -833,28 +871,18 @@ static void ipmmu_release_device(struct device *dev)
 	arm_iommu_release_mapping(mmu->mapping);
 }
 
-static struct iommu_group *ipmmu_find_group(struct device *dev)
-{
-	struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
-	struct iommu_group *group;
-
-	if (mmu->group)
-		return iommu_group_ref_get(mmu->group);
-
-	group = iommu_group_alloc();
-	if (!IS_ERR(group))
-		mmu->group = group;
-
-	return group;
-}
-
 static const struct iommu_ops ipmmu_ops = {
-	.domain_alloc = ipmmu_domain_alloc,
+	.identity_domain = &ipmmu_iommu_identity_domain,
+	.domain_alloc_paging = ipmmu_domain_alloc_paging,
 	.probe_device = ipmmu_probe_device,
 	.release_device = ipmmu_release_device,
 	.probe_finalize = ipmmu_probe_finalize,
+	/*
+	 * FIXME: The device grouping is a fixed property of the hardware's
+	 * ability to isolate and control DMA, it should not depend on kconfig.
+	 */
 	.device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)
-			? generic_device_group : ipmmu_find_group,
+			? generic_device_group : generic_single_device_group,
 	.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
 	.of_xlate = ipmmu_of_xlate,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 79d89bad5132..f86af9815d6f 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -302,13 +302,10 @@ static void __program_context(void __iomem *base, int ctx,
 	SET_M(base, ctx, 1);
 }
 
-static struct iommu_domain *msm_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *msm_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct msm_priv *priv;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		goto fail_nomem;
@@ -443,15 +440,20 @@ fail:
 	return ret;
 }
 
-static void msm_iommu_set_platform_dma(struct device *dev)
+static int msm_iommu_identity_attach(struct iommu_domain *identity_domain,
+				     struct device *dev)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct msm_priv *priv = to_msm_priv(domain);
+	struct msm_priv *priv;
 	unsigned long flags;
 	struct msm_iommu_dev *iommu;
 	struct msm_iommu_ctx_dev *master;
-	int ret;
+	int ret = 0;
+
+	if (domain == identity_domain || !domain)
+		return 0;
 
+	priv = to_msm_priv(domain);
 	free_io_pgtable_ops(priv->iop);
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -468,8 +470,18 @@ static void msm_iommu_set_platform_dma(struct device *dev)
 	}
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	return ret;
 }
 
+static struct iommu_domain_ops msm_iommu_identity_ops = {
+	.attach_dev = msm_iommu_identity_attach,
+};
+
+static struct iommu_domain msm_iommu_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &msm_iommu_identity_ops,
+};
+
 static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t pa, size_t pgsize, size_t pgcount,
 			 int prot, gfp_t gfp, size_t *mapped)
@@ -486,12 +498,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
 	return ret;
 }
 
-static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
-			       size_t size)
+static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+			      size_t size)
 {
 	struct msm_priv *priv = to_msm_priv(domain);
 
 	__flush_iotlb_range(iova, size, SZ_4K, false, priv);
+	return 0;
 }
 
 static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -675,10 +688,10 @@ fail:
 }
 
 static struct iommu_ops msm_iommu_ops = {
-	.domain_alloc = msm_iommu_domain_alloc,
+	.identity_domain = &msm_iommu_identity_domain,
+	.domain_alloc_paging = msm_iommu_domain_alloc_paging,
 	.probe_device = msm_iommu_probe_device,
 	.device_group = generic_device_group,
-	.set_platform_dma_ops = msm_iommu_set_platform_dma,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
 	.of_xlate = qcom_iommu_of_xlate,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index fab6c347ce57..75279500a4a8 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -688,13 +688,10 @@ update_iova_region:
 	return 0;
 }
 
-static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *mtk_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct mtk_iommu_domain *dom;
 
-	if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
 	if (!dom)
 		return NULL;
@@ -776,6 +773,28 @@ err_unlock:
 	return ret;
 }
 
+static int mtk_iommu_identity_attach(struct iommu_domain *identity_domain,
+				     struct device *dev)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
+
+	if (domain == identity_domain || !domain)
+		return 0;
+
+	mtk_iommu_config(data, dev, false, 0);
+	return 0;
+}
+
+static struct iommu_domain_ops mtk_iommu_identity_ops = {
+	.attach_dev = mtk_iommu_identity_attach,
+};
+
+static struct iommu_domain mtk_iommu_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &mtk_iommu_identity_ops,
+};
+
 static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
 			 int prot, gfp_t gfp, size_t *mapped)
@@ -817,12 +836,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
 	mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank);
 }
 
-static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
-			       size_t size)
+static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+			      size_t size)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
 
 	mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank);
+	return 0;
 }
 
 static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -995,7 +1015,8 @@ static void mtk_iommu_get_resv_regions(struct device *dev,
 }
 
 static const struct iommu_ops mtk_iommu_ops = {
-	.domain_alloc	= mtk_iommu_domain_alloc,
+	.identity_domain = &mtk_iommu_identity_domain,
+	.domain_alloc_paging = mtk_iommu_domain_alloc_paging,
 	.probe_device	= mtk_iommu_probe_device,
 	.release_device	= mtk_iommu_release_device,
 	.device_group	= mtk_iommu_device_group,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 8a0a5e5d049f..67e044c1a7d9 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -270,13 +270,10 @@ static int mtk_iommu_v1_domain_finalise(struct mtk_iommu_v1_data *data)
 	return 0;
 }
 
-static struct iommu_domain *mtk_iommu_v1_domain_alloc(unsigned type)
+static struct iommu_domain *mtk_iommu_v1_domain_alloc_paging(struct device *dev)
 {
 	struct mtk_iommu_v1_domain *dom;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
 	if (!dom)
 		return NULL;
@@ -319,13 +316,24 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device
 	return 0;
 }
 
-static void mtk_iommu_v1_set_platform_dma(struct device *dev)
+static int mtk_iommu_v1_identity_attach(struct iommu_domain *identity_domain,
+					struct device *dev)
 {
 	struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev);
 
 	mtk_iommu_v1_config(data, dev, false);
+	return 0;
 }
 
+static struct iommu_domain_ops mtk_iommu_v1_identity_ops = {
+	.attach_dev = mtk_iommu_v1_identity_attach,
+};
+
+static struct iommu_domain mtk_iommu_v1_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &mtk_iommu_v1_identity_ops,
+};
+
 static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova,
 			    phys_addr_t paddr, size_t pgsize, size_t pgcount,
 			    int prot, gfp_t gfp, size_t *mapped)
@@ -441,11 +449,6 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_arg
 	return 0;
 }
 
-static int mtk_iommu_v1_def_domain_type(struct device *dev)
-{
-	return IOMMU_DOMAIN_UNMANAGED;
-}
-
 static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
 {
 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -578,14 +581,13 @@ static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data)
 }
 
 static const struct iommu_ops mtk_iommu_v1_ops = {
-	.domain_alloc	= mtk_iommu_v1_domain_alloc,
+	.identity_domain = &mtk_iommu_v1_identity_domain,
+	.domain_alloc_paging = mtk_iommu_v1_domain_alloc_paging,
 	.probe_device	= mtk_iommu_v1_probe_device,
 	.probe_finalize = mtk_iommu_v1_probe_finalize,
 	.release_device	= mtk_iommu_v1_release_device,
-	.def_domain_type = mtk_iommu_v1_def_domain_type,
 	.device_group	= generic_device_group,
 	.pgsize_bitmap	= MT2701_IOMMU_PAGE_SIZE,
-	.set_platform_dma_ops = mtk_iommu_v1_set_platform_dma,
 	.owner          = THIS_MODULE,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= mtk_iommu_v1_attach_device,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 537e402f9bba..c66b070841dd 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1225,18 +1225,15 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, obj);
 
 	if (omap_iommu_can_register(pdev)) {
-		obj->group = iommu_group_alloc();
-		if (IS_ERR(obj->group))
-			return PTR_ERR(obj->group);
-
 		err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL,
 					     obj->name);
 		if (err)
-			goto out_group;
+			return err;
 
 		err = iommu_device_register(&obj->iommu, &omap_iommu_ops, &pdev->dev);
 		if (err)
 			goto out_sysfs;
+		obj->has_iommu_driver = true;
 	}
 
 	pm_runtime_enable(obj->dev);
@@ -1252,8 +1249,6 @@ static int omap_iommu_probe(struct platform_device *pdev)
 
 out_sysfs:
 	iommu_device_sysfs_remove(&obj->iommu);
-out_group:
-	iommu_group_put(obj->group);
 	return err;
 }
 
@@ -1261,10 +1256,7 @@ static void omap_iommu_remove(struct platform_device *pdev)
 {
 	struct omap_iommu *obj = platform_get_drvdata(pdev);
 
-	if (obj->group) {
-		iommu_group_put(obj->group);
-		obj->group = NULL;
-
+	if (obj->has_iommu_driver) {
 		iommu_device_sysfs_remove(&obj->iommu);
 		iommu_device_unregister(&obj->iommu);
 	}
@@ -1318,7 +1310,8 @@ static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz)
 }
 
 static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
-			  phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
+			  phys_addr_t pa, size_t bytes, size_t count,
+			  int prot, gfp_t gfp, size_t *mapped)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 	struct device *dev = omap_domain->dev;
@@ -1356,13 +1349,15 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 			oiommu = iommu->iommu_dev;
 			iopgtable_clear_entry(oiommu, da);
 		}
+	} else {
+		*mapped = bytes;
 	}
 
 	return ret;
 }
 
 static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
-			       size_t size, struct iommu_iotlb_gather *gather)
+			       size_t size, size_t count, struct iommu_iotlb_gather *gather)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 	struct device *dev = omap_domain->dev;
@@ -1555,23 +1550,35 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
 	omap_domain->dev = NULL;
 }
 
-static void omap_iommu_set_platform_dma(struct device *dev)
+static int omap_iommu_identity_attach(struct iommu_domain *identity_domain,
+				      struct device *dev)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
+	struct omap_iommu_domain *omap_domain;
+
+	if (domain == identity_domain || !domain)
+		return 0;
 
+	omap_domain = to_omap_domain(domain);
 	spin_lock(&omap_domain->lock);
 	_omap_iommu_detach_dev(omap_domain, dev);
 	spin_unlock(&omap_domain->lock);
+	return 0;
 }
 
-static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
+static struct iommu_domain_ops omap_iommu_identity_ops = {
+	.attach_dev = omap_iommu_identity_attach,
+};
+
+static struct iommu_domain omap_iommu_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &omap_iommu_identity_ops,
+};
+
+static struct iommu_domain *omap_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct omap_iommu_domain *omap_domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
 	if (!omap_domain)
 		return NULL;
@@ -1717,31 +1724,17 @@ static void omap_iommu_release_device(struct device *dev)
 
 }
 
-static struct iommu_group *omap_iommu_device_group(struct device *dev)
-{
-	struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
-	struct iommu_group *group = ERR_PTR(-EINVAL);
-
-	if (!arch_data)
-		return ERR_PTR(-ENODEV);
-
-	if (arch_data->iommu_dev)
-		group = iommu_group_ref_get(arch_data->iommu_dev->group);
-
-	return group;
-}
-
 static const struct iommu_ops omap_iommu_ops = {
-	.domain_alloc	= omap_iommu_domain_alloc,
+	.identity_domain = &omap_iommu_identity_domain,
+	.domain_alloc_paging = omap_iommu_domain_alloc_paging,
 	.probe_device	= omap_iommu_probe_device,
 	.release_device	= omap_iommu_release_device,
-	.device_group	= omap_iommu_device_group,
-	.set_platform_dma_ops = omap_iommu_set_platform_dma,
+	.device_group	= generic_single_device_group,
 	.pgsize_bitmap	= OMAP_IOMMU_PGSIZES,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= omap_iommu_attach_dev,
-		.map		= omap_iommu_map,
-		.unmap		= omap_iommu_unmap,
+		.map_pages	= omap_iommu_map,
+		.unmap_pages	= omap_iommu_unmap,
 		.iova_to_phys	= omap_iommu_iova_to_phys,
 		.free		= omap_iommu_domain_free,
 	}
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 18ee713ede78..27697109ec79 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -80,7 +80,7 @@ struct omap_iommu {
 	u32 id;
 
 	struct iommu_device iommu;
-	struct iommu_group *group;
+	bool has_iommu_driver;
 
 	u8 pwrst;
 };
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 8ff69fbf9f65..2685861c0a12 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -113,7 +113,6 @@ struct rk_iommu {
 	struct iommu_device iommu;
 	struct list_head node; /* entry in rk_iommu_domain.iommus */
 	struct iommu_domain *domain; /* domain to which iommu is attached */
-	struct iommu_group *group;
 };
 
 struct rk_iommudata {
@@ -817,7 +816,8 @@ unwind:
 }
 
 static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
-			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+			phys_addr_t paddr, size_t size, size_t count,
+			int prot, gfp_t gfp, size_t *mapped)
 {
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
@@ -850,12 +850,14 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
 				paddr, size, prot);
 
 	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+	if (!ret)
+		*mapped = size;
 
 	return ret;
 }
 
 static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
-			     size_t size, struct iommu_iotlb_gather *gather)
+			     size_t size, size_t count, struct iommu_iotlb_gather *gather)
 {
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
@@ -989,13 +991,8 @@ static int rk_iommu_identity_attach(struct iommu_domain *identity_domain,
 	return 0;
 }
 
-static void rk_iommu_identity_free(struct iommu_domain *domain)
-{
-}
-
 static struct iommu_domain_ops rk_identity_ops = {
 	.attach_dev = rk_iommu_identity_attach,
-	.free = rk_iommu_identity_free,
 };
 
 static struct iommu_domain rk_identity_domain = {
@@ -1003,13 +1000,6 @@ static struct iommu_domain rk_identity_domain = {
 	.ops = &rk_identity_ops,
 };
 
-#ifdef CONFIG_ARM
-static void rk_iommu_set_platform_dma(struct device *dev)
-{
-	WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev));
-}
-#endif
-
 static int rk_iommu_attach_device(struct iommu_domain *domain,
 		struct device *dev)
 {
@@ -1055,16 +1045,10 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	return ret;
 }
 
-static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct rk_iommu_domain *rk_domain;
 
-	if (type == IOMMU_DOMAIN_IDENTITY)
-		return &rk_identity_domain;
-
-	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
-		return NULL;
-
 	if (!dma_dev)
 		return NULL;
 
@@ -1155,15 +1139,6 @@ static void rk_iommu_release_device(struct device *dev)
 	device_link_del(data->link);
 }
 
-static struct iommu_group *rk_iommu_device_group(struct device *dev)
-{
-	struct rk_iommu *iommu;
-
-	iommu = rk_iommu_from_dev(dev);
-
-	return iommu_group_ref_get(iommu->group);
-}
-
 static int rk_iommu_of_xlate(struct device *dev,
 			     struct of_phandle_args *args)
 {
@@ -1186,19 +1161,17 @@ static int rk_iommu_of_xlate(struct device *dev,
 }
 
 static const struct iommu_ops rk_iommu_ops = {
-	.domain_alloc = rk_iommu_domain_alloc,
+	.identity_domain = &rk_identity_domain,
+	.domain_alloc_paging = rk_iommu_domain_alloc_paging,
 	.probe_device = rk_iommu_probe_device,
 	.release_device = rk_iommu_release_device,
-	.device_group = rk_iommu_device_group,
-#ifdef CONFIG_ARM
-	.set_platform_dma_ops = rk_iommu_set_platform_dma,
-#endif
+	.device_group = generic_single_device_group,
 	.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
 	.of_xlate = rk_iommu_of_xlate,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= rk_iommu_attach_device,
-		.map		= rk_iommu_map,
-		.unmap		= rk_iommu_unmap,
+		.map_pages	= rk_iommu_map,
+		.unmap_pages	= rk_iommu_unmap,
 		.iova_to_phys	= rk_iommu_iova_to_phys,
 		.free		= rk_iommu_domain_free,
 	}
@@ -1280,15 +1253,9 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	iommu->group = iommu_group_alloc();
-	if (IS_ERR(iommu->group)) {
-		err = PTR_ERR(iommu->group);
-		goto err_unprepare_clocks;
-	}
-
 	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
 	if (err)
-		goto err_put_group;
+		goto err_unprepare_clocks;
 
 	err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev);
 	if (err)
@@ -1325,8 +1292,6 @@ err_pm_disable:
 	pm_runtime_disable(dev);
 err_remove_sysfs:
 	iommu_device_sysfs_remove(&iommu->iommu);
-err_put_group:
-	iommu_group_put(iommu->group);
 err_unprepare_clocks:
 	clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
 	return err;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index fbf59a8db29b..9a5196f523de 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -14,16 +14,300 @@
 #include <linux/rcupdate.h>
 #include <asm/pci_dma.h>
 
+#include "dma-iommu.h"
+
 static const struct iommu_ops s390_iommu_ops;
 
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+
+static u64 s390_iommu_aperture;
+static u32 s390_iommu_aperture_factor = 1;
+
 struct s390_domain {
 	struct iommu_domain	domain;
 	struct list_head	devices;
+	struct zpci_iommu_ctrs	ctrs;
 	unsigned long		*dma_table;
 	spinlock_t		list_lock;
 	struct rcu_head		rcu;
 };
 
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+	return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+	return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
+{
+	*entry &= ZPCI_PTE_FLAG_MASK;
+	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
+{
+	*entry &= ZPCI_RTE_FLAG_MASK;
+	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
+{
+	*entry &= ZPCI_STE_FLAG_MASK;
+	*entry |= (pto & ZPCI_STE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+	*entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+	*entry &= ~ZPCI_PTE_VALID_MASK;
+	*entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+	*entry &= ~ZPCI_PTE_VALID_MASK;
+	*entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_PROT_MASK;
+	*entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_PROT_MASK;
+	*entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+	else
+		return NULL;
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
+	else
+		return NULL;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+						   ZPCI_TABLE_SIZE,
+						   ZPCI_TABLE_ALIGN,
+						   0, NULL);
+	if (!dma_region_table_cache)
+		return -ENOMEM;
+
+	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+						 ZPCI_PT_SIZE,
+						 ZPCI_PT_ALIGN,
+						 0, NULL);
+	if (!dma_page_table_cache) {
+		kmem_cache_destroy(dma_region_table_cache);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
+{
+	unsigned long *table, *entry;
+
+	table = kmem_cache_alloc(dma_region_table_cache, gfp);
+	if (!table)
+		return NULL;
+
+	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+		*entry = ZPCI_TABLE_INVALID;
+	return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+	kmem_cache_free(dma_region_table_cache, table);
+}
+
+static void dma_free_page_table(void *table)
+{
+	kmem_cache_free(dma_page_table_cache, table);
+}
+
+static void dma_free_seg_table(unsigned long entry)
+{
+	unsigned long *sto = get_rt_sto(entry);
+	int sx;
+
+	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+		if (reg_entry_isvalid(sto[sx]))
+			dma_free_page_table(get_st_pto(sto[sx]));
+
+	dma_free_cpu_table(sto);
+}
+
+static void dma_cleanup_tables(unsigned long *table)
+{
+	int rtx;
+
+	if (!table)
+		return;
+
+	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+		if (reg_entry_isvalid(table[rtx]))
+			dma_free_seg_table(table[rtx]);
+
+	dma_free_cpu_table(table);
+}
+
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
+{
+	unsigned long *table, *entry;
+
+	table = kmem_cache_alloc(dma_page_table_cache, gfp);
+	if (!table)
+		return NULL;
+
+	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+		*entry = ZPCI_PTE_INVALID;
+	return table;
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
+{
+	unsigned long old_rte, rte;
+	unsigned long *sto;
+
+	rte = READ_ONCE(*rtep);
+	if (reg_entry_isvalid(rte)) {
+		sto = get_rt_sto(rte);
+	} else {
+		sto = dma_alloc_cpu_table(gfp);
+		if (!sto)
+			return NULL;
+
+		set_rt_sto(&rte, virt_to_phys(sto));
+		validate_rt_entry(&rte);
+		entry_clr_protected(&rte);
+
+		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
+		if (old_rte != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_cpu_table(sto);
+			sto = get_rt_sto(old_rte);
+		}
+	}
+	return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
+{
+	unsigned long old_ste, ste;
+	unsigned long *pto;
+
+	ste = READ_ONCE(*step);
+	if (reg_entry_isvalid(ste)) {
+		pto = get_st_pto(ste);
+	} else {
+		pto = dma_alloc_page_table(gfp);
+		if (!pto)
+			return NULL;
+		set_st_pto(&ste, virt_to_phys(pto));
+		validate_st_entry(&ste);
+		entry_clr_protected(&ste);
+
+		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
+		if (old_ste != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_page_table(pto);
+			pto = get_st_pto(old_ste);
+		}
+	}
+	return pto;
+}
+
+static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
+{
+	unsigned long *sto, *pto;
+	unsigned int rtx, sx, px;
+
+	rtx = calc_rtx(dma_addr);
+	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
+	if (!sto)
+		return NULL;
+
+	sx = calc_sx(dma_addr);
+	pto = dma_get_page_table_origin(&sto[sx], gfp);
+	if (!pto)
+		return NULL;
+
+	px = calc_px(dma_addr);
+	return &pto[px];
+}
+
+static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
+{
+	unsigned long pte;
+
+	pte = READ_ONCE(*ptep);
+	if (flags & ZPCI_PTE_INVALID) {
+		invalidate_pt_entry(&pte);
+	} else {
+		set_pt_pfaa(&pte, page_addr);
+		validate_pt_entry(&pte);
+	}
+
+	if (flags & ZPCI_TABLE_PROTECTED)
+		entry_set_protected(&pte);
+	else
+		entry_clr_protected(&pte);
+
+	xchg(ptep, pte);
+}
+
 static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
 {
 	return container_of(dom, struct s390_domain, domain);
@@ -31,21 +315,22 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
 
 static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
 {
+	struct zpci_dev *zdev = to_zpci_dev(dev);
+
 	switch (cap) {
 	case IOMMU_CAP_CACHE_COHERENCY:
 		return true;
+	case IOMMU_CAP_DEFERRED_FLUSH:
+		return zdev->pft != PCI_FUNC_TYPE_ISM;
 	default:
 		return false;
 	}
 }
 
-static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
 {
 	struct s390_domain *s390_domain;
 
-	if (domain_type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
 	if (!s390_domain)
 		return NULL;
@@ -84,14 +369,13 @@ static void s390_domain_free(struct iommu_domain *domain)
 	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
 }
 
-static void __s390_iommu_detach_device(struct zpci_dev *zdev)
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+				     struct device *dev)
 {
-	struct s390_domain *s390_domain = zdev->s390_domain;
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	struct zpci_dev *zdev = to_zpci_dev(dev);
 	unsigned long flags;
 
-	if (!s390_domain)
-		return;
-
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
 	list_del_rcu(&zdev->iommu_list);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
@@ -118,9 +402,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 		return -EINVAL;
 
 	if (zdev->s390_domain)
-		__s390_iommu_detach_device(zdev);
-	else if (zdev->dma_table)
-		zpci_dma_exit_device(zdev);
+		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
 
 	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				virt_to_phys(s390_domain->dma_table), &status);
@@ -130,7 +412,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	 */
 	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
 		return -EIO;
-	zdev->dma_table = s390_domain->dma_table;
 
 	zdev->dma_table = s390_domain->dma_table;
 	zdev->s390_domain = s390_domain;
@@ -142,14 +423,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	return 0;
 }
 
-static void s390_iommu_set_platform_dma(struct device *dev)
-{
-	struct zpci_dev *zdev = to_zpci_dev(dev);
-
-	__s390_iommu_detach_device(zdev);
-	zpci_dma_init_device(zdev);
-}
-
 static void s390_iommu_get_resv_regions(struct device *dev,
 					struct list_head *list)
 {
@@ -190,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
 		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
 
+	if (zdev->tlb_refresh)
+		dev->iommu->shadow_on_flush = 1;
+
 	return &zdev->iommu_dev;
 }
 
@@ -202,7 +478,13 @@ static void s390_iommu_release_device(struct device *dev)
 	 * to the device, but keep it attached to other devices in the group.
 	 */
 	if (zdev)
-		__s390_iommu_detach_device(zdev);
+		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
+}
+
+static int zpci_refresh_all(struct zpci_dev *zdev)
+{
+	return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
+				  zdev->end_dma - zdev->start_dma + 1);
 }
 
 static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
@@ -212,8 +494,8 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
-		zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
-				   zdev->end_dma - zdev->start_dma + 1);
+		atomic64_inc(&s390_domain->ctrs.global_rpcits);
+		zpci_refresh_all(zdev);
 	}
 	rcu_read_unlock();
 }
@@ -231,26 +513,40 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
+		atomic64_inc(&s390_domain->ctrs.sync_rpcits);
 		zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
 				   size);
 	}
 	rcu_read_unlock();
 }
 
-static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
-				      unsigned long iova, size_t size)
+static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
+				     unsigned long iova, size_t size)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev;
+	int ret = 0;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
 		if (!zdev->tlb_refresh)
 			continue;
-		zpci_refresh_trans((u64)zdev->fh << 32,
-				   iova, size);
+		atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
+		ret = zpci_refresh_trans((u64)zdev->fh << 32,
+					 iova, size);
+		/*
+		 * let the hypervisor discover invalidated entries
+		 * allowing it to free IOVAs and unpin pages
+		 */
+		if (ret == -ENOMEM) {
+			ret = zpci_refresh_all(zdev);
+			if (ret)
+				break;
+		}
 	}
 	rcu_read_unlock();
+
+	return ret;
 }
 
 static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
@@ -330,16 +626,15 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
 	if (!IS_ALIGNED(iova | paddr, pgsize))
 		return -EINVAL;
 
-	if (!(prot & IOMMU_READ))
-		return -EINVAL;
-
 	if (!(prot & IOMMU_WRITE))
 		flags |= ZPCI_TABLE_PROTECTED;
 
 	rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
-				       pgcount, flags, gfp);
-	if (!rc)
+				     pgcount, flags, gfp);
+	if (!rc) {
 		*mapped = size;
+		atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
+	}
 
 	return rc;
 }
@@ -395,12 +690,26 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
 		return 0;
 
 	iommu_iotlb_gather_add_range(gather, iova, size);
+	atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
 
 	return size;
 }
 
+static void s390_iommu_probe_finalize(struct device *dev)
+{
+	iommu_setup_dma_ops(dev, 0, U64_MAX);
+}
+
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
+{
+	if (!zdev || !zdev->s390_domain)
+		return NULL;
+	return &zdev->s390_domain->ctrs;
+}
+
 int zpci_init_iommu(struct zpci_dev *zdev)
 {
+	u64 aperture_size;
 	int rc = 0;
 
 	rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
@@ -412,6 +721,12 @@ int zpci_init_iommu(struct zpci_dev *zdev)
 	if (rc)
 		goto out_sysfs;
 
+	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
+	aperture_size = min3(s390_iommu_aperture,
+			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
+			     zdev->end_dma - zdev->start_dma + 1);
+	zdev->end_dma = zdev->start_dma + aperture_size - 1;
+
 	return 0;
 
 out_sysfs:
@@ -427,13 +742,52 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
 	iommu_device_sysfs_remove(&zdev->iommu_dev);
 }
 
+static int __init s390_iommu_setup(char *str)
+{
+	if (!strcmp(str, "strict")) {
+		pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
+		iommu_set_dma_strict();
+	}
+	return 1;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
+
+static int __init s390_iommu_aperture_setup(char *str)
+{
+	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
+		s390_iommu_aperture_factor = 1;
+	return 1;
+}
+
+__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
+
+static int __init s390_iommu_init(void)
+{
+	int rc;
+
+	iommu_dma_forcedac = true;
+	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
+	if (!s390_iommu_aperture_factor)
+		s390_iommu_aperture = ULONG_MAX;
+	else
+		s390_iommu_aperture *= s390_iommu_aperture_factor;
+
+	rc = dma_alloc_cpu_table_caches();
+	if (rc)
+		return rc;
+
+	return rc;
+}
+subsys_initcall(s390_iommu_init);
+
 static const struct iommu_ops s390_iommu_ops = {
 	.capable = s390_iommu_capable,
-	.domain_alloc = s390_domain_alloc,
+	.domain_alloc_paging = s390_domain_alloc_paging,
 	.probe_device = s390_iommu_probe_device,
+	.probe_finalize = s390_iommu_probe_finalize,
 	.release_device = s390_iommu_release_device,
 	.device_group = generic_device_group,
-	.set_platform_dma_ops = s390_iommu_set_platform_dma,
 	.pgsize_bitmap = SZ_4K,
 	.get_resv_regions = s390_iommu_get_resv_regions,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 2fa9afebd4f5..2eb9fb46703b 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -70,7 +70,6 @@ struct sprd_iommu_device {
 	void __iomem		*base;
 	struct device		*dev;
 	struct iommu_device	iommu;
-	struct iommu_group	*group;
 	struct clk		*eb;
 };
 
@@ -134,13 +133,10 @@ sprd_iommu_pgt_size(struct iommu_domain *domain)
 		SPRD_IOMMU_PAGE_SHIFT) * sizeof(u32);
 }
 
-static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
+static struct iommu_domain *sprd_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct sprd_iommu_domain *dom;
 
-	if (domain_type != IOMMU_DOMAIN_DMA && domain_type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
 	if (!dom)
 		return NULL;
@@ -345,8 +341,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	return size;
 }
 
-static void sprd_iommu_sync_map(struct iommu_domain *domain,
-				unsigned long iova, size_t size)
+static int sprd_iommu_sync_map(struct iommu_domain *domain,
+			       unsigned long iova, size_t size)
 {
 	struct sprd_iommu_domain *dom = to_sprd_domain(domain);
 	unsigned int reg;
@@ -358,6 +354,7 @@ static void sprd_iommu_sync_map(struct iommu_domain *domain,
 
 	/* clear IOMMU TLB buffer after page table updated */
 	sprd_iommu_write(dom->sdev, reg, 0xffffffff);
+	return 0;
 }
 
 static void sprd_iommu_sync(struct iommu_domain *domain,
@@ -399,13 +396,6 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev)
 	return &sdev->iommu;
 }
 
-static struct iommu_group *sprd_iommu_device_group(struct device *dev)
-{
-	struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev);
-
-	return iommu_group_ref_get(sdev->group);
-}
-
 static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
 {
 	struct platform_device *pdev;
@@ -421,9 +411,9 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
 
 
 static const struct iommu_ops sprd_iommu_ops = {
-	.domain_alloc	= sprd_iommu_domain_alloc,
+	.domain_alloc_paging = sprd_iommu_domain_alloc_paging,
 	.probe_device	= sprd_iommu_probe_device,
-	.device_group	= sprd_iommu_device_group,
+	.device_group	= generic_single_device_group,
 	.of_xlate	= sprd_iommu_of_xlate,
 	.pgsize_bitmap	= SPRD_IOMMU_PAGE_SIZE,
 	.owner		= THIS_MODULE,
@@ -496,16 +486,9 @@ static int sprd_iommu_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, sdev);
 	sdev->dev = dev;
 
-	/* All the client devices are in the same iommu-group */
-	sdev->group = iommu_group_alloc();
-	if (IS_ERR(sdev->group)) {
-		ret = PTR_ERR(sdev->group);
-		goto free_page;
-	}
-
 	ret = iommu_device_sysfs_add(&sdev->iommu, dev, NULL, dev_name(dev));
 	if (ret)
-		goto put_group;
+		goto free_page;
 
 	ret = iommu_device_register(&sdev->iommu, &sprd_iommu_ops, dev);
 	if (ret)
@@ -530,8 +513,6 @@ unregister_iommu:
 	iommu_device_unregister(&sdev->iommu);
 remove_sysfs:
 	iommu_device_sysfs_remove(&sdev->iommu);
-put_group:
-	iommu_group_put(sdev->group);
 free_page:
 	dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa);
 	return ret;
@@ -543,9 +524,6 @@ static void sprd_iommu_remove(struct platform_device *pdev)
 
 	dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa);
 
-	iommu_group_put(sdev->group);
-	sdev->group = NULL;
-
 	platform_set_drvdata(pdev, NULL);
 	iommu_device_sysfs_remove(&sdev->iommu);
 	iommu_device_unregister(&sdev->iommu);
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 74c5cb93e900..41484a5a399b 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -107,7 +107,6 @@ struct sun50i_iommu {
 	struct clk *clk;
 
 	struct iommu_domain *domain;
-	struct iommu_group *group;
 	struct kmem_cache *pt_pool;
 };
 
@@ -402,8 +401,8 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
 	spin_unlock_irqrestore(&iommu->iommu_lock, flags);
 }
 
-static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
-					unsigned long iova, size_t size)
+static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
+				       unsigned long iova, size_t size)
 {
 	struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
 	struct sun50i_iommu *iommu = sun50i_domain->iommu;
@@ -412,6 +411,8 @@ static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
 	spin_lock_irqsave(&iommu->iommu_lock, flags);
 	sun50i_iommu_zap_range(iommu, iova, size);
 	spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+	return 0;
 }
 
 static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
@@ -589,7 +590,8 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
 }
 
 static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			    phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+			    phys_addr_t paddr, size_t size, size_t count,
+			    int prot, gfp_t gfp, size_t *mapped)
 {
 	struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
 	struct sun50i_iommu *iommu = sun50i_domain->iommu;
@@ -616,13 +618,14 @@ static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
 
 	*pte_addr = sun50i_mk_pte(paddr, prot);
 	sun50i_table_flush(sun50i_domain, pte_addr, 1);
+	*mapped = size;
 
 out:
 	return ret;
 }
 
 static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-				 size_t size, struct iommu_iotlb_gather *gather)
+				 size_t size, size_t count, struct iommu_iotlb_gather *gather)
 {
 	struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
 	phys_addr_t pt_phys;
@@ -667,14 +670,11 @@ static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain,
 		sun50i_iova_get_page_offset(iova);
 }
 
-static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *
+sun50i_iommu_domain_alloc_paging(struct device *dev)
 {
 	struct sun50i_iommu_domain *sun50i_domain;
 
-	if (type != IOMMU_DOMAIN_DMA &&
-	    type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL);
 	if (!sun50i_domain)
 		return NULL;
@@ -757,21 +757,32 @@ static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu,
 	iommu->domain = NULL;
 }
 
-static void sun50i_iommu_detach_device(struct iommu_domain *domain,
-				       struct device *dev)
+static int sun50i_iommu_identity_attach(struct iommu_domain *identity_domain,
+					struct device *dev)
 {
-	struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
 	struct sun50i_iommu *iommu = dev_iommu_priv_get(dev);
+	struct sun50i_iommu_domain *sun50i_domain;
 
 	dev_dbg(dev, "Detaching from IOMMU domain\n");
 
-	if (iommu->domain != domain)
-		return;
+	if (iommu->domain == identity_domain)
+		return 0;
 
+	sun50i_domain = to_sun50i_domain(iommu->domain);
 	if (refcount_dec_and_test(&sun50i_domain->refcnt))
 		sun50i_iommu_detach_domain(iommu, sun50i_domain);
+	return 0;
 }
 
+static struct iommu_domain_ops sun50i_iommu_identity_ops = {
+	.attach_dev = sun50i_iommu_identity_attach,
+};
+
+static struct iommu_domain sun50i_iommu_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &sun50i_iommu_identity_ops,
+};
+
 static int sun50i_iommu_attach_device(struct iommu_domain *domain,
 				      struct device *dev)
 {
@@ -789,8 +800,7 @@ static int sun50i_iommu_attach_device(struct iommu_domain *domain,
 	if (iommu->domain == domain)
 		return 0;
 
-	if (iommu->domain)
-		sun50i_iommu_detach_device(iommu->domain, dev);
+	sun50i_iommu_identity_attach(&sun50i_iommu_identity_domain, dev);
 
 	sun50i_iommu_attach_domain(iommu, sun50i_domain);
 
@@ -808,13 +818,6 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
 	return &iommu->iommu;
 }
 
-static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
-{
-	struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
-
-	return iommu_group_ref_get(iommu->group);
-}
-
 static int sun50i_iommu_of_xlate(struct device *dev,
 				 struct of_phandle_args *args)
 {
@@ -827,9 +830,10 @@ static int sun50i_iommu_of_xlate(struct device *dev,
 }
 
 static const struct iommu_ops sun50i_iommu_ops = {
+	.identity_domain = &sun50i_iommu_identity_domain,
 	.pgsize_bitmap	= SZ_4K,
-	.device_group	= sun50i_iommu_device_group,
-	.domain_alloc	= sun50i_iommu_domain_alloc,
+	.device_group	= generic_single_device_group,
+	.domain_alloc_paging = sun50i_iommu_domain_alloc_paging,
 	.of_xlate	= sun50i_iommu_of_xlate,
 	.probe_device	= sun50i_iommu_probe_device,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
@@ -838,8 +842,8 @@ static const struct iommu_ops sun50i_iommu_ops = {
 		.iotlb_sync_map = sun50i_iommu_iotlb_sync_map,
 		.iotlb_sync	= sun50i_iommu_iotlb_sync,
 		.iova_to_phys	= sun50i_iommu_iova_to_phys,
-		.map		= sun50i_iommu_map,
-		.unmap		= sun50i_iommu_unmap,
+		.map_pages	= sun50i_iommu_map,
+		.unmap_pages	= sun50i_iommu_unmap,
 		.free		= sun50i_iommu_domain_free,
 	}
 };
@@ -985,6 +989,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev)
 	if (!iommu)
 		return -ENOMEM;
 	spin_lock_init(&iommu->iommu_lock);
+	iommu->domain = &sun50i_iommu_identity_domain;
 	platform_set_drvdata(pdev, iommu);
 	iommu->dev = &pdev->dev;
 
@@ -995,42 +1000,36 @@ static int sun50i_iommu_probe(struct platform_device *pdev)
 	if (!iommu->pt_pool)
 		return -ENOMEM;
 
-	iommu->group = iommu_group_alloc();
-	if (IS_ERR(iommu->group)) {
-		ret = PTR_ERR(iommu->group);
-		goto err_free_cache;
-	}
-
 	iommu->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(iommu->base)) {
 		ret = PTR_ERR(iommu->base);
-		goto err_free_group;
+		goto err_free_cache;
 	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		ret = irq;
-		goto err_free_group;
+		goto err_free_cache;
 	}
 
 	iommu->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(iommu->clk)) {
 		dev_err(&pdev->dev, "Couldn't get our clock.\n");
 		ret = PTR_ERR(iommu->clk);
-		goto err_free_group;
+		goto err_free_cache;
 	}
 
 	iommu->reset = devm_reset_control_get(&pdev->dev, NULL);
 	if (IS_ERR(iommu->reset)) {
 		dev_err(&pdev->dev, "Couldn't get our reset line.\n");
 		ret = PTR_ERR(iommu->reset);
-		goto err_free_group;
+		goto err_free_cache;
 	}
 
 	ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev,
 				     NULL, dev_name(&pdev->dev));
 	if (ret)
-		goto err_free_group;
+		goto err_free_cache;
 
 	ret = iommu_device_register(&iommu->iommu, &sun50i_iommu_ops, &pdev->dev);
 	if (ret)
@@ -1049,9 +1048,6 @@ err_unregister:
 err_remove_sysfs:
 	iommu_device_sysfs_remove(&iommu->iommu);
 
-err_free_group:
-	iommu_group_put(iommu->group);
-
 err_free_cache:
 	kmem_cache_destroy(iommu->pt_pool);
 
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
deleted file mode 100644
index a482ff838b53..000000000000
--- a/drivers/iommu/tegra-gart.c
+++ /dev/null
@@ -1,371 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * IOMMU API for Graphics Address Relocation Table on Tegra20
- *
- * Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * Author: Hiroshi DOYU <hdoyu@nvidia.com>
- */
-
-#define dev_fmt(fmt)	"gart: " fmt
-
-#include <linux/io.h>
-#include <linux/iommu.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-
-#include <soc/tegra/mc.h>
-
-#define GART_REG_BASE		0x24
-#define GART_CONFIG		(0x24 - GART_REG_BASE)
-#define GART_ENTRY_ADDR		(0x28 - GART_REG_BASE)
-#define GART_ENTRY_DATA		(0x2c - GART_REG_BASE)
-
-#define GART_ENTRY_PHYS_ADDR_VALID	BIT(31)
-
-#define GART_PAGE_SHIFT		12
-#define GART_PAGE_SIZE		(1 << GART_PAGE_SHIFT)
-#define GART_PAGE_MASK		GENMASK(30, GART_PAGE_SHIFT)
-
-/* bitmap of the page sizes currently supported */
-#define GART_IOMMU_PGSIZES	(GART_PAGE_SIZE)
-
-struct gart_device {
-	void __iomem		*regs;
-	u32			*savedata;
-	unsigned long		iovmm_base;	/* offset to vmm_area start */
-	unsigned long		iovmm_end;	/* offset to vmm_area end */
-	spinlock_t		pte_lock;	/* for pagetable */
-	spinlock_t		dom_lock;	/* for active domain */
-	unsigned int		active_devices;	/* number of active devices */
-	struct iommu_domain	*active_domain;	/* current active domain */
-	struct iommu_device	iommu;		/* IOMMU Core handle */
-	struct device		*dev;
-};
-
-static struct gart_device *gart_handle; /* unique for a system */
-
-static bool gart_debug;
-
-/*
- * Any interaction between any block on PPSB and a block on APB or AHB
- * must have these read-back to ensure the APB/AHB bus transaction is
- * complete before initiating activity on the PPSB block.
- */
-#define FLUSH_GART_REGS(gart)	readl_relaxed((gart)->regs + GART_CONFIG)
-
-#define for_each_gart_pte(gart, iova)					\
-	for (iova = gart->iovmm_base;					\
-	     iova < gart->iovmm_end;					\
-	     iova += GART_PAGE_SIZE)
-
-static inline void gart_set_pte(struct gart_device *gart,
-				unsigned long iova, unsigned long pte)
-{
-	writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
-	writel_relaxed(pte, gart->regs + GART_ENTRY_DATA);
-}
-
-static inline unsigned long gart_read_pte(struct gart_device *gart,
-					  unsigned long iova)
-{
-	unsigned long pte;
-
-	writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
-	pte = readl_relaxed(gart->regs + GART_ENTRY_DATA);
-
-	return pte;
-}
-
-static void do_gart_setup(struct gart_device *gart, const u32 *data)
-{
-	unsigned long iova;
-
-	for_each_gart_pte(gart, iova)
-		gart_set_pte(gart, iova, data ? *(data++) : 0);
-
-	writel_relaxed(1, gart->regs + GART_CONFIG);
-	FLUSH_GART_REGS(gart);
-}
-
-static inline bool gart_iova_range_invalid(struct gart_device *gart,
-					   unsigned long iova, size_t bytes)
-{
-	return unlikely(iova < gart->iovmm_base || bytes != GART_PAGE_SIZE ||
-			iova + bytes > gart->iovmm_end);
-}
-
-static inline bool gart_pte_valid(struct gart_device *gart, unsigned long iova)
-{
-	return !!(gart_read_pte(gart, iova) & GART_ENTRY_PHYS_ADDR_VALID);
-}
-
-static int gart_iommu_attach_dev(struct iommu_domain *domain,
-				 struct device *dev)
-{
-	struct gart_device *gart = gart_handle;
-	int ret = 0;
-
-	spin_lock(&gart->dom_lock);
-
-	if (gart->active_domain && gart->active_domain != domain) {
-		ret = -EINVAL;
-	} else if (dev_iommu_priv_get(dev) != domain) {
-		dev_iommu_priv_set(dev, domain);
-		gart->active_domain = domain;
-		gart->active_devices++;
-	}
-
-	spin_unlock(&gart->dom_lock);
-
-	return ret;
-}
-
-static void gart_iommu_set_platform_dma(struct device *dev)
-{
-	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct gart_device *gart = gart_handle;
-
-	spin_lock(&gart->dom_lock);
-
-	if (dev_iommu_priv_get(dev) == domain) {
-		dev_iommu_priv_set(dev, NULL);
-
-		if (--gart->active_devices == 0)
-			gart->active_domain = NULL;
-	}
-
-	spin_unlock(&gart->dom_lock);
-}
-
-static struct iommu_domain *gart_iommu_domain_alloc(unsigned type)
-{
-	struct iommu_domain *domain;
-
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
-	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
-	if (domain) {
-		domain->geometry.aperture_start = gart_handle->iovmm_base;
-		domain->geometry.aperture_end = gart_handle->iovmm_end - 1;
-		domain->geometry.force_aperture = true;
-	}
-
-	return domain;
-}
-
-static void gart_iommu_domain_free(struct iommu_domain *domain)
-{
-	WARN_ON(gart_handle->active_domain == domain);
-	kfree(domain);
-}
-
-static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova,
-				   unsigned long pa)
-{
-	if (unlikely(gart_debug && gart_pte_valid(gart, iova))) {
-		dev_err(gart->dev, "Page entry is in-use\n");
-		return -EINVAL;
-	}
-
-	gart_set_pte(gart, iova, GART_ENTRY_PHYS_ADDR_VALID | pa);
-
-	return 0;
-}
-
-static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			  phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
-{
-	struct gart_device *gart = gart_handle;
-	int ret;
-
-	if (gart_iova_range_invalid(gart, iova, bytes))
-		return -EINVAL;
-
-	spin_lock(&gart->pte_lock);
-	ret = __gart_iommu_map(gart, iova, (unsigned long)pa);
-	spin_unlock(&gart->pte_lock);
-
-	return ret;
-}
-
-static inline int __gart_iommu_unmap(struct gart_device *gart,
-				     unsigned long iova)
-{
-	if (unlikely(gart_debug && !gart_pte_valid(gart, iova))) {
-		dev_err(gart->dev, "Page entry is invalid\n");
-		return -EINVAL;
-	}
-
-	gart_set_pte(gart, iova, 0);
-
-	return 0;
-}
-
-static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			       size_t bytes, struct iommu_iotlb_gather *gather)
-{
-	struct gart_device *gart = gart_handle;
-	int err;
-
-	if (gart_iova_range_invalid(gart, iova, bytes))
-		return 0;
-
-	spin_lock(&gart->pte_lock);
-	err = __gart_iommu_unmap(gart, iova);
-	spin_unlock(&gart->pte_lock);
-
-	return err ? 0 : bytes;
-}
-
-static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
-					   dma_addr_t iova)
-{
-	struct gart_device *gart = gart_handle;
-	unsigned long pte;
-
-	if (gart_iova_range_invalid(gart, iova, GART_PAGE_SIZE))
-		return -EINVAL;
-
-	spin_lock(&gart->pte_lock);
-	pte = gart_read_pte(gart, iova);
-	spin_unlock(&gart->pte_lock);
-
-	return pte & GART_PAGE_MASK;
-}
-
-static struct iommu_device *gart_iommu_probe_device(struct device *dev)
-{
-	if (!dev_iommu_fwspec_get(dev))
-		return ERR_PTR(-ENODEV);
-
-	return &gart_handle->iommu;
-}
-
-static int gart_iommu_of_xlate(struct device *dev,
-			       struct of_phandle_args *args)
-{
-	return 0;
-}
-
-static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
-				size_t size)
-{
-	FLUSH_GART_REGS(gart_handle);
-}
-
-static void gart_iommu_sync(struct iommu_domain *domain,
-			    struct iommu_iotlb_gather *gather)
-{
-	size_t length = gather->end - gather->start + 1;
-
-	gart_iommu_sync_map(domain, gather->start, length);
-}
-
-static const struct iommu_ops gart_iommu_ops = {
-	.domain_alloc	= gart_iommu_domain_alloc,
-	.probe_device	= gart_iommu_probe_device,
-	.device_group	= generic_device_group,
-	.set_platform_dma_ops = gart_iommu_set_platform_dma,
-	.pgsize_bitmap	= GART_IOMMU_PGSIZES,
-	.of_xlate	= gart_iommu_of_xlate,
-	.default_domain_ops = &(const struct iommu_domain_ops) {
-		.attach_dev	= gart_iommu_attach_dev,
-		.map		= gart_iommu_map,
-		.unmap		= gart_iommu_unmap,
-		.iova_to_phys	= gart_iommu_iova_to_phys,
-		.iotlb_sync_map	= gart_iommu_sync_map,
-		.iotlb_sync	= gart_iommu_sync,
-		.free		= gart_iommu_domain_free,
-	}
-};
-
-int tegra_gart_suspend(struct gart_device *gart)
-{
-	u32 *data = gart->savedata;
-	unsigned long iova;
-
-	/*
-	 * All GART users shall be suspended at this point. Disable
-	 * address translation to trap all GART accesses as invalid
-	 * memory accesses.
-	 */
-	writel_relaxed(0, gart->regs + GART_CONFIG);
-	FLUSH_GART_REGS(gart);
-
-	for_each_gart_pte(gart, iova)
-		*(data++) = gart_read_pte(gart, iova);
-
-	return 0;
-}
-
-int tegra_gart_resume(struct gart_device *gart)
-{
-	do_gart_setup(gart, gart->savedata);
-
-	return 0;
-}
-
-struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
-{
-	struct gart_device *gart;
-	struct resource *res;
-	int err;
-
-	BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT);
-
-	/* the GART memory aperture is required */
-	res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1);
-	if (!res) {
-		dev_err(dev, "Memory aperture resource unavailable\n");
-		return ERR_PTR(-ENXIO);
-	}
-
-	gart = kzalloc(sizeof(*gart), GFP_KERNEL);
-	if (!gart)
-		return ERR_PTR(-ENOMEM);
-
-	gart_handle = gart;
-
-	gart->dev = dev;
-	gart->regs = mc->regs + GART_REG_BASE;
-	gart->iovmm_base = res->start;
-	gart->iovmm_end = res->end + 1;
-	spin_lock_init(&gart->pte_lock);
-	spin_lock_init(&gart->dom_lock);
-
-	do_gart_setup(gart, NULL);
-
-	err = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart");
-	if (err)
-		goto free_gart;
-
-	err = iommu_device_register(&gart->iommu, &gart_iommu_ops, dev);
-	if (err)
-		goto remove_sysfs;
-
-	gart->savedata = vmalloc(resource_size(res) / GART_PAGE_SIZE *
-				 sizeof(u32));
-	if (!gart->savedata) {
-		err = -ENOMEM;
-		goto unregister_iommu;
-	}
-
-	return gart;
-
-unregister_iommu:
-	iommu_device_unregister(&gart->iommu);
-remove_sysfs:
-	iommu_device_sysfs_remove(&gart->iommu);
-free_gart:
-	kfree(gart);
-
-	return ERR_PTR(err);
-}
-
-module_param(gart_debug, bool, 0644);
-MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index e445f80d0226..310871728ab4 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -272,13 +272,10 @@ static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id)
 	clear_bit(id, smmu->asids);
 }
 
-static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
+static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev)
 {
 	struct tegra_smmu_as *as;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	as = kzalloc(sizeof(*as), GFP_KERNEL);
 	if (!as)
 		return NULL;
@@ -511,23 +508,39 @@ disable:
 	return err;
 }
 
-static void tegra_smmu_set_platform_dma(struct device *dev)
+static int tegra_smmu_identity_attach(struct iommu_domain *identity_domain,
+				      struct device *dev)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-	struct tegra_smmu_as *as = to_smmu_as(domain);
-	struct tegra_smmu *smmu = as->smmu;
+	struct tegra_smmu_as *as;
+	struct tegra_smmu *smmu;
 	unsigned int index;
 
 	if (!fwspec)
-		return;
+		return -ENODEV;
 
+	if (domain == identity_domain || !domain)
+		return 0;
+
+	as = to_smmu_as(domain);
+	smmu = as->smmu;
 	for (index = 0; index < fwspec->num_ids; index++) {
 		tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
 		tegra_smmu_as_unprepare(smmu, as);
 	}
+	return 0;
 }
 
+static struct iommu_domain_ops tegra_smmu_identity_ops = {
+	.attach_dev = tegra_smmu_identity_attach,
+};
+
+static struct iommu_domain tegra_smmu_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &tegra_smmu_identity_ops,
+};
+
 static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova,
 			       u32 value)
 {
@@ -751,7 +764,8 @@ __tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 }
 
 static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
-			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+			  phys_addr_t paddr, size_t size, size_t count,
+			  int prot, gfp_t gfp, size_t *mapped)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
 	unsigned long flags;
@@ -761,11 +775,14 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
 	ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags);
 	spin_unlock_irqrestore(&as->lock, flags);
 
+	if (!ret)
+		*mapped = size;
+
 	return ret;
 }
 
 static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-			       size_t size, struct iommu_iotlb_gather *gather)
+			       size_t size, size_t count, struct iommu_iotlb_gather *gather)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
 	unsigned long flags;
@@ -962,17 +979,28 @@ static int tegra_smmu_of_xlate(struct device *dev,
 	return iommu_fwspec_add_ids(dev, &id, 1);
 }
 
+static int tegra_smmu_def_domain_type(struct device *dev)
+{
+	/*
+	 * FIXME: For now we want to run all translation in IDENTITY mode, due
+	 * to some device quirks. Better would be to just quirk the troubled
+	 * devices.
+	 */
+	return IOMMU_DOMAIN_IDENTITY;
+}
+
 static const struct iommu_ops tegra_smmu_ops = {
-	.domain_alloc = tegra_smmu_domain_alloc,
+	.identity_domain = &tegra_smmu_identity_domain,
+	.def_domain_type = &tegra_smmu_def_domain_type,
+	.domain_alloc_paging = tegra_smmu_domain_alloc_paging,
 	.probe_device = tegra_smmu_probe_device,
 	.device_group = tegra_smmu_device_group,
-	.set_platform_dma_ops = tegra_smmu_set_platform_dma,
 	.of_xlate = tegra_smmu_of_xlate,
 	.pgsize_bitmap = SZ_4K,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= tegra_smmu_attach_dev,
-		.map		= tegra_smmu_map,
-		.unmap		= tegra_smmu_unmap,
+		.map_pages	= tegra_smmu_map,
+		.unmap_pages	= tegra_smmu_unmap,
 		.iova_to_phys	= tegra_smmu_iova_to_phys,
 		.free		= tegra_smmu_domain_free,
 	}
@@ -1056,8 +1084,6 @@ DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
 static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
 {
 	smmu->debugfs = debugfs_create_dir("smmu", NULL);
-	if (!smmu->debugfs)
-		return;
 
 	debugfs_create_file("swgroups", S_IRUGO, smmu->debugfs, smmu,
 			    &tegra_smmu_swgroups_fops);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 17dcd826f5c2..379ebe03efb6 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -85,7 +85,7 @@ struct viommu_request {
 	void				*writeback;
 	unsigned int			write_offset;
 	unsigned int			len;
-	char				buf[];
+	char				buf[] __counted_by(len);
 };
 
 #define VIOMMU_FAULT_RESV_MASK		0xffffff00
@@ -230,7 +230,7 @@ static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len,
 	if (write_offset <= 0)
 		return -EINVAL;
 
-	req = kzalloc(sizeof(*req) + len, GFP_ATOMIC);
+	req = kzalloc(struct_size(req, buf, len), GFP_ATOMIC);
 	if (!req)
 		return -ENOMEM;
 
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 68d11ccee441..98b0329b7154 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -39,8 +39,7 @@
 
 #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996	(1ULL << 0)
 #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539	(1ULL << 1)
-#define FLAGS_WORKAROUND_MTK_GICR_SAVE		(1ULL << 2)
-#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001	(1ULL << 3)
+#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001	(1ULL << 2)
 
 #define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
 
@@ -106,7 +105,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
  * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1
  *   interrupt.
  */
-DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
+static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
 
 DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
 EXPORT_SYMBOL(gic_nonsecure_priorities);
@@ -1779,15 +1778,6 @@ static bool gic_enable_quirk_msm8996(void *data)
 	return true;
 }
 
-static bool gic_enable_quirk_mtk_gicr(void *data)
-{
-	struct gic_chip_data *d = data;
-
-	d->flags |= FLAGS_WORKAROUND_MTK_GICR_SAVE;
-
-	return true;
-}
-
 static bool gic_enable_quirk_cavium_38539(void *data)
 {
 	struct gic_chip_data *d = data;
@@ -1889,11 +1879,6 @@ static const struct gic_quirk gic_quirks[] = {
 		.init	= gic_enable_quirk_asr8601,
 	},
 	{
-		.desc	= "GICv3: Mediatek Chromebook GICR save problem",
-		.property = "mediatek,broken-save-restore-fw",
-		.init	= gic_enable_quirk_mtk_gicr,
-	},
-	{
 		.desc	= "GICv3: HIP06 erratum 161010803",
 		.iidr	= 0x0204043b,
 		.mask	= 0xffffffff,
@@ -1959,11 +1944,6 @@ static void gic_enable_nmi_support(void)
 	if (!gic_prio_masking_enabled())
 		return;
 
-	if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) {
-		pr_warn("Skipping NMI enable due to firmware issues\n");
-		return;
-	}
-
 	rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR,
 				 sizeof(*rdist_nmi_refs), GFP_KERNEL);
 	if (!rdist_nmi_refs)
diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c
index bf03abb94e68..68d82a682bf6 100644
--- a/drivers/leds/rgb/leds-qcom-lpg.c
+++ b/drivers/leds/rgb/leds-qcom-lpg.c
@@ -1085,7 +1085,6 @@ static const struct pwm_ops lpg_pwm_ops = {
 	.request = lpg_pwm_request,
 	.apply = lpg_pwm_apply,
 	.get_state = lpg_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int lpg_add_pwm(struct lpg *lpg)
diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 67d6e70b4eab..a083921a8968 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -979,35 +979,6 @@ static int tegra_mc_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && !mc->soc->smmu) {
-		mc->gart = tegra_gart_probe(&pdev->dev, mc);
-		if (IS_ERR(mc->gart)) {
-			dev_err(&pdev->dev, "failed to probe GART: %ld\n",
-				PTR_ERR(mc->gart));
-			mc->gart = NULL;
-		}
-	}
-
-	return 0;
-}
-
-static int __maybe_unused tegra_mc_suspend(struct device *dev)
-{
-	struct tegra_mc *mc = dev_get_drvdata(dev);
-
-	if (mc->soc->ops && mc->soc->ops->suspend)
-		return mc->soc->ops->suspend(mc);
-
-	return 0;
-}
-
-static int __maybe_unused tegra_mc_resume(struct device *dev)
-{
-	struct tegra_mc *mc = dev_get_drvdata(dev);
-
-	if (mc->soc->ops && mc->soc->ops->resume)
-		return mc->soc->ops->resume(mc);
-
 	return 0;
 }
 
@@ -1020,15 +991,10 @@ static void tegra_mc_sync_state(struct device *dev)
 		icc_sync_state(dev);
 }
 
-static const struct dev_pm_ops tegra_mc_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(tegra_mc_suspend, tegra_mc_resume)
-};
-
 static struct platform_driver tegra_mc_driver = {
 	.driver = {
 		.name = "tegra-mc",
 		.of_match_table = tegra_mc_of_match,
-		.pm = &tegra_mc_pm_ops,
 		.suppress_bind_attrs = true,
 		.sync_state = tegra_mc_sync_state,
 	},
diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c
index 544bfd216a22..aa4b97d5e732 100644
--- a/drivers/memory/tegra/tegra20.c
+++ b/drivers/memory/tegra/tegra20.c
@@ -688,32 +688,6 @@ static int tegra20_mc_probe(struct tegra_mc *mc)
 	return 0;
 }
 
-static int tegra20_mc_suspend(struct tegra_mc *mc)
-{
-	int err;
-
-	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) {
-		err = tegra_gart_suspend(mc->gart);
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
-}
-
-static int tegra20_mc_resume(struct tegra_mc *mc)
-{
-	int err;
-
-	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) {
-		err = tegra_gart_resume(mc->gart);
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
-}
-
 static irqreturn_t tegra20_mc_handle_irq(int irq, void *data)
 {
 	struct tegra_mc *mc = data;
@@ -789,8 +763,6 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data)
 
 static const struct tegra_mc_ops tegra20_mc_ops = {
 	.probe = tegra20_mc_probe,
-	.suspend = tegra20_mc_suspend,
-	.resume = tegra20_mc_resume,
 	.handle_irq = tegra20_mc_handle_irq,
 };
 
diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c
index fc28714ae3a6..6a33889d0902 100644
--- a/drivers/misc/lkdtm/cfi.c
+++ b/drivers/misc/lkdtm/cfi.c
@@ -68,12 +68,20 @@ static void lkdtm_CFI_FORWARD_PROTO(void)
 #define no_pac_addr(addr)      \
 	((__force __typeof__(addr))((uintptr_t)(addr) | PAGE_OFFSET))
 
+#ifdef CONFIG_RISCV
+/* https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#frame-pointer-convention */
+#define FRAME_RA_OFFSET		(-1)
+#else
+#define FRAME_RA_OFFSET		1
+#endif
+
 /* The ultimate ROP gadget. */
 static noinline __no_ret_protection
 void set_return_addr_unchecked(unsigned long *expected, unsigned long *addr)
 {
 	/* Use of volatile is to make sure final write isn't seen as a dead store. */
-	unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+	unsigned long * volatile *ret_addr =
+		(unsigned long **)__builtin_frame_address(0) + FRAME_RA_OFFSET;
 
 	/* Make sure we've found the right place on the stack before writing it. */
 	if (no_pac_addr(*ret_addr) == expected)
@@ -88,7 +96,8 @@ static noinline
 void set_return_addr(unsigned long *expected, unsigned long *addr)
 {
 	/* Use of volatile is to make sure final write isn't seen as a dead store. */
-	unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+	unsigned long * volatile *ret_addr =
+		(unsigned long **)__builtin_frame_address(0) + FRAME_RA_OFFSET;
 
 	/* Make sure we've found the right place on the stack before writing it. */
 	if (no_pac_addr(*ret_addr) == expected)
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 3a8f27c3e310..152dfe593c43 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2381,8 +2381,10 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req)
 			}
 			ret = mmc_blk_cqe_issue_flush(mq, req);
 			break;
-		case REQ_OP_READ:
 		case REQ_OP_WRITE:
+			card->written_flag = true;
+			fallthrough;
+		case REQ_OP_READ:
 			if (host->cqe_enabled)
 				ret = mmc_blk_cqe_issue_rw_rq(mq, req);
 			else
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index 4edf9057fa79..b7754a1b8d97 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -280,4 +280,8 @@ static inline int mmc_card_broken_sd_cache(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BROKEN_SD_CACHE;
 }
 
+static inline int mmc_card_broken_cache_flush(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_BROKEN_CACHE_FLUSH;
+}
 #endif
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 8180983bd402..705942edacc6 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -104,7 +104,7 @@ static int mmc_decode_cid(struct mmc_card *card)
 	case 3: /* MMC v3.1 - v3.3 */
 	case 4: /* MMC v4 */
 		card->cid.manfid	= UNSTUFF_BITS(resp, 120, 8);
-		card->cid.oemid		= UNSTUFF_BITS(resp, 104, 8);
+		card->cid.oemid		= UNSTUFF_BITS(resp, 104, 16);
 		card->cid.prod_name[0]	= UNSTUFF_BITS(resp, 96, 8);
 		card->cid.prod_name[1]	= UNSTUFF_BITS(resp, 88, 8);
 		card->cid.prod_name[2]	= UNSTUFF_BITS(resp, 80, 8);
@@ -2086,13 +2086,17 @@ static int _mmc_flush_cache(struct mmc_host *host)
 {
 	int err = 0;
 
+	if (mmc_card_broken_cache_flush(host->card) && !host->card->written_flag)
+		return 0;
+
 	if (_mmc_cache_enabled(host)) {
 		err = mmc_switch(host->card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_FLUSH_CACHE, 1,
 				 CACHE_FLUSH_TIMEOUT_MS);
 		if (err)
-			pr_err("%s: cache flush error %d\n",
-			       mmc_hostname(host), err);
+			pr_err("%s: cache flush error %d\n", mmc_hostname(host), err);
+		else
+			host->card->written_flag = false;
 	}
 
 	return err;
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 32b64b564fb1..cca71867bc4a 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -110,11 +110,12 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
 		  MMC_QUIRK_TRIM_BROKEN),
 
 	/*
-	 * Micron MTFC4GACAJCN-1M advertises TRIM but it does not seems to
-	 * support being used to offload WRITE_ZEROES.
+	 * Micron MTFC4GACAJCN-1M supports TRIM but does not appear to support
+	 * WRITE_ZEROES offloading. It also supports caching, but the cache can
+	 * only be flushed after a write has occurred.
 	 */
 	MMC_FIXUP("Q2J54A", CID_MANFID_MICRON, 0x014e, add_quirk_mmc,
-		  MMC_QUIRK_TRIM_BROKEN),
+		  MMC_QUIRK_TRIM_BROKEN | MMC_QUIRK_BROKEN_CACHE_FLUSH),
 
 	/*
 	 * Kingston EMMC04G-M627 advertises TRIM but it does not seems to
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index d83261e857a5..d8a991b349a8 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -28,6 +28,9 @@
 #define PCI_GLI_9750_PM_CTRL	0xFC
 #define   PCI_GLI_9750_PM_STATE	  GENMASK(1, 0)
 
+#define PCI_GLI_9750_CORRERR_MASK				0x214
+#define   PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT	  BIT(12)
+
 #define SDHCI_GLI_9750_CFG2          0x848
 #define   SDHCI_GLI_9750_CFG2_L1DLY    GENMASK(28, 24)
 #define   GLI_9750_CFG2_L1DLY_VALUE    0x1F
@@ -152,6 +155,9 @@
 #define PCI_GLI_9755_PM_CTRL     0xFC
 #define   PCI_GLI_9755_PM_STATE    GENMASK(1, 0)
 
+#define PCI_GLI_9755_CORRERR_MASK				0x214
+#define   PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT	  BIT(12)
+
 #define SDHCI_GLI_9767_GM_BURST_SIZE			0x510
 #define   SDHCI_GLI_9767_GM_BURST_SIZE_AXI_ALWAYS_SET	  BIT(8)
 
@@ -561,6 +567,11 @@ static void gl9750_hw_setting(struct sdhci_host *host)
 	value &= ~PCI_GLI_9750_PM_STATE;
 	pci_write_config_dword(pdev, PCI_GLI_9750_PM_CTRL, value);
 
+	/* mask the replay timer timeout of AER */
+	pci_read_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, &value);
+	value |= PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT;
+	pci_write_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, value);
+
 	gl9750_wt_off(host);
 }
 
@@ -770,6 +781,11 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot)
 	value &= ~PCI_GLI_9755_PM_STATE;
 	pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value);
 
+	/* mask the replay timer timeout of AER */
+	pci_read_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, &value);
+	value |= PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT;
+	pci_write_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, value);
+
 	gl9755_wt_off(pdev);
 }
 
diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index c125485ba80e..967bd2dfcda1 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -598,7 +598,7 @@ static int sdhci_am654_get_otap_delay(struct sdhci_host *host,
 		return 0;
 	}
 
-	for (i = MMC_TIMING_MMC_HS; i <= MMC_TIMING_MMC_HS400; i++) {
+	for (i = MMC_TIMING_LEGACY; i <= MMC_TIMING_MMC_HS400; i++) {
 
 		ret = device_property_read_u32(dev, td[i].otap_binding,
 					       &sdhci_am654->otap_del_sel[i]);
diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index de3f443f5fdc..fd67c0682b38 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -2309,6 +2309,7 @@ static int vub300_probe(struct usb_interface *interface,
 		vub300->read_only =
 			(0x0010 & vub300->system_port_status.port_flags) ? 1 : 0;
 	} else {
+		retval = -EINVAL;
 		goto error5;
 	}
 	usb_set_intfdata(interface, vub300);
diff --git a/drivers/mtd/parsers/Kconfig b/drivers/mtd/parsers/Kconfig
index 60738edcd5d5..da03ab6efe04 100644
--- a/drivers/mtd/parsers/Kconfig
+++ b/drivers/mtd/parsers/Kconfig
@@ -1,9 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config MTD_AR7_PARTS
-	tristate "TI AR7 partitioning parser"
-	help
-	  TI AR7 partitioning parser support
-
 config MTD_BCM47XX_PARTS
 	tristate "BCM47XX partitioning parser"
 	depends on BCM47XX || ARCH_BCM_5301X
diff --git a/drivers/mtd/parsers/Makefile b/drivers/mtd/parsers/Makefile
index 0e70b621a1d8..9b00c62b837a 100644
--- a/drivers/mtd/parsers/Makefile
+++ b/drivers/mtd/parsers/Makefile
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_MTD_AR7_PARTS)		+= ar7part.o
 obj-$(CONFIG_MTD_BCM47XX_PARTS)		+= bcm47xxpart.o
 obj-$(CONFIG_MTD_BCM63XX_PARTS)		+= bcm63xxpart.o
 obj-$(CONFIG_MTD_BRCM_U_BOOT)		+= brcm_u-boot.o
diff --git a/drivers/mtd/parsers/ar7part.c b/drivers/mtd/parsers/ar7part.c
deleted file mode 100644
index 8cd683711ac6..000000000000
--- a/drivers/mtd/parsers/ar7part.c
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright © 2007 Eugene Konev <ejka@openwrt.org>
- *
- * TI AR7 flash partition table.
- * Based on ar7 map by Felix Fietkau <nbd@openwrt.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
-#include <linux/memblock.h>
-#include <linux/module.h>
-
-#include <uapi/linux/magic.h>
-
-#define AR7_PARTS	4
-#define ROOT_OFFSET	0xe0000
-
-#define LOADER_MAGIC1	le32_to_cpu(0xfeedfa42)
-#define LOADER_MAGIC2	le32_to_cpu(0xfeed1281)
-
-struct ar7_bin_rec {
-	unsigned int checksum;
-	unsigned int length;
-	unsigned int address;
-};
-
-static int create_mtd_partitions(struct mtd_info *master,
-				 const struct mtd_partition **pparts,
-				 struct mtd_part_parser_data *data)
-{
-	struct ar7_bin_rec header;
-	unsigned int offset;
-	size_t len;
-	unsigned int pre_size = master->erasesize, post_size = 0;
-	unsigned int root_offset = ROOT_OFFSET;
-
-	int retries = 10;
-	struct mtd_partition *ar7_parts;
-
-	ar7_parts = kcalloc(AR7_PARTS, sizeof(*ar7_parts), GFP_KERNEL);
-	if (!ar7_parts)
-		return -ENOMEM;
-	ar7_parts[0].name = "loader";
-	ar7_parts[0].offset = 0;
-	ar7_parts[0].size = master->erasesize;
-	ar7_parts[0].mask_flags = MTD_WRITEABLE;
-
-	ar7_parts[1].name = "config";
-	ar7_parts[1].offset = 0;
-	ar7_parts[1].size = master->erasesize;
-	ar7_parts[1].mask_flags = 0;
-
-	do { /* Try 10 blocks starting from master->erasesize */
-		offset = pre_size;
-		mtd_read(master, offset, sizeof(header), &len,
-			 (uint8_t *)&header);
-		if (!strncmp((char *)&header, "TIENV0.8", 8))
-			ar7_parts[1].offset = pre_size;
-		if (header.checksum == LOADER_MAGIC1)
-			break;
-		if (header.checksum == LOADER_MAGIC2)
-			break;
-		pre_size += master->erasesize;
-	} while (retries--);
-
-	pre_size = offset;
-
-	if (!ar7_parts[1].offset) {
-		ar7_parts[1].offset = master->size - master->erasesize;
-		post_size = master->erasesize;
-	}
-
-	switch (header.checksum) {
-	case LOADER_MAGIC1:
-		while (header.length) {
-			offset += sizeof(header) + header.length;
-			mtd_read(master, offset, sizeof(header), &len,
-				 (uint8_t *)&header);
-		}
-		root_offset = offset + sizeof(header) + 4;
-		break;
-	case LOADER_MAGIC2:
-		while (header.length) {
-			offset += sizeof(header) + header.length;
-			mtd_read(master, offset, sizeof(header), &len,
-				 (uint8_t *)&header);
-		}
-		root_offset = offset + sizeof(header) + 4 + 0xff;
-		root_offset &= ~(uint32_t)0xff;
-		break;
-	default:
-		printk(KERN_WARNING "Unknown magic: %08x\n", header.checksum);
-		break;
-	}
-
-	mtd_read(master, root_offset, sizeof(header), &len, (u8 *)&header);
-	if (header.checksum != SQUASHFS_MAGIC) {
-		root_offset += master->erasesize - 1;
-		root_offset &= ~(master->erasesize - 1);
-	}
-
-	ar7_parts[2].name = "linux";
-	ar7_parts[2].offset = pre_size;
-	ar7_parts[2].size = master->size - pre_size - post_size;
-	ar7_parts[2].mask_flags = 0;
-
-	ar7_parts[3].name = "rootfs";
-	ar7_parts[3].offset = root_offset;
-	ar7_parts[3].size = master->size - root_offset - post_size;
-	ar7_parts[3].mask_flags = 0;
-
-	*pparts = ar7_parts;
-	return AR7_PARTS;
-}
-
-static struct mtd_part_parser ar7_parser = {
-	.parse_fn = create_mtd_partitions,
-	.name = "ar7part",
-};
-module_mtd_part_parser(ar7_parser);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR(	"Felix Fietkau <nbd@openwrt.org>, "
-		"Eugene Konev <ejka@openwrt.org>");
-MODULE_DESCRIPTION("MTD partitioning for TI AR7");
diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c
index d8ab2b77d201..167a86f39f27 100644
--- a/drivers/net/dsa/lan9303_mdio.c
+++ b/drivers/net/dsa/lan9303_mdio.c
@@ -32,7 +32,7 @@ static int lan9303_mdio_write(void *ctx, uint32_t reg, uint32_t val)
 	struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx;
 
 	reg <<= 2; /* reg num to offset */
-	mutex_lock(&sw_dev->device->bus->mdio_lock);
+	mutex_lock_nested(&sw_dev->device->bus->mdio_lock, MDIO_MUTEX_NESTED);
 	lan9303_mdio_real_write(sw_dev->device, reg, val & 0xffff);
 	lan9303_mdio_real_write(sw_dev->device, reg + 2, (val >> 16) & 0xffff);
 	mutex_unlock(&sw_dev->device->bus->mdio_lock);
@@ -50,7 +50,7 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val)
 	struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx;
 
 	reg <<= 2; /* reg num to offset */
-	mutex_lock(&sw_dev->device->bus->mdio_lock);
+	mutex_lock_nested(&sw_dev->device->bus->mdio_lock, MDIO_MUTEX_NESTED);
 	*val = lan9303_mdio_real_read(sw_dev->device, reg);
 	*val |= (lan9303_mdio_real_read(sw_dev->device, reg + 2) << 16);
 	mutex_unlock(&sw_dev->device->bus->mdio_lock);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 5665d0c3668f..1dee27349367 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6647,9 +6647,9 @@ static void tg3_tx(struct tg3_napi *tnapi)
 
 	tnapi->tx_cons = sw_idx;
 
-	/* Need to make the tx_cons update visible to tg3_start_xmit()
+	/* Need to make the tx_cons update visible to __tg3_start_xmit()
 	 * before checking for netif_queue_stopped().  Without the
-	 * memory barrier, there is a small possibility that tg3_start_xmit()
+	 * memory barrier, there is a small possibility that __tg3_start_xmit()
 	 * will miss it and cause the queue to be stopped forever.
 	 */
 	smp_mb();
@@ -7889,7 +7889,7 @@ static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3;
 }
 
-static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *);
 
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
  * indicated in tg3_tx_frag_set()
@@ -7923,7 +7923,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 
 	skb_list_walk_safe(segs, seg, next) {
 		skb_mark_not_on_list(seg);
-		tg3_start_xmit(seg, tp->dev);
+		__tg3_start_xmit(seg, tp->dev);
 	}
 
 tg3_tso_bug_end:
@@ -7933,7 +7933,7 @@ tg3_tso_bug_end:
 }
 
 /* hard_start_xmit for all devices */
-static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	u32 len, entry, base_flags, mss, vlan = 0;
@@ -8182,11 +8182,6 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			netif_tx_wake_queue(txq);
 	}
 
-	if (!netdev_xmit_more() || netif_xmit_stopped(txq)) {
-		/* Packets are ready, update Tx producer idx on card. */
-		tw32_tx_mbox(tnapi->prodmbox, entry);
-	}
-
 	return NETDEV_TX_OK;
 
 dma_error:
@@ -8199,6 +8194,42 @@ drop_nofree:
 	return NETDEV_TX_OK;
 }
 
+static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct netdev_queue *txq;
+	u16 skb_queue_mapping;
+	netdev_tx_t ret;
+
+	skb_queue_mapping = skb_get_queue_mapping(skb);
+	txq = netdev_get_tx_queue(dev, skb_queue_mapping);
+
+	ret = __tg3_start_xmit(skb, dev);
+
+	/* Notify the hardware that packets are ready by updating the TX ring
+	 * tail pointer. We respect netdev_xmit_more() thus avoiding poking
+	 * the hardware for every packet. To guarantee forward progress the TX
+	 * ring must be drained when it is full as indicated by
+	 * netif_xmit_stopped(). This needs to happen even when the current
+	 * skb was dropped or rejected with NETDEV_TX_BUSY. Otherwise packets
+	 * queued by previous __tg3_start_xmit() calls might get stuck in
+	 * the queue forever.
+	 */
+	if (!netdev_xmit_more() || netif_xmit_stopped(txq)) {
+		struct tg3_napi *tnapi;
+		struct tg3 *tp;
+
+		tp = netdev_priv(dev);
+		tnapi = &tp->napi[skb_queue_mapping];
+
+		if (tg3_flag(tp, ENABLE_TSS))
+			tnapi++;
+
+		tw32_tx_mbox(tnapi->prodmbox, tnapi->tx_prod);
+	}
+
+	return ret;
+}
+
 static void tg3_mac_loopback(struct tg3 *tp, bool enable)
 {
 	if (enable) {
@@ -17729,7 +17760,7 @@ static int tg3_init_one(struct pci_dev *pdev,
 	 * device behind the EPB cannot support DMA addresses > 40-bit.
 	 * On 64-bit systems with IOMMU, use 40-bit dma_mask.
 	 * On 64-bit systems without IOMMU, use 64-bit dma_mask and
-	 * do DMA address check in tg3_start_xmit().
+	 * do DMA address check in __tg3_start_xmit().
 	 */
 	if (tg3_flag(tp, IS_5788))
 		persist_dma_mask = dma_mask = DMA_BIT_MASK(32);
@@ -18127,7 +18158,8 @@ static void tg3_shutdown(struct pci_dev *pdev)
 	if (netif_running(dev))
 		dev_close(dev);
 
-	tg3_power_down(tp);
+	if (system_state == SYSTEM_POWER_OFF)
+		tg3_power_down(tp);
 
 	rtnl_unlock();
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 30bec47bc665..cffbf27c4656 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2769,7 +2769,7 @@ static int enetc_setup_xdp_prog(struct net_device *ndev, struct bpf_prog *prog,
 	if (priv->min_num_stack_tx_queues + num_xdp_tx_queues >
 	    priv->num_tx_rings) {
 		NL_SET_ERR_MSG_FMT_MOD(extack,
-				       "Reserving %d XDP TXQs does not leave a minimum of %d TXQs for network stack (total %d available)",
+				       "Reserving %d XDP TXQs does not leave a minimum of %d for stack (total %d)",
 				       num_xdp_tx_queues,
 				       priv->min_num_stack_tx_queues,
 				       priv->num_tx_rings);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
index 74bc111b4849..cc4e9e2addb7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
@@ -231,6 +231,5 @@ int i40e_devlink_create_port(struct i40e_pf *pf)
  **/
 void i40e_devlink_destroy_port(struct i40e_pf *pf)
 {
-	devlink_port_type_clear(&pf->devlink_port);
 	devlink_port_unregister(&pf->devlink_port);
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3157d14d9b12..f7a332e51524 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -14213,8 +14213,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
 	}
 	set_bit(__I40E_VSI_RELEASING, vsi->state);
 	uplink_seid = vsi->uplink_seid;
-	if (vsi->type == I40E_VSI_MAIN)
-		i40e_devlink_destroy_port(pf);
+
 	if (vsi->type != I40E_VSI_SRIOV) {
 		if (vsi->netdev_registered) {
 			vsi->netdev_registered = false;
@@ -14228,6 +14227,9 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
 		i40e_vsi_disable_irq(vsi);
 	}
 
+	if (vsi->type == I40E_VSI_MAIN)
+		i40e_devlink_destroy_port(pf);
+
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
 	/* clear the sync flag on all filters */
@@ -14402,14 +14404,14 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
 
 err_rings:
 	i40e_vsi_free_q_vectors(vsi);
-	if (vsi->type == I40E_VSI_MAIN)
-		i40e_devlink_destroy_port(pf);
 	if (vsi->netdev_registered) {
 		vsi->netdev_registered = false;
 		unregister_netdev(vsi->netdev);
 		free_netdev(vsi->netdev);
 		vsi->netdev = NULL;
 	}
+	if (vsi->type == I40E_VSI_MAIN)
+		i40e_devlink_destroy_port(pf);
 	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
 err_vsi:
 	i40e_vsi_clear(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index b980f89dc892..cd065ec48c87 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -628,7 +628,7 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
 		INIT_LIST_HEAD(&ndlist.node);
 		rcu_read_lock();
 		for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
-			nl = kzalloc(sizeof(*nl), GFP_KERNEL);
+			nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
 			if (!nl)
 				break;
 
@@ -1555,18 +1555,12 @@ static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr)
  */
 static void ice_lag_disable_sriov_bond(struct ice_lag *lag)
 {
-	struct ice_lag_netdev_list *entry;
 	struct ice_netdev_priv *np;
-	struct net_device *netdev;
 	struct ice_pf *pf;
 
-	list_for_each_entry(entry, lag->netdev_head, node) {
-		netdev = entry->netdev;
-		np = netdev_priv(netdev);
-		pf = np->vsi->back;
-
-		ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
-	}
+	np = netdev_priv(lag->netdev);
+	pf = np->vsi->back;
+	ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
 }
 
 /**
@@ -1698,7 +1692,7 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
 
 		rcu_read_lock();
 		for_each_netdev_in_bond_rcu(upper_netdev, tmp_nd) {
-			nd_list = kzalloc(sizeof(*nd_list), GFP_KERNEL);
+			nd_list = kzalloc(sizeof(*nd_list), GFP_ATOMIC);
 			if (!nd_list)
 				break;
 
@@ -2075,7 +2069,7 @@ void ice_lag_rebuild(struct ice_pf *pf)
 		INIT_LIST_HEAD(&ndlist.node);
 		rcu_read_lock();
 		for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
-			nl = kzalloc(sizeof(*nl), GFP_KERNEL);
+			nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
 			if (!nl)
 				break;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index 37b54db91df2..dd03cb69ad26 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -630,32 +630,83 @@ bool ice_is_tunnel_supported(struct net_device *dev)
 	return ice_tc_tun_get_type(dev) != TNL_LAST;
 }
 
-static int
-ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr,
-			    struct flow_action_entry *act)
+static bool ice_tc_is_dev_uplink(struct net_device *dev)
+{
+	return netif_is_ice(dev) || ice_is_tunnel_supported(dev);
+}
+
+static int ice_tc_setup_redirect_action(struct net_device *filter_dev,
+					struct ice_tc_flower_fltr *fltr,
+					struct net_device *target_dev)
 {
 	struct ice_repr *repr;
 
+	fltr->action.fltr_act = ICE_FWD_TO_VSI;
+
+	if (ice_is_port_repr_netdev(filter_dev) &&
+	    ice_is_port_repr_netdev(target_dev)) {
+		repr = ice_netdev_to_repr(target_dev);
+
+		fltr->dest_vsi = repr->src_vsi;
+		fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
+	} else if (ice_is_port_repr_netdev(filter_dev) &&
+		   ice_tc_is_dev_uplink(target_dev)) {
+		repr = ice_netdev_to_repr(filter_dev);
+
+		fltr->dest_vsi = repr->src_vsi->back->switchdev.uplink_vsi;
+		fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
+	} else if (ice_tc_is_dev_uplink(filter_dev) &&
+		   ice_is_port_repr_netdev(target_dev)) {
+		repr = ice_netdev_to_repr(target_dev);
+
+		fltr->dest_vsi = repr->src_vsi;
+		fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
+	} else {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unsupported netdevice in switchdev mode");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_tc_setup_drop_action(struct net_device *filter_dev,
+			 struct ice_tc_flower_fltr *fltr)
+{
+	fltr->action.fltr_act = ICE_DROP_PACKET;
+
+	if (ice_is_port_repr_netdev(filter_dev)) {
+		fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
+	} else if (ice_tc_is_dev_uplink(filter_dev)) {
+		fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
+	} else {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unsupported netdevice in switchdev mode");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ice_eswitch_tc_parse_action(struct net_device *filter_dev,
+				       struct ice_tc_flower_fltr *fltr,
+				       struct flow_action_entry *act)
+{
+	int err;
+
 	switch (act->id) {
 	case FLOW_ACTION_DROP:
-		fltr->action.fltr_act = ICE_DROP_PACKET;
+		err = ice_tc_setup_drop_action(filter_dev, fltr);
+		if (err)
+			return err;
+
 		break;
 
 	case FLOW_ACTION_REDIRECT:
-		fltr->action.fltr_act = ICE_FWD_TO_VSI;
-
-		if (ice_is_port_repr_netdev(act->dev)) {
-			repr = ice_netdev_to_repr(act->dev);
-
-			fltr->dest_vsi = repr->src_vsi;
-			fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
-		} else if (netif_is_ice(act->dev) ||
-			   ice_is_tunnel_supported(act->dev)) {
-			fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
-		} else {
-			NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported netdevice in switchdev mode");
-			return -EINVAL;
-		}
+		err = ice_tc_setup_redirect_action(filter_dev, fltr, act->dev);
+		if (err)
+			return err;
 
 		break;
 
@@ -696,10 +747,6 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
 		goto exit;
 	}
 
-	/* egress traffic is always redirect to uplink */
-	if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
-		fltr->dest_vsi = vsi->back->switchdev.uplink_vsi;
-
 	rule_info.sw_act.fltr_act = fltr->action.fltr_act;
 	if (fltr->action.fltr_act != ICE_DROP_PACKET)
 		rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx;
@@ -713,13 +760,21 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
 	rule_info.flags_info.act_valid = true;
 
 	if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) {
+		/* Uplink to VF */
 		rule_info.sw_act.flag |= ICE_FLTR_RX;
 		rule_info.sw_act.src = hw->pf_id;
 		rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
-	} else {
+	} else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
+		   fltr->dest_vsi == vsi->back->switchdev.uplink_vsi) {
+		/* VF to Uplink */
 		rule_info.sw_act.flag |= ICE_FLTR_TX;
 		rule_info.sw_act.src = vsi->idx;
 		rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
+	} else {
+		/* VF to VF */
+		rule_info.sw_act.flag |= ICE_FLTR_TX;
+		rule_info.sw_act.src = vsi->idx;
+		rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
 	}
 
 	/* specify the cookie as filter_rule_id */
@@ -1745,16 +1800,17 @@ ice_tc_parse_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr,
 
 /**
  * ice_parse_tc_flower_actions - Parse the actions for a TC filter
+ * @filter_dev: Pointer to device on which filter is being added
  * @vsi: Pointer to VSI
  * @cls_flower: Pointer to TC flower offload structure
  * @fltr: Pointer to TC flower filter structure
  *
  * Parse the actions for a TC filter
  */
-static int
-ice_parse_tc_flower_actions(struct ice_vsi *vsi,
-			    struct flow_cls_offload *cls_flower,
-			    struct ice_tc_flower_fltr *fltr)
+static int ice_parse_tc_flower_actions(struct net_device *filter_dev,
+				       struct ice_vsi *vsi,
+				       struct flow_cls_offload *cls_flower,
+				       struct ice_tc_flower_fltr *fltr)
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower);
 	struct flow_action *flow_action = &rule->action;
@@ -1769,7 +1825,7 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi,
 
 	flow_action_for_each(i, act, flow_action) {
 		if (ice_is_eswitch_mode_switchdev(vsi->back))
-			err = ice_eswitch_tc_parse_action(fltr, act);
+			err = ice_eswitch_tc_parse_action(filter_dev, fltr, act);
 		else
 			err = ice_tc_parse_action(vsi, fltr, act);
 		if (err)
@@ -1856,7 +1912,7 @@ ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi,
 	if (err < 0)
 		goto err;
 
-	err = ice_parse_tc_flower_actions(vsi, f, fltr);
+	err = ice_parse_tc_flower_actions(netdev, vsi, f, fltr);
 	if (err < 0)
 		goto err;
 
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 5e1ef70d54fe..1f728a9004d9 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2365,7 +2365,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
  */
 int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
 {
-	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	const struct skb_shared_info *shinfo;
 	union {
 		struct iphdr *v4;
 		struct ipv6hdr *v6;
@@ -2379,13 +2379,15 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
 	u32 paylen, l4_start;
 	int err;
 
-	if (!shinfo->gso_size)
+	if (!skb_is_gso(skb))
 		return 0;
 
 	err = skb_cow_head(skb, 0);
 	if (err < 0)
 		return err;
 
+	shinfo = skb_shinfo(skb);
+
 	ip.hdr = skb_network_header(skb);
 	l4.hdr = skb_transport_header(skb);
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 1a42bfded872..7ca6941ea0b9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -818,7 +818,6 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
 	int qidx, sqe_tail, sqe_head;
 	struct otx2_snd_queue *sq;
 	u64 incr, *ptr, val;
-	int timeout = 1000;
 
 	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
 	for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) {
@@ -827,15 +826,11 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
 			continue;
 
 		incr = (u64)qidx << 32;
-		while (timeout) {
-			val = otx2_atomic64_add(incr, ptr);
-			sqe_head = (val >> 20) & 0x3F;
-			sqe_tail = (val >> 28) & 0x3F;
-			if (sqe_head == sqe_tail)
-				break;
-			usleep_range(1, 3);
-			timeout--;
-		}
+		val = otx2_atomic64_add(incr, ptr);
+		sqe_head = (val >> 20) & 0x3F;
+		sqe_tail = (val >> 28) & 0x3F;
+		if (sqe_head != sqe_tail)
+			usleep_range(50, 60);
 	}
 }
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index c04a8ee53a82..e7c69b57147e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -977,6 +977,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en);
 int otx2_txsch_alloc(struct otx2_nic *pfvf);
 void otx2_txschq_stop(struct otx2_nic *pfvf);
 void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq);
+void otx2_free_pending_sqe(struct otx2_nic *pfvf);
 void otx2_sqb_flush(struct otx2_nic *pfvf);
 int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
 		    dma_addr_t *dma);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 6daf4d58c25d..91b99fd70361 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1193,31 +1193,32 @@ static char *nix_mnqerr_e_str[NIX_MNQERR_MAX] = {
 };
 
 static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] =  {
-	"NIX_SND_STATUS_GOOD",
-	"NIX_SND_STATUS_SQ_CTX_FAULT",
-	"NIX_SND_STATUS_SQ_CTX_POISON",
-	"NIX_SND_STATUS_SQB_FAULT",
-	"NIX_SND_STATUS_SQB_POISON",
-	"NIX_SND_STATUS_HDR_ERR",
-	"NIX_SND_STATUS_EXT_ERR",
-	"NIX_SND_STATUS_JUMP_FAULT",
-	"NIX_SND_STATUS_JUMP_POISON",
-	"NIX_SND_STATUS_CRC_ERR",
-	"NIX_SND_STATUS_IMM_ERR",
-	"NIX_SND_STATUS_SG_ERR",
-	"NIX_SND_STATUS_MEM_ERR",
-	"NIX_SND_STATUS_INVALID_SUBDC",
-	"NIX_SND_STATUS_SUBDC_ORDER_ERR",
-	"NIX_SND_STATUS_DATA_FAULT",
-	"NIX_SND_STATUS_DATA_POISON",
-	"NIX_SND_STATUS_NPC_DROP_ACTION",
-	"NIX_SND_STATUS_LOCK_VIOL",
-	"NIX_SND_STATUS_NPC_UCAST_CHAN_ERR",
-	"NIX_SND_STATUS_NPC_MCAST_CHAN_ERR",
-	"NIX_SND_STATUS_NPC_MCAST_ABORT",
-	"NIX_SND_STATUS_NPC_VTAG_PTR_ERR",
-	"NIX_SND_STATUS_NPC_VTAG_SIZE_ERR",
-	"NIX_SND_STATUS_SEND_STATS_ERR",
+	[NIX_SND_STATUS_GOOD] = "NIX_SND_STATUS_GOOD",
+	[NIX_SND_STATUS_SQ_CTX_FAULT] = "NIX_SND_STATUS_SQ_CTX_FAULT",
+	[NIX_SND_STATUS_SQ_CTX_POISON] = "NIX_SND_STATUS_SQ_CTX_POISON",
+	[NIX_SND_STATUS_SQB_FAULT] = "NIX_SND_STATUS_SQB_FAULT",
+	[NIX_SND_STATUS_SQB_POISON] = "NIX_SND_STATUS_SQB_POISON",
+	[NIX_SND_STATUS_HDR_ERR] = "NIX_SND_STATUS_HDR_ERR",
+	[NIX_SND_STATUS_EXT_ERR] = "NIX_SND_STATUS_EXT_ERR",
+	[NIX_SND_STATUS_JUMP_FAULT] = "NIX_SND_STATUS_JUMP_FAULT",
+	[NIX_SND_STATUS_JUMP_POISON] = "NIX_SND_STATUS_JUMP_POISON",
+	[NIX_SND_STATUS_CRC_ERR] = "NIX_SND_STATUS_CRC_ERR",
+	[NIX_SND_STATUS_IMM_ERR] = "NIX_SND_STATUS_IMM_ERR",
+	[NIX_SND_STATUS_SG_ERR] = "NIX_SND_STATUS_SG_ERR",
+	[NIX_SND_STATUS_MEM_ERR] = "NIX_SND_STATUS_MEM_ERR",
+	[NIX_SND_STATUS_INVALID_SUBDC] = "NIX_SND_STATUS_INVALID_SUBDC",
+	[NIX_SND_STATUS_SUBDC_ORDER_ERR] = "NIX_SND_STATUS_SUBDC_ORDER_ERR",
+	[NIX_SND_STATUS_DATA_FAULT] = "NIX_SND_STATUS_DATA_FAULT",
+	[NIX_SND_STATUS_DATA_POISON] = "NIX_SND_STATUS_DATA_POISON",
+	[NIX_SND_STATUS_NPC_DROP_ACTION] = "NIX_SND_STATUS_NPC_DROP_ACTION",
+	[NIX_SND_STATUS_LOCK_VIOL] = "NIX_SND_STATUS_LOCK_VIOL",
+	[NIX_SND_STATUS_NPC_UCAST_CHAN_ERR] = "NIX_SND_STAT_NPC_UCAST_CHAN_ERR",
+	[NIX_SND_STATUS_NPC_MCAST_CHAN_ERR] = "NIX_SND_STAT_NPC_MCAST_CHAN_ERR",
+	[NIX_SND_STATUS_NPC_MCAST_ABORT] = "NIX_SND_STATUS_NPC_MCAST_ABORT",
+	[NIX_SND_STATUS_NPC_VTAG_PTR_ERR] = "NIX_SND_STATUS_NPC_VTAG_PTR_ERR",
+	[NIX_SND_STATUS_NPC_VTAG_SIZE_ERR] = "NIX_SND_STATUS_NPC_VTAG_SIZE_ERR",
+	[NIX_SND_STATUS_SEND_MEM_FAULT] = "NIX_SND_STATUS_SEND_MEM_FAULT",
+	[NIX_SND_STATUS_SEND_STATS_ERR] = "NIX_SND_STATUS_SEND_STATS_ERR",
 };
 
 static irqreturn_t otx2_q_intr_handler(int irq, void *data)
@@ -1238,14 +1239,16 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
 			continue;
 
 		if (val & BIT_ULL(42)) {
-			netdev_err(pf->netdev, "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+			netdev_err(pf->netdev,
+				   "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
 				   qidx, otx2_read64(pf, NIX_LF_ERR_INT));
 		} else {
 			if (val & BIT_ULL(NIX_CQERRINT_DOOR_ERR))
 				netdev_err(pf->netdev, "CQ%lld: Doorbell error",
 					   qidx);
 			if (val & BIT_ULL(NIX_CQERRINT_CQE_FAULT))
-				netdev_err(pf->netdev, "CQ%lld: Memory fault on CQE write to LLC/DRAM",
+				netdev_err(pf->netdev,
+					   "CQ%lld: Memory fault on CQE write to LLC/DRAM",
 					   qidx);
 		}
 
@@ -1272,7 +1275,8 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
 			     (val & NIX_SQINT_BITS));
 
 		if (val & BIT_ULL(42)) {
-			netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+			netdev_err(pf->netdev,
+				   "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
 				   qidx, otx2_read64(pf, NIX_LF_ERR_INT));
 			goto done;
 		}
@@ -1282,8 +1286,11 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
 			goto chk_mnq_err_dbg;
 
 		sq_op_err_code = FIELD_GET(GENMASK(7, 0), sq_op_err_dbg);
-		netdev_err(pf->netdev, "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(%llx)  err=%s\n",
-			   qidx, sq_op_err_dbg, nix_sqoperr_e_str[sq_op_err_code]);
+		netdev_err(pf->netdev,
+			   "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(0x%llx)  err=%s(%#x)\n",
+			   qidx, sq_op_err_dbg,
+			   nix_sqoperr_e_str[sq_op_err_code],
+			   sq_op_err_code);
 
 		otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, BIT_ULL(44));
 
@@ -1300,16 +1307,21 @@ chk_mnq_err_dbg:
 			goto chk_snd_err_dbg;
 
 		mnq_err_code = FIELD_GET(GENMASK(7, 0), mnq_err_dbg);
-		netdev_err(pf->netdev, "SQ%lld: NIX_LF_MNQ_ERR_DBG(%llx)  err=%s\n",
-			   qidx, mnq_err_dbg,  nix_mnqerr_e_str[mnq_err_code]);
+		netdev_err(pf->netdev,
+			   "SQ%lld: NIX_LF_MNQ_ERR_DBG(0x%llx)  err=%s(%#x)\n",
+			   qidx, mnq_err_dbg,  nix_mnqerr_e_str[mnq_err_code],
+			   mnq_err_code);
 		otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, BIT_ULL(44));
 
 chk_snd_err_dbg:
 		snd_err_dbg = otx2_read64(pf, NIX_LF_SEND_ERR_DBG);
 		if (snd_err_dbg & BIT(44)) {
 			snd_err_code = FIELD_GET(GENMASK(7, 0), snd_err_dbg);
-			netdev_err(pf->netdev, "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s\n",
-				   qidx, snd_err_dbg, nix_snd_status_e_str[snd_err_code]);
+			netdev_err(pf->netdev,
+				   "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s(%#x)\n",
+				   qidx, snd_err_dbg,
+				   nix_snd_status_e_str[snd_err_code],
+				   snd_err_code);
 			otx2_write64(pf, NIX_LF_SEND_ERR_DBG, BIT_ULL(44));
 		}
 
@@ -1589,6 +1601,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
 		else
 			otx2_cleanup_tx_cqes(pf, cq);
 	}
+	otx2_free_pending_sqe(pf);
 
 	otx2_free_sq_res(pf);
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
index fa37b9f312ca..4e5899d8fa2e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
@@ -318,23 +318,23 @@ enum nix_snd_status_e {
 	NIX_SND_STATUS_EXT_ERR = 0x6,
 	NIX_SND_STATUS_JUMP_FAULT = 0x7,
 	NIX_SND_STATUS_JUMP_POISON = 0x8,
-	NIX_SND_STATUS_CRC_ERR = 0x9,
-	NIX_SND_STATUS_IMM_ERR = 0x10,
-	NIX_SND_STATUS_SG_ERR = 0x11,
-	NIX_SND_STATUS_MEM_ERR = 0x12,
-	NIX_SND_STATUS_INVALID_SUBDC = 0x13,
-	NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x14,
-	NIX_SND_STATUS_DATA_FAULT = 0x15,
-	NIX_SND_STATUS_DATA_POISON = 0x16,
-	NIX_SND_STATUS_NPC_DROP_ACTION = 0x17,
-	NIX_SND_STATUS_LOCK_VIOL = 0x18,
-	NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x19,
-	NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x20,
-	NIX_SND_STATUS_NPC_MCAST_ABORT = 0x21,
-	NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x22,
-	NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x23,
-	NIX_SND_STATUS_SEND_MEM_FAULT = 0x24,
-	NIX_SND_STATUS_SEND_STATS_ERR = 0x25,
+	NIX_SND_STATUS_CRC_ERR = 0x10,
+	NIX_SND_STATUS_IMM_ERR = 0x11,
+	NIX_SND_STATUS_SG_ERR = 0x12,
+	NIX_SND_STATUS_MEM_ERR = 0x13,
+	NIX_SND_STATUS_INVALID_SUBDC = 0x14,
+	NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x15,
+	NIX_SND_STATUS_DATA_FAULT = 0x16,
+	NIX_SND_STATUS_DATA_POISON = 0x17,
+	NIX_SND_STATUS_NPC_DROP_ACTION = 0x20,
+	NIX_SND_STATUS_LOCK_VIOL = 0x21,
+	NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x22,
+	NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x23,
+	NIX_SND_STATUS_NPC_MCAST_ABORT = 0x24,
+	NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x25,
+	NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x26,
+	NIX_SND_STATUS_SEND_MEM_FAULT = 0x27,
+	NIX_SND_STATUS_SEND_STATS_ERR = 0x28,
 	NIX_SND_STATUS_MAX,
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 53b2a4ef5298..6ee15f3c25ed 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -1247,9 +1247,11 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int q
 
 void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
 {
+	int tx_pkts = 0, tx_bytes = 0;
 	struct sk_buff *skb = NULL;
 	struct otx2_snd_queue *sq;
 	struct nix_cqe_tx_s *cqe;
+	struct netdev_queue *txq;
 	int processed_cqe = 0;
 	struct sg_list *sg;
 	int qidx;
@@ -1270,12 +1272,20 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
 		sg = &sq->sg[cqe->comp.sqe_id];
 		skb = (struct sk_buff *)sg->skb;
 		if (skb) {
+			tx_bytes += skb->len;
+			tx_pkts++;
 			otx2_dma_unmap_skb_frags(pfvf, sg);
 			dev_kfree_skb_any(skb);
 			sg->skb = (u64)NULL;
 		}
 	}
 
+	if (likely(tx_pkts)) {
+		if (qidx >= pfvf->hw.tx_queues)
+			qidx -= pfvf->hw.xdp_queues;
+		txq = netdev_get_tx_queue(pfvf->netdev, qidx);
+		netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+	}
 	/* Free CQEs to HW */
 	otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR,
 		     ((u64)cq->cq_idx << 32) | processed_cqe);
@@ -1302,6 +1312,38 @@ int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable)
 	return err;
 }
 
+void otx2_free_pending_sqe(struct otx2_nic *pfvf)
+{
+	int tx_pkts = 0, tx_bytes = 0;
+	struct sk_buff *skb = NULL;
+	struct otx2_snd_queue *sq;
+	struct netdev_queue *txq;
+	struct sg_list *sg;
+	int sq_idx, sqe;
+
+	for (sq_idx = 0; sq_idx < pfvf->hw.tx_queues; sq_idx++) {
+		sq = &pfvf->qset.sq[sq_idx];
+		for (sqe = 0; sqe < sq->sqe_cnt; sqe++) {
+			sg = &sq->sg[sqe];
+			skb = (struct sk_buff *)sg->skb;
+			if (skb) {
+				tx_bytes += skb->len;
+				tx_pkts++;
+				otx2_dma_unmap_skb_frags(pfvf, sg);
+				dev_kfree_skb_any(skb);
+				sg->skb = (u64)NULL;
+			}
+		}
+
+		if (!tx_pkts)
+			continue;
+		txq = netdev_get_tx_queue(pfvf->netdev, sq_idx);
+		netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+		tx_pkts = 0;
+		tx_bytes = 0;
+	}
+}
+
 static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, u64 dma_addr,
 				int len, int *offset)
 {
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a987defb575c..0c76c162b8a9 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2582,9 +2582,13 @@ static void rtl_set_rx_mode(struct net_device *dev)
 
 	if (dev->flags & IFF_PROMISC) {
 		rx_mode |= AcceptAllPhys;
+	} else if (!(dev->flags & IFF_MULTICAST)) {
+		rx_mode &= ~AcceptMulticast;
 	} else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
 		   dev->flags & IFF_ALLMULTI ||
-		   tp->mac_version == RTL_GIGA_MAC_VER_35) {
+		   tp->mac_version == RTL_GIGA_MAC_VER_35 ||
+		   tp->mac_version == RTL_GIGA_MAC_VER_46 ||
+		   tp->mac_version == RTL_GIGA_MAC_VER_48) {
 		/* accept all multicasts */
 	} else if (netdev_mc_empty(dev)) {
 		rx_mode &= ~AcceptMulticast;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 7a8f47e7b728..a4e8b498dea9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -259,7 +259,7 @@
 	((val) << XGMAC_PPS_MINIDX(x))
 #define XGMAC_PPSCMD_START		0x2
 #define XGMAC_PPSCMD_STOP		0x5
-#define XGMAC_PPSEN0			BIT(4)
+#define XGMAC_PPSENx(x)			BIT(4 + (x) * 8)
 #define XGMAC_PPSx_TARGET_TIME_SEC(x)	(0x00000d80 + (x) * 0x10)
 #define XGMAC_PPSx_TARGET_TIME_NSEC(x)	(0x00000d84 + (x) * 0x10)
 #define XGMAC_TRGTBUSY0			BIT(31)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index f352be269deb..453e88b75be0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1178,7 +1178,19 @@ static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index,
 
 	val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_START);
 	val |= XGMAC_TRGTMODSELx(index, XGMAC_PPSCMD_START);
-	val |= XGMAC_PPSEN0;
+
+	/* XGMAC Core has 4 PPS outputs at most.
+	 *
+	 * Prior XGMAC Core 3.20, Fixed mode or Flexible mode are selectable for
+	 * PPS0 only via PPSEN0. PPS{1,2,3} are in Flexible mode by default,
+	 * and can not be switched to Fixed mode, since PPSEN{1,2,3} are
+	 * read-only reserved to 0.
+	 * But we always set PPSEN{1,2,3} do not make things worse ;-)
+	 *
+	 * From XGMAC Core 3.20 and later, PPSEN{0,1,2,3} are writable and must
+	 * be set, or the PPS outputs stay in Fixed PPS mode by default.
+	 */
+	val |= XGMAC_PPSENx(index);
 
 	writel(cfg->start.tv_sec, ioaddr + XGMAC_PPSx_TARGET_TIME_SEC(index));
 
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 24120605502f..ece9f8df98ae 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1588,10 +1588,10 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy
 
 	/* rx_pause/tx_pause */
 	if (rx_pause)
-		mac_control |= CPSW_SL_CTL_RX_FLOW_EN;
+		mac_control |= CPSW_SL_CTL_TX_FLOW_EN;
 
 	if (tx_pause)
-		mac_control |= CPSW_SL_CTL_TX_FLOW_EN;
+		mac_control |= CPSW_SL_CTL_RX_FLOW_EN;
 
 	cpsw_sl_ctl_set(port->slave.mac_sl, mac_control);
 
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c
index 4cf2a52e4378..3025e9c18970 100644
--- a/drivers/net/ethernet/ti/icssg/icss_iep.c
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.c
@@ -177,7 +177,7 @@ static void icss_iep_set_counter(struct icss_iep *iep, u64 ns)
 	if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
 		writel(upper_32_bits(ns), iep->base +
 		       iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG1]);
-	writel(upper_32_bits(ns), iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG0]);
+	writel(lower_32_bits(ns), iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG0]);
 }
 
 static void icss_iep_update_to_next_boundary(struct icss_iep *iep, u64 start_ns);
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 531bf919aef5..e0d26148dfd9 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -163,7 +163,6 @@ typedef void buffer_t;
 
 /* Information about built-in Ethernet MAC interfaces */
 struct eth_plat_info {
-	u8 phy;		/* MII PHY ID, 0 - 31 */
 	u8 rxq;		/* configurable, currently 0 - 31 only */
 	u8 txreadyq;
 	u8 hwaddr[ETH_ALEN];
@@ -1583,7 +1582,7 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	if ((err = register_netdev(ndev)))
 		goto err_phy_dis;
 
-	netdev_info(ndev, "%s: MII PHY %i on %s\n", ndev->name, plat->phy,
+	netdev_info(ndev, "%s: MII PHY %s on %s\n", ndev->name, phydev_name(phydev),
 		    npe_name(port->npe));
 
 	return 0;
diff --git a/drivers/net/mdio/acpi_mdio.c b/drivers/net/mdio/acpi_mdio.c
index 4630dde01974..5d0f11f280cf 100644
--- a/drivers/net/mdio/acpi_mdio.c
+++ b/drivers/net/mdio/acpi_mdio.c
@@ -16,6 +16,7 @@
 
 MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ACPI MDIO bus (Ethernet PHY) accessors");
 
 /**
  * __acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL.
diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index 1183ef5e203e..fd02f5cbc853 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -14,6 +14,7 @@
 
 MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FWNODE MDIO bus (Ethernet PHY) accessors");
 
 static struct pse_control *
 fwnode_find_pse_control(struct fwnode_handle *fwnode)
diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
index 70edeeb7771e..c2170650415c 100644
--- a/drivers/net/mdio/mdio-aspeed.c
+++ b/drivers/net/mdio/mdio-aspeed.c
@@ -205,3 +205,4 @@ module_platform_driver(aspeed_mdio_driver);
 
 MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ASPEED MDIO bus controller");
diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
index 81b7748c10ce..f88639297ff2 100644
--- a/drivers/net/mdio/mdio-bitbang.c
+++ b/drivers/net/mdio/mdio-bitbang.c
@@ -263,3 +263,4 @@ void free_mdio_bitbang(struct mii_bus *bus)
 EXPORT_SYMBOL(free_mdio_bitbang);
 
 MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Bitbanged MDIO buses");
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 7eb32ebb846d..64ebcb6d235c 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -25,6 +25,7 @@
 
 MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OpenFirmware MDIO bus (Ethernet PHY) accessors");
 
 /* Extract the clause 22 phy ID from the compatible string of the form
  * ethernet-phy-idAAAA.BBBB */
diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c
index ef00d6163061..cb4b91af5e17 100644
--- a/drivers/net/phy/bcm-phy-ptp.c
+++ b/drivers/net/phy/bcm-phy-ptp.c
@@ -942,3 +942,4 @@ struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev)
 EXPORT_SYMBOL_GPL(bcm_ptp_probe);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Broadcom PHY PTP driver");
diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c
index cc2858107668..e81404bf8994 100644
--- a/drivers/net/phy/bcm87xx.c
+++ b/drivers/net/phy/bcm87xx.c
@@ -223,3 +223,4 @@ static struct phy_driver bcm87xx_driver[] = {
 module_phy_driver(bcm87xx_driver);
 
 MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Broadcom BCM87xx PHY driver");
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6712883498bb..25c19496a336 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1616,6 +1616,7 @@ struct phylink *phylink_create(struct phylink_config *config,
 	pl->config = config;
 	if (config->type == PHYLINK_NETDEV) {
 		pl->netdev = to_net_dev(config->dev);
+		netif_carrier_off(pl->netdev);
 	} else if (config->type == PHYLINK_DEV) {
 		pl->dev = config->dev;
 	} else {
@@ -3726,3 +3727,4 @@ static int __init phylink_init(void)
 module_init(phylink_init);
 
 MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("phylink models the MAC to optional PHY connection");
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index b8c0961daf53..5468bd209fab 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -3153,3 +3153,4 @@ module_exit(sfp_exit);
 MODULE_ALIAS("platform:sfp");
 MODULE_AUTHOR("Russell King");
 MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SFP cage support");
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index a9beacd552cf..0193af2d31c9 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -570,8 +570,8 @@ static struct bpf_prog *get_filter(struct sock_fprog *uprog)
 
 	/* uprog->len is unsigned short, so no overflow here */
 	fprog.len = uprog->len;
-	fprog.filter = memdup_user(uprog->filter,
-				   uprog->len * sizeof(struct sock_filter));
+	fprog.filter = memdup_array_user(uprog->filter,
+					 uprog->len, sizeof(struct sock_filter));
 	if (IS_ERR(fprog.filter))
 		return ERR_CAST(fprog.filter);
 
diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile
index eedca8c72098..74f59ceed3d5 100644
--- a/drivers/nvme/Makefile
+++ b/drivers/nvme/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-obj-$(CONFIG_NVME_COMMON)		+= common/
+obj-y		+= common/
 obj-y		+= host/
 obj-y		+= target/
diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig
index 06c8df00d1e2..244432e0b73d 100644
--- a/drivers/nvme/common/Kconfig
+++ b/drivers/nvme/common/Kconfig
@@ -1,14 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-config NVME_COMMON
-       tristate
-
 config NVME_KEYRING
-       bool
+       tristate
        select KEYS
 
 config NVME_AUTH
-	bool
+	tristate
 	select CRYPTO
 	select CRYPTO_HMAC
 	select CRYPTO_SHA256
diff --git a/drivers/nvme/common/Makefile b/drivers/nvme/common/Makefile
index 0cbd0b0b8d49..681514cf2e2f 100644
--- a/drivers/nvme/common/Makefile
+++ b/drivers/nvme/common/Makefile
@@ -2,7 +2,8 @@
 
 ccflags-y			+= -I$(src)
 
-obj-$(CONFIG_NVME_COMMON)	+= nvme-common.o
+obj-$(CONFIG_NVME_AUTH)		+= nvme-auth.o
+obj-$(CONFIG_NVME_KEYRING)	+= nvme-keyring.o
 
-nvme-common-$(CONFIG_NVME_AUTH)	+= auth.o
-nvme-common-$(CONFIG_NVME_KEYRING) += keyring.o
+nvme-auth-y			+= auth.o
+nvme-keyring-y			+= keyring.o
diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c
index a8e87dfbeab2..a23ab5c968b9 100644
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -341,7 +341,6 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
 		u8 *challenge, u8 *aug, size_t hlen)
 {
 	struct crypto_shash *tfm;
-	struct shash_desc *desc;
 	u8 *hashed_key;
 	const char *hmac_name;
 	int ret;
@@ -369,29 +368,11 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
 		goto out_free_key;
 	}
 
-	desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
-		       GFP_KERNEL);
-	if (!desc) {
-		ret = -ENOMEM;
-		goto out_free_hash;
-	}
-	desc->tfm = tfm;
-
 	ret = crypto_shash_setkey(tfm, hashed_key, hlen);
 	if (ret)
-		goto out_free_desc;
-
-	ret = crypto_shash_init(desc);
-	if (ret)
-		goto out_free_desc;
-
-	ret = crypto_shash_update(desc, challenge, hlen);
-	if (ret)
-		goto out_free_desc;
+		goto out_free_hash;
 
-	ret = crypto_shash_final(desc, aug);
-out_free_desc:
-	kfree_sensitive(desc);
+	ret = crypto_shash_tfm_digest(tfm, challenge, hlen, aug);
 out_free_hash:
 	crypto_free_shash(tfm);
 out_free_key:
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c
index f8d9a208397b..ee341b83eeba 100644
--- a/drivers/nvme/common/keyring.c
+++ b/drivers/nvme/common/keyring.c
@@ -151,7 +151,7 @@ key_serial_t nvme_tls_psk_default(struct key *keyring,
 }
 EXPORT_SYMBOL_GPL(nvme_tls_psk_default);
 
-int nvme_keyring_init(void)
+static int __init nvme_keyring_init(void)
 {
 	int err;
 
@@ -171,12 +171,15 @@ int nvme_keyring_init(void)
 	}
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nvme_keyring_init);
 
-void nvme_keyring_exit(void)
+static void __exit nvme_keyring_exit(void)
 {
 	unregister_key_type(&nvme_tls_psk_key_type);
 	key_revoke(nvme_keyring);
 	key_put(nvme_keyring);
 }
-EXPORT_SYMBOL_GPL(nvme_keyring_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
+module_init(nvme_keyring_init);
+module_exit(nvme_keyring_exit);
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 48f7d72de5e9..8fe2dd619e80 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -95,7 +95,6 @@ config NVME_TCP
 config NVME_TCP_TLS
 	bool "NVMe over Fabrics TCP TLS encryption support"
 	depends on NVME_TCP
-	select NVME_COMMON
 	select NVME_KEYRING
 	select NET_HANDSHAKE
 	select KEYS
@@ -110,7 +109,6 @@ config NVME_TCP_TLS
 config NVME_HOST_AUTH
 	bool "NVM Express over Fabrics In-Band Authentication"
 	depends on NVME_CORE
-	select NVME_COMMON
 	select NVME_AUTH
 	help
 	  This provides support for NVMe over Fabrics In-Band Authentication.
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index eaefebb2a799..48328e36e93b 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -29,6 +29,7 @@ struct nvme_dhchap_queue_context {
 	int error;
 	u32 s1;
 	u32 s2;
+	bool bi_directional;
 	u16 transaction;
 	u8 status;
 	u8 dhgroup_id;
@@ -312,17 +313,17 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl,
 	data->dhvlen = cpu_to_le16(chap->host_key_len);
 	memcpy(data->rval, chap->response, chap->hash_len);
 	if (ctrl->ctrl_key) {
+		chap->bi_directional = true;
 		get_random_bytes(chap->c2, chap->hash_len);
 		data->cvalid = 1;
-		chap->s2 = nvme_auth_get_seqnum();
 		memcpy(data->rval + chap->hash_len, chap->c2,
 		       chap->hash_len);
 		dev_dbg(ctrl->device, "%s: qid %d ctrl challenge %*ph\n",
 			__func__, chap->qid, (int)chap->hash_len, chap->c2);
 	} else {
 		memset(chap->c2, 0, chap->hash_len);
-		chap->s2 = 0;
 	}
+	chap->s2 = nvme_auth_get_seqnum();
 	data->seqnum = cpu_to_le32(chap->s2);
 	if (chap->host_key_len) {
 		dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n",
@@ -339,10 +340,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl,
 		struct nvme_dhchap_queue_context *chap)
 {
 	struct nvmf_auth_dhchap_success1_data *data = chap->buf;
-	size_t size = sizeof(*data);
-
-	if (chap->s2)
-		size += chap->hash_len;
+	size_t size = sizeof(*data) + chap->hash_len;
 
 	if (size > CHAP_BUF_SIZE) {
 		chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
@@ -663,6 +661,7 @@ static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap)
 	chap->error = 0;
 	chap->s1 = 0;
 	chap->s2 = 0;
+	chap->bi_directional = false;
 	chap->transaction = 0;
 	memset(chap->c1, 0, sizeof(chap->c1));
 	memset(chap->c2, 0, sizeof(chap->c2));
@@ -825,7 +824,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
 		goto fail2;
 	}
 
-	if (chap->s2) {
+	if (chap->bi_directional) {
 		/* DH-HMAC-CHAP Step 5: send success2 */
 		dev_dbg(ctrl->device, "%s: qid %d send success2\n",
 			__func__, chap->qid);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 62612f87aafa..88b54cdcbd68 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -25,7 +25,6 @@
 #include "nvme.h"
 #include "fabrics.h"
 #include <linux/nvme-auth.h>
-#include <linux/nvme-keyring.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -483,6 +482,7 @@ EXPORT_SYMBOL_GPL(nvme_cancel_tagset);
 
 void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl)
 {
+	nvme_stop_keep_alive(ctrl);
 	if (ctrl->admin_tagset) {
 		blk_mq_tagset_busy_iter(ctrl->admin_tagset,
 				nvme_cancel_request, ctrl);
@@ -3200,6 +3200,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
 	clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags);
 	ctrl->identified = true;
 
+	nvme_start_keep_alive(ctrl);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish);
@@ -4074,8 +4076,21 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
 		return;
 
 	if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, NVME_CSI_NVM,
-			log, sizeof(*log), 0))
+			 log, sizeof(*log), 0)) {
 		dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
+		goto out_free_log;
+	}
+
+	if (log->afi & 0x70 || !(log->afi & 0x7)) {
+		dev_info(ctrl->device,
+			 "Firmware is activated after next Controller Level Reset\n");
+		goto out_free_log;
+	}
+
+	memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1],
+		sizeof(ctrl->subsys->firmware_rev));
+
+out_free_log:
 	kfree(log);
 }
 
@@ -4333,7 +4348,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
 	nvme_mpath_stop(ctrl);
 	nvme_auth_stop(ctrl);
-	nvme_stop_keep_alive(ctrl);
 	nvme_stop_failfast_work(ctrl);
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fw_act_work);
@@ -4344,8 +4358,6 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
 
 void nvme_start_ctrl(struct nvme_ctrl *ctrl)
 {
-	nvme_start_keep_alive(ctrl);
-
 	nvme_enable_aen(ctrl);
 
 	/*
@@ -4724,16 +4736,11 @@ static int __init nvme_core_init(void)
 		result = PTR_ERR(nvme_ns_chr_class);
 		goto unregister_generic_ns;
 	}
-	result = nvme_keyring_init();
-	if (result)
-		goto destroy_ns_chr;
 	result = nvme_init_auth();
 	if (result)
-		goto keyring_exit;
+		goto destroy_ns_chr;
 	return 0;
 
-keyring_exit:
-	nvme_keyring_exit();
 destroy_ns_chr:
 	class_destroy(nvme_ns_chr_class);
 unregister_generic_ns:
@@ -4757,7 +4764,6 @@ out:
 static void __exit nvme_core_exit(void)
 {
 	nvme_exit_auth();
-	nvme_keyring_exit();
 	class_destroy(nvme_ns_chr_class);
 	class_destroy(nvme_subsys_class);
 	class_destroy(nvme_class);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index a15b37750d6e..49c3e46eaa1e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2530,6 +2530,12 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
 	 * clean up the admin queue. Same thing as above.
 	 */
 	nvme_quiesce_admin_queue(&ctrl->ctrl);
+
+	/*
+	 * Open-coding nvme_cancel_admin_tagset() as fc
+	 * is not using nvme_cancel_request().
+	 */
+	nvme_stop_keep_alive(&ctrl->ctrl);
 	blk_sync_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 747c879e8982..529b9954d2b8 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -510,10 +510,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
 
 	req->bio = pdu->bio;
-	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+	if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
 		pdu->nvme_status = -EINTR;
-	else
+	} else {
 		pdu->nvme_status = nvme_req(req)->status;
+		if (!pdu->nvme_status)
+			pdu->nvme_status = blk_status_to_errno(err);
+	}
 	pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64);
 
 	/*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4714a902f4ca..89661a9cf850 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1423,13 +1423,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
 			nvme_tcp_queue_id(queue), ret);
 		goto free_icresp;
 	}
+	ret = -ENOTCONN;
 	if (queue->ctrl->ctrl.opts->tls) {
 		ctype = tls_get_record_type(queue->sock->sk,
 					    (struct cmsghdr *)cbuf);
 		if (ctype != TLS_RECORD_TYPE_DATA) {
 			pr_err("queue %d: unhandled TLS record %d\n",
 			       nvme_tcp_queue_id(queue), ctype);
-			return -ENOTCONN;
+			goto free_icresp;
 		}
 	}
 	ret = -EINVAL;
@@ -2236,11 +2237,7 @@ destroy_io:
 		nvme_tcp_destroy_io_queues(ctrl, new);
 	}
 destroy_admin:
-	nvme_quiesce_admin_queue(ctrl);
-	blk_sync_queue(ctrl->admin_q);
-	nvme_tcp_stop_queue(ctrl, 0);
-	nvme_cancel_admin_tagset(ctrl);
-	nvme_tcp_destroy_admin_queue(ctrl, new);
+	nvme_tcp_teardown_admin_queue(ctrl, false);
 	return ret;
 }
 
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index fa479c9f5c3d..31633da9427c 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -87,7 +87,6 @@ config NVME_TARGET_TCP
 config NVME_TARGET_TCP_TLS
 	bool "NVMe over Fabrics TCP target TLS encryption support"
 	depends on NVME_TARGET_TCP
-	select NVME_COMMON
 	select NVME_KEYRING
 	select NET_HANDSHAKE
 	select KEYS
@@ -102,7 +101,6 @@ config NVME_TARGET_TCP_TLS
 config NVME_TARGET_AUTH
 	bool "NVMe over Fabrics In-band Authentication support"
 	depends on NVME_TARGET
-	select NVME_COMMON
 	select NVME_AUTH
 	help
 	  This enables support for NVMe over Fabrics In-band Authentication
diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c
index 1d9854484e2e..eb7785be0ca7 100644
--- a/drivers/nvme/target/fabrics-cmd-auth.c
+++ b/drivers/nvme/target/fabrics-cmd-auth.c
@@ -163,11 +163,11 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d)
 		pr_debug("%s: ctrl %d qid %d challenge %*ph\n",
 			 __func__, ctrl->cntlid, req->sq->qid, data->hl,
 			 req->sq->dhchap_c2);
-		req->sq->dhchap_s2 = le32_to_cpu(data->seqnum);
 	} else {
 		req->sq->authenticated = true;
 		req->sq->dhchap_c2 = NULL;
 	}
+	req->sq->dhchap_s2 = le32_to_cpu(data->seqnum);
 
 	return 0;
 }
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 48d5df054cd0..9cb434c58075 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -466,6 +466,8 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 out_destroy_io:
 	nvme_loop_destroy_io_queues(ctrl);
 out_destroy_admin:
+	nvme_quiesce_admin_queue(&ctrl->ctrl);
+	nvme_cancel_admin_tagset(&ctrl->ctrl);
 	nvme_loop_destroy_admin_queue(ctrl);
 out_disable:
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
@@ -600,6 +602,8 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	return &ctrl->ctrl;
 
 out_remove_admin_queue:
+	nvme_quiesce_admin_queue(&ctrl->ctrl);
+	nvme_cancel_admin_tagset(&ctrl->ctrl);
 	nvme_loop_destroy_admin_queue(ctrl);
 out_free_queues:
 	kfree(ctrl->queues);
diff --git a/drivers/of/property.c b/drivers/of/property.c
index cf8dacf3e3b8..afdaefbd03f6 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1267,6 +1267,7 @@ DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells")
 DEFINE_SIMPLE_PROP(leds, "leds", NULL)
 DEFINE_SIMPLE_PROP(backlight, "backlight", NULL)
 DEFINE_SIMPLE_PROP(panel, "panel", NULL)
+DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells")
 DEFINE_SUFFIX_PROP(regulators, "-supply", NULL)
 DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells")
 
@@ -1356,6 +1357,7 @@ static const struct supplier_bindings of_supplier_bindings[] = {
 	{ .parse_prop = parse_leds, },
 	{ .parse_prop = parse_backlight, },
 	{ .parse_prop = parse_panel, },
+	{ .parse_prop = parse_msi_parent, },
 	{ .parse_prop = parse_gpio_compat, },
 	{ .parse_prop = parse_interrupts, },
 	{ .parse_prop = parse_regulators, },
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index 5658745c398f..b33be1e63c98 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -605,6 +605,7 @@ static int pccardd(void *__skt)
 		dev_warn(&skt->dev, "PCMCIA: unable to register socket\n");
 		skt->thread = NULL;
 		complete(&skt->thread_done);
+		put_device(&skt->dev);
 		return 0;
 	}
 	ret = pccard_sysfs_add_socket(&skt->dev);
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index d500e5dbbc3f..b4b8363d1de2 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -513,9 +513,6 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
 	/* by default don't allow DMA */
 	p_dev->dma_mask = 0;
 	p_dev->dev.dma_mask = &p_dev->dma_mask;
-	dev_set_name(&p_dev->dev, "%d.%d", p_dev->socket->sock, p_dev->device_no);
-	if (!dev_name(&p_dev->dev))
-		goto err_free;
 	p_dev->devname = kasprintf(GFP_KERNEL, "pcmcia%s", dev_name(&p_dev->dev));
 	if (!p_dev->devname)
 		goto err_free;
@@ -573,8 +570,15 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s,
 
 	pcmcia_device_query(p_dev);
 
-	if (device_register(&p_dev->dev))
-		goto err_unreg;
+	dev_set_name(&p_dev->dev, "%d.%d", p_dev->socket->sock, p_dev->device_no);
+	if (device_register(&p_dev->dev)) {
+		mutex_lock(&s->ops_mutex);
+		list_del(&p_dev->socket_device_list);
+		s->device_count--;
+		mutex_unlock(&s->ops_mutex);
+		put_device(&p_dev->dev);
+		return NULL;
+	}
 
 	return p_dev;
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index d78091e79a0f..e9e31c638a67 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -684,7 +684,7 @@ EXPORT_SYMBOL(pcmcia_request_io);
  * pcmcia_request_irq() is a wrapper around request_irq() which allows
  * the PCMCIA core to clean up the registration in pcmcia_disable_device().
  * Drivers are free to use request_irq() directly, but then they need to
- * call free_irq() themselfves, too. Also, only %IRQF_SHARED capable IRQ
+ * call free_irq() themselves, too. Also, only %IRQF_SHARED capable IRQ
  * handlers are allowed.
  */
 int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev,
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 1a0e3f098759..5ef888688e23 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -435,7 +435,7 @@ static int __init init_tcic(void)
     }
     
     /* Set up polling */
-    timer_setup(&poll_timer, &tcic_timer, 0);
+    timer_setup(&poll_timer, tcic_timer, 0);
 
     /* Build interrupt mask */
     printk(KERN_CONT ", %d sockets\n", sockets);
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index 42b72042f6b3..2cc35dded007 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -676,6 +676,9 @@ static int arm_cspmu_event_init(struct perf_event *event)
 
 	cspmu = to_arm_cspmu(event->pmu);
 
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
 	/*
 	 * Following other "uncore" PMUs, we do not support sampling mode or
 	 * attach to a task (per-process mode).
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 18b91b56af1d..6ca7be05229c 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -428,12 +428,12 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event)
 #define	ARMV8_IDX_TO_COUNTER(x)	\
 	(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
 
-static inline u32 armv8pmu_pmcr_read(void)
+static inline u64 armv8pmu_pmcr_read(void)
 {
 	return read_pmcr();
 }
 
-static inline void armv8pmu_pmcr_write(u32 val)
+static inline void armv8pmu_pmcr_write(u64 val)
 {
 	val &= ARMV8_PMU_PMCR_MASK;
 	isb();
@@ -957,7 +957,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 static void armv8pmu_reset(void *info)
 {
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
-	u32 pmcr;
+	u64 pmcr;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
 	armv8pmu_disable_counter(U32_MAX);
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 96c7f670c8f0..16acd4dcdb96 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -22,7 +22,7 @@
 
 #include <asm/errata_list.h>
 #include <asm/sbi.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 #define SYSCTL_NO_USER_ACCESS	0
 #define SYSCTL_USER_ACCESS	1
@@ -543,8 +543,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
 
 	if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
 	    (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
-		on_each_cpu_mask(mm_cpumask(event->owner->mm),
-				 pmu_sbi_set_scounteren, (void *)event, 1);
+		pmu_sbi_set_scounteren((void *)event);
 }
 
 static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
@@ -554,8 +553,7 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
 
 	if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
 	    (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
-		on_each_cpu_mask(mm_cpumask(event->owner->mm),
-				 pmu_sbi_reset_scounteren, (void *)event, 1);
+		pmu_sbi_reset_scounteren((void *)event);
 
 	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
 	if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
@@ -689,6 +687,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
 
 	/* Firmware counter don't support overflow yet */
 	fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
+	if (fidx == RISCV_MAX_COUNTERS) {
+		csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+		return IRQ_NONE;
+	}
+
 	event = cpu_hw_evt->events[fidx];
 	if (!event) {
 		csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
index bc7bb9876e57..fd5ce52d05b1 100644
--- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
@@ -330,24 +330,22 @@ static const struct irq_chip iproc_gpio_irq_chip = {
 static int iproc_gpio_request(struct gpio_chip *gc, unsigned offset)
 {
 	struct iproc_gpio *chip = gpiochip_get_data(gc);
-	unsigned gpio = gc->base + offset;
 
 	/* not all Iproc GPIO pins can be muxed individually */
 	if (!chip->pinmux_is_supported)
 		return 0;
 
-	return pinctrl_gpio_request(gpio);
+	return pinctrl_gpio_request(gc, offset);
 }
 
 static void iproc_gpio_free(struct gpio_chip *gc, unsigned offset)
 {
 	struct iproc_gpio *chip = gpiochip_get_data(gc);
-	unsigned gpio = gc->base + offset;
 
 	if (!chip->pinmux_is_supported)
 		return;
 
-	pinctrl_gpio_free(gpio);
+	pinctrl_gpio_free(gc, offset);
 }
 
 static int iproc_gpio_direction_input(struct gpio_chip *gc, unsigned gpio)
diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
index c09646318419..012b0a3bad5a 100644
--- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
+++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
@@ -506,17 +506,12 @@ static void cs42l43_gpio_set(struct gpio_chip *chip, unsigned int offset, int va
 	pm_runtime_put(priv->dev);
 }
 
-static int cs42l43_gpio_direction_in(struct gpio_chip *chip, unsigned int offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int cs42l43_gpio_direction_out(struct gpio_chip *chip,
 				      unsigned int offset, int value)
 {
 	cs42l43_gpio_set(chip, offset, value);
 
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static int cs42l43_gpio_add_pin_ranges(struct gpio_chip *chip)
@@ -551,7 +546,7 @@ static int cs42l43_pin_probe(struct platform_device *pdev)
 
 	priv->gpio_chip.request = gpiochip_generic_request;
 	priv->gpio_chip.free = gpiochip_generic_free;
-	priv->gpio_chip.direction_input = cs42l43_gpio_direction_in;
+	priv->gpio_chip.direction_input = pinctrl_gpio_direction_input;
 	priv->gpio_chip.direction_output = cs42l43_gpio_direction_out;
 	priv->gpio_chip.add_pin_ranges = cs42l43_gpio_add_pin_ranges;
 	priv->gpio_chip.get = cs42l43_gpio_get;
diff --git a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c
index 0b78cf611afe..014297a3fbd2 100644
--- a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c
+++ b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c
@@ -1098,7 +1098,7 @@ static int lochnagar_gpio_direction_out(struct gpio_chip *chip,
 {
 	lochnagar_gpio_set(chip, offset, value);
 
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static int lochnagar_fill_func_groups(struct lochnagar_pin_priv *priv)
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 71fc9f95584e..1fa89be29b8f 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -23,6 +23,8 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 
+#include <linux/gpio/driver.h>
+
 #include <linux/pinctrl/consumer.h>
 #include <linux/pinctrl/devinfo.h>
 #include <linux/pinctrl/machine.h>
@@ -267,7 +269,8 @@ static int pinctrl_register_pins(struct pinctrl_dev *pctldev,
 /**
  * gpio_to_pin() - GPIO range GPIO number to pin number translation
  * @range: GPIO range used for the translation
- * @gpio: gpio pin to translate to a pin number
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  *
  * Finds the pin number for a given GPIO using the specified GPIO range
  * as a base for translation. The distinction between linear GPIO ranges
@@ -278,25 +281,27 @@ static int pinctrl_register_pins(struct pinctrl_dev *pctldev,
  * result of successful pinctrl_get_device_gpio_range calls)!
  */
 static inline int gpio_to_pin(struct pinctrl_gpio_range *range,
-				unsigned int gpio)
+			      struct gpio_chip *gc, unsigned int offset)
 {
-	unsigned int offset = gpio - range->base;
+	unsigned int pin = gc->base + offset - range->base;
 	if (range->pins)
-		return range->pins[offset];
+		return range->pins[pin];
 	else
-		return range->pin_base + offset;
+		return range->pin_base + pin;
 }
 
 /**
  * pinctrl_match_gpio_range() - check if a certain GPIO pin is in range
  * @pctldev: pin controller device to check
- * @gpio: gpio pin to check taken from the global GPIO pin space
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  *
  * Tries to match a GPIO pin number to the ranges handled by a certain pin
  * controller, return the range or NULL
  */
 static struct pinctrl_gpio_range *
-pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio)
+pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, struct gpio_chip *gc,
+			 unsigned int offset)
 {
 	struct pinctrl_gpio_range *range;
 
@@ -304,8 +309,8 @@ pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio)
 	/* Loop over the ranges */
 	list_for_each_entry(range, &pctldev->gpio_ranges, node) {
 		/* Check if we're in the valid range */
-		if (gpio >= range->base &&
-		    gpio < range->base + range->npins) {
+		if ((gc->base + offset) >= range->base &&
+		    (gc->base + offset) < range->base + range->npins) {
 			mutex_unlock(&pctldev->mutex);
 			return range;
 		}
@@ -317,7 +322,8 @@ pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio)
 /**
  * pinctrl_ready_for_gpio_range() - check if other GPIO pins of
  * the same GPIO chip are in range
- * @gpio: gpio pin to check taken from the global GPIO pin space
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  *
  * This function is complement of pinctrl_match_gpio_range(). If the return
  * value of pinctrl_match_gpio_range() is NULL, this function could be used
@@ -328,19 +334,11 @@ pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio)
  * is false, it means that pinctrl device may not be ready.
  */
 #ifdef CONFIG_GPIOLIB
-static bool pinctrl_ready_for_gpio_range(unsigned gpio)
+static bool pinctrl_ready_for_gpio_range(struct gpio_chip *gc,
+					 unsigned int offset)
 {
 	struct pinctrl_dev *pctldev;
 	struct pinctrl_gpio_range *range = NULL;
-	/*
-	 * FIXME: "gpio" here is a number in the global GPIO numberspace.
-	 * get rid of this from the ranges eventually and get the GPIO
-	 * descriptor from the gpio_chip.
-	 */
-	struct gpio_chip *chip = gpiod_to_chip(gpio_to_desc(gpio));
-
-	if (WARN(!chip, "no gpio_chip for gpio%i?", gpio))
-		return false;
 
 	mutex_lock(&pinctrldev_list_mutex);
 
@@ -350,8 +348,8 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio)
 		mutex_lock(&pctldev->mutex);
 		list_for_each_entry(range, &pctldev->gpio_ranges, node) {
 			/* Check if any gpio range overlapped with gpio chip */
-			if (range->base + range->npins - 1 < chip->base ||
-			    range->base > chip->base + chip->ngpio - 1)
+			if (range->base + range->npins - 1 < gc->base ||
+			    range->base > gc->base + gc->ngpio - 1)
 				continue;
 			mutex_unlock(&pctldev->mutex);
 			mutex_unlock(&pinctrldev_list_mutex);
@@ -365,12 +363,17 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio)
 	return false;
 }
 #else
-static bool pinctrl_ready_for_gpio_range(unsigned gpio) { return true; }
+static inline bool
+pinctrl_ready_for_gpio_range(struct gpio_chip *gc, unsigned int offset)
+{
+	return true;
+}
 #endif
 
 /**
  * pinctrl_get_device_gpio_range() - find device for GPIO range
- * @gpio: the pin to locate the pin controller for
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  * @outdev: the pin control device if found
  * @outrange: the GPIO range if found
  *
@@ -379,7 +382,8 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) { return true; }
  * -EPROBE_DEFER if the GPIO range could not be found in any device since it
  * may still have not been registered.
  */
-static int pinctrl_get_device_gpio_range(unsigned gpio,
+static int pinctrl_get_device_gpio_range(struct gpio_chip *gc,
+					 unsigned int offset,
 					 struct pinctrl_dev **outdev,
 					 struct pinctrl_gpio_range **outrange)
 {
@@ -391,7 +395,7 @@ static int pinctrl_get_device_gpio_range(unsigned gpio,
 	list_for_each_entry(pctldev, &pinctrldev_list, node) {
 		struct pinctrl_gpio_range *range;
 
-		range = pinctrl_match_gpio_range(pctldev, gpio);
+		range = pinctrl_match_gpio_range(pctldev, gc, offset);
 		if (range) {
 			*outdev = pctldev;
 			*outrange = range;
@@ -753,7 +757,7 @@ int pinctrl_get_group_selector(struct pinctrl_dev *pctldev,
 	return -EINVAL;
 }
 
-bool pinctrl_gpio_can_use_line(unsigned gpio)
+bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset)
 {
 	struct pinctrl_dev *pctldev;
 	struct pinctrl_gpio_range *range;
@@ -765,13 +769,13 @@ bool pinctrl_gpio_can_use_line(unsigned gpio)
 	 * we're probably dealing with GPIO driver
 	 * without a backing pin controller - bail out.
 	 */
-	if (pinctrl_get_device_gpio_range(gpio, &pctldev, &range))
+	if (pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range))
 		return true;
 
 	mutex_lock(&pctldev->mutex);
 
 	/* Convert to the pin controllers number space */
-	pin = gpio_to_pin(range, gpio);
+	pin = gpio_to_pin(range, gc, offset);
 
 	result = pinmux_can_be_used_for_gpio(pctldev, pin);
 
@@ -783,22 +787,22 @@ EXPORT_SYMBOL_GPL(pinctrl_gpio_can_use_line);
 
 /**
  * pinctrl_gpio_request() - request a single pin to be used as GPIO
- * @gpio: the GPIO pin number from the GPIO subsystem number space
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  *
  * This function should *ONLY* be used from gpiolib-based GPIO drivers,
  * as part of their gpio_request() semantics, platforms and individual drivers
  * shall *NOT* request GPIO pins to be muxed in.
  */
-int pinctrl_gpio_request(unsigned gpio)
+int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset)
 {
-	struct pinctrl_dev *pctldev;
 	struct pinctrl_gpio_range *range;
-	int ret;
-	int pin;
+	struct pinctrl_dev *pctldev;
+	int ret, pin;
 
-	ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range);
+	ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range);
 	if (ret) {
-		if (pinctrl_ready_for_gpio_range(gpio))
+		if (pinctrl_ready_for_gpio_range(gc, offset))
 			ret = 0;
 		return ret;
 	}
@@ -806,9 +810,9 @@ int pinctrl_gpio_request(unsigned gpio)
 	mutex_lock(&pctldev->mutex);
 
 	/* Convert to the pin controllers number space */
-	pin = gpio_to_pin(range, gpio);
+	pin = gpio_to_pin(range, gc, offset);
 
-	ret = pinmux_request_gpio(pctldev, range, pin, gpio);
+	ret = pinmux_request_gpio(pctldev, range, pin, gc->base + offset);
 
 	mutex_unlock(&pctldev->mutex);
 
@@ -818,27 +822,27 @@ EXPORT_SYMBOL_GPL(pinctrl_gpio_request);
 
 /**
  * pinctrl_gpio_free() - free control on a single pin, currently used as GPIO
- * @gpio: the GPIO pin number from the GPIO subsystem number space
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  *
  * This function should *ONLY* be used from gpiolib-based GPIO drivers,
- * as part of their gpio_free() semantics, platforms and individual drivers
- * shall *NOT* request GPIO pins to be muxed out.
+ * as part of their gpio_request() semantics, platforms and individual drivers
+ * shall *NOT* request GPIO pins to be muxed in.
  */
-void pinctrl_gpio_free(unsigned gpio)
+void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset)
 {
-	struct pinctrl_dev *pctldev;
 	struct pinctrl_gpio_range *range;
-	int ret;
-	int pin;
+	struct pinctrl_dev *pctldev;
+	int ret, pin;
 
-	ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range);
-	if (ret) {
+	ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range);
+	if (ret)
 		return;
-	}
+
 	mutex_lock(&pctldev->mutex);
 
 	/* Convert to the pin controllers number space */
-	pin = gpio_to_pin(range, gpio);
+	pin = gpio_to_pin(range, gc, offset);
 
 	pinmux_free_gpio(pctldev, pin, range);
 
@@ -846,14 +850,15 @@ void pinctrl_gpio_free(unsigned gpio)
 }
 EXPORT_SYMBOL_GPL(pinctrl_gpio_free);
 
-static int pinctrl_gpio_direction(unsigned gpio, bool input)
+static int pinctrl_gpio_direction(struct gpio_chip *gc, unsigned int offset,
+				  bool input)
 {
 	struct pinctrl_dev *pctldev;
 	struct pinctrl_gpio_range *range;
 	int ret;
 	int pin;
 
-	ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range);
+	ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range);
 	if (ret) {
 		return ret;
 	}
@@ -861,7 +866,7 @@ static int pinctrl_gpio_direction(unsigned gpio, bool input)
 	mutex_lock(&pctldev->mutex);
 
 	/* Convert to the pin controllers number space */
-	pin = gpio_to_pin(range, gpio);
+	pin = gpio_to_pin(range, gc, offset);
 	ret = pinmux_gpio_direction(pctldev, range, pin, input);
 
 	mutex_unlock(&pctldev->mutex);
@@ -871,54 +876,58 @@ static int pinctrl_gpio_direction(unsigned gpio, bool input)
 
 /**
  * pinctrl_gpio_direction_input() - request a GPIO pin to go into input mode
- * @gpio: the GPIO pin number from the GPIO subsystem number space
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  *
  * This function should *ONLY* be used from gpiolib-based GPIO drivers,
  * as part of their gpio_direction_input() semantics, platforms and individual
  * drivers shall *NOT* touch pin control GPIO calls.
  */
-int pinctrl_gpio_direction_input(unsigned gpio)
+int pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
 {
-	return pinctrl_gpio_direction(gpio, true);
+	return pinctrl_gpio_direction(gc, offset, true);
 }
 EXPORT_SYMBOL_GPL(pinctrl_gpio_direction_input);
 
 /**
  * pinctrl_gpio_direction_output() - request a GPIO pin to go into output mode
- * @gpio: the GPIO pin number from the GPIO subsystem number space
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  *
  * This function should *ONLY* be used from gpiolib-based GPIO drivers,
  * as part of their gpio_direction_output() semantics, platforms and individual
  * drivers shall *NOT* touch pin control GPIO calls.
  */
-int pinctrl_gpio_direction_output(unsigned gpio)
+int pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset)
 {
-	return pinctrl_gpio_direction(gpio, false);
+	return pinctrl_gpio_direction(gc, offset, false);
 }
 EXPORT_SYMBOL_GPL(pinctrl_gpio_direction_output);
 
 /**
  * pinctrl_gpio_set_config() - Apply config to given GPIO pin
- * @gpio: the GPIO pin number from the GPIO subsystem number space
+ * @gc: GPIO chip structure from the GPIO subsystem
+ * @offset: hardware offset of the GPIO relative to the controller
  * @config: the configuration to apply to the GPIO
  *
  * This function should *ONLY* be used from gpiolib-based GPIO drivers, if
  * they need to call the underlying pin controller to change GPIO config
  * (for example set debounce time).
  */
-int pinctrl_gpio_set_config(unsigned gpio, unsigned long config)
+int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+				unsigned long config)
 {
 	unsigned long configs[] = { config };
 	struct pinctrl_gpio_range *range;
 	struct pinctrl_dev *pctldev;
 	int ret, pin;
 
-	ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range);
+	ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range);
 	if (ret)
 		return ret;
 
 	mutex_lock(&pctldev->mutex);
-	pin = gpio_to_pin(range, gpio);
+	pin = gpio_to_pin(range, gc, offset);
 	ret = pinconf_set_config(pctldev, pin, configs, ARRAY_SIZE(configs));
 	mutex_unlock(&pctldev->mutex);
 
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index b1d8f6136f99..067b0d344f0e 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1148,14 +1148,14 @@ static int chv_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
 
 static int chv_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
 {
-	return pinctrl_gpio_direction_input(chip->base + offset);
+	return pinctrl_gpio_direction_input(chip, offset);
 }
 
 static int chv_gpio_direction_output(struct gpio_chip *chip, unsigned int offset,
 				     int value)
 {
 	chv_gpio_set(chip, offset, value);
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static const struct gpio_chip chv_gpio_chip = {
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index b19527a8728e..652ba451f885 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -992,14 +992,14 @@ static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
 
 static int intel_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
 {
-	return pinctrl_gpio_direction_input(chip->base + offset);
+	return pinctrl_gpio_direction_input(chip, offset);
 }
 
 static int intel_gpio_direction_output(struct gpio_chip *chip, unsigned int offset,
 				       int value)
 {
 	intel_gpio_set(chip, offset, value);
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static const struct gpio_chip intel_gpio_chip = {
diff --git a/drivers/pinctrl/intel/pinctrl-lynxpoint.c b/drivers/pinctrl/intel/pinctrl-lynxpoint.c
index d7bc9ef29fcc..e6878e4cf20c 100644
--- a/drivers/pinctrl/intel/pinctrl-lynxpoint.c
+++ b/drivers/pinctrl/intel/pinctrl-lynxpoint.c
@@ -541,7 +541,7 @@ static void lp_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
 
 static int lp_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
 {
-	return pinctrl_gpio_direction_input(chip->base + offset);
+	return pinctrl_gpio_direction_input(chip, offset);
 }
 
 static int lp_gpio_direction_output(struct gpio_chip *chip, unsigned int offset,
@@ -549,7 +549,7 @@ static int lp_gpio_direction_output(struct gpio_chip *chip, unsigned int offset,
 {
 	lp_gpio_set(chip, offset, value);
 
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip,  offset);
 }
 
 static int lp_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
diff --git a/drivers/pinctrl/mediatek/pinctrl-moore.c b/drivers/pinctrl/mediatek/pinctrl-moore.c
index 889469c7ac26..c3d59eddd994 100644
--- a/drivers/pinctrl/mediatek/pinctrl-moore.c
+++ b/drivers/pinctrl/mediatek/pinctrl-moore.c
@@ -510,17 +510,12 @@ static void mtk_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value)
 	mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DO, !!value);
 }
 
-static int mtk_gpio_direction_input(struct gpio_chip *chip, unsigned int gpio)
-{
-	return pinctrl_gpio_direction_input(chip->base + gpio);
-}
-
 static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio,
 				     int value)
 {
 	mtk_gpio_set(chip, gpio, value);
 
-	return pinctrl_gpio_direction_output(chip->base + gpio);
+	return pinctrl_gpio_direction_output(chip, gpio);
 }
 
 static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset)
@@ -569,7 +564,7 @@ static int mtk_build_gpiochip(struct mtk_pinctrl *hw)
 	chip->parent		= hw->dev;
 	chip->request		= gpiochip_generic_request;
 	chip->free		= gpiochip_generic_free;
-	chip->direction_input	= mtk_gpio_direction_input;
+	chip->direction_input	= pinctrl_gpio_direction_input;
 	chip->direction_output	= mtk_gpio_direction_output;
 	chip->get		= mtk_gpio_get;
 	chip->set		= mtk_gpio_set;
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 74b15952b742..e79d66a04194 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -808,17 +808,11 @@ static const struct pinmux_ops mtk_pmx_ops = {
 	.gpio_request_enable	= mtk_pmx_gpio_request_enable,
 };
 
-static int mtk_gpio_direction_input(struct gpio_chip *chip,
-					unsigned offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int mtk_gpio_direction_output(struct gpio_chip *chip,
 					unsigned offset, int value)
 {
 	mtk_gpio_set(chip, offset, value);
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static int mtk_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
@@ -898,7 +892,7 @@ static const struct gpio_chip mtk_gpio_chip = {
 	.request		= gpiochip_generic_request,
 	.free			= gpiochip_generic_free,
 	.get_direction		= mtk_gpio_get_direction,
-	.direction_input	= mtk_gpio_direction_input,
+	.direction_input	= pinctrl_gpio_direction_input,
 	.direction_output	= mtk_gpio_direction_output,
 	.get			= mtk_gpio_get,
 	.set			= mtk_gpio_set,
diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c
index b7cb5a1f1060..6392f1e05d02 100644
--- a/drivers/pinctrl/mediatek/pinctrl-paris.c
+++ b/drivers/pinctrl/mediatek/pinctrl-paris.c
@@ -916,7 +916,7 @@ static int mtk_gpio_direction_input(struct gpio_chip *chip, unsigned int gpio)
 	if (gpio >= hw->soc->npins)
 		return -EINVAL;
 
-	return pinctrl_gpio_direction_input(chip->base + gpio);
+	return pinctrl_gpio_direction_input(chip, gpio);
 }
 
 static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio,
@@ -929,7 +929,7 @@ static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio,
 
 	mtk_gpio_set(chip, gpio, value);
 
-	return pinctrl_gpio_direction_output(chip->base + gpio);
+	return pinctrl_gpio_direction_output(chip, gpio);
 }
 
 static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset)
diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
index 8bdd0124e2eb..1e658721aaba 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
@@ -171,7 +171,7 @@ static int npcmgpio_direction_input(struct gpio_chip *chip, unsigned int offset)
 	struct npcm7xx_gpio *bank = gpiochip_get_data(chip);
 	int ret;
 
-	ret = pinctrl_gpio_direction_input(offset + chip->base);
+	ret = pinctrl_gpio_direction_input(chip, offset);
 	if (ret)
 		return ret;
 
@@ -188,7 +188,7 @@ static int npcmgpio_direction_output(struct gpio_chip *chip,
 	dev_dbg(chip->parent, "gpio_direction_output: offset%d = %x\n", offset,
 		value);
 
-	ret = pinctrl_gpio_direction_output(offset + chip->base);
+	ret = pinctrl_gpio_direction_output(chip, offset);
 	if (ret)
 		return ret;
 
@@ -201,19 +201,13 @@ static int npcmgpio_gpio_request(struct gpio_chip *chip, unsigned int offset)
 	int ret;
 
 	dev_dbg(chip->parent, "gpio_request: offset%d\n", offset);
-	ret = pinctrl_gpio_request(offset + chip->base);
+	ret = pinctrl_gpio_request(chip, offset);
 	if (ret)
 		return ret;
 
 	return bank->request(chip, offset);
 }
 
-static void npcmgpio_gpio_free(struct gpio_chip *chip, unsigned int offset)
-{
-	dev_dbg(chip->parent, "gpio_free: offset%d\n", offset);
-	pinctrl_gpio_free(offset + chip->base);
-}
-
 static void npcmgpio_irq_handler(struct irq_desc *desc)
 {
 	struct gpio_chip *gc;
@@ -1916,7 +1910,7 @@ static int npcm7xx_gpio_of(struct npcm7xx_pinctrl *pctrl)
 		pctrl->gpio_bank[id].gc.direction_output = npcmgpio_direction_output;
 		pctrl->gpio_bank[id].request = pctrl->gpio_bank[id].gc.request;
 		pctrl->gpio_bank[id].gc.request = npcmgpio_gpio_request;
-		pctrl->gpio_bank[id].gc.free = npcmgpio_gpio_free;
+		pctrl->gpio_bank[id].gc.free = pinctrl_gpio_free;
 		id++;
 	}
 
diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
index da21f6a45888..a377d36b0eb0 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
@@ -173,7 +173,7 @@ static int npcmgpio_direction_input(struct gpio_chip *chip, unsigned int offset)
 	struct npcm8xx_gpio *bank = gpiochip_get_data(chip);
 	int ret;
 
-	ret = pinctrl_gpio_direction_input(offset + chip->base);
+	ret = pinctrl_gpio_direction_input(chip, offset);
 	if (ret)
 		return ret;
 
@@ -186,7 +186,7 @@ static int npcmgpio_direction_output(struct gpio_chip *chip,
 	struct npcm8xx_gpio *bank = gpiochip_get_data(chip);
 	int ret;
 
-	ret = pinctrl_gpio_direction_output(offset + chip->base);
+	ret = pinctrl_gpio_direction_output(chip, offset);
 	if (ret)
 		return ret;
 
@@ -198,18 +198,13 @@ static int npcmgpio_gpio_request(struct gpio_chip *chip, unsigned int offset)
 	struct npcm8xx_gpio *bank = gpiochip_get_data(chip);
 	int ret;
 
-	ret = pinctrl_gpio_request(offset + chip->base);
+	ret = pinctrl_gpio_request(chip, offset);
 	if (ret)
 		return ret;
 
 	return bank->request(chip, offset);
 }
 
-static void npcmgpio_gpio_free(struct gpio_chip *chip, unsigned int offset)
-{
-	pinctrl_gpio_free(offset + chip->base);
-}
-
 static void npcmgpio_irq_handler(struct irq_desc *desc)
 {
 	unsigned long sts, en, bit;
@@ -2388,7 +2383,7 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl)
 		pctrl->gpio_bank[id].gc.direction_output = npcmgpio_direction_output;
 		pctrl->gpio_bank[id].request = pctrl->gpio_bank[id].gc.request;
 		pctrl->gpio_bank[id].gc.request = npcmgpio_gpio_request;
-		pctrl->gpio_bank[id].gc.free = npcmgpio_gpio_free;
+		pctrl->gpio_bank[id].gc.free = pinctrl_gpio_free;
 		for (i = 0 ; i < NPCM8XX_DEBOUNCE_MAX ; i++)
 			pctrl->gpio_bank[id].debounce.set_val[i] = false;
 		pctrl->gpio_bank[id].gc.add_pin_ranges = npcmgpio_add_pin_ranges;
diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c
index 84b47a6cc3a6..6a5f23cf7a2a 100644
--- a/drivers/pinctrl/pinctrl-as3722.c
+++ b/drivers/pinctrl/pinctrl-as3722.c
@@ -500,16 +500,11 @@ static void as3722_gpio_set(struct gpio_chip *chip, unsigned offset,
 			"GPIO_SIGNAL_OUT_REG update failed: %d\n", ret);
 }
 
-static int as3722_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int as3722_gpio_direction_output(struct gpio_chip *chip,
 		unsigned offset, int value)
 {
 	as3722_gpio_set(chip, offset, value);
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static int as3722_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
@@ -526,7 +521,7 @@ static const struct gpio_chip as3722_gpio_chip = {
 	.free			= gpiochip_generic_free,
 	.get			= as3722_gpio_get,
 	.set			= as3722_gpio_set,
-	.direction_input	= as3722_gpio_direction_input,
+	.direction_input	= pinctrl_gpio_direction_input,
 	.direction_output	= as3722_gpio_direction_output,
 	.to_irq			= as3722_gpio_to_irq,
 	.can_sleep		= true,
diff --git a/drivers/pinctrl/pinctrl-axp209.c b/drivers/pinctrl/pinctrl-axp209.c
index 9f5b3ab8e184..2b4805e74eed 100644
--- a/drivers/pinctrl/pinctrl-axp209.c
+++ b/drivers/pinctrl/pinctrl-axp209.c
@@ -124,11 +124,6 @@ static int axp20x_gpio_get_reg(unsigned int offset)
 	return -EINVAL;
 }
 
-static int axp20x_gpio_input(struct gpio_chip *chip, unsigned int offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int axp20x_gpio_get(struct gpio_chip *chip, unsigned int offset)
 {
 	struct axp20x_pctl *pctl = gpiochip_get_data(chip);
@@ -474,7 +469,7 @@ static int axp20x_pctl_probe(struct platform_device *pdev)
 	pctl->chip.get			= axp20x_gpio_get;
 	pctl->chip.get_direction	= axp20x_gpio_get_direction;
 	pctl->chip.set			= axp20x_gpio_set;
-	pctl->chip.direction_input	= axp20x_gpio_input;
+	pctl->chip.direction_input	= pinctrl_gpio_direction_input;
 	pctl->chip.direction_output	= axp20x_gpio_output;
 
 	pctl->desc = of_device_get_match_data(dev);
diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c
index fae80b52a6fc..04285c930e94 100644
--- a/drivers/pinctrl/pinctrl-cy8c95x0.c
+++ b/drivers/pinctrl/pinctrl-cy8c95x0.c
@@ -554,7 +554,7 @@ out:
 
 static int cy8c95x0_gpio_direction_input(struct gpio_chip *gc, unsigned int off)
 {
-	return pinctrl_gpio_direction_input(gc->base + off);
+	return pinctrl_gpio_direction_input(gc, off);
 }
 
 static int cy8c95x0_gpio_direction_output(struct gpio_chip *gc,
@@ -571,7 +571,7 @@ static int cy8c95x0_gpio_direction_output(struct gpio_chip *gc,
 	if (ret)
 		return ret;
 
-	return pinctrl_gpio_direction_output(gc->base + off);
+	return pinctrl_gpio_direction_output(gc, off);
 }
 
 static int cy8c95x0_gpio_get_value(struct gpio_chip *gc, unsigned int off)
diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c
index 2f220a47b749..ee718f6e2556 100644
--- a/drivers/pinctrl/pinctrl-ingenic.c
+++ b/drivers/pinctrl/pinctrl-ingenic.c
@@ -133,6 +133,8 @@ struct ingenic_pinctrl {
 	struct pinctrl_pin_desc *pdesc;
 
 	const struct ingenic_chip_info *info;
+
+	struct gpio_chip *gc;
 };
 
 struct ingenic_gpio_chip {
@@ -3558,17 +3560,11 @@ static int ingenic_gpio_get(struct gpio_chip *gc, unsigned int offset)
 	return (int) ingenic_gpio_get_value(jzgc, offset);
 }
 
-static int ingenic_gpio_direction_input(struct gpio_chip *gc,
-		unsigned int offset)
-{
-	return pinctrl_gpio_direction_input(gc->base + offset);
-}
-
 static int ingenic_gpio_direction_output(struct gpio_chip *gc,
 		unsigned int offset, int value)
 {
 	ingenic_gpio_set(gc, offset, value);
-	return pinctrl_gpio_direction_output(gc->base + offset);
+	return pinctrl_gpio_direction_output(gc, offset);
 }
 
 static inline void ingenic_config_pin(struct ingenic_pinctrl *jzpc,
@@ -3678,7 +3674,7 @@ static int ingenic_gpio_irq_request(struct irq_data *data)
 	irq_hw_number_t irq = irqd_to_hwirq(data);
 	int ret;
 
-	ret = ingenic_gpio_direction_input(gpio_chip, irq);
+	ret = pinctrl_gpio_direction_input(gpio_chip, irq);
 	if (ret)
 		return ret;
 
@@ -4052,7 +4048,8 @@ static int ingenic_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
 			break;
 
 		case PIN_CONFIG_OUTPUT:
-			ret = pinctrl_gpio_direction_output(pin);
+			ret = pinctrl_gpio_direction_output(jzpc->gc,
+							pin - jzpc->gc->base);
 			if (ret)
 				return ret;
 
@@ -4172,6 +4169,8 @@ static int __init ingenic_gpio_probe(struct ingenic_pinctrl *jzpc,
 	if (!jzgc)
 		return -ENOMEM;
 
+	jzpc->gc = &jzgc->gc;
+
 	jzgc->jzpc = jzpc;
 	jzgc->reg_base = bank * jzpc->info->reg_offset;
 
@@ -4192,7 +4191,7 @@ static int __init ingenic_gpio_probe(struct ingenic_pinctrl *jzpc,
 
 	jzgc->gc.set = ingenic_gpio_set;
 	jzgc->gc.get = ingenic_gpio_get;
-	jzgc->gc.direction_input = ingenic_gpio_direction_input;
+	jzgc->gc.direction_input = pinctrl_gpio_direction_input;
 	jzgc->gc.direction_output = ingenic_gpio_direction_output;
 	jzgc->gc.get_direction = ingenic_gpio_get_direction;
 	jzgc->gc.request = gpiochip_generic_request;
diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index f8ae2e974221..52aadd6d72a8 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -1776,12 +1776,6 @@ static int ocelot_gpio_get_direction(struct gpio_chip *chip,
 	return GPIO_LINE_DIRECTION_IN;
 }
 
-static int ocelot_gpio_direction_input(struct gpio_chip *chip,
-				       unsigned int offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int ocelot_gpio_direction_output(struct gpio_chip *chip,
 					unsigned int offset, int value)
 {
@@ -1795,7 +1789,7 @@ static int ocelot_gpio_direction_output(struct gpio_chip *chip,
 		regmap_write(info->map, REG(OCELOT_GPIO_OUT_CLR, info, offset),
 			     pin);
 
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static const struct gpio_chip ocelot_gpiolib_chip = {
@@ -1804,7 +1798,7 @@ static const struct gpio_chip ocelot_gpiolib_chip = {
 	.set = ocelot_gpio_set,
 	.get = ocelot_gpio_get,
 	.get_direction = ocelot_gpio_get_direction,
-	.direction_input = ocelot_gpio_direction_input,
+	.direction_input = pinctrl_gpio_direction_input,
 	.direction_output = ocelot_gpio_direction_output,
 	.owner = THIS_MODULE,
 };
diff --git a/drivers/pinctrl/pinctrl-rk805.c b/drivers/pinctrl/pinctrl-rk805.c
index 2639a9ee82cd..56d916f2cee6 100644
--- a/drivers/pinctrl/pinctrl-rk805.c
+++ b/drivers/pinctrl/pinctrl-rk805.c
@@ -286,17 +286,11 @@ static void rk805_gpio_set(struct gpio_chip *chip,
 			offset, value);
 }
 
-static int rk805_gpio_direction_input(struct gpio_chip *chip,
-				      unsigned int offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int rk805_gpio_direction_output(struct gpio_chip *chip,
 				       unsigned int offset, int value)
 {
 	rk805_gpio_set(chip, offset, value);
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static int rk805_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
@@ -330,7 +324,7 @@ static const struct gpio_chip rk805_gpio_chip = {
 	.get_direction		= rk805_gpio_get_direction,
 	.get			= rk805_gpio_get,
 	.set			= rk805_gpio_set,
-	.direction_input	= rk805_gpio_direction_input,
+	.direction_input	= pinctrl_gpio_direction_input,
 	.direction_output	= rk805_gpio_direction_output,
 	.can_sleep		= true,
 	.base			= -1,
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index c1f36b164ea5..1485573b523c 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -717,20 +717,13 @@ static void st_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 	__st_gpio_set(bank, offset, value);
 }
 
-static int st_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	pinctrl_gpio_direction_input(chip->base + offset);
-
-	return 0;
-}
-
 static int st_gpio_direction_output(struct gpio_chip *chip,
 	unsigned offset, int value)
 {
 	struct st_gpio_bank *bank = gpiochip_get_data(chip);
 
 	__st_gpio_set(bank, offset, value);
-	pinctrl_gpio_direction_output(chip->base + offset);
+	pinctrl_gpio_direction_output(chip, offset);
 
 	return 0;
 }
@@ -1330,7 +1323,7 @@ static int st_gpio_irq_request_resources(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 
-	st_gpio_direction_input(gc, d->hwirq);
+	pinctrl_gpio_direction_input(gc, d->hwirq);
 
 	return gpiochip_reqres_irq(gc, d->hwirq);
 }
@@ -1488,7 +1481,7 @@ static const struct gpio_chip st_gpio_template = {
 	.free			= gpiochip_generic_free,
 	.get			= st_gpio_get,
 	.set			= st_gpio_set,
-	.direction_input	= st_gpio_direction_input,
+	.direction_input	= pinctrl_gpio_direction_input,
 	.direction_output	= st_gpio_direction_output,
 	.get_direction		= st_gpio_get_direction,
 	.ngpio			= ST_GPIO_PINS_PER_BANK,
diff --git a/drivers/pinctrl/renesas/gpio.c b/drivers/pinctrl/renesas/gpio.c
index 5758daf94fe2..a5136dacaaf2 100644
--- a/drivers/pinctrl/renesas/gpio.c
+++ b/drivers/pinctrl/renesas/gpio.c
@@ -135,12 +135,12 @@ static int gpio_pin_request(struct gpio_chip *gc, unsigned offset)
 	if (idx < 0 || pfc->info->pins[idx].enum_id == 0)
 		return -EINVAL;
 
-	return pinctrl_gpio_request(gc->base + offset);
+	return pinctrl_gpio_request(gc, offset);
 }
 
 static void gpio_pin_free(struct gpio_chip *gc, unsigned offset)
 {
-	return pinctrl_gpio_free(gc->base + offset);
+	return pinctrl_gpio_free(gc, offset);
 }
 
 static void gpio_pin_set_value(struct sh_pfc_chip *chip, unsigned offset,
@@ -164,7 +164,7 @@ static void gpio_pin_set_value(struct sh_pfc_chip *chip, unsigned offset,
 
 static int gpio_pin_direction_input(struct gpio_chip *gc, unsigned offset)
 {
-	return pinctrl_gpio_direction_input(gc->base + offset);
+	return pinctrl_gpio_direction_input(gc, offset);
 }
 
 static int gpio_pin_direction_output(struct gpio_chip *gc, unsigned offset,
@@ -172,7 +172,7 @@ static int gpio_pin_direction_output(struct gpio_chip *gc, unsigned offset,
 {
 	gpio_pin_set_value(gpiochip_get_data(gc), offset, value);
 
-	return pinctrl_gpio_direction_output(gc->base + offset);
+	return pinctrl_gpio_direction_output(gc, offset);
 }
 
 static int gpio_pin_get(struct gpio_chip *gc, unsigned offset)
diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
index c7c6d912a975..9de350ad7e7d 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
@@ -1056,7 +1056,7 @@ static int rzg2l_gpio_request(struct gpio_chip *chip, unsigned int offset)
 	if (ret)
 		return ret;
 
-	ret = pinctrl_gpio_request(chip->base + offset);
+	ret = pinctrl_gpio_request(chip, offset);
 	if (ret)
 		return ret;
 
@@ -1181,7 +1181,7 @@ static void rzg2l_gpio_free(struct gpio_chip *chip, unsigned int offset)
 {
 	unsigned int virq;
 
-	pinctrl_gpio_free(chip->base + offset);
+	pinctrl_gpio_free(chip, offset);
 
 	virq = irq_find_mapping(chip->irq.domain, offset);
 	if (virq)
diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
index 52aeafaba4b6..21d7d5ac8c4a 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
@@ -754,7 +754,7 @@ static int rzv2m_gpio_request(struct gpio_chip *chip, unsigned int offset)
 	u8 bit = RZV2M_PIN_ID_TO_PIN(offset);
 	int ret;
 
-	ret = pinctrl_gpio_request(chip->base + offset);
+	ret = pinctrl_gpio_request(chip, offset);
 	if (ret)
 		return ret;
 
@@ -832,7 +832,7 @@ static int rzv2m_gpio_get(struct gpio_chip *chip, unsigned int offset)
 
 static void rzv2m_gpio_free(struct gpio_chip *chip, unsigned int offset)
 {
-	pinctrl_gpio_free(chip->base + offset);
+	pinctrl_gpio_free(chip, offset);
 
 	/*
 	 * Set the GPIO as an input to ensure that the next GPIO request won't
diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c
index 722681e0b89b..a05570c7d833 100644
--- a/drivers/pinctrl/spear/pinctrl-plgpio.c
+++ b/drivers/pinctrl/spear/pinctrl-plgpio.c
@@ -204,14 +204,13 @@ static void plgpio_set_value(struct gpio_chip *chip, unsigned offset, int value)
 static int plgpio_request(struct gpio_chip *chip, unsigned offset)
 {
 	struct plgpio *plgpio = gpiochip_get_data(chip);
-	int gpio = chip->base + offset;
 	unsigned long flags;
 	int ret = 0;
 
 	if (offset >= chip->ngpio)
 		return -EINVAL;
 
-	ret = pinctrl_gpio_request(gpio);
+	ret = pinctrl_gpio_request(chip, offset);
 	if (ret)
 		return ret;
 
@@ -249,14 +248,13 @@ err1:
 	if (!IS_ERR(plgpio->clk))
 		clk_disable(plgpio->clk);
 err0:
-	pinctrl_gpio_free(gpio);
+	pinctrl_gpio_free(chip, offset);
 	return ret;
 }
 
 static void plgpio_free(struct gpio_chip *chip, unsigned offset)
 {
 	struct plgpio *plgpio = gpiochip_get_data(chip);
-	int gpio = chip->base + offset;
 	unsigned long flags;
 
 	if (offset >= chip->ngpio)
@@ -280,7 +278,7 @@ disable_clk:
 	if (!IS_ERR(plgpio->clk))
 		clk_disable(plgpio->clk);
 
-	pinctrl_gpio_free(gpio);
+	pinctrl_gpio_free(chip, offset);
 }
 
 /* PLGPIO IRQ */
diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c
index 530fe340a9a1..ea70b8c61679 100644
--- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c
+++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c
@@ -916,16 +916,6 @@ static struct pinctrl_desc starfive_desc = {
 	.custom_conf_items = starfive_pinconf_custom_conf_items,
 };
 
-static int starfive_gpio_request(struct gpio_chip *gc, unsigned int gpio)
-{
-	return pinctrl_gpio_request(gc->base + gpio);
-}
-
-static void starfive_gpio_free(struct gpio_chip *gc, unsigned int gpio)
-{
-	pinctrl_gpio_free(gc->base + gpio);
-}
-
 static int starfive_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio)
 {
 	struct starfive_pinctrl *sfp = container_of(gc, struct starfive_pinctrl, gc);
@@ -1309,8 +1299,8 @@ static int starfive_probe(struct platform_device *pdev)
 
 	sfp->gc.label = dev_name(dev);
 	sfp->gc.owner = THIS_MODULE;
-	sfp->gc.request = starfive_gpio_request;
-	sfp->gc.free = starfive_gpio_free;
+	sfp->gc.request = pinctrl_gpio_request;
+	sfp->gc.free = pinctrl_gpio_free;
 	sfp->gc.get_direction = starfive_gpio_get_direction;
 	sfp->gc.direction_input = starfive_gpio_direction_input;
 	sfp->gc.direction_output = starfive_gpio_direction_output;
diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c
index 640f827a9b2c..9d71e8c13310 100644
--- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c
+++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c
@@ -545,16 +545,6 @@ static const struct pinconf_ops jh7110_pinconf_ops = {
 	.is_generic		= true,
 };
 
-static int jh7110_gpio_request(struct gpio_chip *gc, unsigned int gpio)
-{
-	return pinctrl_gpio_request(gc->base + gpio);
-}
-
-static void jh7110_gpio_free(struct gpio_chip *gc, unsigned int gpio)
-{
-	pinctrl_gpio_free(gc->base + gpio);
-}
-
 static int jh7110_gpio_get_direction(struct gpio_chip *gc,
 				     unsigned int gpio)
 {
@@ -940,8 +930,8 @@ int jh7110_pinctrl_probe(struct platform_device *pdev)
 
 	sfp->gc.label = dev_name(dev);
 	sfp->gc.owner = THIS_MODULE;
-	sfp->gc.request = jh7110_gpio_request;
-	sfp->gc.free = jh7110_gpio_free;
+	sfp->gc.request = pinctrl_gpio_request;
+	sfp->gc.free = pinctrl_gpio_free;
 	sfp->gc.get_direction = jh7110_gpio_get_direction;
 	sfp->gc.direction_input = jh7110_gpio_direction_input;
 	sfp->gc.direction_output = jh7110_gpio_direction_output;
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index a73385a431de..64e8201c7eac 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -217,12 +217,7 @@ static int stm32_gpio_request(struct gpio_chip *chip, unsigned offset)
 		return -EINVAL;
 	}
 
-	return pinctrl_gpio_request(chip->base + offset);
-}
-
-static void stm32_gpio_free(struct gpio_chip *chip, unsigned offset)
-{
-	pinctrl_gpio_free(chip->base + offset);
+	return pinctrl_gpio_request(chip, offset);
 }
 
 static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -239,18 +234,13 @@ static void stm32_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 	__stm32_gpio_set(bank, offset, value);
 }
 
-static int stm32_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int stm32_gpio_direction_output(struct gpio_chip *chip,
 	unsigned offset, int value)
 {
 	struct stm32_gpio_bank *bank = gpiochip_get_data(chip);
 
 	__stm32_gpio_set(bank, offset, value);
-	pinctrl_gpio_direction_output(chip->base + offset);
+	pinctrl_gpio_direction_output(chip, offset);
 
 	return 0;
 }
@@ -316,10 +306,10 @@ static int stm32_gpio_init_valid_mask(struct gpio_chip *chip,
 
 static const struct gpio_chip stm32_gpio_template = {
 	.request		= stm32_gpio_request,
-	.free			= stm32_gpio_free,
+	.free			= pinctrl_gpio_free,
 	.get			= stm32_gpio_get,
 	.set			= stm32_gpio_set,
-	.direction_input	= stm32_gpio_direction_input,
+	.direction_input	= pinctrl_gpio_direction_input,
 	.direction_output	= stm32_gpio_direction_output,
 	.to_irq			= stm32_gpio_to_irq,
 	.get_direction		= stm32_gpio_get_direction,
@@ -381,7 +371,7 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data)
 	struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
 	int ret;
 
-	ret = stm32_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq);
+	ret = pinctrl_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq);
 	if (ret)
 		return ret;
 
diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c
index 6fac30de1c6a..fce92111a32e 100644
--- a/drivers/pinctrl/vt8500/pinctrl-wmt.c
+++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c
@@ -526,16 +526,11 @@ static void wmt_gpio_set_value(struct gpio_chip *chip, unsigned offset,
 		wmt_clearbits(data, reg_data_out, BIT(bit));
 }
 
-static int wmt_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
 static int wmt_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 				     int value)
 {
 	wmt_gpio_set_value(chip, offset, value);
-	return pinctrl_gpio_direction_output(chip->base + offset);
+	return pinctrl_gpio_direction_output(chip, offset);
 }
 
 static const struct gpio_chip wmt_gpio_chip = {
@@ -544,7 +539,7 @@ static const struct gpio_chip wmt_gpio_chip = {
 	.request = gpiochip_generic_request,
 	.free = gpiochip_generic_free,
 	.get_direction = wmt_gpio_get_direction,
-	.direction_input = wmt_gpio_direction_input,
+	.direction_input = pinctrl_gpio_direction_input,
 	.direction_output = wmt_gpio_direction_output,
 	.get = wmt_gpio_get_value,
 	.set = wmt_gpio_set_value,
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 282cd7d24077..3f7a74788802 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -108,6 +108,7 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
 		container_of(pccontext->clk, struct ptp_clock, clock);
 	struct timestamp_event_queue *queue;
 	char debugfsname[32];
+	unsigned long flags;
 
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 	if (!queue)
@@ -119,7 +120,9 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
 	}
 	bitmap_set(queue->mask, 0, PTP_MAX_CHANNELS);
 	spin_lock_init(&queue->lock);
+	spin_lock_irqsave(&ptp->tsevqs_lock, flags);
 	list_add_tail(&queue->qlist, &ptp->tsevqs);
+	spin_unlock_irqrestore(&ptp->tsevqs_lock, flags);
 	pccontext->private_clkdata = queue;
 
 	/* Debugfs contents */
@@ -139,16 +142,16 @@ int ptp_release(struct posix_clock_context *pccontext)
 {
 	struct timestamp_event_queue *queue = pccontext->private_clkdata;
 	unsigned long flags;
+	struct ptp_clock *ptp =
+		container_of(pccontext->clk, struct ptp_clock, clock);
 
-	if (queue) {
-		debugfs_remove(queue->debugfs_instance);
-		pccontext->private_clkdata = NULL;
-		spin_lock_irqsave(&queue->lock, flags);
-		list_del(&queue->qlist);
-		spin_unlock_irqrestore(&queue->lock, flags);
-		bitmap_free(queue->mask);
-		kfree(queue);
-	}
+	debugfs_remove(queue->debugfs_instance);
+	pccontext->private_clkdata = NULL;
+	spin_lock_irqsave(&ptp->tsevqs_lock, flags);
+	list_del(&queue->qlist);
+	spin_unlock_irqrestore(&ptp->tsevqs_lock, flags);
+	bitmap_free(queue->mask);
+	kfree(queue);
 	return 0;
 }
 
@@ -585,7 +588,5 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags,
 free_event:
 	kfree(event);
 exit:
-	if (result < 0)
-		ptp_release(pccontext);
 	return result;
 }
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 3d1b0a97301c..3134568af622 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -179,11 +179,11 @@ static void ptp_clock_release(struct device *dev)
 	mutex_destroy(&ptp->pincfg_mux);
 	mutex_destroy(&ptp->n_vclocks_mux);
 	/* Delete first entry */
+	spin_lock_irqsave(&ptp->tsevqs_lock, flags);
 	tsevq = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue,
 				 qlist);
-	spin_lock_irqsave(&tsevq->lock, flags);
 	list_del(&tsevq->qlist);
-	spin_unlock_irqrestore(&tsevq->lock, flags);
+	spin_unlock_irqrestore(&ptp->tsevqs_lock, flags);
 	bitmap_free(tsevq->mask);
 	kfree(tsevq);
 	debugfs_remove(ptp->debugfs_root);
@@ -247,6 +247,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	if (!queue)
 		goto no_memory_queue;
 	list_add_tail(&queue->qlist, &ptp->tsevqs);
+	spin_lock_init(&ptp->tsevqs_lock);
 	queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL);
 	if (!queue->mask)
 		goto no_memory_bitmap;
@@ -407,6 +408,7 @@ void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
 {
 	struct timestamp_event_queue *tsevq;
 	struct pps_event_time evt;
+	unsigned long flags;
 
 	switch (event->type) {
 
@@ -415,10 +417,12 @@ void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
 
 	case PTP_CLOCK_EXTTS:
 		/* Enqueue timestamp on selected queues */
+		spin_lock_irqsave(&ptp->tsevqs_lock, flags);
 		list_for_each_entry(tsevq, &ptp->tsevqs, qlist) {
 			if (test_bit((unsigned int)event->index, tsevq->mask))
 				enqueue_external_timestamp(tsevq, event);
 		}
+		spin_unlock_irqrestore(&ptp->tsevqs_lock, flags);
 		wake_up_interruptible(&ptp->tsev_wq);
 		break;
 
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index 52f87e394aa6..35fde0a05746 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -44,6 +44,7 @@ struct ptp_clock {
 	struct pps_device *pps_source;
 	long dialed_frequency; /* remembers the frequency adjustment */
 	struct list_head tsevqs; /* timestamp fifo list */
+	spinlock_t tsevqs_lock; /* protects tsevqs from concurrent access */
 	struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
 	wait_queue_head_t tsev_wq;
 	int defunct; /* tells readers to go away when clock is being removed */
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 8ebcddf91f7b..4b956d661755 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -173,8 +173,8 @@ config PWM_CLPS711X
 	  will be called pwm-clps711x.
 
 config PWM_CRC
-	bool "Intel Crystalcove (CRC) PWM support"
-	depends on X86 && INTEL_SOC_PMIC
+	tristate "Intel Crystalcove (CRC) PWM support"
+	depends on INTEL_SOC_PMIC
 	help
 	  Generic PWM framework driver for Crystalcove (CRC) PMIC based PWM
 	  control.
@@ -186,9 +186,19 @@ config PWM_CROS_EC
 	  PWM driver for exposing a PWM attached to the ChromeOS Embedded
 	  Controller.
 
+config PWM_DWC_CORE
+	tristate
+	depends on HAS_IOMEM
+	help
+	  PWM driver for Synopsys DWC PWM Controller.
+
+	  To compile this driver as a module, build the dependecies as
+	  modules, this will be called pwm-dwc-core.
+
 config PWM_DWC
-	tristate "DesignWare PWM Controller"
-	depends on PCI
+	tristate "DesignWare PWM Controller (PCI bus)"
+	depends on HAS_IOMEM && PCI
+	select PWM_DWC_CORE
 	help
 	  PWM driver for Synopsys DWC PWM Controller attached to a PCI bus.
 
@@ -407,7 +417,7 @@ config PWM_MEDIATEK
 
 config PWM_MICROCHIP_CORE
 	tristate "Microchip corePWM PWM support"
-	depends on SOC_MICROCHIP_POLARFIRE || COMPILE_TEST
+	depends on ARCH_MICROCHIP_POLARFIRE || COMPILE_TEST
 	depends on HAS_IOMEM && OF
 	help
 	  PWM driver for Microchip FPGA soft IP core.
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index c822389c2a24..c5ec9e168ee7 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_PWM_CLK)		+= pwm-clk.o
 obj-$(CONFIG_PWM_CLPS711X)	+= pwm-clps711x.o
 obj-$(CONFIG_PWM_CRC)		+= pwm-crc.o
 obj-$(CONFIG_PWM_CROS_EC)	+= pwm-cros-ec.o
+obj-$(CONFIG_PWM_DWC_CORE)	+= pwm-dwc-core.o
 obj-$(CONFIG_PWM_DWC)		+= pwm-dwc.o
 obj-$(CONFIG_PWM_EP93XX)	+= pwm-ep93xx.o
 obj-$(CONFIG_PWM_FSL_FTM)	+= pwm-fsl-ftm.o
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index dc66e3405bf5..29078486534d 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -89,13 +89,13 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
 	if (test_bit(PWMF_REQUESTED, &pwm->flags))
 		return -EBUSY;
 
-	if (!try_module_get(pwm->chip->ops->owner))
+	if (!try_module_get(pwm->chip->owner))
 		return -ENODEV;
 
 	if (pwm->chip->ops->request) {
 		err = pwm->chip->ops->request(pwm->chip, pwm);
 		if (err) {
-			module_put(pwm->chip->ops->owner);
+			module_put(pwm->chip->owner);
 			return err;
 		}
 	}
@@ -208,36 +208,6 @@ static void of_pwmchip_remove(struct pwm_chip *chip)
 		of_node_put(chip->dev->of_node);
 }
 
-/**
- * pwm_set_chip_data() - set private chip data for a PWM
- * @pwm: PWM device
- * @data: pointer to chip-specific data
- *
- * Returns: 0 on success or a negative error code on failure.
- */
-int pwm_set_chip_data(struct pwm_device *pwm, void *data)
-{
-	if (!pwm)
-		return -EINVAL;
-
-	pwm->chip_data = data;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pwm_set_chip_data);
-
-/**
- * pwm_get_chip_data() - get private chip data for a PWM
- * @pwm: PWM device
- *
- * Returns: A pointer to the chip-private data for the PWM device.
- */
-void *pwm_get_chip_data(struct pwm_device *pwm)
-{
-	return pwm ? pwm->chip_data : NULL;
-}
-EXPORT_SYMBOL_GPL(pwm_get_chip_data);
-
 static bool pwm_ops_check(const struct pwm_chip *chip)
 {
 	const struct pwm_ops *ops = chip->ops;
@@ -253,14 +223,16 @@ static bool pwm_ops_check(const struct pwm_chip *chip)
 }
 
 /**
- * pwmchip_add() - register a new PWM chip
+ * __pwmchip_add() - register a new PWM chip
  * @chip: the PWM chip to add
+ * @owner: reference to the module providing the chip.
  *
- * Register a new PWM chip.
+ * Register a new PWM chip. @owner is supposed to be THIS_MODULE, use the
+ * pwmchip_add wrapper to do this right.
  *
  * Returns: 0 on success or a negative error code on failure.
  */
-int pwmchip_add(struct pwm_chip *chip)
+int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
 {
 	struct pwm_device *pwm;
 	unsigned int i;
@@ -272,6 +244,8 @@ int pwmchip_add(struct pwm_chip *chip)
 	if (!pwm_ops_check(chip))
 		return -EINVAL;
 
+	chip->owner = owner;
+
 	chip->pwms = kcalloc(chip->npwm, sizeof(*pwm), GFP_KERNEL);
 	if (!chip->pwms)
 		return -ENOMEM;
@@ -306,7 +280,7 @@ int pwmchip_add(struct pwm_chip *chip)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(pwmchip_add);
+EXPORT_SYMBOL_GPL(__pwmchip_add);
 
 /**
  * pwmchip_remove() - remove a PWM chip
@@ -338,17 +312,17 @@ static void devm_pwmchip_remove(void *data)
 	pwmchip_remove(chip);
 }
 
-int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip)
+int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner)
 {
 	int ret;
 
-	ret = pwmchip_add(chip);
+	ret = __pwmchip_add(chip, owner);
 	if (ret)
 		return ret;
 
 	return devm_add_action_or_reset(dev, devm_pwmchip_remove, chip);
 }
-EXPORT_SYMBOL_GPL(devm_pwmchip_add);
+EXPORT_SYMBOL_GPL(__devm_pwmchip_add);
 
 /**
  * pwm_request_from_chip() - request a PWM device relative to a PWM chip
@@ -976,10 +950,9 @@ void pwm_put(struct pwm_device *pwm)
 	if (pwm->chip->ops->free)
 		pwm->chip->ops->free(pwm->chip, pwm);
 
-	pwm_set_chip_data(pwm, NULL);
 	pwm->label = NULL;
 
-	module_put(pwm->chip->ops->owner);
+	module_put(pwm->chip->owner);
 out:
 	mutex_unlock(&pwm_lock);
 }
diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c
index 583a7d69c741..670d33daea84 100644
--- a/drivers/pwm/pwm-ab8500.c
+++ b/drivers/pwm/pwm-ab8500.c
@@ -181,7 +181,6 @@ static int ab8500_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops ab8500_pwm_ops = {
 	.apply = ab8500_pwm_apply,
 	.get_state = ab8500_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int ab8500_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-apple.c b/drivers/pwm/pwm-apple.c
index 8e7d67fb5fbe..4d755b628d9e 100644
--- a/drivers/pwm/pwm-apple.c
+++ b/drivers/pwm/pwm-apple.c
@@ -99,7 +99,6 @@ static int apple_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops apple_pwm_ops = {
 	.apply = apple_pwm_apply,
 	.get_state = apple_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int apple_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index e271d920151e..07920e034757 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -170,7 +170,6 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops atmel_hlcdc_pwm_ops = {
 	.apply = atmel_hlcdc_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_at91sam9x5_errata = {
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index c00dd37c5fbd..98b33c016c3c 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -364,7 +364,6 @@ static const struct pwm_ops atmel_tcb_pwm_ops = {
 	.request = atmel_tcb_pwm_request,
 	.free = atmel_tcb_pwm_free,
 	.apply = atmel_tcb_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static struct atmel_tcb_config tcb_rm9200_config = {
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index 1f73325d1bea..47bcc8a3bf9d 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -402,7 +402,6 @@ static int atmel_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops atmel_pwm_ops = {
 	.apply = atmel_pwm_apply,
 	.get_state = atmel_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static const struct atmel_pwm_data atmel_sam9rl_pwm_data = {
@@ -547,7 +546,7 @@ disable_clk:
 static struct platform_driver atmel_pwm_driver = {
 	.driver = {
 		.name = "atmel-pwm",
-		.of_match_table = of_match_ptr(atmel_pwm_dt_ids),
+		.of_match_table = atmel_pwm_dt_ids,
 	},
 	.probe = atmel_pwm_probe,
 };
diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c
index 7d70b6f186a6..758254025683 100644
--- a/drivers/pwm/pwm-bcm-iproc.c
+++ b/drivers/pwm/pwm-bcm-iproc.c
@@ -183,7 +183,6 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops iproc_pwm_ops = {
 	.apply = iproc_pwmc_apply,
 	.get_state = iproc_pwmc_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int iproc_pwmc_probe(struct platform_device *pdev)
@@ -207,18 +206,10 @@ static int iproc_pwmc_probe(struct platform_device *pdev)
 	if (IS_ERR(ip->base))
 		return PTR_ERR(ip->base);
 
-	ip->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(ip->clk)) {
-		dev_err(&pdev->dev, "failed to get clock: %ld\n",
-			PTR_ERR(ip->clk));
-		return PTR_ERR(ip->clk);
-	}
-
-	ret = clk_prepare_enable(ip->clk);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to enable clock: %d\n", ret);
-		return ret;
-	}
+	ip->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+	if (IS_ERR(ip->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(ip->clk),
+				     "failed to get clock\n");
 
 	/* Set full drive and normal polarity for all channels */
 	value = readl(ip->base + IPROC_PWM_CTRL_OFFSET);
@@ -230,22 +221,12 @@ static int iproc_pwmc_probe(struct platform_device *pdev)
 
 	writel(value, ip->base + IPROC_PWM_CTRL_OFFSET);
 
-	ret = pwmchip_add(&ip->chip);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
-		clk_disable_unprepare(ip->clk);
-	}
-
-	return ret;
-}
-
-static void iproc_pwmc_remove(struct platform_device *pdev)
-{
-	struct iproc_pwmc *ip = platform_get_drvdata(pdev);
+	ret = devm_pwmchip_add(&pdev->dev, &ip->chip);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret,
+				     "failed to add PWM chip\n");
 
-	pwmchip_remove(&ip->chip);
-
-	clk_disable_unprepare(ip->clk);
+	return 0;
 }
 
 static const struct of_device_id bcm_iproc_pwmc_dt[] = {
@@ -260,7 +241,6 @@ static struct platform_driver iproc_pwmc_driver = {
 		.of_match_table = bcm_iproc_pwmc_dt,
 	},
 	.probe = iproc_pwmc_probe,
-	.remove_new = iproc_pwmc_remove,
 };
 module_platform_driver(iproc_pwmc_driver);
 
diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c
index e5b00cc9f7a7..15d6ed03c3ce 100644
--- a/drivers/pwm/pwm-bcm-kona.c
+++ b/drivers/pwm/pwm-bcm-kona.c
@@ -269,7 +269,6 @@ static int kona_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops kona_pwm_ops = {
 	.apply = kona_pwmc_apply,
-	.owner = THIS_MODULE,
 };
 
 static int kona_pwmc_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c
index bdfc2a5ec0d6..9777babd5b95 100644
--- a/drivers/pwm/pwm-bcm2835.c
+++ b/drivers/pwm/pwm-bcm2835.c
@@ -129,7 +129,6 @@ static const struct pwm_ops bcm2835_pwm_ops = {
 	.request = bcm2835_pwm_request,
 	.free = bcm2835_pwm_free,
 	.apply = bcm2835_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int bcm2835_pwm_probe(struct platform_device *pdev)
@@ -147,41 +146,42 @@ static int bcm2835_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(pc->base))
 		return PTR_ERR(pc->base);
 
-	pc->clk = devm_clk_get(&pdev->dev, NULL);
+	pc->clk = devm_clk_get_enabled(&pdev->dev, NULL);
 	if (IS_ERR(pc->clk))
 		return dev_err_probe(&pdev->dev, PTR_ERR(pc->clk),
 				     "clock not found\n");
 
-	ret = clk_prepare_enable(pc->clk);
-	if (ret)
-		return ret;
-
 	pc->chip.dev = &pdev->dev;
 	pc->chip.ops = &bcm2835_pwm_ops;
 	pc->chip.npwm = 2;
 
-	platform_set_drvdata(pdev, pc);
-
-	ret = pwmchip_add(&pc->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
 	if (ret < 0)
-		goto add_fail;
+		return dev_err_probe(&pdev->dev, ret,
+				     "failed to add pwmchip\n");
 
 	return 0;
+}
+
+static int bcm2835_pwm_suspend(struct device *dev)
+{
+	struct bcm2835_pwm *pc = dev_get_drvdata(dev);
 
-add_fail:
 	clk_disable_unprepare(pc->clk);
-	return ret;
+
+	return 0;
 }
 
-static void bcm2835_pwm_remove(struct platform_device *pdev)
+static int bcm2835_pwm_resume(struct device *dev)
 {
-	struct bcm2835_pwm *pc = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&pc->chip);
+	struct bcm2835_pwm *pc = dev_get_drvdata(dev);
 
-	clk_disable_unprepare(pc->clk);
+	return clk_prepare_enable(pc->clk);
 }
 
+static DEFINE_SIMPLE_DEV_PM_OPS(bcm2835_pwm_pm_ops, bcm2835_pwm_suspend,
+				bcm2835_pwm_resume);
+
 static const struct of_device_id bcm2835_pwm_of_match[] = {
 	{ .compatible = "brcm,bcm2835-pwm", },
 	{ /* sentinel */ }
@@ -192,9 +192,9 @@ static struct platform_driver bcm2835_pwm_driver = {
 	.driver = {
 		.name = "bcm2835-pwm",
 		.of_match_table = bcm2835_pwm_of_match,
+		.pm = pm_ptr(&bcm2835_pwm_pm_ops),
 	},
 	.probe = bcm2835_pwm_probe,
-	.remove_new = bcm2835_pwm_remove,
 };
 module_platform_driver(bcm2835_pwm_driver);
 
diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c
index 0971c666afd1..ba2d79991769 100644
--- a/drivers/pwm/pwm-berlin.c
+++ b/drivers/pwm/pwm-berlin.c
@@ -39,6 +39,8 @@
 #define BERLIN_PWM_TCNT			0xc
 #define  BERLIN_PWM_MAX_TCNT		65535
 
+#define BERLIN_PWM_NUMPWMS		4
+
 struct berlin_pwm_channel {
 	u32 enable;
 	u32 ctrl;
@@ -50,6 +52,7 @@ struct berlin_pwm_chip {
 	struct pwm_chip chip;
 	struct clk *clk;
 	void __iomem *base;
+	struct berlin_pwm_channel channel[BERLIN_PWM_NUMPWMS];
 };
 
 static inline struct berlin_pwm_chip *to_berlin_pwm_chip(struct pwm_chip *chip)
@@ -70,24 +73,6 @@ static inline void berlin_pwm_writel(struct berlin_pwm_chip *bpc,
 	writel_relaxed(value, bpc->base + channel * 0x10 + offset);
 }
 
-static int berlin_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct berlin_pwm_channel *channel;
-
-	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
-	if (!channel)
-		return -ENOMEM;
-
-	return pwm_set_chip_data(pwm, channel);
-}
-
-static void berlin_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct berlin_pwm_channel *channel = pwm_get_chip_data(pwm);
-
-	kfree(channel);
-}
-
 static int berlin_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			     u64 duty_ns, u64 period_ns)
 {
@@ -202,10 +187,7 @@ static int berlin_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static const struct pwm_ops berlin_pwm_ops = {
-	.request = berlin_pwm_request,
-	.free = berlin_pwm_free,
 	.apply = berlin_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id berlin_pwm_match[] = {
@@ -227,39 +209,23 @@ static int berlin_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(bpc->base))
 		return PTR_ERR(bpc->base);
 
-	bpc->clk = devm_clk_get(&pdev->dev, NULL);
+	bpc->clk = devm_clk_get_enabled(&pdev->dev, NULL);
 	if (IS_ERR(bpc->clk))
 		return PTR_ERR(bpc->clk);
 
-	ret = clk_prepare_enable(bpc->clk);
-	if (ret)
-		return ret;
-
 	bpc->chip.dev = &pdev->dev;
 	bpc->chip.ops = &berlin_pwm_ops;
-	bpc->chip.npwm = 4;
+	bpc->chip.npwm = BERLIN_PWM_NUMPWMS;
 
-	ret = pwmchip_add(&bpc->chip);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
-		clk_disable_unprepare(bpc->clk);
-		return ret;
-	}
+	ret = devm_pwmchip_add(&pdev->dev, &bpc->chip);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
 
 	platform_set_drvdata(pdev, bpc);
 
 	return 0;
 }
 
-static void berlin_pwm_remove(struct platform_device *pdev)
-{
-	struct berlin_pwm_chip *bpc = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&bpc->chip);
-
-	clk_disable_unprepare(bpc->clk);
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int berlin_pwm_suspend(struct device *dev)
 {
@@ -267,11 +233,7 @@ static int berlin_pwm_suspend(struct device *dev)
 	unsigned int i;
 
 	for (i = 0; i < bpc->chip.npwm; i++) {
-		struct berlin_pwm_channel *channel;
-
-		channel = pwm_get_chip_data(&bpc->chip.pwms[i]);
-		if (!channel)
-			continue;
+		struct berlin_pwm_channel *channel = &bpc->channel[i];
 
 		channel->enable = berlin_pwm_readl(bpc, i, BERLIN_PWM_ENABLE);
 		channel->ctrl = berlin_pwm_readl(bpc, i, BERLIN_PWM_CONTROL);
@@ -295,11 +257,7 @@ static int berlin_pwm_resume(struct device *dev)
 		return ret;
 
 	for (i = 0; i < bpc->chip.npwm; i++) {
-		struct berlin_pwm_channel *channel;
-
-		channel = pwm_get_chip_data(&bpc->chip.pwms[i]);
-		if (!channel)
-			continue;
+		struct berlin_pwm_channel *channel = &bpc->channel[i];
 
 		berlin_pwm_writel(bpc, i, channel->ctrl, BERLIN_PWM_CONTROL);
 		berlin_pwm_writel(bpc, i, channel->duty, BERLIN_PWM_DUTY);
@@ -316,7 +274,6 @@ static SIMPLE_DEV_PM_OPS(berlin_pwm_pm_ops, berlin_pwm_suspend,
 
 static struct platform_driver berlin_pwm_driver = {
 	.probe = berlin_pwm_probe,
-	.remove_new = berlin_pwm_remove,
 	.driver = {
 		.name = "berlin-pwm",
 		.of_match_table = berlin_pwm_match,
diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index a3faa9a3de7c..b723c2d4f485 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c
@@ -220,7 +220,6 @@ static int brcmstb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops brcmstb_pwm_ops = {
 	.apply = brcmstb_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id brcmstb_pwm_of_match[] = {
@@ -238,17 +237,10 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
 	if (!p)
 		return -ENOMEM;
 
-	p->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(p->clk)) {
-		dev_err(&pdev->dev, "failed to obtain clock\n");
-		return PTR_ERR(p->clk);
-	}
-
-	ret = clk_prepare_enable(p->clk);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to enable clock: %d\n", ret);
-		return ret;
-	}
+	p->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+	if (IS_ERR(p->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(p->clk),
+				     "failed to obtain clock\n");
 
 	platform_set_drvdata(pdev, p);
 
@@ -257,30 +249,14 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
 	p->chip.npwm = 2;
 
 	p->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(p->base)) {
-		ret = PTR_ERR(p->base);
-		goto out_clk;
-	}
+	if (IS_ERR(p->base))
+		return PTR_ERR(p->base);
 
-	ret = pwmchip_add(&p->chip);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
-		goto out_clk;
-	}
+	ret = devm_pwmchip_add(&pdev->dev, &p->chip);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
 
 	return 0;
-
-out_clk:
-	clk_disable_unprepare(p->clk);
-	return ret;
-}
-
-static void brcmstb_pwm_remove(struct platform_device *pdev)
-{
-	struct brcmstb_pwm *p = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&p->chip);
-	clk_disable_unprepare(p->clk);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -288,7 +264,7 @@ static int brcmstb_pwm_suspend(struct device *dev)
 {
 	struct brcmstb_pwm *p = dev_get_drvdata(dev);
 
-	clk_disable(p->clk);
+	clk_disable_unprepare(p->clk);
 
 	return 0;
 }
@@ -297,9 +273,7 @@ static int brcmstb_pwm_resume(struct device *dev)
 {
 	struct brcmstb_pwm *p = dev_get_drvdata(dev);
 
-	clk_enable(p->clk);
-
-	return 0;
+	return clk_prepare_enable(p->clk);
 }
 #endif
 
@@ -308,7 +282,6 @@ static SIMPLE_DEV_PM_OPS(brcmstb_pwm_pm_ops, brcmstb_pwm_suspend,
 
 static struct platform_driver brcmstb_pwm_driver = {
 	.probe = brcmstb_pwm_probe,
-	.remove_new = brcmstb_pwm_remove,
 	.driver = {
 		.name = "pwm-brcmstb",
 		.of_match_table = brcmstb_pwm_of_match,
diff --git a/drivers/pwm/pwm-clk.c b/drivers/pwm/pwm-clk.c
index 0ee4d2aee4df..9dd88b386907 100644
--- a/drivers/pwm/pwm-clk.c
+++ b/drivers/pwm/pwm-clk.c
@@ -77,7 +77,6 @@ static int pwm_clk_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops pwm_clk_ops = {
 	.apply = pwm_clk_apply,
-	.owner = THIS_MODULE,
 };
 
 static int pwm_clk_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c
index b0d91142da8d..42179b3f7ec3 100644
--- a/drivers/pwm/pwm-clps711x.c
+++ b/drivers/pwm/pwm-clps711x.c
@@ -72,7 +72,6 @@ static int clps711x_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops clps711x_pwm_ops = {
 	.request = clps711x_pwm_request,
 	.apply = clps711x_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static struct pwm_device *clps711x_pwm_xlate(struct pwm_chip *chip,
diff --git a/drivers/pwm/pwm-crc.c b/drivers/pwm/pwm-crc.c
index b9f063dc6b5f..2b0b659eee97 100644
--- a/drivers/pwm/pwm-crc.c
+++ b/drivers/pwm/pwm-crc.c
@@ -184,5 +184,8 @@ static struct platform_driver crystalcove_pwm_driver = {
 		.name = "crystal_cove_pwm",
 	},
 };
+module_platform_driver(crystalcove_pwm_driver);
 
-builtin_platform_driver(crystalcove_pwm_driver);
+MODULE_ALIAS("platform:crystal_cove_pwm");
+MODULE_DESCRIPTION("Intel Crystalcove (CRC) PWM support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index baaac0c33aa0..4fbd23e4ef69 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -22,12 +22,14 @@
  * @ec: Pointer to EC device
  * @chip: PWM controller chip
  * @use_pwm_type: Use PWM types instead of generic channels
+ * @channel: array with per-channel data
  */
 struct cros_ec_pwm_device {
 	struct device *dev;
 	struct cros_ec_device *ec;
 	struct pwm_chip chip;
 	bool use_pwm_type;
+	struct cros_ec_pwm *channel;
 };
 
 /**
@@ -43,26 +45,6 @@ static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *chi
 	return container_of(chip, struct cros_ec_pwm_device, chip);
 }
 
-static int cros_ec_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct cros_ec_pwm *channel;
-
-	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
-	if (!channel)
-		return -ENOMEM;
-
-	pwm_set_chip_data(pwm, channel);
-
-	return 0;
-}
-
-static void cros_ec_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct cros_ec_pwm *channel = pwm_get_chip_data(pwm);
-
-	kfree(channel);
-}
-
 static int cros_ec_dt_type_to_pwm_type(u8 dt_index, u8 *pwm_type)
 {
 	switch (dt_index) {
@@ -158,7 +140,7 @@ static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			     const struct pwm_state *state)
 {
 	struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip);
-	struct cros_ec_pwm *channel = pwm_get_chip_data(pwm);
+	struct cros_ec_pwm *channel = &ec_pwm->channel[pwm->hwpwm];
 	u16 duty_cycle;
 	int ret;
 
@@ -188,7 +170,7 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 				 struct pwm_state *state)
 {
 	struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip);
-	struct cros_ec_pwm *channel = pwm_get_chip_data(pwm);
+	struct cros_ec_pwm *channel = &ec_pwm->channel[pwm->hwpwm];
 	int ret;
 
 	ret = cros_ec_pwm_get_duty(ec_pwm, pwm->hwpwm);
@@ -237,11 +219,8 @@ cros_ec_pwm_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
 }
 
 static const struct pwm_ops cros_ec_pwm_ops = {
-	.request = cros_ec_pwm_request,
-	.free = cros_ec_pwm_free,
 	.get_state	= cros_ec_pwm_get_state,
 	.apply		= cros_ec_pwm_apply,
-	.owner		= THIS_MODULE,
 };
 
 /*
@@ -286,10 +265,8 @@ static int cros_ec_pwm_probe(struct platform_device *pdev)
 	struct pwm_chip *chip;
 	int ret;
 
-	if (!ec) {
-		dev_err(dev, "no parent EC device\n");
-		return -EINVAL;
-	}
+	if (!ec)
+		return dev_err_probe(dev, -EINVAL, "no parent EC device\n");
 
 	ec_pwm = devm_kzalloc(dev, sizeof(*ec_pwm), GFP_KERNEL);
 	if (!ec_pwm)
@@ -310,32 +287,23 @@ static int cros_ec_pwm_probe(struct platform_device *pdev)
 		chip->npwm = CROS_EC_PWM_DT_COUNT;
 	} else {
 		ret = cros_ec_num_pwms(ec_pwm);
-		if (ret < 0) {
-			dev_err(dev, "Couldn't find PWMs: %d\n", ret);
-			return ret;
-		}
+		if (ret < 0)
+			return dev_err_probe(dev, ret, "Couldn't find PWMs\n");
 		chip->npwm = ret;
 	}
 
-	dev_dbg(dev, "Probed %u PWMs\n", chip->npwm);
-
-	ret = pwmchip_add(chip);
-	if (ret < 0) {
-		dev_err(dev, "cannot register PWM: %d\n", ret);
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, ec_pwm);
+	ec_pwm->channel = devm_kcalloc(dev, chip->npwm, sizeof(*ec_pwm->channel),
+					GFP_KERNEL);
+	if (!ec_pwm->channel)
+		return -ENOMEM;
 
-	return ret;
-}
+	dev_dbg(dev, "Probed %u PWMs\n", chip->npwm);
 
-static void cros_ec_pwm_remove(struct platform_device *dev)
-{
-	struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev);
-	struct pwm_chip *chip = &ec_pwm->chip;
+	ret = devm_pwmchip_add(dev, chip);
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "cannot register PWM\n");
 
-	pwmchip_remove(chip);
+	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -349,7 +317,6 @@ MODULE_DEVICE_TABLE(of, cros_ec_pwm_of_match);
 
 static struct platform_driver cros_ec_pwm_driver = {
 	.probe = cros_ec_pwm_probe,
-	.remove_new = cros_ec_pwm_remove,
 	.driver = {
 		.name = "cros-ec-pwm",
 		.of_match_table = of_match_ptr(cros_ec_pwm_of_match),
diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c
new file mode 100644
index 000000000000..ea63dd741f5c
--- /dev/null
+++ b/drivers/pwm/pwm-dwc-core.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DesignWare PWM Controller driver core
+ *
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * Author: Felipe Balbi (Intel)
+ * Author: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+ * Author: Raymond Tan <raymond.tan@intel.com>
+ */
+
+#define DEFAULT_SYMBOL_NAMESPACE dwc_pwm
+
+#include <linux/bitops.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/pwm.h>
+
+#include "pwm-dwc.h"
+
+static void __dwc_pwm_set_enable(struct dwc_pwm *dwc, int pwm, int enabled)
+{
+	u32 reg;
+
+	reg = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm));
+
+	if (enabled)
+		reg |= DWC_TIM_CTRL_EN;
+	else
+		reg &= ~DWC_TIM_CTRL_EN;
+
+	dwc_pwm_writel(dwc, reg, DWC_TIM_CTRL(pwm));
+}
+
+static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
+				     struct pwm_device *pwm,
+				     const struct pwm_state *state)
+{
+	u64 tmp;
+	u32 ctrl;
+	u32 high;
+	u32 low;
+
+	/*
+	 * Calculate width of low and high period in terms of input clock
+	 * periods and check are the result within HW limits between 1 and
+	 * 2^32 periods.
+	 */
+	tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, dwc->clk_ns);
+	if (tmp < 1 || tmp > (1ULL << 32))
+		return -ERANGE;
+	low = tmp - 1;
+
+	tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
+				    dwc->clk_ns);
+	if (tmp < 1 || tmp > (1ULL << 32))
+		return -ERANGE;
+	high = tmp - 1;
+
+	/*
+	 * Specification says timer usage flow is to disable timer, then
+	 * program it followed by enable. It also says Load Count is loaded
+	 * into timer after it is enabled - either after a disable or
+	 * a reset. Based on measurements it happens also without disable
+	 * whenever Load Count is updated. But follow the specification.
+	 */
+	__dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
+
+	/*
+	 * Write Load Count and Load Count 2 registers. Former defines the
+	 * width of low period and latter the width of high period in terms
+	 * multiple of input clock periods:
+	 * Width = ((Count + 1) * input clock period).
+	 */
+	dwc_pwm_writel(dwc, low, DWC_TIM_LD_CNT(pwm->hwpwm));
+	dwc_pwm_writel(dwc, high, DWC_TIM_LD_CNT2(pwm->hwpwm));
+
+	/*
+	 * Set user-defined mode, timer reloads from Load Count registers
+	 * when it counts down to 0.
+	 * Set PWM mode, it makes output to toggle and width of low and high
+	 * periods are set by Load Count registers.
+	 */
+	ctrl = DWC_TIM_CTRL_MODE_USER | DWC_TIM_CTRL_PWM;
+	dwc_pwm_writel(dwc, ctrl, DWC_TIM_CTRL(pwm->hwpwm));
+
+	/*
+	 * Enable timer. Output starts from low period.
+	 */
+	__dwc_pwm_set_enable(dwc, pwm->hwpwm, state->enabled);
+
+	return 0;
+}
+
+static int dwc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			 const struct pwm_state *state)
+{
+	struct dwc_pwm *dwc = to_dwc_pwm(chip);
+
+	if (state->polarity != PWM_POLARITY_INVERSED)
+		return -EINVAL;
+
+	if (state->enabled) {
+		if (!pwm->state.enabled)
+			pm_runtime_get_sync(chip->dev);
+		return __dwc_pwm_configure_timer(dwc, pwm, state);
+	} else {
+		if (pwm->state.enabled) {
+			__dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
+			pm_runtime_put_sync(chip->dev);
+		}
+	}
+
+	return 0;
+}
+
+static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+			     struct pwm_state *state)
+{
+	struct dwc_pwm *dwc = to_dwc_pwm(chip);
+	u64 duty, period;
+	u32 ctrl, ld, ld2;
+
+	pm_runtime_get_sync(chip->dev);
+
+	ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm->hwpwm));
+	ld = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
+	ld2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
+
+	state->enabled = !!(ctrl & DWC_TIM_CTRL_EN);
+
+	/*
+	 * If we're not in PWM, technically the output is a 50-50
+	 * based on the timer load-count only.
+	 */
+	if (ctrl & DWC_TIM_CTRL_PWM) {
+		duty = (ld + 1) * dwc->clk_ns;
+		period = (ld2 + 1)  * dwc->clk_ns;
+		period += duty;
+	} else {
+		duty = (ld + 1) * dwc->clk_ns;
+		period = duty * 2;
+	}
+
+	state->polarity = PWM_POLARITY_INVERSED;
+	state->period = period;
+	state->duty_cycle = duty;
+
+	pm_runtime_put_sync(chip->dev);
+
+	return 0;
+}
+
+static const struct pwm_ops dwc_pwm_ops = {
+	.apply = dwc_pwm_apply,
+	.get_state = dwc_pwm_get_state,
+};
+
+struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
+{
+	struct dwc_pwm *dwc;
+
+	dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL);
+	if (!dwc)
+		return NULL;
+
+	dwc->clk_ns = 10;
+	dwc->chip.dev = dev;
+	dwc->chip.ops = &dwc_pwm_ops;
+	dwc->chip.npwm = DWC_TIMERS_TOTAL;
+
+	dev_set_drvdata(dev, dwc);
+	return dwc;
+}
+EXPORT_SYMBOL_GPL(dwc_pwm_alloc);
+
+MODULE_AUTHOR("Felipe Balbi (Intel)");
+MODULE_AUTHOR("Jarkko Nikula <jarkko.nikula@linux.intel.com>");
+MODULE_AUTHOR("Raymond Tan <raymond.tan@intel.com>");
+MODULE_DESCRIPTION("DesignWare PWM Controller");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c
index 3bbb26c862c3..bd9cadb497d7 100644
--- a/drivers/pwm/pwm-dwc.c
+++ b/drivers/pwm/pwm-dwc.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * DesignWare PWM Controller driver
+ * DesignWare PWM Controller driver (PCI part)
  *
  * Copyright (C) 2018-2020 Intel Corporation
  *
@@ -13,6 +13,8 @@
  *   periods are one or more input clock periods long.
  */
 
+#define DEFAULT_MOUDLE_NAMESPACE dwc_pwm
+
 #include <linux/bitops.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
@@ -21,198 +23,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
 
-#define DWC_TIM_LD_CNT(n)	((n) * 0x14)
-#define DWC_TIM_LD_CNT2(n)	(((n) * 4) + 0xb0)
-#define DWC_TIM_CUR_VAL(n)	(((n) * 0x14) + 0x04)
-#define DWC_TIM_CTRL(n)		(((n) * 0x14) + 0x08)
-#define DWC_TIM_EOI(n)		(((n) * 0x14) + 0x0c)
-#define DWC_TIM_INT_STS(n)	(((n) * 0x14) + 0x10)
-
-#define DWC_TIMERS_INT_STS	0xa0
-#define DWC_TIMERS_EOI		0xa4
-#define DWC_TIMERS_RAW_INT_STS	0xa8
-#define DWC_TIMERS_COMP_VERSION	0xac
-
-#define DWC_TIMERS_TOTAL	8
-#define DWC_CLK_PERIOD_NS	10
-
-/* Timer Control Register */
-#define DWC_TIM_CTRL_EN		BIT(0)
-#define DWC_TIM_CTRL_MODE	BIT(1)
-#define DWC_TIM_CTRL_MODE_FREE	(0 << 1)
-#define DWC_TIM_CTRL_MODE_USER	(1 << 1)
-#define DWC_TIM_CTRL_INT_MASK	BIT(2)
-#define DWC_TIM_CTRL_PWM	BIT(3)
-
-struct dwc_pwm_ctx {
-	u32 cnt;
-	u32 cnt2;
-	u32 ctrl;
-};
-
-struct dwc_pwm {
-	struct pwm_chip chip;
-	void __iomem *base;
-	struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL];
-};
-#define to_dwc_pwm(p)	(container_of((p), struct dwc_pwm, chip))
-
-static inline u32 dwc_pwm_readl(struct dwc_pwm *dwc, u32 offset)
-{
-	return readl(dwc->base + offset);
-}
-
-static inline void dwc_pwm_writel(struct dwc_pwm *dwc, u32 value, u32 offset)
-{
-	writel(value, dwc->base + offset);
-}
-
-static void __dwc_pwm_set_enable(struct dwc_pwm *dwc, int pwm, int enabled)
-{
-	u32 reg;
-
-	reg = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm));
-
-	if (enabled)
-		reg |= DWC_TIM_CTRL_EN;
-	else
-		reg &= ~DWC_TIM_CTRL_EN;
-
-	dwc_pwm_writel(dwc, reg, DWC_TIM_CTRL(pwm));
-}
-
-static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
-				     struct pwm_device *pwm,
-				     const struct pwm_state *state)
-{
-	u64 tmp;
-	u32 ctrl;
-	u32 high;
-	u32 low;
-
-	/*
-	 * Calculate width of low and high period in terms of input clock
-	 * periods and check are the result within HW limits between 1 and
-	 * 2^32 periods.
-	 */
-	tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, DWC_CLK_PERIOD_NS);
-	if (tmp < 1 || tmp > (1ULL << 32))
-		return -ERANGE;
-	low = tmp - 1;
-
-	tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
-				    DWC_CLK_PERIOD_NS);
-	if (tmp < 1 || tmp > (1ULL << 32))
-		return -ERANGE;
-	high = tmp - 1;
-
-	/*
-	 * Specification says timer usage flow is to disable timer, then
-	 * program it followed by enable. It also says Load Count is loaded
-	 * into timer after it is enabled - either after a disable or
-	 * a reset. Based on measurements it happens also without disable
-	 * whenever Load Count is updated. But follow the specification.
-	 */
-	__dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
-
-	/*
-	 * Write Load Count and Load Count 2 registers. Former defines the
-	 * width of low period and latter the width of high period in terms
-	 * multiple of input clock periods:
-	 * Width = ((Count + 1) * input clock period).
-	 */
-	dwc_pwm_writel(dwc, low, DWC_TIM_LD_CNT(pwm->hwpwm));
-	dwc_pwm_writel(dwc, high, DWC_TIM_LD_CNT2(pwm->hwpwm));
-
-	/*
-	 * Set user-defined mode, timer reloads from Load Count registers
-	 * when it counts down to 0.
-	 * Set PWM mode, it makes output to toggle and width of low and high
-	 * periods are set by Load Count registers.
-	 */
-	ctrl = DWC_TIM_CTRL_MODE_USER | DWC_TIM_CTRL_PWM;
-	dwc_pwm_writel(dwc, ctrl, DWC_TIM_CTRL(pwm->hwpwm));
-
-	/*
-	 * Enable timer. Output starts from low period.
-	 */
-	__dwc_pwm_set_enable(dwc, pwm->hwpwm, state->enabled);
-
-	return 0;
-}
-
-static int dwc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			 const struct pwm_state *state)
-{
-	struct dwc_pwm *dwc = to_dwc_pwm(chip);
-
-	if (state->polarity != PWM_POLARITY_INVERSED)
-		return -EINVAL;
-
-	if (state->enabled) {
-		if (!pwm->state.enabled)
-			pm_runtime_get_sync(chip->dev);
-		return __dwc_pwm_configure_timer(dwc, pwm, state);
-	} else {
-		if (pwm->state.enabled) {
-			__dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
-			pm_runtime_put_sync(chip->dev);
-		}
-	}
-
-	return 0;
-}
-
-static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
-			     struct pwm_state *state)
-{
-	struct dwc_pwm *dwc = to_dwc_pwm(chip);
-	u64 duty, period;
-
-	pm_runtime_get_sync(chip->dev);
-
-	state->enabled = !!(dwc_pwm_readl(dwc,
-				DWC_TIM_CTRL(pwm->hwpwm)) & DWC_TIM_CTRL_EN);
-
-	duty = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
-	duty += 1;
-	duty *= DWC_CLK_PERIOD_NS;
-	state->duty_cycle = duty;
-
-	period = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
-	period += 1;
-	period *= DWC_CLK_PERIOD_NS;
-	period += duty;
-	state->period = period;
-
-	state->polarity = PWM_POLARITY_INVERSED;
-
-	pm_runtime_put_sync(chip->dev);
-
-	return 0;
-}
-
-static const struct pwm_ops dwc_pwm_ops = {
-	.apply = dwc_pwm_apply,
-	.get_state = dwc_pwm_get_state,
-	.owner = THIS_MODULE,
-};
-
-static struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
-{
-	struct dwc_pwm *dwc;
-
-	dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL);
-	if (!dwc)
-		return NULL;
-
-	dwc->chip.dev = dev;
-	dwc->chip.ops = &dwc_pwm_ops;
-	dwc->chip.npwm = DWC_TIMERS_TOTAL;
-
-	dev_set_drvdata(dev, dwc);
-	return dwc;
-}
+#include "pwm-dwc.h"
 
 static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h
new file mode 100644
index 000000000000..64795247c54c
--- /dev/null
+++ b/drivers/pwm/pwm-dwc.h
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DesignWare PWM Controller driver
+ *
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * Author: Felipe Balbi (Intel)
+ * Author: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+ * Author: Raymond Tan <raymond.tan@intel.com>
+ */
+
+MODULE_IMPORT_NS(dwc_pwm);
+
+#define DWC_TIM_LD_CNT(n)	((n) * 0x14)
+#define DWC_TIM_LD_CNT2(n)	(((n) * 4) + 0xb0)
+#define DWC_TIM_CUR_VAL(n)	(((n) * 0x14) + 0x04)
+#define DWC_TIM_CTRL(n)		(((n) * 0x14) + 0x08)
+#define DWC_TIM_EOI(n)		(((n) * 0x14) + 0x0c)
+#define DWC_TIM_INT_STS(n)	(((n) * 0x14) + 0x10)
+
+#define DWC_TIMERS_INT_STS	0xa0
+#define DWC_TIMERS_EOI		0xa4
+#define DWC_TIMERS_RAW_INT_STS	0xa8
+#define DWC_TIMERS_COMP_VERSION	0xac
+
+#define DWC_TIMERS_TOTAL	8
+
+/* Timer Control Register */
+#define DWC_TIM_CTRL_EN		BIT(0)
+#define DWC_TIM_CTRL_MODE	BIT(1)
+#define DWC_TIM_CTRL_MODE_FREE	(0 << 1)
+#define DWC_TIM_CTRL_MODE_USER	(1 << 1)
+#define DWC_TIM_CTRL_INT_MASK	BIT(2)
+#define DWC_TIM_CTRL_PWM	BIT(3)
+
+struct dwc_pwm_ctx {
+	u32 cnt;
+	u32 cnt2;
+	u32 ctrl;
+};
+
+struct dwc_pwm {
+	struct pwm_chip chip;
+	void __iomem *base;
+	unsigned int clk_ns;
+	struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL];
+};
+#define to_dwc_pwm(p)	(container_of((p), struct dwc_pwm, chip))
+
+static inline u32 dwc_pwm_readl(struct dwc_pwm *dwc, u32 offset)
+{
+	return readl(dwc->base + offset);
+}
+
+static inline void dwc_pwm_writel(struct dwc_pwm *dwc, u32 value, u32 offset)
+{
+	writel(value, dwc->base + offset);
+}
+
+extern struct dwc_pwm *dwc_pwm_alloc(struct device *dev);
diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c
index c45a75e65c86..51e072572a87 100644
--- a/drivers/pwm/pwm-ep93xx.c
+++ b/drivers/pwm/pwm-ep93xx.c
@@ -159,7 +159,6 @@ static const struct pwm_ops ep93xx_pwm_ops = {
 	.request = ep93xx_pwm_request,
 	.free = ep93xx_pwm_free,
 	.apply = ep93xx_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int ep93xx_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index b7c6045c5d08..d1b6d1aa4773 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -350,7 +350,6 @@ static const struct pwm_ops fsl_pwm_ops = {
 	.request = fsl_pwm_request,
 	.free = fsl_pwm_free,
 	.apply = fsl_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int fsl_pwm_init(struct fsl_pwm_chip *fpc)
diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index f7ba6fe9a349..c435776e2f78 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c
@@ -185,7 +185,6 @@ static const struct pwm_ops hibvt_pwm_ops = {
 	.get_state = hibvt_pwm_get_state,
 	.apply = hibvt_pwm_apply,
 
-	.owner = THIS_MODULE,
 };
 
 static int hibvt_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index 326af85888e7..116fa060e302 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c
@@ -208,7 +208,6 @@ static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops img_pwm_ops = {
 	.apply = img_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct img_pwm_soc_data pistachio_pwm = {
diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c
index 98ab65c89685..dc6aafeb9f7b 100644
--- a/drivers/pwm/pwm-imx-tpm.c
+++ b/drivers/pwm/pwm-imx-tpm.c
@@ -332,7 +332,6 @@ static const struct pwm_ops imx_tpm_pwm_ops = {
 	.free = pwm_imx_tpm_free,
 	.get_state = pwm_imx_tpm_get_state,
 	.apply = pwm_imx_tpm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int pwm_imx_tpm_probe(struct platform_device *pdev)
@@ -351,18 +350,11 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev)
 	if (IS_ERR(tpm->base))
 		return PTR_ERR(tpm->base);
 
-	tpm->clk = devm_clk_get(&pdev->dev, NULL);
+	tpm->clk = devm_clk_get_enabled(&pdev->dev, NULL);
 	if (IS_ERR(tpm->clk))
 		return dev_err_probe(&pdev->dev, PTR_ERR(tpm->clk),
 				     "failed to get PWM clock\n");
 
-	ret = clk_prepare_enable(tpm->clk);
-	if (ret) {
-		dev_err(&pdev->dev,
-			"failed to prepare or enable clock: %d\n", ret);
-		return ret;
-	}
-
 	tpm->chip.dev = &pdev->dev;
 	tpm->chip.ops = &imx_tpm_pwm_ops;
 
@@ -372,22 +364,11 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev)
 
 	mutex_init(&tpm->lock);
 
-	ret = pwmchip_add(&tpm->chip);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
-		clk_disable_unprepare(tpm->clk);
-	}
-
-	return ret;
-}
-
-static void pwm_imx_tpm_remove(struct platform_device *pdev)
-{
-	struct imx_tpm_pwm_chip *tpm = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&tpm->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &tpm->chip);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
 
-	clk_disable_unprepare(tpm->clk);
+	return 0;
 }
 
 static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev)
@@ -437,7 +418,6 @@ static struct platform_driver imx_tpm_pwm_driver = {
 		.pm = &imx_tpm_pwm_pm,
 	},
 	.probe	= pwm_imx_tpm_probe,
-	.remove_new = pwm_imx_tpm_remove,
 };
 module_platform_driver(imx_tpm_pwm_driver);
 
diff --git a/drivers/pwm/pwm-imx1.c b/drivers/pwm/pwm-imx1.c
index 0651983bed19..d175d895f22a 100644
--- a/drivers/pwm/pwm-imx1.c
+++ b/drivers/pwm/pwm-imx1.c
@@ -146,7 +146,6 @@ static int pwm_imx1_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops pwm_imx1_ops = {
 	.apply = pwm_imx1_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id pwm_imx1_dt_ids[] = {
diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c
index 29a3089c534c..7d9bc43f12b0 100644
--- a/drivers/pwm/pwm-imx27.c
+++ b/drivers/pwm/pwm-imx27.c
@@ -296,7 +296,6 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops pwm_imx27_ops = {
 	.apply = pwm_imx27_apply,
 	.get_state = pwm_imx27_get_state,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id pwm_imx27_dt_ids[] = {
diff --git a/drivers/pwm/pwm-intel-lgm.c b/drivers/pwm/pwm-intel-lgm.c
index 0cd7dd548e82..54ecae7f937e 100644
--- a/drivers/pwm/pwm-intel-lgm.c
+++ b/drivers/pwm/pwm-intel-lgm.c
@@ -107,7 +107,6 @@ static int lgm_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops lgm_pwm_ops = {
 	.get_state = lgm_pwm_get_state,
 	.apply = lgm_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static void lgm_pwm_init(struct lgm_pwm_chip *pc)
diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c
index 47b3141135f3..378ab036edfe 100644
--- a/drivers/pwm/pwm-iqs620a.c
+++ b/drivers/pwm/pwm-iqs620a.c
@@ -166,7 +166,6 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier,
 static const struct pwm_ops iqs620_pwm_ops = {
 	.apply = iqs620_pwm_apply,
 	.get_state = iqs620_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static void iqs620_pwm_notifier_unregister(void *context)
diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c
index ef1293f2a897..e9375de60ad6 100644
--- a/drivers/pwm/pwm-jz4740.c
+++ b/drivers/pwm/pwm-jz4740.c
@@ -27,6 +27,7 @@ struct soc_info {
 struct jz4740_pwm_chip {
 	struct pwm_chip chip;
 	struct regmap *map;
+	struct clk *clk[];
 };
 
 static inline struct jz4740_pwm_chip *to_jz4740(struct pwm_chip *chip)
@@ -70,14 +71,15 @@ static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
 		return err;
 	}
 
-	pwm_set_chip_data(pwm, clk);
+	jz->clk[pwm->hwpwm] = clk;
 
 	return 0;
 }
 
 static void jz4740_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-	struct clk *clk = pwm_get_chip_data(pwm);
+	struct jz4740_pwm_chip *jz = to_jz4740(chip);
+	struct clk *clk = jz->clk[pwm->hwpwm];
 
 	clk_disable_unprepare(clk);
 	clk_put(clk);
@@ -121,9 +123,9 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			    const struct pwm_state *state)
 {
-	struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip);
+	struct jz4740_pwm_chip *jz = to_jz4740(pwm->chip);
 	unsigned long long tmp = 0xffffull * NSEC_PER_SEC;
-	struct clk *clk = pwm_get_chip_data(pwm);
+	struct clk *clk = jz->clk[pwm->hwpwm];
 	unsigned long period, duty;
 	long rate;
 	int err;
@@ -173,16 +175,16 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	}
 
 	/* Reset counter to 0 */
-	regmap_write(jz4740->map, TCU_REG_TCNTc(pwm->hwpwm), 0);
+	regmap_write(jz->map, TCU_REG_TCNTc(pwm->hwpwm), 0);
 
 	/* Set duty */
-	regmap_write(jz4740->map, TCU_REG_TDHRc(pwm->hwpwm), duty);
+	regmap_write(jz->map, TCU_REG_TDHRc(pwm->hwpwm), duty);
 
 	/* Set period */
-	regmap_write(jz4740->map, TCU_REG_TDFRc(pwm->hwpwm), period);
+	regmap_write(jz->map, TCU_REG_TDFRc(pwm->hwpwm), period);
 
 	/* Set abrupt shutdown */
-	regmap_set_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm),
+	regmap_set_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm),
 			TCU_TCSR_PWM_SD);
 
 	/*
@@ -199,10 +201,10 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * state instead of its inactive state.
 	 */
 	if ((state->polarity == PWM_POLARITY_NORMAL) ^ state->enabled)
-		regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm),
+		regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm),
 				   TCU_TCSR_PWM_INITL_HIGH, 0);
 	else
-		regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm),
+		regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm),
 				   TCU_TCSR_PWM_INITL_HIGH,
 				   TCU_TCSR_PWM_INITL_HIGH);
 
@@ -216,34 +218,34 @@ static const struct pwm_ops jz4740_pwm_ops = {
 	.request = jz4740_pwm_request,
 	.free = jz4740_pwm_free,
 	.apply = jz4740_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int jz4740_pwm_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct jz4740_pwm_chip *jz4740;
+	struct jz4740_pwm_chip *jz;
 	const struct soc_info *info;
 
 	info = device_get_match_data(dev);
 	if (!info)
 		return -EINVAL;
 
-	jz4740 = devm_kzalloc(dev, sizeof(*jz4740), GFP_KERNEL);
-	if (!jz4740)
+	jz = devm_kzalloc(dev, struct_size(jz, clk, info->num_pwms),
+			      GFP_KERNEL);
+	if (!jz)
 		return -ENOMEM;
 
-	jz4740->map = device_node_to_regmap(dev->parent->of_node);
-	if (IS_ERR(jz4740->map)) {
-		dev_err(dev, "regmap not found: %ld\n", PTR_ERR(jz4740->map));
-		return PTR_ERR(jz4740->map);
+	jz->map = device_node_to_regmap(dev->parent->of_node);
+	if (IS_ERR(jz->map)) {
+		dev_err(dev, "regmap not found: %ld\n", PTR_ERR(jz->map));
+		return PTR_ERR(jz->map);
 	}
 
-	jz4740->chip.dev = dev;
-	jz4740->chip.ops = &jz4740_pwm_ops;
-	jz4740->chip.npwm = info->num_pwms;
+	jz->chip.dev = dev;
+	jz->chip.ops = &jz4740_pwm_ops;
+	jz->chip.npwm = info->num_pwms;
 
-	return devm_pwmchip_add(dev, &jz4740->chip);
+	return devm_pwmchip_add(dev, &jz->chip);
 }
 
 static const struct soc_info jz4740_soc_info = {
diff --git a/drivers/pwm/pwm-keembay.c b/drivers/pwm/pwm-keembay.c
index ac02d8bb4a0b..ac824ecc3f64 100644
--- a/drivers/pwm/pwm-keembay.c
+++ b/drivers/pwm/pwm-keembay.c
@@ -178,7 +178,6 @@ static int keembay_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static const struct pwm_ops keembay_pwm_ops = {
-	.owner = THIS_MODULE,
 	.apply = keembay_pwm_apply,
 	.get_state = keembay_pwm_get_state,
 };
diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c
index 4b133a17f4be..32350a357278 100644
--- a/drivers/pwm/pwm-lp3943.c
+++ b/drivers/pwm/pwm-lp3943.c
@@ -23,6 +23,7 @@ struct lp3943_pwm {
 	struct pwm_chip chip;
 	struct lp3943 *lp3943;
 	struct lp3943_platform_data *pdata;
+	struct lp3943_pwm_map pwm_map[LP3943_NUM_PWMS];
 };
 
 static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *chip)
@@ -35,13 +36,9 @@ lp3943_pwm_request_map(struct lp3943_pwm *lp3943_pwm, int hwpwm)
 {
 	struct lp3943_platform_data *pdata = lp3943_pwm->pdata;
 	struct lp3943 *lp3943 = lp3943_pwm->lp3943;
-	struct lp3943_pwm_map *pwm_map;
+	struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[hwpwm];
 	int i, offset;
 
-	pwm_map = kzalloc(sizeof(*pwm_map), GFP_KERNEL);
-	if (!pwm_map)
-		return ERR_PTR(-ENOMEM);
-
 	pwm_map->output = pdata->pwms[hwpwm]->output;
 	pwm_map->num_outputs = pdata->pwms[hwpwm]->num_outputs;
 
@@ -49,10 +46,8 @@ lp3943_pwm_request_map(struct lp3943_pwm *lp3943_pwm, int hwpwm)
 		offset = pwm_map->output[i];
 
 		/* Return an error if the pin is already assigned */
-		if (test_and_set_bit(offset, &lp3943->pin_used)) {
-			kfree(pwm_map);
+		if (test_and_set_bit(offset, &lp3943->pin_used))
 			return ERR_PTR(-EBUSY);
-		}
 	}
 
 	return pwm_map;
@@ -67,7 +62,7 @@ static int lp3943_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
 	if (IS_ERR(pwm_map))
 		return PTR_ERR(pwm_map);
 
-	return pwm_set_chip_data(pwm, pwm_map);
+	return 0;
 }
 
 static void lp3943_pwm_free_map(struct lp3943_pwm *lp3943_pwm,
@@ -80,14 +75,12 @@ static void lp3943_pwm_free_map(struct lp3943_pwm *lp3943_pwm,
 		offset = pwm_map->output[i];
 		clear_bit(offset, &lp3943->pin_used);
 	}
-
-	kfree(pwm_map);
 }
 
 static void lp3943_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip);
-	struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm);
+	struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm];
 
 	lp3943_pwm_free_map(lp3943_pwm, pwm_map);
 }
@@ -159,7 +152,7 @@ static int lp3943_pwm_set_mode(struct lp3943_pwm *lp3943_pwm,
 static int lp3943_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip);
-	struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm);
+	struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm];
 	u8 val;
 
 	if (pwm->hwpwm == 0)
@@ -178,7 +171,7 @@ static int lp3943_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 static void lp3943_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip);
-	struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm);
+	struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm];
 
 	/*
 	 * LP3943 outputs are open-drain, so the pin should be configured
@@ -216,7 +209,6 @@ static const struct pwm_ops lp3943_pwm_ops = {
 	.request	= lp3943_pwm_request,
 	.free		= lp3943_pwm_free,
 	.apply		= lp3943_pwm_apply,
-	.owner		= THIS_MODULE,
 };
 
 static int lp3943_pwm_parse_dt(struct device *dev,
diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c
index 7a19a840bca5..ef7d0da137ed 100644
--- a/drivers/pwm/pwm-lpc18xx-sct.c
+++ b/drivers/pwm/pwm-lpc18xx-sct.c
@@ -341,7 +341,6 @@ static const struct pwm_ops lpc18xx_pwm_ops = {
 	.apply = lpc18xx_pwm_apply,
 	.request = lpc18xx_pwm_request,
 	.free = lpc18xx_pwm_free,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id lpc18xx_pwm_of_match[] = {
diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c
index 806f0bb3ad6d..78f664e41e6e 100644
--- a/drivers/pwm/pwm-lpc32xx.c
+++ b/drivers/pwm/pwm-lpc32xx.c
@@ -115,7 +115,6 @@ static int lpc32xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops lpc32xx_pwm_ops = {
 	.apply = lpc32xx_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int lpc32xx_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 23fe332b2394..a6ea3ce7e019 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -243,7 +243,6 @@ static int pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops pwm_lpss_ops = {
 	.apply = pwm_lpss_apply,
 	.get_state = pwm_lpss_get_state,
-	.owner = THIS_MODULE,
 };
 
 struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base,
diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
index 6adb0ed01906..373abfd25acb 100644
--- a/drivers/pwm/pwm-mediatek.c
+++ b/drivers/pwm/pwm-mediatek.c
@@ -229,7 +229,6 @@ static int pwm_mediatek_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops pwm_mediatek_ops = {
 	.apply = pwm_mediatek_apply,
-	.owner = THIS_MODULE,
 };
 
 static int pwm_mediatek_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 25519cddc2a9..5bea53243ed2 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -335,7 +335,6 @@ static const struct pwm_ops meson_pwm_ops = {
 	.free = meson_pwm_free,
 	.apply = meson_pwm_apply,
 	.get_state = meson_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static const char * const pwm_meson8b_parent_names[] = {
diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c
index e7525c98105e..c0c53968f3e9 100644
--- a/drivers/pwm/pwm-microchip-core.c
+++ b/drivers/pwm/pwm-microchip-core.c
@@ -435,7 +435,6 @@ static int mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm
 static const struct pwm_ops mchp_core_pwm_ops = {
 	.apply = mchp_core_pwm_apply,
 	.get_state = mchp_core_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id mchp_core_of_match[] = {
diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c
index a83bd6e18b07..a72f7be36996 100644
--- a/drivers/pwm/pwm-mtk-disp.c
+++ b/drivers/pwm/pwm-mtk-disp.c
@@ -227,7 +227,6 @@ static int mtk_disp_pwm_get_state(struct pwm_chip *chip,
 static const struct pwm_ops mtk_disp_pwm_ops = {
 	.apply = mtk_disp_pwm_apply,
 	.get_state = mtk_disp_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int mtk_disp_pwm_probe(struct platform_device *pdev)
@@ -247,34 +246,25 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev)
 
 	mdp->clk_main = devm_clk_get(&pdev->dev, "main");
 	if (IS_ERR(mdp->clk_main))
-		return PTR_ERR(mdp->clk_main);
+		return dev_err_probe(&pdev->dev, PTR_ERR(mdp->clk_main),
+				     "Failed to get main clock\n");
 
 	mdp->clk_mm = devm_clk_get(&pdev->dev, "mm");
 	if (IS_ERR(mdp->clk_mm))
-		return PTR_ERR(mdp->clk_mm);
+		return dev_err_probe(&pdev->dev, PTR_ERR(mdp->clk_mm),
+				     "Failed to get mm clock\n");
 
 	mdp->chip.dev = &pdev->dev;
 	mdp->chip.ops = &mtk_disp_pwm_ops;
 	mdp->chip.npwm = 1;
 
-	ret = pwmchip_add(&mdp->chip);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "pwmchip_add() failed: %pe\n", ERR_PTR(ret));
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, mdp);
+	ret = devm_pwmchip_add(&pdev->dev, &mdp->chip);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret, "pwmchip_add() failed\n");
 
 	return 0;
 }
 
-static void mtk_disp_pwm_remove(struct platform_device *pdev)
-{
-	struct mtk_disp_pwm *mdp = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&mdp->chip);
-}
-
 static const struct mtk_pwm_data mt2701_pwm_data = {
 	.enable_mask = BIT(16),
 	.con0 = 0xa8,
@@ -320,7 +310,6 @@ static struct platform_driver mtk_disp_pwm_driver = {
 		.of_match_table = mtk_disp_pwm_of_match,
 	},
 	.probe = mtk_disp_pwm_probe,
-	.remove_new = mtk_disp_pwm_remove,
 };
 module_platform_driver(mtk_disp_pwm_driver);
 
diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c
index 766dbc58dad8..1b5e787d78f1 100644
--- a/drivers/pwm/pwm-mxs.c
+++ b/drivers/pwm/pwm-mxs.c
@@ -115,7 +115,6 @@ static int mxs_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops mxs_pwm_ops = {
 	.apply = mxs_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int mxs_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-ntxec.c b/drivers/pwm/pwm-ntxec.c
index 7514ea384ec5..78606039eda2 100644
--- a/drivers/pwm/pwm-ntxec.c
+++ b/drivers/pwm/pwm-ntxec.c
@@ -126,7 +126,6 @@ static int ntxec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm_dev,
 }
 
 static const struct pwm_ops ntxec_pwm_ops = {
-	.owner = THIS_MODULE,
 	.apply = ntxec_pwm_apply,
 	/*
 	 * No .get_state callback, because the current state cannot be read
diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
index 4889fbd8a431..13161e08dd6e 100644
--- a/drivers/pwm/pwm-omap-dmtimer.c
+++ b/drivers/pwm/pwm-omap-dmtimer.c
@@ -311,7 +311,6 @@ unlock_mutex:
 
 static const struct pwm_ops pwm_omap_dmtimer_ops = {
 	.apply = pwm_omap_dmtimer_apply,
-	.owner = THIS_MODULE,
 };
 
 static int pwm_omap_dmtimer_probe(struct platform_device *pdev)
@@ -466,7 +465,7 @@ MODULE_DEVICE_TABLE(of, pwm_omap_dmtimer_of_match);
 static struct platform_driver pwm_omap_dmtimer_driver = {
 	.driver = {
 		.name = "omap-dmtimer-pwm",
-		.of_match_table = of_match_ptr(pwm_omap_dmtimer_of_match),
+		.of_match_table = pwm_omap_dmtimer_of_match,
 	},
 	.probe = pwm_omap_dmtimer_probe,
 	.remove_new = pwm_omap_dmtimer_remove,
diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 3038a68412a7..e79b1de8c4d8 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -505,7 +505,6 @@ static const struct pwm_ops pca9685_pwm_ops = {
 	.get_state = pca9685_pwm_get_state,
 	.request = pca9685_pwm_request,
 	.free = pca9685_pwm_free,
-	.owner = THIS_MODULE,
 };
 
 static const struct regmap_config pca9685_regmap_i2c_config = {
diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index 1e475ed10180..76685f926c75 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c
@@ -24,7 +24,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/pwm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <asm/div64.h>
 
@@ -135,7 +135,6 @@ static int pxa_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops pxa_pwm_ops = {
 	.apply = pxa_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 #ifdef CONFIG_OF
diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c
index 2939b71a7ba7..1ad814fdec6b 100644
--- a/drivers/pwm/pwm-raspberrypi-poe.c
+++ b/drivers/pwm/pwm-raspberrypi-poe.c
@@ -135,7 +135,6 @@ static int raspberrypi_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops raspberrypi_pwm_ops = {
 	.get_state = raspberrypi_pwm_get_state,
 	.apply = raspberrypi_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int raspberrypi_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index 5b5f357c44de..13269f55fccf 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -198,7 +198,6 @@ static const struct pwm_ops rcar_pwm_ops = {
 	.request = rcar_pwm_request,
 	.free = rcar_pwm_free,
 	.apply = rcar_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int rcar_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index d7311614c846..4239f2c3e8b2 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -85,6 +85,7 @@ struct tpu_device {
 
 	void __iomem *base;
 	struct clk *clk;
+	struct tpu_pwm_device tpd[TPU_CHANNEL_MAX];
 };
 
 #define to_tpu_device(c)	container_of(c, struct tpu_device, chip)
@@ -215,9 +216,7 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
 	if (pwm->hwpwm >= TPU_CHANNEL_MAX)
 		return -EINVAL;
 
-	tpd = kzalloc(sizeof(*tpd), GFP_KERNEL);
-	if (tpd == NULL)
-		return -ENOMEM;
+	tpd = &tpu->tpd[pwm->hwpwm];
 
 	tpd->tpu = tpu;
 	tpd->channel = pwm->hwpwm;
@@ -228,24 +227,22 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
 
 	tpd->timer_on = false;
 
-	pwm_set_chip_data(pwm, tpd);
-
 	return 0;
 }
 
 static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-	struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+	struct tpu_device *tpu = to_tpu_device(chip);
+	struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
 
 	tpu_pwm_timer_stop(tpd);
-	kfree(tpd);
 }
 
 static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			  u64 duty_ns, u64 period_ns, bool enabled)
 {
-	struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
 	struct tpu_device *tpu = to_tpu_device(chip);
+	struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
 	unsigned int prescaler;
 	bool duty_only = false;
 	u32 clk_rate;
@@ -353,7 +350,8 @@ static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 static int tpu_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
 				enum pwm_polarity polarity)
 {
-	struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+	struct tpu_device *tpu = to_tpu_device(chip);
+	struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
 
 	tpd->polarity = polarity;
 
@@ -362,7 +360,8 @@ static int tpu_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static int tpu_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-	struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+	struct tpu_device *tpu = to_tpu_device(chip);
+	struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
 	int ret;
 
 	ret = tpu_pwm_timer_start(tpd);
@@ -384,7 +383,8 @@ static int tpu_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 
 static void tpu_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-	struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+	struct tpu_device *tpu = to_tpu_device(chip);
+	struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
 
 	/* The timer must be running to modify the pin output configuration. */
 	tpu_pwm_timer_start(tpd);
@@ -431,7 +431,6 @@ static const struct pwm_ops tpu_pwm_ops = {
 	.request = tpu_pwm_request,
 	.free = tpu_pwm_free,
 	.apply = tpu_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 /* -----------------------------------------------------------------------------
diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c
index 03ee18fb82d5..cce4381e188a 100644
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -228,7 +228,6 @@ out:
 static const struct pwm_ops rockchip_pwm_ops = {
 	.get_state = rockchip_pwm_get_state,
 	.apply = rockchip_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct rockchip_pwm_data pwm_data_v1 = {
diff --git a/drivers/pwm/pwm-rz-mtu3.c b/drivers/pwm/pwm-rz-mtu3.c
index a56cecb0e46e..bdda315b3bd3 100644
--- a/drivers/pwm/pwm-rz-mtu3.c
+++ b/drivers/pwm/pwm-rz-mtu3.c
@@ -438,7 +438,6 @@ static const struct pwm_ops rz_mtu3_pwm_ops = {
 	.free = rz_mtu3_pwm_free,
 	.get_state = rz_mtu3_pwm_get_state,
 	.apply = rz_mtu3_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int rz_mtu3_pwm_pm_runtime_suspend(struct device *dev)
diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c
index e8828f57ab15..69d9f4577b34 100644
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c
@@ -77,6 +77,7 @@ struct samsung_pwm_channel {
  * @base_clk:		base clock used to drive the timers
  * @tclk0:		external clock 0 (can be ERR_PTR if not present)
  * @tclk1:		external clock 1 (can be ERR_PTR if not present)
+ * @channel:		per channel driver data
  */
 struct samsung_pwm_chip {
 	struct pwm_chip chip;
@@ -88,6 +89,7 @@ struct samsung_pwm_chip {
 	struct clk *base_clk;
 	struct clk *tclk0;
 	struct clk *tclk1;
+	struct samsung_pwm_channel channel[SAMSUNG_PWM_NUM];
 };
 
 #ifndef CONFIG_CLKSRC_SAMSUNG_PWM
@@ -117,21 +119,21 @@ static inline unsigned int to_tcon_channel(unsigned int channel)
 	return (channel == 0) ? 0 : (channel + 1);
 }
 
-static void __pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
+static void __pwm_samsung_manual_update(struct samsung_pwm_chip *our_chip,
 				      struct pwm_device *pwm)
 {
 	unsigned int tcon_chan = to_tcon_channel(pwm->hwpwm);
 	u32 tcon;
 
-	tcon = readl(chip->base + REG_TCON);
+	tcon = readl(our_chip->base + REG_TCON);
 	tcon |= TCON_MANUALUPDATE(tcon_chan);
-	writel(tcon, chip->base + REG_TCON);
+	writel(tcon, our_chip->base + REG_TCON);
 
 	tcon &= ~TCON_MANUALUPDATE(tcon_chan);
-	writel(tcon, chip->base + REG_TCON);
+	writel(tcon, our_chip->base + REG_TCON);
 }
 
-static void pwm_samsung_set_divisor(struct samsung_pwm_chip *pwm,
+static void pwm_samsung_set_divisor(struct samsung_pwm_chip *our_chip,
 				    unsigned int channel, u8 divisor)
 {
 	u8 shift = TCFG1_SHIFT(channel);
@@ -139,39 +141,39 @@ static void pwm_samsung_set_divisor(struct samsung_pwm_chip *pwm,
 	u32 reg;
 	u8 bits;
 
-	bits = (fls(divisor) - 1) - pwm->variant.div_base;
+	bits = (fls(divisor) - 1) - our_chip->variant.div_base;
 
 	spin_lock_irqsave(&samsung_pwm_lock, flags);
 
-	reg = readl(pwm->base + REG_TCFG1);
+	reg = readl(our_chip->base + REG_TCFG1);
 	reg &= ~(TCFG1_MUX_MASK << shift);
 	reg |= bits << shift;
-	writel(reg, pwm->base + REG_TCFG1);
+	writel(reg, our_chip->base + REG_TCFG1);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
 
-static int pwm_samsung_is_tdiv(struct samsung_pwm_chip *chip, unsigned int chan)
+static int pwm_samsung_is_tdiv(struct samsung_pwm_chip *our_chip, unsigned int chan)
 {
-	struct samsung_pwm_variant *variant = &chip->variant;
+	struct samsung_pwm_variant *variant = &our_chip->variant;
 	u32 reg;
 
-	reg = readl(chip->base + REG_TCFG1);
+	reg = readl(our_chip->base + REG_TCFG1);
 	reg >>= TCFG1_SHIFT(chan);
 	reg &= TCFG1_MUX_MASK;
 
 	return (BIT(reg) & variant->tclk_mask) == 0;
 }
 
-static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *chip,
+static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *our_chip,
 					      unsigned int chan)
 {
 	unsigned long rate;
 	u32 reg;
 
-	rate = clk_get_rate(chip->base_clk);
+	rate = clk_get_rate(our_chip->base_clk);
 
-	reg = readl(chip->base + REG_TCFG0);
+	reg = readl(our_chip->base + REG_TCFG0);
 	if (chan >= 2)
 		reg >>= TCFG0_PRESCALER1_SHIFT;
 	reg &= TCFG0_PRESCALER_MASK;
@@ -179,28 +181,28 @@ static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *chip,
 	return rate / (reg + 1);
 }
 
-static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip,
+static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *our_chip,
 					  unsigned int chan, unsigned long freq)
 {
-	struct samsung_pwm_variant *variant = &chip->variant;
+	struct samsung_pwm_variant *variant = &our_chip->variant;
 	unsigned long rate;
 	struct clk *clk;
 	u8 div;
 
-	if (!pwm_samsung_is_tdiv(chip, chan)) {
-		clk = (chan < 2) ? chip->tclk0 : chip->tclk1;
+	if (!pwm_samsung_is_tdiv(our_chip, chan)) {
+		clk = (chan < 2) ? our_chip->tclk0 : our_chip->tclk1;
 		if (!IS_ERR(clk)) {
 			rate = clk_get_rate(clk);
 			if (rate)
 				return rate;
 		}
 
-		dev_warn(chip->chip.dev,
+		dev_warn(our_chip->chip.dev,
 			"tclk of PWM %d is inoperational, using tdiv\n", chan);
 	}
 
-	rate = pwm_samsung_get_tin_rate(chip, chan);
-	dev_dbg(chip->chip.dev, "tin parent at %lu\n", rate);
+	rate = pwm_samsung_get_tin_rate(our_chip, chan);
+	dev_dbg(our_chip->chip.dev, "tin parent at %lu\n", rate);
 
 	/*
 	 * Compare minimum PWM frequency that can be achieved with possible
@@ -220,7 +222,7 @@ static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip,
 		div = variant->div_base;
 	}
 
-	pwm_samsung_set_divisor(chip, chan, BIT(div));
+	pwm_samsung_set_divisor(our_chip, chan, BIT(div));
 
 	return rate >> div;
 }
@@ -228,7 +230,6 @@ static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip,
 static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
-	struct samsung_pwm_channel *our_chan;
 
 	if (!(our_chip->variant.output_mask & BIT(pwm->hwpwm))) {
 		dev_warn(chip->dev,
@@ -237,20 +238,11 @@ static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm)
 		return -EINVAL;
 	}
 
-	our_chan = kzalloc(sizeof(*our_chan), GFP_KERNEL);
-	if (!our_chan)
-		return -ENOMEM;
-
-	pwm_set_chip_data(pwm, our_chan);
+	memset(&our_chip->channel[pwm->hwpwm], 0, sizeof(our_chip->channel[pwm->hwpwm]));
 
 	return 0;
 }
 
-static void pwm_samsung_free(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	kfree(pwm_get_chip_data(pwm));
-}
-
 static int pwm_samsung_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
@@ -302,14 +294,14 @@ static void pwm_samsung_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
 
-static void pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
+static void pwm_samsung_manual_update(struct samsung_pwm_chip *our_chip,
 				      struct pwm_device *pwm)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&samsung_pwm_lock, flags);
 
-	__pwm_samsung_manual_update(chip, pwm);
+	__pwm_samsung_manual_update(our_chip, pwm);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
@@ -318,7 +310,7 @@ static int __pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
 				int duty_ns, int period_ns, bool force_period)
 {
 	struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
-	struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm);
+	struct samsung_pwm_channel *chan = &our_chip->channel[pwm->hwpwm];
 	u32 tin_ns = chan->tin_ns, tcnt, tcmp, oldtcmp;
 
 	tcnt = readl(our_chip->base + REG_TCNTB(pwm->hwpwm));
@@ -393,7 +385,7 @@ static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	return __pwm_samsung_config(chip, pwm, duty_ns, period_ns, false);
 }
 
-static void pwm_samsung_set_invert(struct samsung_pwm_chip *chip,
+static void pwm_samsung_set_invert(struct samsung_pwm_chip *our_chip,
 				   unsigned int channel, bool invert)
 {
 	unsigned int tcon_chan = to_tcon_channel(channel);
@@ -402,17 +394,17 @@ static void pwm_samsung_set_invert(struct samsung_pwm_chip *chip,
 
 	spin_lock_irqsave(&samsung_pwm_lock, flags);
 
-	tcon = readl(chip->base + REG_TCON);
+	tcon = readl(our_chip->base + REG_TCON);
 
 	if (invert) {
-		chip->inverter_mask |= BIT(channel);
+		our_chip->inverter_mask |= BIT(channel);
 		tcon |= TCON_INVERT(tcon_chan);
 	} else {
-		chip->inverter_mask &= ~BIT(channel);
+		our_chip->inverter_mask &= ~BIT(channel);
 		tcon &= ~TCON_INVERT(tcon_chan);
 	}
 
-	writel(tcon, chip->base + REG_TCON);
+	writel(tcon, our_chip->base + REG_TCON);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
@@ -473,9 +465,7 @@ static int pwm_samsung_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops pwm_samsung_ops = {
 	.request	= pwm_samsung_request,
-	.free		= pwm_samsung_free,
 	.apply		= pwm_samsung_apply,
-	.owner		= THIS_MODULE,
 };
 
 #ifdef CONFIG_OF
@@ -517,9 +507,9 @@ static const struct of_device_id samsung_pwm_matches[] = {
 };
 MODULE_DEVICE_TABLE(of, samsung_pwm_matches);
 
-static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
+static int pwm_samsung_parse_dt(struct samsung_pwm_chip *our_chip)
 {
-	struct device_node *np = chip->chip.dev->of_node;
+	struct device_node *np = our_chip->chip.dev->of_node;
 	const struct of_device_id *match;
 	struct property *prop;
 	const __be32 *cur;
@@ -529,22 +519,22 @@ static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
 	if (!match)
 		return -ENODEV;
 
-	memcpy(&chip->variant, match->data, sizeof(chip->variant));
+	memcpy(&our_chip->variant, match->data, sizeof(our_chip->variant));
 
 	of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) {
 		if (val >= SAMSUNG_PWM_NUM) {
-			dev_err(chip->chip.dev,
+			dev_err(our_chip->chip.dev,
 				"%s: invalid channel index in samsung,pwm-outputs property\n",
 								__func__);
 			continue;
 		}
-		chip->variant.output_mask |= BIT(val);
+		our_chip->variant.output_mask |= BIT(val);
 	}
 
 	return 0;
 }
 #else
-static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
+static int pwm_samsung_parse_dt(struct samsung_pwm_chip *our_chip)
 {
 	return -ENODEV;
 }
@@ -553,21 +543,21 @@ static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
 static int pwm_samsung_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct samsung_pwm_chip *chip;
+	struct samsung_pwm_chip *our_chip;
 	unsigned int chan;
 	int ret;
 
-	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
-	if (chip == NULL)
+	our_chip = devm_kzalloc(&pdev->dev, sizeof(*our_chip), GFP_KERNEL);
+	if (our_chip == NULL)
 		return -ENOMEM;
 
-	chip->chip.dev = &pdev->dev;
-	chip->chip.ops = &pwm_samsung_ops;
-	chip->chip.npwm = SAMSUNG_PWM_NUM;
-	chip->inverter_mask = BIT(SAMSUNG_PWM_NUM) - 1;
+	our_chip->chip.dev = &pdev->dev;
+	our_chip->chip.ops = &pwm_samsung_ops;
+	our_chip->chip.npwm = SAMSUNG_PWM_NUM;
+	our_chip->inverter_mask = BIT(SAMSUNG_PWM_NUM) - 1;
 
 	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
-		ret = pwm_samsung_parse_dt(chip);
+		ret = pwm_samsung_parse_dt(our_chip);
 		if (ret)
 			return ret;
 	} else {
@@ -576,58 +566,58 @@ static int pwm_samsung_probe(struct platform_device *pdev)
 			return -EINVAL;
 		}
 
-		memcpy(&chip->variant, pdev->dev.platform_data,
-							sizeof(chip->variant));
+		memcpy(&our_chip->variant, pdev->dev.platform_data,
+							sizeof(our_chip->variant));
 	}
 
-	chip->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(chip->base))
-		return PTR_ERR(chip->base);
+	our_chip->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(our_chip->base))
+		return PTR_ERR(our_chip->base);
 
-	chip->base_clk = devm_clk_get(&pdev->dev, "timers");
-	if (IS_ERR(chip->base_clk)) {
+	our_chip->base_clk = devm_clk_get(&pdev->dev, "timers");
+	if (IS_ERR(our_chip->base_clk)) {
 		dev_err(dev, "failed to get timer base clk\n");
-		return PTR_ERR(chip->base_clk);
+		return PTR_ERR(our_chip->base_clk);
 	}
 
-	ret = clk_prepare_enable(chip->base_clk);
+	ret = clk_prepare_enable(our_chip->base_clk);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable base clock\n");
 		return ret;
 	}
 
 	for (chan = 0; chan < SAMSUNG_PWM_NUM; ++chan)
-		if (chip->variant.output_mask & BIT(chan))
-			pwm_samsung_set_invert(chip, chan, true);
+		if (our_chip->variant.output_mask & BIT(chan))
+			pwm_samsung_set_invert(our_chip, chan, true);
 
 	/* Following clocks are optional. */
-	chip->tclk0 = devm_clk_get(&pdev->dev, "pwm-tclk0");
-	chip->tclk1 = devm_clk_get(&pdev->dev, "pwm-tclk1");
+	our_chip->tclk0 = devm_clk_get(&pdev->dev, "pwm-tclk0");
+	our_chip->tclk1 = devm_clk_get(&pdev->dev, "pwm-tclk1");
 
-	platform_set_drvdata(pdev, chip);
+	platform_set_drvdata(pdev, our_chip);
 
-	ret = pwmchip_add(&chip->chip);
+	ret = pwmchip_add(&our_chip->chip);
 	if (ret < 0) {
 		dev_err(dev, "failed to register PWM chip\n");
-		clk_disable_unprepare(chip->base_clk);
+		clk_disable_unprepare(our_chip->base_clk);
 		return ret;
 	}
 
 	dev_dbg(dev, "base_clk at %lu, tclk0 at %lu, tclk1 at %lu\n",
-		clk_get_rate(chip->base_clk),
-		!IS_ERR(chip->tclk0) ? clk_get_rate(chip->tclk0) : 0,
-		!IS_ERR(chip->tclk1) ? clk_get_rate(chip->tclk1) : 0);
+		clk_get_rate(our_chip->base_clk),
+		!IS_ERR(our_chip->tclk0) ? clk_get_rate(our_chip->tclk0) : 0,
+		!IS_ERR(our_chip->tclk1) ? clk_get_rate(our_chip->tclk1) : 0);
 
 	return 0;
 }
 
 static void pwm_samsung_remove(struct platform_device *pdev)
 {
-	struct samsung_pwm_chip *chip = platform_get_drvdata(pdev);
+	struct samsung_pwm_chip *our_chip = platform_get_drvdata(pdev);
 
-	pwmchip_remove(&chip->chip);
+	pwmchip_remove(&our_chip->chip);
 
-	clk_disable_unprepare(chip->base_clk);
+	clk_disable_unprepare(our_chip->base_clk);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -639,9 +629,9 @@ static int pwm_samsung_resume(struct device *dev)
 
 	for (i = 0; i < SAMSUNG_PWM_NUM; i++) {
 		struct pwm_device *pwm = &chip->pwms[i];
-		struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm);
+		struct samsung_pwm_channel *chan = &our_chip->channel[i];
 
-		if (!chan)
+		if (!test_bit(PWMF_REQUESTED, &pwm->flags))
 			continue;
 
 		if (our_chip->variant.output_mask & BIT(i))
diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c
index eabddb7c7820..089e50bdbbf0 100644
--- a/drivers/pwm/pwm-sifive.c
+++ b/drivers/pwm/pwm-sifive.c
@@ -203,7 +203,6 @@ static const struct pwm_ops pwm_sifive_ops = {
 	.free = pwm_sifive_free,
 	.get_state = pwm_sifive_get_state,
 	.apply = pwm_sifive_apply,
-	.owner = THIS_MODULE,
 };
 
 static int pwm_sifive_clock_notifier(struct notifier_block *nb,
diff --git a/drivers/pwm/pwm-sl28cpld.c b/drivers/pwm/pwm-sl28cpld.c
index 9e42e3a74ad6..88b01ff9e460 100644
--- a/drivers/pwm/pwm-sl28cpld.c
+++ b/drivers/pwm/pwm-sl28cpld.c
@@ -200,7 +200,6 @@ static int sl28cpld_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops sl28cpld_pwm_ops = {
 	.apply = sl28cpld_pwm_apply,
 	.get_state = sl28cpld_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int sl28cpld_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c
index 4e1cfd8d7c03..ff991319feef 100644
--- a/drivers/pwm/pwm-spear.c
+++ b/drivers/pwm/pwm-spear.c
@@ -189,7 +189,6 @@ static int spear_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops spear_pwm_ops = {
 	.apply = spear_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int spear_pwm_probe(struct platform_device *pdev)
@@ -207,26 +206,21 @@ static int spear_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(pc->mmio_base))
 		return PTR_ERR(pc->mmio_base);
 
-	pc->clk = devm_clk_get(&pdev->dev, NULL);
+	pc->clk = devm_clk_get_prepared(&pdev->dev, NULL);
 	if (IS_ERR(pc->clk))
-		return PTR_ERR(pc->clk);
-
-	platform_set_drvdata(pdev, pc);
+		return dev_err_probe(&pdev->dev, PTR_ERR(pc->clk),
+				     "Failed to get clock\n");
 
 	pc->chip.dev = &pdev->dev;
 	pc->chip.ops = &spear_pwm_ops;
 	pc->chip.npwm = NUM_PWM;
 
-	ret = clk_prepare(pc->clk);
-	if (ret)
-		return ret;
-
 	if (of_device_is_compatible(np, "st,spear1340-pwm")) {
 		ret = clk_enable(pc->clk);
-		if (ret) {
-			clk_unprepare(pc->clk);
-			return ret;
-		}
+		if (ret)
+			return dev_err_probe(&pdev->dev, ret,
+					     "Failed to enable clk\n");
+
 		/*
 		 * Following enables PWM chip, channels would still be
 		 * enabled individually through their control register
@@ -238,23 +232,11 @@ static int spear_pwm_probe(struct platform_device *pdev)
 		clk_disable(pc->clk);
 	}
 
-	ret = pwmchip_add(&pc->chip);
-	if (ret < 0) {
-		clk_unprepare(pc->clk);
-		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
-	}
+	ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret, "pwmchip_add() failed\n");
 
-	return ret;
-}
-
-static void spear_pwm_remove(struct platform_device *pdev)
-{
-	struct spear_pwm_chip *pc = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&pc->chip);
-
-	/* clk was prepared in probe, hence unprepare it here */
-	clk_unprepare(pc->clk);
+	return 0;
 }
 
 static const struct of_device_id spear_pwm_of_match[] = {
@@ -271,7 +253,6 @@ static struct platform_driver spear_pwm_driver = {
 		.of_match_table = spear_pwm_of_match,
 	},
 	.probe = spear_pwm_probe,
-	.remove_new = spear_pwm_remove,
 };
 
 module_platform_driver(spear_pwm_driver);
diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
index 1499c8c1fe37..77939e161006 100644
--- a/drivers/pwm/pwm-sprd.c
+++ b/drivers/pwm/pwm-sprd.c
@@ -40,6 +40,11 @@ struct sprd_pwm_chip {
 	struct sprd_pwm_chn chn[SPRD_PWM_CHN_NUM];
 };
 
+static inline struct sprd_pwm_chip* sprd_pwm_from_chip(struct pwm_chip *chip)
+{
+	return container_of(chip, struct sprd_pwm_chip, chip);
+}
+
 /*
  * The list of clocks required by PWM channels, and each channel has 2 clocks:
  * enable clock and pwm clock.
@@ -69,8 +74,7 @@ static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 hwid,
 static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 			      struct pwm_state *state)
 {
-	struct sprd_pwm_chip *spc =
-		container_of(chip, struct sprd_pwm_chip, chip);
+	struct sprd_pwm_chip *spc = sprd_pwm_from_chip(chip);
 	struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
 	u32 val, duty, prescale;
 	u64 tmp;
@@ -162,8 +166,7 @@ static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm,
 static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			  const struct pwm_state *state)
 {
-	struct sprd_pwm_chip *spc =
-		container_of(chip, struct sprd_pwm_chip, chip);
+	struct sprd_pwm_chip *spc = sprd_pwm_from_chip(chip);
 	struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
 	struct pwm_state *cstate = &pwm->state;
 	int ret;
@@ -210,7 +213,6 @@ static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops sprd_pwm_ops = {
 	.apply = sprd_pwm_apply,
 	.get_state = sprd_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc)
@@ -240,10 +242,8 @@ static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc)
 		chn->clk_rate = clk_get_rate(clk_pwm);
 	}
 
-	if (!i) {
-		dev_err(spc->dev, "no available PWM channels\n");
-		return -ENODEV;
-	}
+	if (!i)
+		return dev_err_probe(spc->dev, -ENODEV, "no available PWM channels\n");
 
 	spc->num_pwms = i;
 
@@ -264,7 +264,6 @@ static int sprd_pwm_probe(struct platform_device *pdev)
 		return PTR_ERR(spc->base);
 
 	spc->dev = &pdev->dev;
-	platform_set_drvdata(pdev, spc);
 
 	ret = sprd_pwm_clk_init(spc);
 	if (ret)
@@ -274,20 +273,13 @@ static int sprd_pwm_probe(struct platform_device *pdev)
 	spc->chip.ops = &sprd_pwm_ops;
 	spc->chip.npwm = spc->num_pwms;
 
-	ret = pwmchip_add(&spc->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &spc->chip);
 	if (ret)
 		dev_err(&pdev->dev, "failed to add PWM chip\n");
 
 	return ret;
 }
 
-static void sprd_pwm_remove(struct platform_device *pdev)
-{
-	struct sprd_pwm_chip *spc = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&spc->chip);
-}
-
 static const struct of_device_id sprd_pwm_of_match[] = {
 	{ .compatible = "sprd,ums512-pwm", },
 	{ },
@@ -300,7 +292,6 @@ static struct platform_driver sprd_pwm_driver = {
 		.of_match_table = sprd_pwm_of_match,
 	},
 	.probe = sprd_pwm_probe,
-	.remove_new = sprd_pwm_remove,
 };
 
 module_platform_driver(sprd_pwm_driver);
diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c
index b1d1373648a3..dc92cea31cd0 100644
--- a/drivers/pwm/pwm-sti.c
+++ b/drivers/pwm/pwm-sti.c
@@ -79,6 +79,7 @@ struct sti_pwm_compat_data {
 	unsigned int cpt_num_devs;
 	unsigned int max_pwm_cnt;
 	unsigned int max_prescale;
+	struct sti_cpt_ddata *ddata;
 };
 
 struct sti_pwm_chip {
@@ -314,7 +315,7 @@ static int sti_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm,
 {
 	struct sti_pwm_chip *pc = to_sti_pwmchip(chip);
 	struct sti_pwm_compat_data *cdata = pc->cdata;
-	struct sti_cpt_ddata *ddata = pwm_get_chip_data(pwm);
+	struct sti_cpt_ddata *ddata = &cdata->ddata[pwm->hwpwm];
 	struct device *dev = pc->dev;
 	unsigned int effective_ticks;
 	unsigned long long high, low;
@@ -420,7 +421,6 @@ static const struct pwm_ops sti_pwm_ops = {
 	.capture = sti_pwm_capture,
 	.apply = sti_pwm_apply,
 	.free = sti_pwm_free,
-	.owner = THIS_MODULE,
 };
 
 static irqreturn_t sti_pwm_interrupt(int irq, void *data)
@@ -440,7 +440,7 @@ static irqreturn_t sti_pwm_interrupt(int irq, void *data)
 	while (cpt_int_stat) {
 		devicenum = ffs(cpt_int_stat) - 1;
 
-		ddata = pwm_get_chip_data(&pc->chip.pwms[devicenum]);
+		ddata = &pc->cdata->ddata[devicenum];
 
 		/*
 		 * Capture input:
@@ -638,30 +638,28 @@ static int sti_pwm_probe(struct platform_device *pdev)
 			dev_err(dev, "failed to prepare clock\n");
 			return ret;
 		}
+
+		cdata->ddata = devm_kzalloc(dev, cdata->cpt_num_devs * sizeof(*cdata->ddata), GFP_KERNEL);
+		if (!cdata->ddata)
+			return -ENOMEM;
 	}
 
 	pc->chip.dev = dev;
 	pc->chip.ops = &sti_pwm_ops;
 	pc->chip.npwm = pc->cdata->pwm_num_devs;
 
-	ret = pwmchip_add(&pc->chip);
-	if (ret < 0) {
-		clk_unprepare(pc->pwm_clk);
-		clk_unprepare(pc->cpt_clk);
-		return ret;
-	}
-
 	for (i = 0; i < cdata->cpt_num_devs; i++) {
-		struct sti_cpt_ddata *ddata;
-
-		ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
-		if (!ddata)
-			return -ENOMEM;
+		struct sti_cpt_ddata *ddata = &cdata->ddata[i];
 
 		init_waitqueue_head(&ddata->wait);
 		mutex_init(&ddata->lock);
+	}
 
-		pwm_set_chip_data(&pc->chip.pwms[i], ddata);
+	ret = pwmchip_add(&pc->chip);
+	if (ret < 0) {
+		clk_unprepare(pc->pwm_clk);
+		clk_unprepare(pc->cpt_clk);
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, pc);
diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
index bb3a045a7334..b67974cc1872 100644
--- a/drivers/pwm/pwm-stm32-lp.c
+++ b/drivers/pwm/pwm-stm32-lp.c
@@ -189,7 +189,6 @@ static int stm32_pwm_lp_get_state(struct pwm_chip *chip,
 }
 
 static const struct pwm_ops stm32_pwm_lp_ops = {
-	.owner = THIS_MODULE,
 	.apply = stm32_pwm_lp_apply,
 	.get_state = stm32_pwm_lp_get_state,
 };
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index 3d6be7749e23..3303a754ea02 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -487,7 +487,6 @@ static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static const struct pwm_ops stm32pwm_ops = {
-	.owner = THIS_MODULE,
 	.apply = stm32_pwm_apply_locked,
 	.capture = IS_ENABLED(CONFIG_DMA_ENGINE) ? stm32_pwm_capture : NULL,
 };
diff --git a/drivers/pwm/pwm-stmpe.c b/drivers/pwm/pwm-stmpe.c
index e205405c4828..a46f5b4dd816 100644
--- a/drivers/pwm/pwm-stmpe.c
+++ b/drivers/pwm/pwm-stmpe.c
@@ -287,7 +287,6 @@ static int stmpe_24xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops stmpe_24xx_pwm_ops = {
 	.apply = stmpe_24xx_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int __init stmpe_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index c84fcf1a13dc..1a439025540d 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -325,7 +325,6 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops sun4i_pwm_ops = {
 	.apply = sun4i_pwm_apply,
 	.get_state = sun4i_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static const struct sun4i_pwm_data sun4i_pwm_dual_nobypass = {
diff --git a/drivers/pwm/pwm-sunplus.c b/drivers/pwm/pwm-sunplus.c
index 7705c7b86c3a..773e2f80526e 100644
--- a/drivers/pwm/pwm-sunplus.c
+++ b/drivers/pwm/pwm-sunplus.c
@@ -163,7 +163,6 @@ static int sunplus_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops sunplus_pwm_ops = {
 	.apply = sunplus_pwm_apply,
 	.get_state = sunplus_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static void sunplus_pwm_clk_release(void *data)
diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index a169a34e0778..39ea51e08c94 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -268,7 +268,6 @@ static int tegra_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops tegra_pwm_ops = {
 	.apply = tegra_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int tegra_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c
index 8c94b266c1b2..11e3549cf103 100644
--- a/drivers/pwm/pwm-tiecap.c
+++ b/drivers/pwm/pwm-tiecap.c
@@ -205,7 +205,6 @@ static int ecap_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops ecap_pwm_ops = {
 	.apply = ecap_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id ecap_of_match[] = {
diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index ecbfd7e954ec..66ac2655845f 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c
@@ -437,7 +437,6 @@ static int ehrpwm_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops ehrpwm_pwm_ops = {
 	.free = ehrpwm_pwm_free,
 	.apply = ehrpwm_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id ehrpwm_of_match[] = {
diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c
index 8fb84b441853..625233f4703a 100644
--- a/drivers/pwm/pwm-twl-led.c
+++ b/drivers/pwm/pwm-twl-led.c
@@ -189,7 +189,6 @@ static int twl4030_pwmled_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops twl4030_pwmled_ops = {
 	.apply = twl4030_pwmled_apply,
-	.owner = THIS_MODULE,
 };
 
 static int twl6030_pwmled_config(struct pwm_chip *chip, struct pwm_device *pwm,
@@ -342,7 +341,6 @@ static const struct pwm_ops twl6030_pwmled_ops = {
 	.apply = twl6030_pwmled_apply,
 	.request = twl6030_pwmled_request,
 	.free = twl6030_pwmled_free,
-	.owner = THIS_MODULE,
 };
 
 static int twl_pwmled_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index 86567add79db..603d31f27470 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c
@@ -333,12 +333,10 @@ static const struct pwm_ops twl4030_pwm_ops = {
 	.apply = twl4030_pwm_apply,
 	.request = twl4030_pwm_request,
 	.free = twl4030_pwm_free,
-	.owner = THIS_MODULE,
 };
 
 static const struct pwm_ops twl6030_pwm_ops = {
 	.apply = twl6030_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int twl_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-visconti.c b/drivers/pwm/pwm-visconti.c
index 7f7591a2384c..8d736d558122 100644
--- a/drivers/pwm/pwm-visconti.c
+++ b/drivers/pwm/pwm-visconti.c
@@ -129,7 +129,6 @@ static int visconti_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops visconti_pwm_ops = {
 	.apply = visconti_pwm_apply,
 	.get_state = visconti_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static int visconti_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index 6d46db51daac..5568d5312d3c 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c
@@ -221,7 +221,6 @@ static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 static const struct pwm_ops vt8500_pwm_ops = {
 	.apply = vt8500_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static const struct of_device_id vt8500_pwm_dt_ids[] = {
@@ -236,10 +235,8 @@ static int vt8500_pwm_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	int ret;
 
-	if (!np) {
-		dev_err(&pdev->dev, "invalid devicetree node\n");
-		return -EINVAL;
-	}
+	if (!np)
+		return dev_err_probe(&pdev->dev, -EINVAL, "invalid devicetree node\n");
 
 	vt8500 = devm_kzalloc(&pdev->dev, sizeof(*vt8500), GFP_KERNEL);
 	if (vt8500 == NULL)
@@ -249,45 +246,23 @@ static int vt8500_pwm_probe(struct platform_device *pdev)
 	vt8500->chip.ops = &vt8500_pwm_ops;
 	vt8500->chip.npwm = VT8500_NR_PWMS;
 
-	vt8500->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(vt8500->clk)) {
-		dev_err(&pdev->dev, "clock source not specified\n");
-		return PTR_ERR(vt8500->clk);
-	}
+	vt8500->clk = devm_clk_get_prepared(&pdev->dev, NULL);
+	if (IS_ERR(vt8500->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(vt8500->clk), "clock source not specified\n");
 
 	vt8500->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(vt8500->base))
 		return PTR_ERR(vt8500->base);
 
-	ret = clk_prepare(vt8500->clk);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to prepare clock\n");
-		return ret;
-	}
-
-	ret = pwmchip_add(&vt8500->chip);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to add PWM chip\n");
-		clk_unprepare(vt8500->clk);
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, vt8500);
-	return ret;
-}
-
-static void vt8500_pwm_remove(struct platform_device *pdev)
-{
-	struct vt8500_chip *vt8500 = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&vt8500->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &vt8500->chip);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
 
-	clk_unprepare(vt8500->clk);
+	return 0;
 }
 
 static struct platform_driver vt8500_pwm_driver = {
 	.probe		= vt8500_pwm_probe,
-	.remove_new	= vt8500_pwm_remove,
 	.driver		= {
 		.name	= "vt8500-pwm",
 		.of_match_table = vt8500_pwm_dt_ids,
diff --git a/drivers/pwm/pwm-xilinx.c b/drivers/pwm/pwm-xilinx.c
index 85153ee90809..5f3c2a6fed11 100644
--- a/drivers/pwm/pwm-xilinx.c
+++ b/drivers/pwm/pwm-xilinx.c
@@ -198,7 +198,6 @@ static int xilinx_pwm_get_state(struct pwm_chip *chip,
 static const struct pwm_ops xilinx_pwm_ops = {
 	.apply = xilinx_pwm_apply,
 	.get_state = xilinx_pwm_get_state,
-	.owner = THIS_MODULE,
 };
 
 static const struct regmap_config xilinx_pwm_regmap_config = {
diff --git a/drivers/remoteproc/mtk_common.h b/drivers/remoteproc/mtk_common.h
index ea6fa1100a00..6d7736a031f7 100644
--- a/drivers/remoteproc/mtk_common.h
+++ b/drivers/remoteproc/mtk_common.h
@@ -47,6 +47,7 @@
 #define MT8192_SCP2SPM_IPC_CLR		0x4094
 #define MT8192_GIPC_IN_SET		0x4098
 #define MT8192_HOST_IPC_INT_BIT		BIT(0)
+#define MT8195_CORE1_HOST_IPC_INT_BIT	BIT(4)
 
 #define MT8192_CORE0_SW_RSTN_CLR	0x10000
 #define MT8192_CORE0_SW_RSTN_SET	0x10004
@@ -54,8 +55,28 @@
 #define MT8192_CORE0_WDT_IRQ		0x10030
 #define MT8192_CORE0_WDT_CFG		0x10034
 
+#define MT8195_SYS_STATUS		0x4004
+#define MT8195_CORE0_WDT		BIT(16)
+#define MT8195_CORE1_WDT		BIT(17)
+
 #define MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS		GENMASK(7, 4)
 
+#define MT8195_CPU1_SRAM_PD			0x1084
+#define MT8195_SSHUB2APMCU_IPC_SET		0x4088
+#define MT8195_SSHUB2APMCU_IPC_CLR		0x408C
+#define MT8195_CORE1_SW_RSTN_CLR		0x20000
+#define MT8195_CORE1_SW_RSTN_SET		0x20004
+#define MT8195_CORE1_MEM_ATT_PREDEF		0x20008
+#define MT8195_CORE1_WDT_IRQ			0x20030
+#define MT8195_CORE1_WDT_CFG			0x20034
+
+#define MT8195_SEC_CTRL				0x85000
+#define MT8195_CORE_OFFSET_ENABLE_D		BIT(13)
+#define MT8195_CORE_OFFSET_ENABLE_I		BIT(12)
+#define MT8195_L2TCM_OFFSET_RANGE_0_LOW		0x850b0
+#define MT8195_L2TCM_OFFSET_RANGE_0_HIGH	0x850b4
+#define MT8195_L2TCM_OFFSET			0x850d0
+
 #define SCP_FW_VER_LEN			32
 #define SCP_SHARE_BUFFER_SIZE		288
 
@@ -91,17 +112,24 @@ struct mtk_scp_of_data {
 	size_t ipi_buf_offset;
 };
 
+struct mtk_scp_of_cluster {
+	void __iomem *reg_base;
+	void __iomem *l1tcm_base;
+	size_t l1tcm_size;
+	phys_addr_t l1tcm_phys;
+	struct list_head mtk_scp_list;
+	/* Prevent concurrent operations of this structure and L2TCM power control. */
+	struct mutex cluster_lock;
+	u32 l2tcm_refcnt;
+};
+
 struct mtk_scp {
 	struct device *dev;
 	struct rproc *rproc;
 	struct clk *clk;
-	void __iomem *reg_base;
 	void __iomem *sram_base;
 	size_t sram_size;
 	phys_addr_t sram_phys;
-	void __iomem *l1tcm_base;
-	size_t l1tcm_size;
-	phys_addr_t l1tcm_phys;
 
 	const struct mtk_scp_of_data *data;
 
@@ -119,6 +147,9 @@ struct mtk_scp {
 	size_t dram_size;
 
 	struct rproc_subdev *rpmsg_subdev;
+
+	struct list_head elem;
+	struct mtk_scp_of_cluster *cluster;
 };
 
 /**
diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c
index dcc94ee2458d..a35409eda0cf 100644
--- a/drivers/remoteproc/mtk_scp.c
+++ b/drivers/remoteproc/mtk_scp.c
@@ -68,8 +68,14 @@ EXPORT_SYMBOL_GPL(scp_put);
 
 static void scp_wdt_handler(struct mtk_scp *scp, u32 scp_to_host)
 {
+	struct mtk_scp_of_cluster *scp_cluster = scp->cluster;
+	struct mtk_scp *scp_node;
+
 	dev_err(scp->dev, "SCP watchdog timeout! 0x%x", scp_to_host);
-	rproc_report_crash(scp->rproc, RPROC_WATCHDOG);
+
+	/* report watchdog timeout to all cores */
+	list_for_each_entry(scp_node, &scp_cluster->mtk_scp_list, elem)
+		rproc_report_crash(scp_node->rproc, RPROC_WATCHDOG);
 }
 
 static void scp_init_ipi_handler(void *data, unsigned int len, void *priv)
@@ -106,7 +112,7 @@ static void scp_ipi_handler(struct mtk_scp *scp)
 	scp_ipi_lock(scp, id);
 	handler = ipi_desc[id].handler;
 	if (!handler) {
-		dev_err(scp->dev, "No such ipi id = %d\n", id);
+		dev_err(scp->dev, "No handler for ipi id = %d\n", id);
 		scp_ipi_unlock(scp, id);
 		return;
 	}
@@ -152,35 +158,45 @@ static void mt8183_scp_reset_assert(struct mtk_scp *scp)
 {
 	u32 val;
 
-	val = readl(scp->reg_base + MT8183_SW_RSTN);
+	val = readl(scp->cluster->reg_base + MT8183_SW_RSTN);
 	val &= ~MT8183_SW_RSTN_BIT;
-	writel(val, scp->reg_base + MT8183_SW_RSTN);
+	writel(val, scp->cluster->reg_base + MT8183_SW_RSTN);
 }
 
 static void mt8183_scp_reset_deassert(struct mtk_scp *scp)
 {
 	u32 val;
 
-	val = readl(scp->reg_base + MT8183_SW_RSTN);
+	val = readl(scp->cluster->reg_base + MT8183_SW_RSTN);
 	val |= MT8183_SW_RSTN_BIT;
-	writel(val, scp->reg_base + MT8183_SW_RSTN);
+	writel(val, scp->cluster->reg_base + MT8183_SW_RSTN);
 }
 
 static void mt8192_scp_reset_assert(struct mtk_scp *scp)
 {
-	writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_SET);
+	writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_SET);
 }
 
 static void mt8192_scp_reset_deassert(struct mtk_scp *scp)
 {
-	writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_CLR);
+	writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_CLR);
+}
+
+static void mt8195_scp_c1_reset_assert(struct mtk_scp *scp)
+{
+	writel(1, scp->cluster->reg_base + MT8195_CORE1_SW_RSTN_SET);
+}
+
+static void mt8195_scp_c1_reset_deassert(struct mtk_scp *scp)
+{
+	writel(1, scp->cluster->reg_base + MT8195_CORE1_SW_RSTN_CLR);
 }
 
 static void mt8183_scp_irq_handler(struct mtk_scp *scp)
 {
 	u32 scp_to_host;
 
-	scp_to_host = readl(scp->reg_base + MT8183_SCP_TO_HOST);
+	scp_to_host = readl(scp->cluster->reg_base + MT8183_SCP_TO_HOST);
 	if (scp_to_host & MT8183_SCP_IPC_INT_BIT)
 		scp_ipi_handler(scp);
 	else
@@ -188,14 +204,14 @@ static void mt8183_scp_irq_handler(struct mtk_scp *scp)
 
 	/* SCP won't send another interrupt until we set SCP_TO_HOST to 0. */
 	writel(MT8183_SCP_IPC_INT_BIT | MT8183_SCP_WDT_INT_BIT,
-	       scp->reg_base + MT8183_SCP_TO_HOST);
+	       scp->cluster->reg_base + MT8183_SCP_TO_HOST);
 }
 
 static void mt8192_scp_irq_handler(struct mtk_scp *scp)
 {
 	u32 scp_to_host;
 
-	scp_to_host = readl(scp->reg_base + MT8192_SCP2APMCU_IPC_SET);
+	scp_to_host = readl(scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_SET);
 
 	if (scp_to_host & MT8192_SCP_IPC_INT_BIT) {
 		scp_ipi_handler(scp);
@@ -205,13 +221,48 @@ static void mt8192_scp_irq_handler(struct mtk_scp *scp)
 		 * MT8192_SCP2APMCU_IPC.
 		 */
 		writel(MT8192_SCP_IPC_INT_BIT,
-		       scp->reg_base + MT8192_SCP2APMCU_IPC_CLR);
+		       scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_CLR);
 	} else {
 		scp_wdt_handler(scp, scp_to_host);
-		writel(1, scp->reg_base + MT8192_CORE0_WDT_IRQ);
+		writel(1, scp->cluster->reg_base + MT8192_CORE0_WDT_IRQ);
 	}
 }
 
+static void mt8195_scp_irq_handler(struct mtk_scp *scp)
+{
+	u32 scp_to_host;
+
+	scp_to_host = readl(scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_SET);
+
+	if (scp_to_host & MT8192_SCP_IPC_INT_BIT) {
+		scp_ipi_handler(scp);
+	} else {
+		u32 reason = readl(scp->cluster->reg_base + MT8195_SYS_STATUS);
+
+		if (reason & MT8195_CORE0_WDT)
+			writel(1, scp->cluster->reg_base + MT8192_CORE0_WDT_IRQ);
+
+		if (reason & MT8195_CORE1_WDT)
+			writel(1, scp->cluster->reg_base + MT8195_CORE1_WDT_IRQ);
+
+		scp_wdt_handler(scp, reason);
+	}
+
+	writel(scp_to_host, scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_CLR);
+}
+
+static void mt8195_scp_c1_irq_handler(struct mtk_scp *scp)
+{
+	u32 scp_to_host;
+
+	scp_to_host = readl(scp->cluster->reg_base + MT8195_SSHUB2APMCU_IPC_SET);
+
+	if (scp_to_host & MT8192_SCP_IPC_INT_BIT)
+		scp_ipi_handler(scp);
+
+	writel(scp_to_host, scp->cluster->reg_base + MT8195_SSHUB2APMCU_IPC_CLR);
+}
+
 static irqreturn_t scp_irq_handler(int irq, void *priv)
 {
 	struct mtk_scp *scp = priv;
@@ -341,26 +392,26 @@ static int mt8195_scp_clk_get(struct mtk_scp *scp)
 static int mt8183_scp_before_load(struct mtk_scp *scp)
 {
 	/* Clear SCP to host interrupt */
-	writel(MT8183_SCP_IPC_INT_BIT, scp->reg_base + MT8183_SCP_TO_HOST);
+	writel(MT8183_SCP_IPC_INT_BIT, scp->cluster->reg_base + MT8183_SCP_TO_HOST);
 
 	/* Reset clocks before loading FW */
-	writel(0x0, scp->reg_base + MT8183_SCP_CLK_SW_SEL);
-	writel(0x0, scp->reg_base + MT8183_SCP_CLK_DIV_SEL);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_SW_SEL);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_DIV_SEL);
 
 	/* Initialize TCM before loading FW. */
-	writel(0x0, scp->reg_base + MT8183_SCP_L1_SRAM_PD);
-	writel(0x0, scp->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_L1_SRAM_PD);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD);
 
 	/* Turn on the power of SCP's SRAM before using it. */
-	writel(0x0, scp->reg_base + MT8183_SCP_SRAM_PDN);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_SRAM_PDN);
 
 	/*
 	 * Set I-cache and D-cache size before loading SCP FW.
 	 * SCP SRAM logical address may change when cache size setting differs.
 	 */
 	writel(MT8183_SCP_CACHE_CON_WAYEN | MT8183_SCP_CACHESIZE_8KB,
-	       scp->reg_base + MT8183_SCP_CACHE_CON);
-	writel(MT8183_SCP_CACHESIZE_8KB, scp->reg_base + MT8183_SCP_DCACHE_CON);
+	       scp->cluster->reg_base + MT8183_SCP_CACHE_CON);
+	writel(MT8183_SCP_CACHESIZE_8KB, scp->cluster->reg_base + MT8183_SCP_DCACHE_CON);
 
 	return 0;
 }
@@ -386,28 +437,28 @@ static void scp_sram_power_off(void __iomem *addr, u32 reserved_mask)
 static int mt8186_scp_before_load(struct mtk_scp *scp)
 {
 	/* Clear SCP to host interrupt */
-	writel(MT8183_SCP_IPC_INT_BIT, scp->reg_base + MT8183_SCP_TO_HOST);
+	writel(MT8183_SCP_IPC_INT_BIT, scp->cluster->reg_base + MT8183_SCP_TO_HOST);
 
 	/* Reset clocks before loading FW */
-	writel(0x0, scp->reg_base + MT8183_SCP_CLK_SW_SEL);
-	writel(0x0, scp->reg_base + MT8183_SCP_CLK_DIV_SEL);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_SW_SEL);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_DIV_SEL);
 
 	/* Turn on the power of SCP's SRAM before using it. Enable 1 block per time*/
-	scp_sram_power_on(scp->reg_base + MT8183_SCP_SRAM_PDN, 0);
+	scp_sram_power_on(scp->cluster->reg_base + MT8183_SCP_SRAM_PDN, 0);
 
 	/* Initialize TCM before loading FW. */
-	writel(0x0, scp->reg_base + MT8183_SCP_L1_SRAM_PD);
-	writel(0x0, scp->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD);
-	writel(0x0, scp->reg_base + MT8186_SCP_L1_SRAM_PD_P1);
-	writel(0x0, scp->reg_base + MT8186_SCP_L1_SRAM_PD_p2);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_L1_SRAM_PD);
+	writel(0x0, scp->cluster->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD);
+	writel(0x0, scp->cluster->reg_base + MT8186_SCP_L1_SRAM_PD_P1);
+	writel(0x0, scp->cluster->reg_base + MT8186_SCP_L1_SRAM_PD_p2);
 
 	/*
 	 * Set I-cache and D-cache size before loading SCP FW.
 	 * SCP SRAM logical address may change when cache size setting differs.
 	 */
 	writel(MT8183_SCP_CACHE_CON_WAYEN | MT8183_SCP_CACHESIZE_8KB,
-	       scp->reg_base + MT8183_SCP_CACHE_CON);
-	writel(MT8183_SCP_CACHESIZE_8KB, scp->reg_base + MT8183_SCP_DCACHE_CON);
+	       scp->cluster->reg_base + MT8183_SCP_CACHE_CON);
+	writel(MT8183_SCP_CACHESIZE_8KB, scp->cluster->reg_base + MT8183_SCP_DCACHE_CON);
 
 	return 0;
 }
@@ -415,40 +466,100 @@ static int mt8186_scp_before_load(struct mtk_scp *scp)
 static int mt8192_scp_before_load(struct mtk_scp *scp)
 {
 	/* clear SPM interrupt, SCP2SPM_IPC_CLR */
-	writel(0xff, scp->reg_base + MT8192_SCP2SPM_IPC_CLR);
+	writel(0xff, scp->cluster->reg_base + MT8192_SCP2SPM_IPC_CLR);
 
-	writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_SET);
+	writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_SET);
 
 	/* enable SRAM clock */
-	scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
-	scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
-	scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
-	scp_sram_power_on(scp->reg_base + MT8192_L1TCM_SRAM_PDN, 0);
-	scp_sram_power_on(scp->reg_base + MT8192_CPU0_SRAM_PD, 0);
+	scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
+	scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
+	scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
+	scp_sram_power_on(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN, 0);
+	scp_sram_power_on(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0);
 
 	/* enable MPU for all memory regions */
-	writel(0xff, scp->reg_base + MT8192_CORE0_MEM_ATT_PREDEF);
+	writel(0xff, scp->cluster->reg_base + MT8192_CORE0_MEM_ATT_PREDEF);
+
+	return 0;
+}
+
+static int mt8195_scp_l2tcm_on(struct mtk_scp *scp)
+{
+	struct mtk_scp_of_cluster *scp_cluster = scp->cluster;
+
+	mutex_lock(&scp_cluster->cluster_lock);
+
+	if (scp_cluster->l2tcm_refcnt == 0) {
+		/* clear SPM interrupt, SCP2SPM_IPC_CLR */
+		writel(0xff, scp->cluster->reg_base + MT8192_SCP2SPM_IPC_CLR);
+
+		/* Power on L2TCM */
+		scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
+		scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
+		scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
+		scp_sram_power_on(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN,
+				  MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS);
+	}
+
+	scp_cluster->l2tcm_refcnt += 1;
+
+	mutex_unlock(&scp_cluster->cluster_lock);
 
 	return 0;
 }
 
 static int mt8195_scp_before_load(struct mtk_scp *scp)
 {
-	/* clear SPM interrupt, SCP2SPM_IPC_CLR */
-	writel(0xff, scp->reg_base + MT8192_SCP2SPM_IPC_CLR);
+	writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_SET);
 
-	writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_SET);
+	mt8195_scp_l2tcm_on(scp);
 
-	/* enable SRAM clock */
-	scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
-	scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
-	scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
-	scp_sram_power_on(scp->reg_base + MT8192_L1TCM_SRAM_PDN,
-			  MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS);
-	scp_sram_power_on(scp->reg_base + MT8192_CPU0_SRAM_PD, 0);
+	scp_sram_power_on(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0);
 
 	/* enable MPU for all memory regions */
-	writel(0xff, scp->reg_base + MT8192_CORE0_MEM_ATT_PREDEF);
+	writel(0xff, scp->cluster->reg_base + MT8192_CORE0_MEM_ATT_PREDEF);
+
+	return 0;
+}
+
+static int mt8195_scp_c1_before_load(struct mtk_scp *scp)
+{
+	u32 sec_ctrl;
+	struct mtk_scp *scp_c0;
+	struct mtk_scp_of_cluster *scp_cluster = scp->cluster;
+
+	scp->data->scp_reset_assert(scp);
+
+	mt8195_scp_l2tcm_on(scp);
+
+	scp_sram_power_on(scp->cluster->reg_base + MT8195_CPU1_SRAM_PD, 0);
+
+	/* enable MPU for all memory regions */
+	writel(0xff, scp->cluster->reg_base + MT8195_CORE1_MEM_ATT_PREDEF);
+
+	/*
+	 * The L2TCM_OFFSET_RANGE and L2TCM_OFFSET shift the destination address
+	 * on SRAM when SCP core 1 accesses SRAM.
+	 *
+	 * This configuration solves booting the SCP core 0 and core 1 from
+	 * different SRAM address because core 0 and core 1 both boot from
+	 * the head of SRAM by default. this must be configured before boot SCP core 1.
+	 *
+	 * The value of L2TCM_OFFSET_RANGE is from the viewpoint of SCP core 1.
+	 * When SCP core 1 issues address within the range (L2TCM_OFFSET_RANGE),
+	 * the address will be added with a fixed offset (L2TCM_OFFSET) on the bus.
+	 * The shift action is tranparent to software.
+	 */
+	writel(0, scp->cluster->reg_base + MT8195_L2TCM_OFFSET_RANGE_0_LOW);
+	writel(scp->sram_size, scp->cluster->reg_base + MT8195_L2TCM_OFFSET_RANGE_0_HIGH);
+
+	scp_c0 = list_first_entry(&scp_cluster->mtk_scp_list, struct mtk_scp, elem);
+	writel(scp->sram_phys - scp_c0->sram_phys, scp->cluster->reg_base + MT8195_L2TCM_OFFSET);
+
+	/* enable SRAM offset when fetching instruction and data */
+	sec_ctrl = readl(scp->cluster->reg_base + MT8195_SEC_CTRL);
+	sec_ctrl |= MT8195_CORE_OFFSET_ENABLE_I | MT8195_CORE_OFFSET_ENABLE_D;
+	writel(sec_ctrl, scp->cluster->reg_base + MT8195_SEC_CTRL);
 
 	return 0;
 }
@@ -567,11 +678,11 @@ static void *mt8192_scp_da_to_va(struct mtk_scp *scp, u64 da, size_t len)
 	}
 
 	/* optional memory region */
-	if (scp->l1tcm_size &&
-	    da >= scp->l1tcm_phys &&
-	    (da + len) <= scp->l1tcm_phys + scp->l1tcm_size) {
-		offset = da - scp->l1tcm_phys;
-		return (void __force *)scp->l1tcm_base + offset;
+	if (scp->cluster->l1tcm_size &&
+	    da >= scp->cluster->l1tcm_phys &&
+	    (da + len) <= scp->cluster->l1tcm_phys + scp->cluster->l1tcm_size) {
+		offset = da - scp->cluster->l1tcm_phys;
+		return (void __force *)scp->cluster->l1tcm_base + offset;
 	}
 
 	/* optional memory region */
@@ -595,34 +706,62 @@ static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iome
 static void mt8183_scp_stop(struct mtk_scp *scp)
 {
 	/* Disable SCP watchdog */
-	writel(0, scp->reg_base + MT8183_WDT_CFG);
+	writel(0, scp->cluster->reg_base + MT8183_WDT_CFG);
 }
 
 static void mt8192_scp_stop(struct mtk_scp *scp)
 {
 	/* Disable SRAM clock */
-	scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
-	scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
-	scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
-	scp_sram_power_off(scp->reg_base + MT8192_L1TCM_SRAM_PDN, 0);
-	scp_sram_power_off(scp->reg_base + MT8192_CPU0_SRAM_PD, 0);
+	scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
+	scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
+	scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
+	scp_sram_power_off(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN, 0);
+	scp_sram_power_off(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0);
 
 	/* Disable SCP watchdog */
-	writel(0, scp->reg_base + MT8192_CORE0_WDT_CFG);
+	writel(0, scp->cluster->reg_base + MT8192_CORE0_WDT_CFG);
+}
+
+static void mt8195_scp_l2tcm_off(struct mtk_scp *scp)
+{
+	struct mtk_scp_of_cluster *scp_cluster = scp->cluster;
+
+	mutex_lock(&scp_cluster->cluster_lock);
+
+	if (scp_cluster->l2tcm_refcnt > 0)
+		scp_cluster->l2tcm_refcnt -= 1;
+
+	if (scp_cluster->l2tcm_refcnt == 0) {
+		/* Power off L2TCM */
+		scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
+		scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
+		scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
+		scp_sram_power_off(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN,
+				   MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS);
+	}
+
+	mutex_unlock(&scp_cluster->cluster_lock);
 }
 
 static void mt8195_scp_stop(struct mtk_scp *scp)
 {
-	/* Disable SRAM clock */
-	scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0);
-	scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0);
-	scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0);
-	scp_sram_power_off(scp->reg_base + MT8192_L1TCM_SRAM_PDN,
-			   MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS);
-	scp_sram_power_off(scp->reg_base + MT8192_CPU0_SRAM_PD, 0);
+	mt8195_scp_l2tcm_off(scp);
+
+	scp_sram_power_off(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0);
+
+	/* Disable SCP watchdog */
+	writel(0, scp->cluster->reg_base + MT8192_CORE0_WDT_CFG);
+}
+
+static void mt8195_scp_c1_stop(struct mtk_scp *scp)
+{
+	mt8195_scp_l2tcm_off(scp);
+
+	/* Power off CPU SRAM */
+	scp_sram_power_off(scp->cluster->reg_base + MT8195_CPU1_SRAM_PD, 0);
 
 	/* Disable SCP watchdog */
-	writel(0, scp->reg_base + MT8192_CORE0_WDT_CFG);
+	writel(0, scp->cluster->reg_base + MT8195_CORE1_WDT_CFG);
 }
 
 static int scp_stop(struct rproc *rproc)
@@ -811,7 +950,9 @@ static void scp_remove_rpmsg_subdev(struct mtk_scp *scp)
 	}
 }
 
-static int scp_probe(struct platform_device *pdev)
+static struct mtk_scp *scp_rproc_init(struct platform_device *pdev,
+				      struct mtk_scp_of_cluster *scp_cluster,
+				      const struct mtk_scp_of_data *of_data)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
@@ -823,52 +964,38 @@ static int scp_probe(struct platform_device *pdev)
 
 	ret = rproc_of_parse_firmware(dev, 0, &fw_name);
 	if (ret < 0 && ret != -EINVAL)
-		return ret;
+		return ERR_PTR(ret);
 
 	rproc = devm_rproc_alloc(dev, np->name, &scp_ops, fw_name, sizeof(*scp));
-	if (!rproc)
-		return dev_err_probe(dev, -ENOMEM, "unable to allocate remoteproc\n");
+	if (!rproc) {
+		dev_err(dev, "unable to allocate remoteproc\n");
+		return ERR_PTR(-ENOMEM);
+	}
 
 	scp = rproc->priv;
 	scp->rproc = rproc;
 	scp->dev = dev;
-	scp->data = of_device_get_match_data(dev);
+	scp->data = of_data;
+	scp->cluster = scp_cluster;
 	platform_set_drvdata(pdev, scp);
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sram");
 	scp->sram_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(scp->sram_base))
-		return dev_err_probe(dev, PTR_ERR(scp->sram_base),
-				     "Failed to parse and map sram memory\n");
+	if (IS_ERR(scp->sram_base)) {
+		dev_err(dev, "Failed to parse and map sram memory\n");
+		return ERR_CAST(scp->sram_base);
+	}
 
 	scp->sram_size = resource_size(res);
 	scp->sram_phys = res->start;
 
-	/* l1tcm is an optional memory region */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "l1tcm");
-	scp->l1tcm_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(scp->l1tcm_base)) {
-		ret = PTR_ERR(scp->l1tcm_base);
-		if (ret != -EINVAL) {
-			return dev_err_probe(dev, ret, "Failed to map l1tcm memory\n");
-		}
-	} else {
-		scp->l1tcm_size = resource_size(res);
-		scp->l1tcm_phys = res->start;
-	}
-
-	scp->reg_base = devm_platform_ioremap_resource_byname(pdev, "cfg");
-	if (IS_ERR(scp->reg_base))
-		return dev_err_probe(dev, PTR_ERR(scp->reg_base),
-				     "Failed to parse and map cfg memory\n");
-
 	ret = scp->data->scp_clk_get(scp);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	ret = scp_map_memory_region(scp);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	mutex_init(&scp->send_lock);
 	for (i = 0; i < SCP_IPI_MAX; i++)
@@ -895,11 +1022,7 @@ static int scp_probe(struct platform_device *pdev)
 		goto remove_subdev;
 	}
 
-	ret = rproc_add(rproc);
-	if (ret)
-		goto remove_subdev;
-
-	return 0;
+	return scp;
 
 remove_subdev:
 	scp_remove_rpmsg_subdev(scp);
@@ -910,15 +1033,13 @@ release_dev_mem:
 		mutex_destroy(&scp->ipi_desc[i].lock);
 	mutex_destroy(&scp->send_lock);
 
-	return ret;
+	return ERR_PTR(ret);
 }
 
-static void scp_remove(struct platform_device *pdev)
+static void scp_free(struct mtk_scp *scp)
 {
-	struct mtk_scp *scp = platform_get_drvdata(pdev);
 	int i;
 
-	rproc_del(scp->rproc);
 	scp_remove_rpmsg_subdev(scp);
 	scp_ipi_unregister(scp, SCP_IPI_INIT);
 	scp_unmap_memory_region(scp);
@@ -927,6 +1048,186 @@ static void scp_remove(struct platform_device *pdev)
 	mutex_destroy(&scp->send_lock);
 }
 
+static int scp_add_single_core(struct platform_device *pdev,
+			       struct mtk_scp_of_cluster *scp_cluster)
+{
+	struct device *dev = &pdev->dev;
+	struct list_head *scp_list = &scp_cluster->mtk_scp_list;
+	struct mtk_scp *scp;
+	int ret;
+
+	scp = scp_rproc_init(pdev, scp_cluster, of_device_get_match_data(dev));
+	if (IS_ERR(scp))
+		return PTR_ERR(scp);
+
+	ret = rproc_add(scp->rproc);
+	if (ret) {
+		dev_err(dev, "Failed to add rproc\n");
+		scp_free(scp);
+		return ret;
+	}
+
+	list_add_tail(&scp->elem, scp_list);
+
+	return 0;
+}
+
+static int scp_add_multi_core(struct platform_device *pdev,
+			      struct mtk_scp_of_cluster *scp_cluster)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev_of_node(dev);
+	struct platform_device *cpdev;
+	struct device_node *child;
+	struct list_head *scp_list = &scp_cluster->mtk_scp_list;
+	const struct mtk_scp_of_data **cluster_of_data;
+	struct mtk_scp *scp, *temp;
+	int core_id = 0;
+	int ret;
+
+	cluster_of_data = (const struct mtk_scp_of_data **)of_device_get_match_data(dev);
+
+	for_each_available_child_of_node(np, child) {
+		if (!cluster_of_data[core_id]) {
+			ret = -EINVAL;
+			dev_err(dev, "Not support core %d\n", core_id);
+			of_node_put(child);
+			goto init_fail;
+		}
+
+		cpdev = of_find_device_by_node(child);
+		if (!cpdev) {
+			ret = -ENODEV;
+			dev_err(dev, "Not found platform device for core %d\n", core_id);
+			of_node_put(child);
+			goto init_fail;
+		}
+
+		scp = scp_rproc_init(cpdev, scp_cluster, cluster_of_data[core_id]);
+		put_device(&cpdev->dev);
+		if (IS_ERR(scp)) {
+			ret = PTR_ERR(scp);
+			dev_err(dev, "Failed to initialize core %d rproc\n", core_id);
+			of_node_put(child);
+			goto init_fail;
+		}
+
+		ret = rproc_add(scp->rproc);
+		if (ret) {
+			dev_err(dev, "Failed to add rproc of core %d\n", core_id);
+			of_node_put(child);
+			scp_free(scp);
+			goto init_fail;
+		}
+
+		list_add_tail(&scp->elem, scp_list);
+		core_id++;
+	}
+
+	/*
+	 * Here we are setting the platform device for @pdev to the last @scp that was
+	 * created, which is needed because (1) scp_rproc_init() is calling
+	 * platform_set_drvdata() on the child platform devices and (2) we need a handle to
+	 * the cluster list in scp_remove().
+	 */
+	platform_set_drvdata(pdev, scp);
+
+	return 0;
+
+init_fail:
+	list_for_each_entry_safe_reverse(scp, temp, scp_list, elem) {
+		list_del(&scp->elem);
+		rproc_del(scp->rproc);
+		scp_free(scp);
+	}
+
+	return ret;
+}
+
+static bool scp_is_single_core(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev_of_node(dev);
+	struct device_node *child;
+	int num_cores = 0;
+
+	for_each_child_of_node(np, child)
+		if (of_device_is_compatible(child, "mediatek,scp-core"))
+			num_cores++;
+
+	return num_cores < 2;
+}
+
+static int scp_cluster_init(struct platform_device *pdev, struct mtk_scp_of_cluster *scp_cluster)
+{
+	int ret;
+
+	if (scp_is_single_core(pdev))
+		ret = scp_add_single_core(pdev, scp_cluster);
+	else
+		ret = scp_add_multi_core(pdev, scp_cluster);
+
+	return ret;
+}
+
+static int scp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_scp_of_cluster *scp_cluster;
+	struct resource *res;
+	int ret;
+
+	scp_cluster = devm_kzalloc(dev, sizeof(*scp_cluster), GFP_KERNEL);
+	if (!scp_cluster)
+		return -ENOMEM;
+
+	scp_cluster->reg_base = devm_platform_ioremap_resource_byname(pdev, "cfg");
+	if (IS_ERR(scp_cluster->reg_base))
+		return dev_err_probe(dev, PTR_ERR(scp_cluster->reg_base),
+				     "Failed to parse and map cfg memory\n");
+
+	/* l1tcm is an optional memory region */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "l1tcm");
+	scp_cluster->l1tcm_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(scp_cluster->l1tcm_base)) {
+		ret = PTR_ERR(scp_cluster->l1tcm_base);
+		if (ret != -EINVAL)
+			return dev_err_probe(dev, ret, "Failed to map l1tcm memory\n");
+
+		scp_cluster->l1tcm_base = NULL;
+	} else {
+		scp_cluster->l1tcm_size = resource_size(res);
+		scp_cluster->l1tcm_phys = res->start;
+	}
+
+	INIT_LIST_HEAD(&scp_cluster->mtk_scp_list);
+	mutex_init(&scp_cluster->cluster_lock);
+
+	ret = devm_of_platform_populate(dev);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to populate platform devices\n");
+
+	ret = scp_cluster_init(pdev, scp_cluster);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void scp_remove(struct platform_device *pdev)
+{
+	struct mtk_scp *scp = platform_get_drvdata(pdev);
+	struct mtk_scp_of_cluster *scp_cluster = scp->cluster;
+	struct mtk_scp *temp;
+
+	list_for_each_entry_safe_reverse(scp, temp, &scp_cluster->mtk_scp_list, elem) {
+		list_del(&scp->elem);
+		rproc_del(scp->rproc);
+		scp_free(scp);
+	}
+	mutex_destroy(&scp_cluster->cluster_lock);
+}
+
 static const struct mtk_scp_of_data mt8183_of_data = {
 	.scp_clk_get = mt8183_scp_clk_get,
 	.scp_before_load = mt8183_scp_before_load,
@@ -980,7 +1281,7 @@ static const struct mtk_scp_of_data mt8192_of_data = {
 static const struct mtk_scp_of_data mt8195_of_data = {
 	.scp_clk_get = mt8195_scp_clk_get,
 	.scp_before_load = mt8195_scp_before_load,
-	.scp_irq_handler = mt8192_scp_irq_handler,
+	.scp_irq_handler = mt8195_scp_irq_handler,
 	.scp_reset_assert = mt8192_scp_reset_assert,
 	.scp_reset_deassert = mt8192_scp_reset_deassert,
 	.scp_stop = mt8195_scp_stop,
@@ -989,12 +1290,31 @@ static const struct mtk_scp_of_data mt8195_of_data = {
 	.host_to_scp_int_bit = MT8192_HOST_IPC_INT_BIT,
 };
 
+static const struct mtk_scp_of_data mt8195_of_data_c1 = {
+	.scp_clk_get = mt8195_scp_clk_get,
+	.scp_before_load = mt8195_scp_c1_before_load,
+	.scp_irq_handler = mt8195_scp_c1_irq_handler,
+	.scp_reset_assert = mt8195_scp_c1_reset_assert,
+	.scp_reset_deassert = mt8195_scp_c1_reset_deassert,
+	.scp_stop = mt8195_scp_c1_stop,
+	.scp_da_to_va = mt8192_scp_da_to_va,
+	.host_to_scp_reg = MT8192_GIPC_IN_SET,
+	.host_to_scp_int_bit = MT8195_CORE1_HOST_IPC_INT_BIT,
+};
+
+static const struct mtk_scp_of_data *mt8195_of_data_cores[] = {
+	&mt8195_of_data,
+	&mt8195_of_data_c1,
+	NULL
+};
+
 static const struct of_device_id mtk_scp_of_match[] = {
 	{ .compatible = "mediatek,mt8183-scp", .data = &mt8183_of_data },
 	{ .compatible = "mediatek,mt8186-scp", .data = &mt8186_of_data },
 	{ .compatible = "mediatek,mt8188-scp", .data = &mt8188_of_data },
 	{ .compatible = "mediatek,mt8192-scp", .data = &mt8192_of_data },
 	{ .compatible = "mediatek,mt8195-scp", .data = &mt8195_of_data },
+	{ .compatible = "mediatek,mt8195-scp-dual", .data = &mt8195_of_data_cores },
 	{},
 };
 MODULE_DEVICE_TABLE(of, mtk_scp_of_match);
diff --git a/drivers/remoteproc/mtk_scp_ipi.c b/drivers/remoteproc/mtk_scp_ipi.c
index 9c7c17b9d181..cd0b60106ec2 100644
--- a/drivers/remoteproc/mtk_scp_ipi.c
+++ b/drivers/remoteproc/mtk_scp_ipi.c
@@ -177,7 +177,7 @@ int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len,
 	mutex_lock(&scp->send_lock);
 
 	 /* Wait until SCP receives the last command */
-	ret = readl_poll_timeout_atomic(scp->reg_base + scp->data->host_to_scp_reg,
+	ret = readl_poll_timeout_atomic(scp->cluster->reg_base + scp->data->host_to_scp_reg,
 					val, !val, 0, SCP_TIMEOUT_US);
 	if (ret) {
 		dev_err(scp->dev, "%s: IPI timeout!\n", __func__);
@@ -192,7 +192,7 @@ int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len,
 	scp->ipi_id_ack[id] = false;
 	/* send the command to SCP */
 	writel(scp->data->host_to_scp_int_bit,
-	       scp->reg_base + scp->data->host_to_scp_reg);
+	       scp->cluster->reg_base + scp->data->host_to_scp_reg);
 
 	if (wait) {
 		/* wait for SCP's ACK */
diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c
index 22fe7b5f5236..394b2c1cb5e2 100644
--- a/drivers/remoteproc/qcom_q6v5_mss.c
+++ b/drivers/remoteproc/qcom_q6v5_mss.c
@@ -2322,7 +2322,6 @@ static const struct rproc_hexagon_res msm8996_mss = {
 	},
 	.proxy_clk_names = (char*[]){
 			"xo",
-			"pnoc",
 			"qdss",
 			NULL
 	},
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
index b5447dd2dd35..913a5d2068e8 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -813,6 +813,21 @@ static const struct adsp_data sm6350_adsp_resource = {
 	.ssctl_id = 0x14,
 };
 
+static const struct adsp_data sm6375_mpss_resource = {
+	.crash_reason_smem = 421,
+	.firmware_name = "modem.mdt",
+	.pas_id = 4,
+	.minidump_id = 3,
+	.auto_boot = false,
+	.proxy_pd_names = (char*[]){
+		"cx",
+		NULL
+	},
+	.ssr_name = "mpss",
+	.sysmon_name = "modem",
+	.ssctl_id = 0x12,
+};
+
 static const struct adsp_data sm8150_adsp_resource = {
 		.crash_reason_smem = 423,
 		.firmware_name = "adsp.mdt",
@@ -1161,6 +1176,7 @@ static const struct of_device_id adsp_of_match[] = {
 	{ .compatible = "qcom,qcs404-adsp-pas", .data = &adsp_resource_init },
 	{ .compatible = "qcom,qcs404-cdsp-pas", .data = &cdsp_resource_init },
 	{ .compatible = "qcom,qcs404-wcss-pas", .data = &wcss_resource_init },
+	{ .compatible = "qcom,sc7180-adsp-pas", .data = &sm8250_adsp_resource},
 	{ .compatible = "qcom,sc7180-mpss-pas", .data = &mpss_resource_init},
 	{ .compatible = "qcom,sc7280-mpss-pas", .data = &mpss_resource_init},
 	{ .compatible = "qcom,sc8180x-adsp-pas", .data = &sm8150_adsp_resource},
@@ -1180,6 +1196,9 @@ static const struct of_device_id adsp_of_match[] = {
 	{ .compatible = "qcom,sm6350-adsp-pas", .data = &sm6350_adsp_resource},
 	{ .compatible = "qcom,sm6350-cdsp-pas", .data = &sm6350_cdsp_resource},
 	{ .compatible = "qcom,sm6350-mpss-pas", .data = &mpss_resource_init},
+	{ .compatible = "qcom,sm6375-adsp-pas", .data = &sm6350_adsp_resource},
+	{ .compatible = "qcom,sm6375-cdsp-pas", .data = &sm8150_cdsp_resource},
+	{ .compatible = "qcom,sm6375-mpss-pas", .data = &sm6375_mpss_resource},
 	{ .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource},
 	{ .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource},
 	{ .compatible = "qcom,sm8150-mpss-pas", .data = &mpss_resource_init},
diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c
index e3ce01d98b4c..cb163766c56d 100644
--- a/drivers/remoteproc/st_remoteproc.c
+++ b/drivers/remoteproc/st_remoteproc.c
@@ -16,10 +16,9 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/remoteproc.h>
 #include <linux/reset.h>
@@ -341,7 +340,6 @@ static int st_rproc_parse_dt(struct platform_device *pdev)
 static int st_rproc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct of_device_id *match;
 	struct st_rproc *ddata;
 	struct device_node *np = dev->of_node;
 	struct rproc *rproc;
@@ -349,19 +347,17 @@ static int st_rproc_probe(struct platform_device *pdev)
 	int enabled;
 	int ret, i;
 
-	match = of_match_device(st_rproc_match, dev);
-	if (!match || !match->data) {
-		dev_err(dev, "No device match found\n");
-		return -ENODEV;
-	}
-
 	rproc = rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata));
 	if (!rproc)
 		return -ENOMEM;
 
 	rproc->has_iommu = false;
 	ddata = rproc->priv;
-	ddata->config = (struct st_rproc_config *)match->data;
+	ddata->config = (struct st_rproc_config *)device_get_match_data(dev);
+	if (!ddata->config) {
+		ret = -ENODEV;
+		goto free_rproc;
+	}
 
 	platform_set_drvdata(pdev, rproc);
 
diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c
index 9d9b13530f78..4f469f0bcf8b 100644
--- a/drivers/remoteproc/stm32_rproc.c
+++ b/drivers/remoteproc/stm32_rproc.c
@@ -712,9 +712,9 @@ static int stm32_rproc_parse_dt(struct platform_device *pdev,
 	unsigned int tzen;
 	int err, irq;
 
-	irq = platform_get_irq(pdev, 0);
+	irq = platform_get_irq_optional(pdev, 0);
 	if (irq == -EPROBE_DEFER)
-		return dev_err_probe(dev, irq, "failed to get interrupt\n");
+		return irq;
 
 	if (irq > 0) {
 		err = devm_request_irq(dev, irq, stm32_rproc_wdg, 0,
diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c
index feca6de68da2..4395edea9a64 100644
--- a/drivers/remoteproc/xlnx_r5_remoteproc.c
+++ b/drivers/remoteproc/xlnx_r5_remoteproc.c
@@ -39,12 +39,14 @@ enum zynqmp_r5_cluster_mode {
  * struct mem_bank_data - Memory Bank description
  *
  * @addr: Start address of memory bank
+ * @da: device address
  * @size: Size of Memory bank
  * @pm_domain_id: Power-domains id of memory bank for firmware to turn on/off
  * @bank_name: name of the bank for remoteproc framework
  */
 struct mem_bank_data {
 	phys_addr_t addr;
+	u32 da;
 	size_t size;
 	u32 pm_domain_id;
 	char *bank_name;
@@ -75,11 +77,19 @@ struct mbox_info {
  * Hardcoded TCM bank values. This will be removed once TCM bindings are
  * accepted for system-dt specifications and upstreamed in linux kernel
  */
-static const struct mem_bank_data zynqmp_tcm_banks[] = {
-	{0xffe00000UL, 0x10000UL, PD_R5_0_ATCM, "atcm0"}, /* TCM 64KB each */
-	{0xffe20000UL, 0x10000UL, PD_R5_0_BTCM, "btcm0"},
-	{0xffe90000UL, 0x10000UL, PD_R5_1_ATCM, "atcm1"},
-	{0xffeb0000UL, 0x10000UL, PD_R5_1_BTCM, "btcm1"},
+static const struct mem_bank_data zynqmp_tcm_banks_split[] = {
+	{0xffe00000UL, 0x0, 0x10000UL, PD_R5_0_ATCM, "atcm0"}, /* TCM 64KB each */
+	{0xffe20000UL, 0x20000, 0x10000UL, PD_R5_0_BTCM, "btcm0"},
+	{0xffe90000UL, 0x0, 0x10000UL, PD_R5_1_ATCM, "atcm1"},
+	{0xffeb0000UL, 0x20000, 0x10000UL, PD_R5_1_BTCM, "btcm1"},
+};
+
+/* In lockstep mode cluster combines each 64KB TCM and makes 128KB TCM */
+static const struct mem_bank_data zynqmp_tcm_banks_lockstep[] = {
+	{0xffe00000UL, 0x0, 0x20000UL, PD_R5_0_ATCM, "atcm0"}, /* TCM 128KB each */
+	{0xffe20000UL, 0x20000, 0x20000UL, PD_R5_0_BTCM, "btcm0"},
+	{0, 0, 0, PD_R5_1_ATCM, ""},
+	{0, 0, 0, PD_R5_1_BTCM, ""},
 };
 
 /**
@@ -526,30 +536,6 @@ static int tcm_mem_map(struct rproc *rproc,
 	/* clear TCMs */
 	memset_io(va, 0, mem->len);
 
-	/*
-	 * The R5s expect their TCM banks to be at address 0x0 and 0x2000,
-	 * while on the Linux side they are at 0xffexxxxx.
-	 *
-	 * Zero out the high 12 bits of the address. This will give
-	 * expected values for TCM Banks 0A and 0B (0x0 and 0x20000).
-	 */
-	mem->da &= 0x000fffff;
-
-	/*
-	 * TCM Banks 1A and 1B still have to be translated.
-	 *
-	 * Below handle these two banks' absolute addresses (0xffe90000 and
-	 * 0xffeb0000) and convert to the expected relative addresses
-	 * (0x0 and 0x20000).
-	 */
-	if (mem->da == 0x90000 || mem->da == 0xB0000)
-		mem->da -= 0x90000;
-
-	/* if translated TCM bank address is not valid report error */
-	if (mem->da != 0x0 && mem->da != 0x20000) {
-		dev_err(&rproc->dev, "invalid TCM address: %x\n", mem->da);
-		return -EINVAL;
-	}
 	return 0;
 }
 
@@ -571,6 +557,7 @@ static int add_tcm_carveout_split_mode(struct rproc *rproc)
 	u32 pm_domain_id;
 	size_t bank_size;
 	char *bank_name;
+	u32 da;
 
 	r5_core = rproc->priv;
 	dev = r5_core->dev;
@@ -583,6 +570,7 @@ static int add_tcm_carveout_split_mode(struct rproc *rproc)
 	 */
 	for (i = 0; i < num_banks; i++) {
 		bank_addr = r5_core->tcm_banks[i]->addr;
+		da = r5_core->tcm_banks[i]->da;
 		bank_name = r5_core->tcm_banks[i]->bank_name;
 		bank_size = r5_core->tcm_banks[i]->size;
 		pm_domain_id = r5_core->tcm_banks[i]->pm_domain_id;
@@ -595,11 +583,11 @@ static int add_tcm_carveout_split_mode(struct rproc *rproc)
 			goto release_tcm_split;
 		}
 
-		dev_dbg(dev, "TCM carveout split mode %s addr=%llx, size=0x%lx",
-			bank_name, bank_addr, bank_size);
+		dev_dbg(dev, "TCM carveout split mode %s addr=%llx, da=0x%x, size=0x%lx",
+			bank_name, bank_addr, da, bank_size);
 
 		rproc_mem = rproc_mem_entry_init(dev, NULL, bank_addr,
-						 bank_size, bank_addr,
+						 bank_size, da,
 						 tcm_mem_map, tcm_mem_unmap,
 						 bank_name);
 		if (!rproc_mem) {
@@ -640,6 +628,7 @@ static int add_tcm_carveout_lockstep_mode(struct rproc *rproc)
 	struct device *dev;
 	u32 pm_domain_id;
 	char *bank_name;
+	u32 da;
 
 	r5_core = rproc->priv;
 	dev = r5_core->dev;
@@ -650,14 +639,11 @@ static int add_tcm_carveout_lockstep_mode(struct rproc *rproc)
 	/*
 	 * In lockstep mode, TCM is contiguous memory block
 	 * However, each TCM block still needs to be enabled individually.
-	 * So, Enable each TCM block individually, but add their size
-	 * to create contiguous memory region.
+	 * So, Enable each TCM block individually.
+	 * Although ATCM and BTCM is contiguous memory block, add two separate
+	 * carveouts for both.
 	 */
-	bank_addr = r5_core->tcm_banks[0]->addr;
-	bank_name = r5_core->tcm_banks[0]->bank_name;
-
 	for (i = 0; i < num_banks; i++) {
-		bank_size += r5_core->tcm_banks[i]->size;
 		pm_domain_id = r5_core->tcm_banks[i]->pm_domain_id;
 
 		/* Turn on each TCM bank individually */
@@ -668,23 +654,32 @@ static int add_tcm_carveout_lockstep_mode(struct rproc *rproc)
 			dev_err(dev, "failed to turn on TCM 0x%x", pm_domain_id);
 			goto release_tcm_lockstep;
 		}
-	}
 
-	dev_dbg(dev, "TCM add carveout lockstep mode %s addr=0x%llx, size=0x%lx",
-		bank_name, bank_addr, bank_size);
-
-	/* Register TCM address range, TCM map and unmap functions */
-	rproc_mem = rproc_mem_entry_init(dev, NULL, bank_addr,
-					 bank_size, bank_addr,
-					 tcm_mem_map, tcm_mem_unmap,
-					 bank_name);
-	if (!rproc_mem) {
-		ret = -ENOMEM;
-		goto release_tcm_lockstep;
-	}
+		bank_size = r5_core->tcm_banks[i]->size;
+		if (bank_size == 0)
+			continue;
 
-	/* If registration is success, add carveouts */
-	rproc_add_carveout(rproc, rproc_mem);
+		bank_addr = r5_core->tcm_banks[i]->addr;
+		da = r5_core->tcm_banks[i]->da;
+		bank_name = r5_core->tcm_banks[i]->bank_name;
+
+		/* Register TCM address range, TCM map and unmap functions */
+		rproc_mem = rproc_mem_entry_init(dev, NULL, bank_addr,
+						 bank_size, da,
+						 tcm_mem_map, tcm_mem_unmap,
+						 bank_name);
+		if (!rproc_mem) {
+			ret = -ENOMEM;
+			zynqmp_pm_release_node(pm_domain_id);
+			goto release_tcm_lockstep;
+		}
+
+		/* If registration is success, add carveouts */
+		rproc_add_carveout(rproc, rproc_mem);
+
+		dev_dbg(dev, "TCM carveout lockstep mode %s addr=0x%llx, da=0x%x, size=0x%lx",
+			bank_name, bank_addr, da, bank_size);
+	}
 
 	return 0;
 
@@ -895,12 +890,19 @@ free_rproc:
  */
 static int zynqmp_r5_get_tcm_node(struct zynqmp_r5_cluster *cluster)
 {
+	const struct mem_bank_data *zynqmp_tcm_banks;
 	struct device *dev = cluster->dev;
 	struct zynqmp_r5_core *r5_core;
 	int tcm_bank_count, tcm_node;
 	int i, j;
 
-	tcm_bank_count = ARRAY_SIZE(zynqmp_tcm_banks);
+	if (cluster->mode == SPLIT_MODE) {
+		zynqmp_tcm_banks = zynqmp_tcm_banks_split;
+		tcm_bank_count = ARRAY_SIZE(zynqmp_tcm_banks_split);
+	} else {
+		zynqmp_tcm_banks = zynqmp_tcm_banks_lockstep;
+		tcm_bank_count = ARRAY_SIZE(zynqmp_tcm_banks_lockstep);
+	}
 
 	/* count per core tcm banks */
 	tcm_bank_count = tcm_bank_count / cluster->core_count;
diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index 32b550c91d9f..8abc7d022ff7 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -545,7 +545,7 @@ static int rpmsg_dev_probe(struct device *dev)
 		goto out;
 
 	if (rpdrv->callback) {
-		strncpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE);
+		strscpy(chinfo.name, rpdev->id.name, sizeof(chinfo.name));
 		chinfo.src = rpdev->src;
 		chinfo.dst = RPMSG_ADDR_ANY;
 
diff --git a/drivers/rpmsg/rpmsg_ns.c b/drivers/rpmsg/rpmsg_ns.c
index c70ad03ff2e9..bde8c8d433e0 100644
--- a/drivers/rpmsg/rpmsg_ns.c
+++ b/drivers/rpmsg/rpmsg_ns.c
@@ -50,7 +50,7 @@ static int rpmsg_ns_cb(struct rpmsg_device *rpdev, void *data, int len,
 	/* don't trust the remote processor for null terminating the name */
 	msg->name[RPMSG_NAME_SIZE - 1] = '\0';
 
-	strncpy(chinfo.name, msg->name, sizeof(chinfo.name));
+	strscpy_pad(chinfo.name, msg->name, sizeof(chinfo.name));
 	chinfo.src = RPMSG_ADDR_ANY;
 	chinfo.dst = rpmsg32_to_cpu(rpdev, msg->addr);
 
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 905ac7910c98..dc87965f8164 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -329,7 +329,7 @@ static int virtio_rpmsg_announce_create(struct rpmsg_device *rpdev)
 	    virtio_has_feature(vrp->vdev, VIRTIO_RPMSG_F_NS)) {
 		struct rpmsg_ns_msg nsm;
 
-		strncpy(nsm.name, rpdev->id.name, RPMSG_NAME_SIZE);
+		strscpy_pad(nsm.name, rpdev->id.name, sizeof(nsm.name));
 		nsm.addr = cpu_to_rpmsg32(rpdev, rpdev->ept->addr);
 		nsm.flags = cpu_to_rpmsg32(rpdev, RPMSG_NS_CREATE);
 
@@ -353,7 +353,7 @@ static int virtio_rpmsg_announce_destroy(struct rpmsg_device *rpdev)
 	    virtio_has_feature(vrp->vdev, VIRTIO_RPMSG_F_NS)) {
 		struct rpmsg_ns_msg nsm;
 
-		strncpy(nsm.name, rpdev->id.name, RPMSG_NAME_SIZE);
+		strscpy_pad(nsm.name, rpdev->id.name, sizeof(nsm.name));
 		nsm.addr = cpu_to_rpmsg32(rpdev, rpdev->ept->addr);
 		nsm.flags = cpu_to_rpmsg32(rpdev, RPMSG_NS_DESTROY);
 
@@ -424,7 +424,7 @@ static struct rpmsg_device *__rpmsg_create_channel(struct virtproc_info *vrp,
 	 */
 	rpdev->announce = rpdev->src != RPMSG_ADDR_ANY;
 
-	strncpy(rpdev->id.name, chinfo->name, RPMSG_NAME_SIZE);
+	strscpy(rpdev->id.name, chinfo->name, sizeof(rpdev->id.name));
 
 	rpdev->dev.parent = &vrp->vdev->dev;
 	rpdev->dev.release = virtio_rpmsg_release_device;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index d09e08b71cfb..5dd33155d5d5 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -352,7 +352,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain);
 /*
  * ap_queue_info(): Check and get AP queue info.
  * Returns: 1 if APQN exists and info is filled,
- *	    0 if APQN seems to exit but there is no info
+ *	    0 if APQN seems to exist but there is no info
  *	      available (eg. caused by an asynch pending error)
  *	   -1 invalid APQN, TAPQ error or AP queue status which
  *	      indicates there is no APQN.
@@ -373,36 +373,33 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac,
 	/* call TAPQ on this APQN */
 	status = ap_test_queue(qid, ap_apft_available(), &tapq_info);
 
-	/* handle pending async error with return 'no info available' */
-	if (status.async)
-		return 0;
-
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 	case AP_RESPONSE_DECONFIGURED:
 	case AP_RESPONSE_CHECKSTOPPED:
 	case AP_RESPONSE_BUSY:
-		/*
-		 * According to the architecture in all these cases the
-		 * info should be filled. All bits 0 is not possible as
-		 * there is at least one of the mode bits set.
-		 */
-		if (WARN_ON_ONCE(!tapq_info.value))
-			return 0;
-		*q_type = tapq_info.at;
-		*q_fac = tapq_info.fac;
-		*q_depth = tapq_info.qd;
-		*q_ml = tapq_info.ml;
-		*q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
-		*q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
-		return 1;
+		/* For all these RCs the tapq info should be available */
+		break;
 	default:
-		/*
-		 * A response code which indicates, there is no info available.
-		 */
-		return -1;
+		/* On a pending async error the info should be available */
+		if (!status.async)
+			return -1;
+		break;
 	}
+
+	/* There should be at least one of the mode bits set */
+	if (WARN_ON_ONCE(!tapq_info.value))
+		return 0;
+
+	*q_type = tapq_info.at;
+	*q_fac = tapq_info.fac;
+	*q_depth = tapq_info.qd;
+	*q_ml = tapq_info.ml;
+	*q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
+	*q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
+
+	return 1;
 }
 
 void ap_wait(enum ap_sm_wait wait)
@@ -1022,6 +1019,10 @@ EXPORT_SYMBOL(ap_driver_unregister);
 
 void ap_bus_force_rescan(void)
 {
+	/* Only trigger AP bus scans after the initial scan is done */
+	if (atomic64_read(&ap_scan_bus_count) <= 0)
+		return;
+
 	/* processing a asynchronous bus rescan */
 	del_timer(&ap_config_timer);
 	queue_work(system_long_wq, &ap_scan_work);
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 359a35f894d5..b0771ca0849b 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -206,7 +206,6 @@ struct ap_queue {
 	bool config;			/* configured state */
 	bool chkstop;			/* checkstop state */
 	ap_qid_t qid;			/* AP queue id. */
-	bool interrupt;			/* indicate if interrupts are enabled */
 	bool se_bound;			/* SE bound state */
 	unsigned int assoc_idx;		/* SE association index */
 	int queue_count;		/* # messages currently on AP queue. */
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 993240370ecf..3934a0cc13e7 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -200,13 +200,13 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
 			return AP_SM_WAIT_AGAIN;
 		}
 		aq->sm_state = AP_SM_STATE_IDLE;
-		return AP_SM_WAIT_NONE;
+		break;
 	case AP_RESPONSE_NO_PENDING_REPLY:
 		if (aq->queue_count > 0)
-			return aq->interrupt ?
+			return status.irq_enabled ?
 				AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
 		aq->sm_state = AP_SM_STATE_IDLE;
-		return AP_SM_WAIT_NONE;
+		break;
 	default:
 		aq->dev_state = AP_DEV_STATE_ERROR;
 		aq->last_err_rc = status.response_code;
@@ -215,6 +215,16 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
 			    AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
 		return AP_SM_WAIT_NONE;
 	}
+	/* Check and maybe enable irq support (again) on this queue */
+	if (!status.irq_enabled && status.queue_empty) {
+		void *lsi_ptr = ap_airq_ptr();
+
+		if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0) {
+			aq->sm_state = AP_SM_STATE_SETIRQ_WAIT;
+			return AP_SM_WAIT_AGAIN;
+		}
+	}
+	return AP_SM_WAIT_NONE;
 }
 
 /**
@@ -254,7 +264,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
 		fallthrough;
 	case AP_RESPONSE_Q_FULL:
 		aq->sm_state = AP_SM_STATE_QUEUE_FULL;
-		return aq->interrupt ?
+		return status.irq_enabled ?
 			AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 		aq->sm_state = AP_SM_STATE_RESET_WAIT;
@@ -307,7 +317,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
 	case AP_RESPONSE_NORMAL:
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 		aq->sm_state = AP_SM_STATE_RESET_WAIT;
-		aq->interrupt = false;
 		aq->rapq_fbit = 0;
 		aq->se_bound = false;
 		return AP_SM_WAIT_LOW_TIMEOUT;
@@ -383,7 +392,6 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
 
 	if (status.irq_enabled == 1) {
 		/* Irqs are now enabled */
-		aq->interrupt = true;
 		aq->sm_state = (aq->queue_count > 0) ?
 			AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
 	}
@@ -626,16 +634,21 @@ static ssize_t interrupt_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
 	struct ap_queue *aq = to_ap_queue(dev);
+	struct ap_queue_status status;
 	int rc = 0;
 
 	spin_lock_bh(&aq->lock);
-	if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT)
+	if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) {
 		rc = sysfs_emit(buf, "Enable Interrupt pending.\n");
-	else if (aq->interrupt)
-		rc = sysfs_emit(buf, "Interrupts enabled.\n");
-	else
-		rc = sysfs_emit(buf, "Interrupts disabled.\n");
+	} else {
+		status = ap_tapq(aq->qid, NULL);
+		if (status.irq_enabled)
+			rc = sysfs_emit(buf, "Interrupts enabled.\n");
+		else
+			rc = sysfs_emit(buf, "Interrupts disabled.\n");
+	}
 	spin_unlock_bh(&aq->lock);
+
 	return rc;
 }
 
@@ -1032,7 +1045,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
 	if (ap_sb_available() && is_prot_virt_guest())
 		aq->ap_dev.device.groups = ap_queue_dev_sb_attr_groups;
 	aq->qid = qid;
-	aq->interrupt = false;
 	spin_lock_init(&aq->lock);
 	INIT_LIST_HEAD(&aq->pendingq);
 	INIT_LIST_HEAD(&aq->requestq);
diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c
index c815722d0ac8..050462d95222 100644
--- a/drivers/s390/crypto/zcrypt_card.c
+++ b/drivers/s390/crypto/zcrypt_card.c
@@ -52,7 +52,7 @@ static ssize_t online_show(struct device *dev,
 {
 	struct zcrypt_card *zc = dev_get_drvdata(dev);
 	struct ap_card *ac = to_ap_card(dev);
-	int online = ac->config && zc->online ? 1 : 0;
+	int online = ac->config && !ac->chkstop && zc->online ? 1 : 0;
 
 	return sysfs_emit(buf, "%d\n", online);
 }
@@ -70,7 +70,7 @@ static ssize_t online_store(struct device *dev,
 	if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
 		return -EINVAL;
 
-	if (online && !ac->config)
+	if (online && (!ac->config || ac->chkstop))
 		return -ENODEV;
 
 	zc->online = online;
diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c
index 112a80e8e6c2..67d8e0ae0eec 100644
--- a/drivers/s390/crypto/zcrypt_queue.c
+++ b/drivers/s390/crypto/zcrypt_queue.c
@@ -42,7 +42,7 @@ static ssize_t online_show(struct device *dev,
 {
 	struct zcrypt_queue *zq = dev_get_drvdata(dev);
 	struct ap_queue *aq = to_ap_queue(dev);
-	int online = aq->config && zq->online ? 1 : 0;
+	int online = aq->config && !aq->chkstop && zq->online ? 1 : 0;
 
 	return sysfs_emit(buf, "%d\n", online);
 }
@@ -59,7 +59,8 @@ static ssize_t online_store(struct device *dev,
 	if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
 		return -EINVAL;
 
-	if (online && (!aq->config || !aq->card->config))
+	if (online && (!aq->config || !aq->card->config ||
+		       aq->chkstop || aq->card->chkstop))
 		return -ENODEV;
 	if (online && !zc->online)
 		return -EINVAL;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6af2511e070c..cf8506d0f185 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3675,7 +3675,7 @@ static void qeth_flush_queue(struct qeth_qdio_out_q *queue)
 static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
 {
 	/*
-	 * check if weed have to switch to non-packing mode or if
+	 * check if we have to switch to non-packing mode or if
 	 * we have to get a pci flag out on the queue
 	 */
 	if ((atomic_read(&queue->used_buffers) <= QETH_LOW_WATERMARK_PACK) ||
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 35dbfacecf1c..70c9dd6b6a31 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -1169,6 +1169,7 @@ config SPI_XTENSA_XTFPGA
 config SPI_ZYNQ_QSPI
 	tristate "Xilinx Zynq QSPI controller"
 	depends on ARCH_ZYNQ || COMPILE_TEST
+	depends on SPI_MEM
 	help
 	  This enables support for the Zynq Quad SPI controller
 	  in master mode.
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 791df0e69105..8ead7acb99f3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3317,33 +3317,52 @@ void spi_unregister_controller(struct spi_controller *ctlr)
 }
 EXPORT_SYMBOL_GPL(spi_unregister_controller);
 
+static inline int __spi_check_suspended(const struct spi_controller *ctlr)
+{
+	return ctlr->flags & SPI_CONTROLLER_SUSPENDED ? -ESHUTDOWN : 0;
+}
+
+static inline void __spi_mark_suspended(struct spi_controller *ctlr)
+{
+	mutex_lock(&ctlr->bus_lock_mutex);
+	ctlr->flags |= SPI_CONTROLLER_SUSPENDED;
+	mutex_unlock(&ctlr->bus_lock_mutex);
+}
+
+static inline void __spi_mark_resumed(struct spi_controller *ctlr)
+{
+	mutex_lock(&ctlr->bus_lock_mutex);
+	ctlr->flags &= ~SPI_CONTROLLER_SUSPENDED;
+	mutex_unlock(&ctlr->bus_lock_mutex);
+}
+
 int spi_controller_suspend(struct spi_controller *ctlr)
 {
-	int ret;
+	int ret = 0;
 
 	/* Basically no-ops for non-queued controllers */
-	if (!ctlr->queued)
-		return 0;
-
-	ret = spi_stop_queue(ctlr);
-	if (ret)
-		dev_err(&ctlr->dev, "queue stop failed\n");
+	if (ctlr->queued) {
+		ret = spi_stop_queue(ctlr);
+		if (ret)
+			dev_err(&ctlr->dev, "queue stop failed\n");
+	}
 
+	__spi_mark_suspended(ctlr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(spi_controller_suspend);
 
 int spi_controller_resume(struct spi_controller *ctlr)
 {
-	int ret;
-
-	if (!ctlr->queued)
-		return 0;
+	int ret = 0;
 
-	ret = spi_start_queue(ctlr);
-	if (ret)
-		dev_err(&ctlr->dev, "queue restart failed\n");
+	__spi_mark_resumed(ctlr);
 
+	if (ctlr->queued) {
+		ret = spi_start_queue(ctlr);
+		if (ret)
+			dev_err(&ctlr->dev, "queue restart failed\n");
+	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(spi_controller_resume);
@@ -4147,8 +4166,7 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s
 	ctlr->cur_msg = msg;
 	ret = __spi_pump_transfer_message(ctlr, msg, was_busy);
 	if (ret)
-		goto out;
-
+		dev_err(&ctlr->dev, "noqueue transfer failed\n");
 	ctlr->cur_msg = NULL;
 	ctlr->fallback = false;
 
@@ -4164,7 +4182,6 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s
 		spi_idle_runtime_pm(ctlr);
 	}
 
-out:
 	mutex_unlock(&ctlr->io_mutex);
 }
 
@@ -4187,6 +4204,11 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
 	int status;
 	struct spi_controller *ctlr = spi->controller;
 
+	if (__spi_check_suspended(ctlr)) {
+		dev_warn_once(&spi->dev, "Attempted to sync while suspend\n");
+		return -ESHUTDOWN;
+	}
+
 	status = __spi_validate(spi, message);
 	if (status != 0)
 		return status;
diff --git a/drivers/staging/greybus/pwm.c b/drivers/staging/greybus/pwm.c
index 57cc1960d059..a3cb68cfa0f9 100644
--- a/drivers/staging/greybus/pwm.c
+++ b/drivers/staging/greybus/pwm.c
@@ -258,7 +258,6 @@ static const struct pwm_ops gb_pwm_ops = {
 	.request = gb_pwm_request,
 	.free = gb_pwm_free,
 	.apply = gb_pwm_apply,
-	.owner = THIS_MODULE,
 };
 
 static int gb_pwm_probe(struct gbphy_device *gbphy_dev,
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 925ee1d61afb..8ca061d3bbb9 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -170,13 +170,6 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
-	[PORT_AR7] = {
-		.name		= "AR7",
-		.fifo_size	= 16,
-		.tx_loadsz	= 16,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
-		.flags		= UART_CAP_FIFO /* | UART_CAP_AFE */,
-	},
 	[PORT_U6_16550A] = {
 		.name		= "U6_16550A",
 		.fifo_size	= 64,
diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c
index b18c6b4f129a..305f396c764c 100644
--- a/drivers/video/fbdev/amifb.c
+++ b/drivers/video/fbdev/amifb.c
@@ -3752,7 +3752,7 @@ release:
 }
 
 
-static int __exit amifb_remove(struct platform_device *pdev)
+static void __exit amifb_remove(struct platform_device *pdev)
 {
 	struct fb_info *info = platform_get_drvdata(pdev);
 
@@ -3765,11 +3765,16 @@ static int __exit amifb_remove(struct platform_device *pdev)
 	chipfree();
 	framebuffer_release(info);
 	amifb_video_off();
-	return 0;
 }
 
-static struct platform_driver amifb_driver = {
-	.remove = __exit_p(amifb_remove),
+/*
+ * amifb_remove() lives in .exit.text. For drivers registered via
+ * module_platform_driver_probe() this ok because they cannot get unboud at
+ * runtime. The driver needs to be marked with __refdata, otherwise modpost
+ * triggers a section mismatch warning.
+ */
+static struct platform_driver amifb_driver __refdata = {
+	.remove_new = __exit_p(amifb_remove),
 	.driver   = {
 		.name	= "amiga-video",
 	},
diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index a908db233409..9e391e5eaf9d 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -220,7 +220,7 @@ static inline void atmel_lcdfb_power_control(struct atmel_lcdfb_info *sinfo, int
 	}
 }
 
-static const struct fb_fix_screeninfo atmel_lcdfb_fix __initconst = {
+static const struct fb_fix_screeninfo atmel_lcdfb_fix = {
 	.type		= FB_TYPE_PACKED_PIXELS,
 	.visual		= FB_VISUAL_TRUECOLOR,
 	.xpanstep	= 0,
@@ -841,7 +841,7 @@ static void atmel_lcdfb_task(struct work_struct *work)
 	atmel_lcdfb_reset(sinfo);
 }
 
-static int __init atmel_lcdfb_init_fbinfo(struct atmel_lcdfb_info *sinfo)
+static int atmel_lcdfb_init_fbinfo(struct atmel_lcdfb_info *sinfo)
 {
 	struct fb_info *info = sinfo->info;
 	int ret = 0;
@@ -1017,7 +1017,7 @@ put_display_node:
 	return ret;
 }
 
-static int __init atmel_lcdfb_probe(struct platform_device *pdev)
+static int atmel_lcdfb_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct fb_info *info;
@@ -1223,14 +1223,14 @@ out:
 	return ret;
 }
 
-static int __exit atmel_lcdfb_remove(struct platform_device *pdev)
+static void atmel_lcdfb_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct fb_info *info = dev_get_drvdata(dev);
 	struct atmel_lcdfb_info *sinfo;
 
 	if (!info || !info->par)
-		return 0;
+		return;
 	sinfo = info->par;
 
 	cancel_work_sync(&sinfo->task);
@@ -1252,8 +1252,6 @@ static int __exit atmel_lcdfb_remove(struct platform_device *pdev)
 	}
 
 	framebuffer_release(info);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1301,7 +1299,8 @@ static int atmel_lcdfb_resume(struct platform_device *pdev)
 #endif
 
 static struct platform_driver atmel_lcdfb_driver = {
-	.remove		= __exit_p(atmel_lcdfb_remove),
+	.probe		= atmel_lcdfb_probe,
+	.remove_new	= atmel_lcdfb_remove,
 	.suspend	= atmel_lcdfb_suspend,
 	.resume		= atmel_lcdfb_resume,
 	.driver		= {
@@ -1309,8 +1308,7 @@ static struct platform_driver atmel_lcdfb_driver = {
 		.of_match_table	= atmel_lcdfb_dt_ids,
 	},
 };
-
-module_platform_driver_probe(atmel_lcdfb_driver, atmel_lcdfb_probe);
+module_platform_driver(atmel_lcdfb_driver);
 
 MODULE_DESCRIPTION("AT91 LCD Controller framebuffer driver");
 MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index 7fbd9f069ac2..0bced82fa494 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -490,7 +490,7 @@ static enum fsl_diu_monitor_port fsl_diu_name_to_port(const char *s)
  * Workaround for failed writing desc register of planes.
  * Needed with MPC5121 DIU rev 2.0 silicon.
  */
-void wr_reg_wa(u32 *reg, u32 val)
+static void wr_reg_wa(u32 *reg, u32 val)
 {
 	do {
 		out_be32(reg, val);
diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index bf59daf862fc..a80939fe2ee6 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -1013,6 +1013,8 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
 	} else if (IS_ENABLED(CONFIG_SYSFB)) {
 		base = screen_info.lfb_base;
 		size = screen_info.lfb_size;
+	} else {
+		goto err1;
 	}
 
 	/*
diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
index e7e03e920729..660499260f46 100644
--- a/drivers/video/fbdev/imsttfb.c
+++ b/drivers/video/fbdev/imsttfb.c
@@ -1421,7 +1421,6 @@ static int init_imstt(struct fb_info *info)
 	if ((info->var.xres * info->var.yres) * (info->var.bits_per_pixel >> 3) > info->fix.smem_len
 	    || !(compute_imstt_regvals(par, info->var.xres, info->var.yres))) {
 		printk("imsttfb: %ux%ux%u not supported\n", info->var.xres, info->var.yres, info->var.bits_per_pixel);
-		framebuffer_release(info);
 		return -ENODEV;
 	}
 
@@ -1453,14 +1452,11 @@ static int init_imstt(struct fb_info *info)
 	              FBINFO_HWACCEL_FILLRECT |
 	              FBINFO_HWACCEL_YPAN;
 
-	if (fb_alloc_cmap(&info->cmap, 0, 0)) {
-		framebuffer_release(info);
+	if (fb_alloc_cmap(&info->cmap, 0, 0))
 		return -ENODEV;
-	}
 
 	if (register_framebuffer(info) < 0) {
 		fb_dealloc_cmap(&info->cmap);
-		framebuffer_release(info);
 		return -ENODEV;
 	}
 
@@ -1500,8 +1496,8 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	if (!request_mem_region(addr, size, "imsttfb")) {
 		printk(KERN_ERR "imsttfb: Can't reserve memory region\n");
-		framebuffer_release(info);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto release_info;
 	}
 
 	switch (pdev->device) {
@@ -1518,36 +1514,39 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			printk(KERN_INFO "imsttfb: Device 0x%x unknown, "
 					 "contact maintainer.\n", pdev->device);
 			ret = -ENODEV;
-			goto error;
+			goto release_mem_region;
 	}
 
 	info->fix.smem_start = addr;
 	info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ?
 					    0x400000 : 0x800000);
 	if (!info->screen_base)
-		goto error;
+		goto release_mem_region;
 	info->fix.mmio_start = addr + 0x800000;
 	par->dc_regs = ioremap(addr + 0x800000, 0x1000);
 	if (!par->dc_regs)
-		goto error;
+		goto unmap_screen_base;
 	par->cmap_regs_phys = addr + 0x840000;
 	par->cmap_regs = (__u8 *)ioremap(addr + 0x840000, 0x1000);
 	if (!par->cmap_regs)
-		goto error;
+		goto unmap_dc_regs;
 	info->pseudo_palette = par->palette;
 	ret = init_imstt(info);
 	if (ret)
-		goto error;
+		goto unmap_cmap_regs;
 
 	pci_set_drvdata(pdev, info);
-	return ret;
+	return 0;
 
-error:
-	if (par->dc_regs)
-		iounmap(par->dc_regs);
-	if (info->screen_base)
-		iounmap(info->screen_base);
+unmap_cmap_regs:
+	iounmap(par->cmap_regs);
+unmap_dc_regs:
+	iounmap(par->dc_regs);
+unmap_screen_base:
+	iounmap(info->screen_base);
+release_mem_region:
 	release_mem_region(addr, size);
+release_info:
 	framebuffer_release(info);
 	return ret;
 }
diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c
index dcb1b81d35db..b421b46d88ef 100644
--- a/drivers/video/fbdev/offb.c
+++ b/drivers/video/fbdev/offb.c
@@ -423,11 +423,9 @@ static void offb_init_fb(struct platform_device *parent, const char *name,
 	fix = &info->fix;
 	var = &info->var;
 
-	if (name) {
-		strcpy(fix->id, "OFfb ");
-		strncat(fix->id, name, sizeof(fix->id) - sizeof("OFfb "));
-		fix->id[sizeof(fix->id) - 1] = '\0';
-	} else
+	if (name)
+		snprintf(fix->id, sizeof(fix->id), "OFfb %s", name);
+	else
 		snprintf(fix->id, sizeof(fix->id), "OFfb %pOFn", dp);
 
 
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index 42c96f1cfc93..694cf6318782 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -1643,17 +1643,16 @@ static int omapfb_do_probe(struct platform_device *pdev,
 		r = -ENOMEM;
 		goto cleanup;
 	}
-	fbdev->int_irq = platform_get_irq(pdev, 0);
-	if (fbdev->int_irq < 0) {
-		r = -ENXIO;
+
+	r = platform_get_irq(pdev, 0);
+	if (r < 0)
 		goto cleanup;
-	}
+	fbdev->int_irq = r;
 
-	fbdev->ext_irq = platform_get_irq(pdev, 1);
-	if (fbdev->ext_irq < 0) {
-		r = -ENXIO;
+	r = platform_get_irq(pdev, 1);
+	if (r < 0)
 		goto cleanup;
-	}
+	fbdev->ext_irq = r;
 
 	init_state++;
 
@@ -1857,20 +1856,13 @@ static int __init omapfb_setup(char *options)
 		if (!strncmp(this_opt, "accel", 5))
 			def_accel = 1;
 		else if (!strncmp(this_opt, "vram:", 5)) {
+			unsigned long long vram;
 			char *suffix;
-			unsigned long vram;
-			vram = (simple_strtoul(this_opt + 5, &suffix, 0));
+
+			vram = memparse(this_opt + 5, &suffix);
 			switch (suffix[0]) {
 			case '\0':
 				break;
-			case 'm':
-			case 'M':
-				vram *= 1024;
-				fallthrough;
-			case 'k':
-			case 'K':
-				vram *= 1024;
-				break;
 			default:
 				pr_debug("omapfb: invalid vram suffix %c\n",
 					 suffix[0]);
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c
index 0daaf9f89bab..c6786726a1af 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c
@@ -221,7 +221,7 @@ err_reg:
 	return r;
 }
 
-static int __exit tvc_remove(struct platform_device *pdev)
+static void tvc_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -233,8 +233,6 @@ static int __exit tvc_remove(struct platform_device *pdev)
 	tvc_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id tvc_of_match[] = {
@@ -247,11 +245,10 @@ MODULE_DEVICE_TABLE(of, tvc_of_match);
 
 static struct platform_driver tvc_connector_driver = {
 	.probe	= tvc_probe,
-	.remove	= __exit_p(tvc_remove),
+	.remove_new = tvc_remove,
 	.driver	= {
 		.name	= "connector-analog-tv",
 		.of_match_table = tvc_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
index c8ad3ef42bd3..0cc9294f89b4 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
@@ -303,7 +303,7 @@ err_reg:
 	return r;
 }
 
-static int __exit dvic_remove(struct platform_device *pdev)
+static void dvic_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -317,8 +317,6 @@ static int __exit dvic_remove(struct platform_device *pdev)
 	omap_dss_put_device(in);
 
 	i2c_put_adapter(ddata->i2c_adapter);
-
-	return 0;
 }
 
 static const struct of_device_id dvic_of_match[] = {
@@ -330,11 +328,10 @@ MODULE_DEVICE_TABLE(of, dvic_of_match);
 
 static struct platform_driver dvi_connector_driver = {
 	.probe	= dvic_probe,
-	.remove	= __exit_p(dvic_remove),
+	.remove_new = dvic_remove,
 	.driver	= {
 		.name	= "connector-dvi",
 		.of_match_table = dvic_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c
index 8f9ff9fb4ca4..b862a32670ae 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c
@@ -249,7 +249,7 @@ err_reg:
 	return r;
 }
 
-static int __exit hdmic_remove(struct platform_device *pdev)
+static void hdmic_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -261,8 +261,6 @@ static int __exit hdmic_remove(struct platform_device *pdev)
 	hdmic_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id hdmic_of_match[] = {
@@ -274,11 +272,10 @@ MODULE_DEVICE_TABLE(of, hdmic_of_match);
 
 static struct platform_driver hdmi_connector_driver = {
 	.probe	= hdmic_probe,
-	.remove	= __exit_p(hdmic_remove),
+	.remove_new = hdmic_remove,
 	.driver	= {
 		.name	= "connector-hdmi",
 		.of_match_table = hdmic_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c
index dd29dc5c77ec..f0d3eb581166 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c
@@ -231,7 +231,7 @@ err_reg:
 	return r;
 }
 
-static int __exit opa362_remove(struct platform_device *pdev)
+static void opa362_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -248,8 +248,6 @@ static int __exit opa362_remove(struct platform_device *pdev)
 		opa362_disconnect(dssdev, dssdev->dst);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id opa362_of_match[] = {
@@ -260,11 +258,10 @@ MODULE_DEVICE_TABLE(of, opa362_of_match);
 
 static struct platform_driver opa362_driver = {
 	.probe	= opa362_probe,
-	.remove	= __exit_p(opa362_remove),
+	.remove_new = opa362_remove,
 	.driver	= {
 		.name	= "amplifier-opa362",
 		.of_match_table = opa362_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c
index 7bac420169a6..c8aca4592949 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c
@@ -217,7 +217,7 @@ err_reg:
 	return r;
 }
 
-static int __exit tfp410_remove(struct platform_device *pdev)
+static void tfp410_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -234,8 +234,6 @@ static int __exit tfp410_remove(struct platform_device *pdev)
 		tfp410_disconnect(dssdev, dssdev->dst);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id tfp410_of_match[] = {
@@ -247,11 +245,10 @@ MODULE_DEVICE_TABLE(of, tfp410_of_match);
 
 static struct platform_driver tfp410_driver = {
 	.probe	= tfp410_probe,
-	.remove	= __exit_p(tfp410_remove),
+	.remove_new = tfp410_remove,
 	.driver	= {
 		.name	= "tfp410",
 		.of_match_table = tfp410_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
index 67f0c9250e9e..eb3926d0361b 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
@@ -283,7 +283,7 @@ err_gpio:
 	return r;
 }
 
-static int __exit tpd_remove(struct platform_device *pdev)
+static void tpd_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -300,8 +300,6 @@ static int __exit tpd_remove(struct platform_device *pdev)
 		tpd_disconnect(dssdev, dssdev->dst);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id tpd_of_match[] = {
@@ -313,11 +311,10 @@ MODULE_DEVICE_TABLE(of, tpd_of_match);
 
 static struct platform_driver tpd_driver = {
 	.probe	= tpd_probe,
-	.remove	= __exit_p(tpd_remove),
+	.remove_new = tpd_remove,
 	.driver	= {
 		.name	= "tpd12s015",
 		.of_match_table = tpd_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c
index 9790053c5877..937f9091274f 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c
@@ -211,7 +211,7 @@ err_reg:
 	return r;
 }
 
-static int __exit panel_dpi_remove(struct platform_device *pdev)
+static void panel_dpi_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -223,8 +223,6 @@ static int __exit panel_dpi_remove(struct platform_device *pdev)
 	panel_dpi_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id panel_dpi_of_match[] = {
@@ -236,11 +234,10 @@ MODULE_DEVICE_TABLE(of, panel_dpi_of_match);
 
 static struct platform_driver panel_dpi_driver = {
 	.probe = panel_dpi_probe,
-	.remove = __exit_p(panel_dpi_remove),
+	.remove_new = panel_dpi_remove,
 	.driver = {
 		.name = "panel-dpi",
 		.of_match_table = panel_dpi_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index 77fce1223a64..adb8881bac28 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -1241,7 +1241,7 @@ err_reg:
 	return r;
 }
 
-static int __exit dsicm_remove(struct platform_device *pdev)
+static void dsicm_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -1269,8 +1269,6 @@ static int __exit dsicm_remove(struct platform_device *pdev)
 
 	/* reset, to be sure that the panel is in a valid state */
 	dsicm_hw_reset(ddata);
-
-	return 0;
 }
 
 static const struct of_device_id dsicm_of_match[] = {
@@ -1282,11 +1280,10 @@ MODULE_DEVICE_TABLE(of, dsicm_of_match);
 
 static struct platform_driver dsicm_driver = {
 	.probe = dsicm_probe,
-	.remove = __exit_p(dsicm_remove),
+	.remove_new = dsicm_remove,
 	.driver = {
 		.name = "panel-dsi-cm",
 		.of_match_table = dsicm_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c
index cc30758300e2..e37268cf8dca 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c
@@ -292,7 +292,7 @@ err_reg:
 	return r;
 }
 
-static int __exit sharp_ls_remove(struct platform_device *pdev)
+static void sharp_ls_remove(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -304,8 +304,6 @@ static int __exit sharp_ls_remove(struct platform_device *pdev)
 	sharp_ls_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id sharp_ls_of_match[] = {
@@ -317,11 +315,10 @@ MODULE_DEVICE_TABLE(of, sharp_ls_of_match);
 
 static struct platform_driver sharp_ls_driver = {
 	.probe = sharp_ls_probe,
-	.remove = __exit_p(sharp_ls_remove),
+	.remove_new = sharp_ls_remove,
 	.driver = {
 		.name = "panel-sharp-ls037v7dw01",
 		.of_match_table = sharp_ls_of_match,
-		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/video/fbdev/omap2/omapfb/vrfb.c b/drivers/video/fbdev/omap2/omapfb/vrfb.c
index ee0dd4c6a646..568e6e1eca62 100644
--- a/drivers/video/fbdev/omap2/omapfb/vrfb.c
+++ b/drivers/video/fbdev/omap2/omapfb/vrfb.c
@@ -368,17 +368,10 @@ static int __init vrfb_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static void __exit vrfb_remove(struct platform_device *pdev)
-{
-	vrfb_loaded = false;
-}
-
 static struct platform_driver vrfb_driver = {
 	.driver.name	= "omapvrfb",
-	.remove		= __exit_p(vrfb_remove),
 };
-
-module_platform_driver_probe(vrfb_driver, vrfb_probe);
+builtin_platform_driver_probe(vrfb_driver, vrfb_probe);
 
 MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@ti.com>");
 MODULE_DESCRIPTION("OMAP VRFB");
diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c
index 58868f8880d6..a52b1ba43a48 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c
@@ -574,7 +574,7 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
 		break;
 
 	case VIAFB_SET_GAMMA_LUT:
-		viafb_gamma_table = memdup_user(argp, 256 * sizeof(u32));
+		viafb_gamma_table = memdup_array_user(argp, 256, sizeof(u32));
 		if (IS_ERR(viafb_gamma_table))
 			return PTR_ERR(viafb_gamma_table);
 		viafb_set_gamma_table(viafb_bpp, viafb_gamma_table);
diff --git a/drivers/vlynq/Kconfig b/drivers/vlynq/Kconfig
deleted file mode 100644
index e7f9492a0b04..000000000000
--- a/drivers/vlynq/Kconfig
+++ /dev/null
@@ -1,21 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-menu "TI VLYNQ"
-	depends on AR7
-
-config VLYNQ
-	bool "TI VLYNQ bus support"
-	help
-	  Support for Texas Instruments(R) VLYNQ bus.
-	  The VLYNQ bus is a high-speed, serial and packetized
-	  data bus which allows external peripherals of a SoC
-	  to appear into the system's main memory.
-
-	  If unsure, say N
-
-config VLYNQ_DEBUG
-	bool "VLYNQ bus debug"
-	depends on VLYNQ && DEBUG_KERNEL
-	help
-	  Turn on VLYNQ bus debugging.
-
-endmenu
diff --git a/drivers/vlynq/Makefile b/drivers/vlynq/Makefile
deleted file mode 100644
index d9ce5b2b5ce0..000000000000
--- a/drivers/vlynq/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for kernel vlynq drivers
-#
-
-obj-$(CONFIG_VLYNQ) += vlynq.o
diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c
deleted file mode 100644
index 4af6615808cc..000000000000
--- a/drivers/vlynq/vlynq.c
+++ /dev/null
@@ -1,799 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org>
- *
- * Parts of the VLYNQ specification can be found here:
- * http://www.ti.com/litv/pdf/sprue36a
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
-
-#include <linux/vlynq.h>
-
-#define VLYNQ_CTRL_PM_ENABLE		0x80000000
-#define VLYNQ_CTRL_CLOCK_INT		0x00008000
-#define VLYNQ_CTRL_CLOCK_DIV(x)		(((x) & 7) << 16)
-#define VLYNQ_CTRL_INT_LOCAL		0x00004000
-#define VLYNQ_CTRL_INT_ENABLE		0x00002000
-#define VLYNQ_CTRL_INT_VECTOR(x)	(((x) & 0x1f) << 8)
-#define VLYNQ_CTRL_INT2CFG		0x00000080
-#define VLYNQ_CTRL_RESET		0x00000001
-
-#define VLYNQ_CTRL_CLOCK_MASK          (0x7 << 16)
-
-#define VLYNQ_INT_OFFSET		0x00000014
-#define VLYNQ_REMOTE_OFFSET		0x00000080
-
-#define VLYNQ_STATUS_LINK		0x00000001
-#define VLYNQ_STATUS_LERROR		0x00000080
-#define VLYNQ_STATUS_RERROR		0x00000100
-
-#define VINT_ENABLE			0x00000100
-#define VINT_TYPE_EDGE			0x00000080
-#define VINT_LEVEL_LOW			0x00000040
-#define VINT_VECTOR(x)			((x) & 0x1f)
-#define VINT_OFFSET(irq)		(8 * ((irq) % 4))
-
-#define VLYNQ_AUTONEGO_V2		0x00010000
-
-struct vlynq_regs {
-	u32 revision;
-	u32 control;
-	u32 status;
-	u32 int_prio;
-	u32 int_status;
-	u32 int_pending;
-	u32 int_ptr;
-	u32 tx_offset;
-	struct vlynq_mapping rx_mapping[4];
-	u32 chip;
-	u32 autonego;
-	u32 unused[6];
-	u32 int_device[8];
-};
-
-#ifdef CONFIG_VLYNQ_DEBUG
-static void vlynq_dump_regs(struct vlynq_device *dev)
-{
-	int i;
-
-	printk(KERN_DEBUG "VLYNQ local=%p remote=%p\n",
-			dev->local, dev->remote);
-	for (i = 0; i < 32; i++) {
-		printk(KERN_DEBUG "VLYNQ: local %d: %08x\n",
-			i + 1, ((u32 *)dev->local)[i]);
-		printk(KERN_DEBUG "VLYNQ: remote %d: %08x\n",
-			i + 1, ((u32 *)dev->remote)[i]);
-	}
-}
-
-static void vlynq_dump_mem(u32 *base, int count)
-{
-	int i;
-
-	for (i = 0; i < (count + 3) / 4; i++) {
-		if (i % 4 == 0)
-			printk(KERN_DEBUG "\nMEM[0x%04x]:", i * 4);
-		printk(KERN_DEBUG " 0x%08x", *(base + i));
-	}
-	printk(KERN_DEBUG "\n");
-}
-#endif
-
-/* Check the VLYNQ link status with a given device */
-static int vlynq_linked(struct vlynq_device *dev)
-{
-	int i;
-
-	for (i = 0; i < 100; i++)
-		if (readl(&dev->local->status) & VLYNQ_STATUS_LINK)
-			return 1;
-		else
-			cpu_relax();
-
-	return 0;
-}
-
-static void vlynq_reset(struct vlynq_device *dev)
-{
-	writel(readl(&dev->local->control) | VLYNQ_CTRL_RESET,
-			&dev->local->control);
-
-	/* Wait for the devices to finish resetting */
-	msleep(5);
-
-	/* Remove reset bit */
-	writel(readl(&dev->local->control) & ~VLYNQ_CTRL_RESET,
-			&dev->local->control);
-
-	/* Give some time for the devices to settle */
-	msleep(5);
-}
-
-static void vlynq_irq_unmask(struct irq_data *d)
-{
-	struct vlynq_device *dev = irq_data_get_irq_chip_data(d);
-	int virq;
-	u32 val;
-
-	BUG_ON(!dev);
-	virq = d->irq - dev->irq_start;
-	val = readl(&dev->remote->int_device[virq >> 2]);
-	val |= (VINT_ENABLE | virq) << VINT_OFFSET(virq);
-	writel(val, &dev->remote->int_device[virq >> 2]);
-}
-
-static void vlynq_irq_mask(struct irq_data *d)
-{
-	struct vlynq_device *dev = irq_data_get_irq_chip_data(d);
-	int virq;
-	u32 val;
-
-	BUG_ON(!dev);
-	virq = d->irq - dev->irq_start;
-	val = readl(&dev->remote->int_device[virq >> 2]);
-	val &= ~(VINT_ENABLE << VINT_OFFSET(virq));
-	writel(val, &dev->remote->int_device[virq >> 2]);
-}
-
-static int vlynq_irq_type(struct irq_data *d, unsigned int flow_type)
-{
-	struct vlynq_device *dev = irq_data_get_irq_chip_data(d);
-	int virq;
-	u32 val;
-
-	BUG_ON(!dev);
-	virq = d->irq - dev->irq_start;
-	val = readl(&dev->remote->int_device[virq >> 2]);
-	switch (flow_type & IRQ_TYPE_SENSE_MASK) {
-	case IRQ_TYPE_EDGE_RISING:
-	case IRQ_TYPE_EDGE_FALLING:
-	case IRQ_TYPE_EDGE_BOTH:
-		val |= VINT_TYPE_EDGE << VINT_OFFSET(virq);
-		val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq));
-		break;
-	case IRQ_TYPE_LEVEL_HIGH:
-		val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq));
-		val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq));
-		break;
-	case IRQ_TYPE_LEVEL_LOW:
-		val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq));
-		val |= VINT_LEVEL_LOW << VINT_OFFSET(virq);
-		break;
-	default:
-		return -EINVAL;
-	}
-	writel(val, &dev->remote->int_device[virq >> 2]);
-	return 0;
-}
-
-static void vlynq_local_ack(struct irq_data *d)
-{
-	struct vlynq_device *dev = irq_data_get_irq_chip_data(d);
-	u32 status = readl(&dev->local->status);
-
-	pr_debug("%s: local status: 0x%08x\n",
-		       dev_name(&dev->dev), status);
-	writel(status, &dev->local->status);
-}
-
-static void vlynq_remote_ack(struct irq_data *d)
-{
-	struct vlynq_device *dev = irq_data_get_irq_chip_data(d);
-	u32 status = readl(&dev->remote->status);
-
-	pr_debug("%s: remote status: 0x%08x\n",
-		       dev_name(&dev->dev), status);
-	writel(status, &dev->remote->status);
-}
-
-static irqreturn_t vlynq_irq(int irq, void *dev_id)
-{
-	struct vlynq_device *dev = dev_id;
-	u32 status;
-	int virq = 0;
-
-	status = readl(&dev->local->int_status);
-	writel(status, &dev->local->int_status);
-
-	if (unlikely(!status))
-		spurious_interrupt();
-
-	while (status) {
-		if (status & 1)
-			do_IRQ(dev->irq_start + virq);
-		status >>= 1;
-		virq++;
-	}
-
-	return IRQ_HANDLED;
-}
-
-static struct irq_chip vlynq_irq_chip = {
-	.name = "vlynq",
-	.irq_unmask = vlynq_irq_unmask,
-	.irq_mask = vlynq_irq_mask,
-	.irq_set_type = vlynq_irq_type,
-};
-
-static struct irq_chip vlynq_local_chip = {
-	.name = "vlynq local error",
-	.irq_unmask = vlynq_irq_unmask,
-	.irq_mask = vlynq_irq_mask,
-	.irq_ack = vlynq_local_ack,
-};
-
-static struct irq_chip vlynq_remote_chip = {
-	.name = "vlynq local error",
-	.irq_unmask = vlynq_irq_unmask,
-	.irq_mask = vlynq_irq_mask,
-	.irq_ack = vlynq_remote_ack,
-};
-
-static int vlynq_setup_irq(struct vlynq_device *dev)
-{
-	u32 val;
-	int i, virq;
-
-	if (dev->local_irq == dev->remote_irq) {
-		printk(KERN_ERR
-		       "%s: local vlynq irq should be different from remote\n",
-		       dev_name(&dev->dev));
-		return -EINVAL;
-	}
-
-	/* Clear local and remote error bits */
-	writel(readl(&dev->local->status), &dev->local->status);
-	writel(readl(&dev->remote->status), &dev->remote->status);
-
-	/* Now setup interrupts */
-	val = VLYNQ_CTRL_INT_VECTOR(dev->local_irq);
-	val |= VLYNQ_CTRL_INT_ENABLE | VLYNQ_CTRL_INT_LOCAL |
-		VLYNQ_CTRL_INT2CFG;
-	val |= readl(&dev->local->control);
-	writel(VLYNQ_INT_OFFSET, &dev->local->int_ptr);
-	writel(val, &dev->local->control);
-
-	val = VLYNQ_CTRL_INT_VECTOR(dev->remote_irq);
-	val |= VLYNQ_CTRL_INT_ENABLE;
-	val |= readl(&dev->remote->control);
-	writel(VLYNQ_INT_OFFSET, &dev->remote->int_ptr);
-	writel(val, &dev->remote->int_ptr);
-	writel(val, &dev->remote->control);
-
-	for (i = dev->irq_start; i <= dev->irq_end; i++) {
-		virq = i - dev->irq_start;
-		if (virq == dev->local_irq) {
-			irq_set_chip_and_handler(i, &vlynq_local_chip,
-						 handle_level_irq);
-			irq_set_chip_data(i, dev);
-		} else if (virq == dev->remote_irq) {
-			irq_set_chip_and_handler(i, &vlynq_remote_chip,
-						 handle_level_irq);
-			irq_set_chip_data(i, dev);
-		} else {
-			irq_set_chip_and_handler(i, &vlynq_irq_chip,
-						 handle_simple_irq);
-			irq_set_chip_data(i, dev);
-			writel(0, &dev->remote->int_device[virq >> 2]);
-		}
-	}
-
-	if (request_irq(dev->irq, vlynq_irq, IRQF_SHARED, "vlynq", dev)) {
-		printk(KERN_ERR "%s: request_irq failed\n",
-					dev_name(&dev->dev));
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
-static void vlynq_device_release(struct device *dev)
-{
-	struct vlynq_device *vdev = to_vlynq_device(dev);
-	kfree(vdev);
-}
-
-static int vlynq_device_match(struct device *dev,
-			      struct device_driver *drv)
-{
-	struct vlynq_device *vdev = to_vlynq_device(dev);
-	struct vlynq_driver *vdrv = to_vlynq_driver(drv);
-	struct vlynq_device_id *ids = vdrv->id_table;
-
-	while (ids->id) {
-		if (ids->id == vdev->dev_id) {
-			vdev->divisor = ids->divisor;
-			vlynq_set_drvdata(vdev, ids);
-			printk(KERN_INFO "Driver found for VLYNQ "
-				"device: %08x\n", vdev->dev_id);
-			return 1;
-		}
-		printk(KERN_DEBUG "Not using the %08x VLYNQ device's driver"
-			" for VLYNQ device: %08x\n", ids->id, vdev->dev_id);
-		ids++;
-	}
-	return 0;
-}
-
-static int vlynq_device_probe(struct device *dev)
-{
-	struct vlynq_device *vdev = to_vlynq_device(dev);
-	struct vlynq_driver *drv = to_vlynq_driver(dev->driver);
-	struct vlynq_device_id *id = vlynq_get_drvdata(vdev);
-	int result = -ENODEV;
-
-	if (drv->probe)
-		result = drv->probe(vdev, id);
-	if (result)
-		put_device(dev);
-	return result;
-}
-
-static void vlynq_device_remove(struct device *dev)
-{
-	struct vlynq_driver *drv = to_vlynq_driver(dev->driver);
-
-	if (drv->remove)
-		drv->remove(to_vlynq_device(dev));
-}
-
-int __vlynq_register_driver(struct vlynq_driver *driver, struct module *owner)
-{
-	driver->driver.name = driver->name;
-	driver->driver.bus = &vlynq_bus_type;
-	return driver_register(&driver->driver);
-}
-EXPORT_SYMBOL(__vlynq_register_driver);
-
-void vlynq_unregister_driver(struct vlynq_driver *driver)
-{
-	driver_unregister(&driver->driver);
-}
-EXPORT_SYMBOL(vlynq_unregister_driver);
-
-/*
- * A VLYNQ remote device can clock the VLYNQ bus master
- * using a dedicated clock line. In that case, both the
- * remove device and the bus master should have the same
- * serial clock dividers configured. Iterate through the
- * 8 possible dividers until we actually link with the
- * device.
- */
-static int __vlynq_try_remote(struct vlynq_device *dev)
-{
-	int i;
-
-	vlynq_reset(dev);
-	for (i = dev->dev_id ? vlynq_rdiv2 : vlynq_rdiv8; dev->dev_id ?
-			i <= vlynq_rdiv8 : i >= vlynq_rdiv2;
-		dev->dev_id ? i++ : i--) {
-
-		if (!vlynq_linked(dev))
-			break;
-
-		writel((readl(&dev->remote->control) &
-				~VLYNQ_CTRL_CLOCK_MASK) |
-				VLYNQ_CTRL_CLOCK_INT |
-				VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1),
-				&dev->remote->control);
-		writel((readl(&dev->local->control)
-				& ~(VLYNQ_CTRL_CLOCK_INT |
-				VLYNQ_CTRL_CLOCK_MASK)) |
-				VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1),
-				&dev->local->control);
-
-		if (vlynq_linked(dev)) {
-			printk(KERN_DEBUG
-				"%s: using remote clock divisor %d\n",
-				dev_name(&dev->dev), i - vlynq_rdiv1 + 1);
-			dev->divisor = i;
-			return 0;
-		} else {
-			vlynq_reset(dev);
-		}
-	}
-
-	return -ENODEV;
-}
-
-/*
- * A VLYNQ remote device can be clocked by the VLYNQ bus
- * master using a dedicated clock line. In that case, only
- * the bus master configures the serial clock divider.
- * Iterate through the 8 possible dividers until we
- * actually get a link with the device.
- */
-static int __vlynq_try_local(struct vlynq_device *dev)
-{
-	int i;
-
-	vlynq_reset(dev);
-
-	for (i = dev->dev_id ? vlynq_ldiv2 : vlynq_ldiv8; dev->dev_id ?
-			i <= vlynq_ldiv8 : i >= vlynq_ldiv2;
-		dev->dev_id ? i++ : i--) {
-
-		writel((readl(&dev->local->control) &
-				~VLYNQ_CTRL_CLOCK_MASK) |
-				VLYNQ_CTRL_CLOCK_INT |
-				VLYNQ_CTRL_CLOCK_DIV(i - vlynq_ldiv1),
-				&dev->local->control);
-
-		if (vlynq_linked(dev)) {
-			printk(KERN_DEBUG
-				"%s: using local clock divisor %d\n",
-				dev_name(&dev->dev), i - vlynq_ldiv1 + 1);
-			dev->divisor = i;
-			return 0;
-		} else {
-			vlynq_reset(dev);
-		}
-	}
-
-	return -ENODEV;
-}
-
-/*
- * When using external clocking method, serial clock
- * is supplied by an external oscillator, therefore we
- * should mask the local clock bit in the clock control
- * register for both the bus master and the remote device.
- */
-static int __vlynq_try_external(struct vlynq_device *dev)
-{
-	vlynq_reset(dev);
-	if (!vlynq_linked(dev))
-		return -ENODEV;
-
-	writel((readl(&dev->remote->control) &
-			~VLYNQ_CTRL_CLOCK_INT),
-			&dev->remote->control);
-
-	writel((readl(&dev->local->control) &
-			~VLYNQ_CTRL_CLOCK_INT),
-			&dev->local->control);
-
-	if (vlynq_linked(dev)) {
-		printk(KERN_DEBUG "%s: using external clock\n",
-			dev_name(&dev->dev));
-			dev->divisor = vlynq_div_external;
-		return 0;
-	}
-
-	return -ENODEV;
-}
-
-static int __vlynq_enable_device(struct vlynq_device *dev)
-{
-	int result;
-	struct plat_vlynq_ops *ops = dev->dev.platform_data;
-
-	result = ops->on(dev);
-	if (result)
-		return result;
-
-	switch (dev->divisor) {
-	case vlynq_div_external:
-	case vlynq_div_auto:
-		/* When the device is brought from reset it should have clock
-		 * generation negotiated by hardware.
-		 * Check which device is generating clocks and perform setup
-		 * accordingly */
-		if (vlynq_linked(dev) && readl(&dev->remote->control) &
-		   VLYNQ_CTRL_CLOCK_INT) {
-			if (!__vlynq_try_remote(dev) ||
-				!__vlynq_try_local(dev)  ||
-				!__vlynq_try_external(dev))
-				return 0;
-		} else {
-			if (!__vlynq_try_external(dev) ||
-				!__vlynq_try_local(dev)    ||
-				!__vlynq_try_remote(dev))
-				return 0;
-		}
-		break;
-	case vlynq_ldiv1:
-	case vlynq_ldiv2:
-	case vlynq_ldiv3:
-	case vlynq_ldiv4:
-	case vlynq_ldiv5:
-	case vlynq_ldiv6:
-	case vlynq_ldiv7:
-	case vlynq_ldiv8:
-		writel(VLYNQ_CTRL_CLOCK_INT |
-			VLYNQ_CTRL_CLOCK_DIV(dev->divisor -
-			vlynq_ldiv1), &dev->local->control);
-		writel(0, &dev->remote->control);
-		if (vlynq_linked(dev)) {
-			printk(KERN_DEBUG
-				"%s: using local clock divisor %d\n",
-				dev_name(&dev->dev),
-				dev->divisor - vlynq_ldiv1 + 1);
-			return 0;
-		}
-		break;
-	case vlynq_rdiv1:
-	case vlynq_rdiv2:
-	case vlynq_rdiv3:
-	case vlynq_rdiv4:
-	case vlynq_rdiv5:
-	case vlynq_rdiv6:
-	case vlynq_rdiv7:
-	case vlynq_rdiv8:
-		writel(0, &dev->local->control);
-		writel(VLYNQ_CTRL_CLOCK_INT |
-			VLYNQ_CTRL_CLOCK_DIV(dev->divisor -
-			vlynq_rdiv1), &dev->remote->control);
-		if (vlynq_linked(dev)) {
-			printk(KERN_DEBUG
-				"%s: using remote clock divisor %d\n",
-				dev_name(&dev->dev),
-				dev->divisor - vlynq_rdiv1 + 1);
-			return 0;
-		}
-		break;
-	}
-
-	ops->off(dev);
-	return -ENODEV;
-}
-
-int vlynq_enable_device(struct vlynq_device *dev)
-{
-	struct plat_vlynq_ops *ops = dev->dev.platform_data;
-	int result = -ENODEV;
-
-	result = __vlynq_enable_device(dev);
-	if (result)
-		return result;
-
-	result = vlynq_setup_irq(dev);
-	if (result)
-		ops->off(dev);
-
-	dev->enabled = !result;
-	return result;
-}
-EXPORT_SYMBOL(vlynq_enable_device);
-
-
-void vlynq_disable_device(struct vlynq_device *dev)
-{
-	struct plat_vlynq_ops *ops = dev->dev.platform_data;
-
-	dev->enabled = 0;
-	free_irq(dev->irq, dev);
-	ops->off(dev);
-}
-EXPORT_SYMBOL(vlynq_disable_device);
-
-int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset,
-			    struct vlynq_mapping *mapping)
-{
-	int i;
-
-	if (!dev->enabled)
-		return -ENXIO;
-
-	writel(tx_offset, &dev->local->tx_offset);
-	for (i = 0; i < 4; i++) {
-		writel(mapping[i].offset, &dev->local->rx_mapping[i].offset);
-		writel(mapping[i].size, &dev->local->rx_mapping[i].size);
-	}
-	return 0;
-}
-EXPORT_SYMBOL(vlynq_set_local_mapping);
-
-int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset,
-			     struct vlynq_mapping *mapping)
-{
-	int i;
-
-	if (!dev->enabled)
-		return -ENXIO;
-
-	writel(tx_offset, &dev->remote->tx_offset);
-	for (i = 0; i < 4; i++) {
-		writel(mapping[i].offset, &dev->remote->rx_mapping[i].offset);
-		writel(mapping[i].size, &dev->remote->rx_mapping[i].size);
-	}
-	return 0;
-}
-EXPORT_SYMBOL(vlynq_set_remote_mapping);
-
-int vlynq_set_local_irq(struct vlynq_device *dev, int virq)
-{
-	int irq = dev->irq_start + virq;
-	if (dev->enabled)
-		return -EBUSY;
-
-	if ((irq < dev->irq_start) || (irq > dev->irq_end))
-		return -EINVAL;
-
-	if (virq == dev->remote_irq)
-		return -EINVAL;
-
-	dev->local_irq = virq;
-
-	return 0;
-}
-EXPORT_SYMBOL(vlynq_set_local_irq);
-
-int vlynq_set_remote_irq(struct vlynq_device *dev, int virq)
-{
-	int irq = dev->irq_start + virq;
-	if (dev->enabled)
-		return -EBUSY;
-
-	if ((irq < dev->irq_start) || (irq > dev->irq_end))
-		return -EINVAL;
-
-	if (virq == dev->local_irq)
-		return -EINVAL;
-
-	dev->remote_irq = virq;
-
-	return 0;
-}
-EXPORT_SYMBOL(vlynq_set_remote_irq);
-
-static int vlynq_probe(struct platform_device *pdev)
-{
-	struct vlynq_device *dev;
-	struct resource *regs_res, *mem_res, *irq_res;
-	int len, result;
-
-	regs_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
-	if (!regs_res)
-		return -ENODEV;
-
-	mem_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mem");
-	if (!mem_res)
-		return -ENODEV;
-
-	irq_res = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "devirq");
-	if (!irq_res)
-		return -ENODEV;
-
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev) {
-		printk(KERN_ERR
-		       "vlynq: failed to allocate device structure\n");
-		return -ENOMEM;
-	}
-
-	dev->id = pdev->id;
-	dev->dev.bus = &vlynq_bus_type;
-	dev->dev.parent = &pdev->dev;
-	dev_set_name(&dev->dev, "vlynq%d", dev->id);
-	dev->dev.platform_data = pdev->dev.platform_data;
-	dev->dev.release = vlynq_device_release;
-
-	dev->regs_start = regs_res->start;
-	dev->regs_end = regs_res->end;
-	dev->mem_start = mem_res->start;
-	dev->mem_end = mem_res->end;
-
-	len = resource_size(regs_res);
-	if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) {
-		printk(KERN_ERR "%s: Can't request vlynq registers\n",
-		       dev_name(&dev->dev));
-		result = -ENXIO;
-		goto fail_request;
-	}
-
-	dev->local = ioremap(regs_res->start, len);
-	if (!dev->local) {
-		printk(KERN_ERR "%s: Can't remap vlynq registers\n",
-		       dev_name(&dev->dev));
-		result = -ENXIO;
-		goto fail_remap;
-	}
-
-	dev->remote = (struct vlynq_regs *)((void *)dev->local +
-					    VLYNQ_REMOTE_OFFSET);
-
-	dev->irq = platform_get_irq_byname(pdev, "irq");
-	dev->irq_start = irq_res->start;
-	dev->irq_end = irq_res->end;
-	dev->local_irq = dev->irq_end - dev->irq_start;
-	dev->remote_irq = dev->local_irq - 1;
-
-	if (device_register(&dev->dev))
-		goto fail_register;
-	platform_set_drvdata(pdev, dev);
-
-	printk(KERN_INFO "%s: regs 0x%p, irq %d, mem 0x%p\n",
-	       dev_name(&dev->dev), (void *)dev->regs_start, dev->irq,
-	       (void *)dev->mem_start);
-
-	dev->dev_id = 0;
-	dev->divisor = vlynq_div_auto;
-	result = __vlynq_enable_device(dev);
-	if (result == 0) {
-		dev->dev_id = readl(&dev->remote->chip);
-		((struct plat_vlynq_ops *)(dev->dev.platform_data))->off(dev);
-	}
-	if (dev->dev_id)
-		printk(KERN_INFO "Found a VLYNQ device: %08x\n", dev->dev_id);
-
-	return 0;
-
-fail_register:
-	iounmap(dev->local);
-fail_remap:
-fail_request:
-	release_mem_region(regs_res->start, len);
-	kfree(dev);
-	return result;
-}
-
-static int vlynq_remove(struct platform_device *pdev)
-{
-	struct vlynq_device *dev = platform_get_drvdata(pdev);
-
-	device_unregister(&dev->dev);
-	iounmap(dev->local);
-	release_mem_region(dev->regs_start,
-			   dev->regs_end - dev->regs_start + 1);
-
-	kfree(dev);
-
-	return 0;
-}
-
-static struct platform_driver vlynq_platform_driver = {
-	.driver.name = "vlynq",
-	.probe = vlynq_probe,
-	.remove = vlynq_remove,
-};
-
-struct bus_type vlynq_bus_type = {
-	.name = "vlynq",
-	.match = vlynq_device_match,
-	.probe = vlynq_device_probe,
-	.remove = vlynq_device_remove,
-};
-EXPORT_SYMBOL(vlynq_bus_type);
-
-static int vlynq_init(void)
-{
-	int res = 0;
-
-	res = bus_register(&vlynq_bus_type);
-	if (res)
-		goto fail_bus;
-
-	res = platform_driver_register(&vlynq_platform_driver);
-	if (res)
-		goto fail_platform;
-
-	return 0;
-
-fail_platform:
-	bus_unregister(&vlynq_bus_type);
-fail_bus:
-	return res;
-}
-
-static void vlynq_exit(void)
-{
-	platform_driver_unregister(&vlynq_platform_driver);
-	bus_unregister(&vlynq_bus_type);
-}
-
-module_init(vlynq_init);
-module_exit(vlynq_exit);
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 8cb6fa45d599..7d22051b15a2 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1768,12 +1768,6 @@ config SIBYTE_WDOG
 	  To compile this driver as a loadable module, choose M here.
 	  The module will be called sb_wdog.
 
-config AR7_WDT
-	tristate "TI AR7 Watchdog Timer"
-	depends on AR7 || (MIPS && 32BIT && COMPILE_TEST)
-	help
-	  Hardware driver for the TI AR7 Watchdog Timer.
-
 config TXX9_WDT
 	tristate "Toshiba TXx9 Watchdog Timer"
 	depends on CPU_TX49XX || (MIPS && COMPILE_TEST)
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 7eab9de311cb..7cbc34514ec1 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -168,7 +168,6 @@ obj-$(CONFIG_INDYDOG) += indydog.o
 obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o
 obj-$(CONFIG_WDT_MTX1) += mtx-1_wdt.o
 obj-$(CONFIG_SIBYTE_WDOG) += sb_wdog.o
-obj-$(CONFIG_AR7_WDT) += ar7_wdt.o
 obj-$(CONFIG_TXX9_WDT) += txx9wdt.o
 obj-$(CONFIG_OCTEON_WDT) += octeon-wdt.o
 octeon-wdt-y := octeon-wdt-main.o octeon-wdt-nmi.o
diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c
index eddeb0fede89..d4f739932f0b 100644
--- a/drivers/watchdog/apple_wdt.c
+++ b/drivers/watchdog/apple_wdt.c
@@ -173,6 +173,8 @@ static int apple_wdt_probe(struct platform_device *pdev)
 	if (!wdt->clk_rate)
 		return -EINVAL;
 
+	platform_set_drvdata(pdev, wdt);
+
 	wdt->wdd.ops = &apple_wdt_ops;
 	wdt->wdd.info = &apple_wdt_info;
 	wdt->wdd.max_timeout = U32_MAX / wdt->clk_rate;
@@ -190,6 +192,28 @@ static int apple_wdt_probe(struct platform_device *pdev)
 	return devm_watchdog_register_device(dev, &wdt->wdd);
 }
 
+static int apple_wdt_resume(struct device *dev)
+{
+	struct apple_wdt *wdt = dev_get_drvdata(dev);
+
+	if (watchdog_active(&wdt->wdd) || watchdog_hw_running(&wdt->wdd))
+		apple_wdt_start(&wdt->wdd);
+
+	return 0;
+}
+
+static int apple_wdt_suspend(struct device *dev)
+{
+	struct apple_wdt *wdt = dev_get_drvdata(dev);
+
+	if (watchdog_active(&wdt->wdd) || watchdog_hw_running(&wdt->wdd))
+		apple_wdt_stop(&wdt->wdd);
+
+	return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(apple_wdt_pm_ops, apple_wdt_suspend, apple_wdt_resume);
+
 static const struct of_device_id apple_wdt_of_match[] = {
 	{ .compatible = "apple,wdt" },
 	{},
@@ -200,6 +224,7 @@ static struct platform_driver apple_wdt_driver = {
 	.driver = {
 		.name = "apple-watchdog",
 		.of_match_table = apple_wdt_of_match,
+		.pm = pm_sleep_ptr(&apple_wdt_pm_ops),
 	},
 	.probe = apple_wdt_probe,
 };
diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c
deleted file mode 100644
index cdcaeb0961ac..000000000000
--- a/drivers/watchdog/ar7_wdt.c
+++ /dev/null
@@ -1,315 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * drivers/watchdog/ar7_wdt.c
- *
- * Copyright (C) 2007 Nicolas Thill <nico@openwrt.org>
- * Copyright (c) 2005 Enrik Berkhan <Enrik.Berkhan@akk.org>
- *
- * Some code taken from:
- * National Semiconductor SCx200 Watchdog support
- * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/errno.h>
-#include <linux/miscdevice.h>
-#include <linux/platform_device.h>
-#include <linux/watchdog.h>
-#include <linux/fs.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-#include <linux/clk.h>
-
-#include <asm/addrspace.h>
-#include <asm/mach-ar7/ar7.h>
-
-#define LONGNAME "TI AR7 Watchdog Timer"
-
-MODULE_AUTHOR("Nicolas Thill <nico@openwrt.org>");
-MODULE_DESCRIPTION(LONGNAME);
-MODULE_LICENSE("GPL");
-
-static int margin = 60;
-module_param(margin, int, 0);
-MODULE_PARM_DESC(margin, "Watchdog margin in seconds");
-
-static bool nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close");
-
-#define READ_REG(x) readl((void __iomem *)&(x))
-#define WRITE_REG(x, v) writel((v), (void __iomem *)&(x))
-
-struct ar7_wdt {
-	u32 kick_lock;
-	u32 kick;
-	u32 change_lock;
-	u32 change;
-	u32 disable_lock;
-	u32 disable;
-	u32 prescale_lock;
-	u32 prescale;
-};
-
-static unsigned long wdt_is_open;
-static unsigned expect_close;
-static DEFINE_SPINLOCK(wdt_lock);
-
-/* XXX currently fixed, allows max margin ~68.72 secs */
-#define prescale_value 0xffff
-
-/* Pointer to the remapped WDT IO space */
-static struct ar7_wdt *ar7_wdt;
-
-static struct clk *vbus_clk;
-
-static void ar7_wdt_kick(u32 value)
-{
-	WRITE_REG(ar7_wdt->kick_lock, 0x5555);
-	if ((READ_REG(ar7_wdt->kick_lock) & 3) == 1) {
-		WRITE_REG(ar7_wdt->kick_lock, 0xaaaa);
-		if ((READ_REG(ar7_wdt->kick_lock) & 3) == 3) {
-			WRITE_REG(ar7_wdt->kick, value);
-			return;
-		}
-	}
-	pr_err("failed to unlock WDT kick reg\n");
-}
-
-static void ar7_wdt_prescale(u32 value)
-{
-	WRITE_REG(ar7_wdt->prescale_lock, 0x5a5a);
-	if ((READ_REG(ar7_wdt->prescale_lock) & 3) == 1) {
-		WRITE_REG(ar7_wdt->prescale_lock, 0xa5a5);
-		if ((READ_REG(ar7_wdt->prescale_lock) & 3) == 3) {
-			WRITE_REG(ar7_wdt->prescale, value);
-			return;
-		}
-	}
-	pr_err("failed to unlock WDT prescale reg\n");
-}
-
-static void ar7_wdt_change(u32 value)
-{
-	WRITE_REG(ar7_wdt->change_lock, 0x6666);
-	if ((READ_REG(ar7_wdt->change_lock) & 3) == 1) {
-		WRITE_REG(ar7_wdt->change_lock, 0xbbbb);
-		if ((READ_REG(ar7_wdt->change_lock) & 3) == 3) {
-			WRITE_REG(ar7_wdt->change, value);
-			return;
-		}
-	}
-	pr_err("failed to unlock WDT change reg\n");
-}
-
-static void ar7_wdt_disable(u32 value)
-{
-	WRITE_REG(ar7_wdt->disable_lock, 0x7777);
-	if ((READ_REG(ar7_wdt->disable_lock) & 3) == 1) {
-		WRITE_REG(ar7_wdt->disable_lock, 0xcccc);
-		if ((READ_REG(ar7_wdt->disable_lock) & 3) == 2) {
-			WRITE_REG(ar7_wdt->disable_lock, 0xdddd);
-			if ((READ_REG(ar7_wdt->disable_lock) & 3) == 3) {
-				WRITE_REG(ar7_wdt->disable, value);
-				return;
-			}
-		}
-	}
-	pr_err("failed to unlock WDT disable reg\n");
-}
-
-static void ar7_wdt_update_margin(int new_margin)
-{
-	u32 change;
-	u32 vbus_rate;
-
-	vbus_rate = clk_get_rate(vbus_clk);
-	change = new_margin * (vbus_rate / prescale_value);
-	if (change < 1)
-		change = 1;
-	if (change > 0xffff)
-		change = 0xffff;
-	ar7_wdt_change(change);
-	margin = change * prescale_value / vbus_rate;
-	pr_info("timer margin %d seconds (prescale %d, change %d, freq %d)\n",
-		margin, prescale_value, change, vbus_rate);
-}
-
-static void ar7_wdt_enable_wdt(void)
-{
-	pr_debug("enabling watchdog timer\n");
-	ar7_wdt_disable(1);
-	ar7_wdt_kick(1);
-}
-
-static void ar7_wdt_disable_wdt(void)
-{
-	pr_debug("disabling watchdog timer\n");
-	ar7_wdt_disable(0);
-}
-
-static int ar7_wdt_open(struct inode *inode, struct file *file)
-{
-	/* only allow one at a time */
-	if (test_and_set_bit(0, &wdt_is_open))
-		return -EBUSY;
-	ar7_wdt_enable_wdt();
-	expect_close = 0;
-
-	return stream_open(inode, file);
-}
-
-static int ar7_wdt_release(struct inode *inode, struct file *file)
-{
-	if (!expect_close)
-		pr_warn("watchdog device closed unexpectedly, will not disable the watchdog timer\n");
-	else if (!nowayout)
-		ar7_wdt_disable_wdt();
-	clear_bit(0, &wdt_is_open);
-	return 0;
-}
-
-static ssize_t ar7_wdt_write(struct file *file, const char *data,
-			     size_t len, loff_t *ppos)
-{
-	/* check for a magic close character */
-	if (len) {
-		size_t i;
-
-		spin_lock(&wdt_lock);
-		ar7_wdt_kick(1);
-		spin_unlock(&wdt_lock);
-
-		expect_close = 0;
-		for (i = 0; i < len; ++i) {
-			char c;
-			if (get_user(c, data + i))
-				return -EFAULT;
-			if (c == 'V')
-				expect_close = 1;
-		}
-
-	}
-	return len;
-}
-
-static long ar7_wdt_ioctl(struct file *file,
-					unsigned int cmd, unsigned long arg)
-{
-	static const struct watchdog_info ident = {
-		.identity = LONGNAME,
-		.firmware_version = 1,
-		.options = (WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
-						WDIOF_MAGICCLOSE),
-	};
-	int new_margin;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		if (copy_to_user((struct watchdog_info *)arg, &ident,
-				sizeof(ident)))
-			return -EFAULT;
-		return 0;
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		if (put_user(0, (int *)arg))
-			return -EFAULT;
-		return 0;
-	case WDIOC_KEEPALIVE:
-		ar7_wdt_kick(1);
-		return 0;
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_margin, (int *)arg))
-			return -EFAULT;
-		if (new_margin < 1)
-			return -EINVAL;
-
-		spin_lock(&wdt_lock);
-		ar7_wdt_update_margin(new_margin);
-		ar7_wdt_kick(1);
-		spin_unlock(&wdt_lock);
-		fallthrough;
-	case WDIOC_GETTIMEOUT:
-		if (put_user(margin, (int *)arg))
-			return -EFAULT;
-		return 0;
-	default:
-		return -ENOTTY;
-	}
-}
-
-static const struct file_operations ar7_wdt_fops = {
-	.owner		= THIS_MODULE,
-	.write		= ar7_wdt_write,
-	.unlocked_ioctl	= ar7_wdt_ioctl,
-	.compat_ioctl	= compat_ptr_ioctl,
-	.open		= ar7_wdt_open,
-	.release	= ar7_wdt_release,
-	.llseek		= no_llseek,
-};
-
-static struct miscdevice ar7_wdt_miscdev = {
-	.minor		= WATCHDOG_MINOR,
-	.name		= "watchdog",
-	.fops		= &ar7_wdt_fops,
-};
-
-static int ar7_wdt_probe(struct platform_device *pdev)
-{
-	int rc;
-
-	ar7_wdt = devm_platform_ioremap_resource_byname(pdev, "regs");
-	if (IS_ERR(ar7_wdt))
-		return PTR_ERR(ar7_wdt);
-
-	vbus_clk = clk_get(NULL, "vbus");
-	if (IS_ERR(vbus_clk)) {
-		pr_err("could not get vbus clock\n");
-		return PTR_ERR(vbus_clk);
-	}
-
-	ar7_wdt_disable_wdt();
-	ar7_wdt_prescale(prescale_value);
-	ar7_wdt_update_margin(margin);
-
-	rc = misc_register(&ar7_wdt_miscdev);
-	if (rc) {
-		pr_err("unable to register misc device\n");
-		goto out;
-	}
-	return 0;
-
-out:
-	clk_put(vbus_clk);
-	vbus_clk = NULL;
-	return rc;
-}
-
-static void ar7_wdt_remove(struct platform_device *pdev)
-{
-	misc_deregister(&ar7_wdt_miscdev);
-	clk_put(vbus_clk);
-	vbus_clk = NULL;
-}
-
-static void ar7_wdt_shutdown(struct platform_device *pdev)
-{
-	if (!nowayout)
-		ar7_wdt_disable_wdt();
-}
-
-static struct platform_driver ar7_wdt_driver = {
-	.probe = ar7_wdt_probe,
-	.remove_new = ar7_wdt_remove,
-	.shutdown = ar7_wdt_shutdown,
-	.driver = {
-		.name = "ar7_wdt",
-	},
-};
-
-module_platform_driver(ar7_wdt_driver);
diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c
index b72a858bbac7..b4773a6aaf8c 100644
--- a/drivers/watchdog/aspeed_wdt.c
+++ b/drivers/watchdog/aspeed_wdt.c
@@ -79,6 +79,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
 #define   WDT_TIMEOUT_STATUS_BOOT_SECONDARY	BIT(1)
 #define WDT_CLEAR_TIMEOUT_STATUS	0x14
 #define   WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION	BIT(0)
+#define WDT_RESET_MASK1		0x1c
+#define WDT_RESET_MASK2		0x20
 
 /*
  * WDT_RESET_WIDTH controls the characteristics of the external pulse (if
@@ -402,6 +404,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 
 	if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) ||
 		(of_device_is_compatible(np, "aspeed,ast2600-wdt"))) {
+		u32 reset_mask[2];
+		size_t nrstmask = of_device_is_compatible(np, "aspeed,ast2600-wdt") ? 2 : 1;
 		u32 reg = readl(wdt->base + WDT_RESET_WIDTH);
 
 		reg &= wdt->cfg->ext_pulse_width_mask;
@@ -419,6 +423,13 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 			reg |= WDT_OPEN_DRAIN_MAGIC;
 
 		writel(reg, wdt->base + WDT_RESET_WIDTH);
+
+		ret = of_property_read_u32_array(np, "aspeed,reset-mask", reset_mask, nrstmask);
+		if (!ret) {
+			writel(reset_mask[0], wdt->base + WDT_RESET_MASK1);
+			if (nrstmask > 1)
+				writel(reset_mask[1], wdt->base + WDT_RESET_MASK2);
+		}
 	}
 
 	if (!of_property_read_u32(np, "aspeed,ext-pulse-duration", &duration)) {
diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index fed7be246442..b111b28acb94 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -348,25 +348,21 @@ static int __init at91wdt_probe(struct platform_device *pdev)
 	if (IS_ERR(wdt->base))
 		return PTR_ERR(wdt->base);
 
-	wdt->sclk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(wdt->sclk))
-		return PTR_ERR(wdt->sclk);
-
-	err = clk_prepare_enable(wdt->sclk);
-	if (err) {
+	wdt->sclk = devm_clk_get_enabled(&pdev->dev, NULL);
+	if (IS_ERR(wdt->sclk)) {
 		dev_err(&pdev->dev, "Could not enable slow clock\n");
-		return err;
+		return PTR_ERR(wdt->sclk);
 	}
 
 	if (pdev->dev.of_node) {
 		err = of_at91wdt_init(pdev->dev.of_node, wdt);
 		if (err)
-			goto err_clk;
+			return err;
 	}
 
 	err = at91_wdt_init(pdev, wdt);
 	if (err)
-		goto err_clk;
+		return err;
 
 	platform_set_drvdata(pdev, wdt);
 
@@ -374,11 +370,6 @@ static int __init at91wdt_probe(struct platform_device *pdev)
 		wdt->wdd.timeout, wdt->nowayout);
 
 	return 0;
-
-err_clk:
-	clk_disable_unprepare(wdt->sclk);
-
-	return err;
 }
 
 static int __exit at91wdt_remove(struct platform_device *pdev)
@@ -388,7 +379,6 @@ static int __exit at91wdt_remove(struct platform_device *pdev)
 
 	pr_warn("I quit now, hardware will probably reboot!\n");
 	del_timer(&wdt->timer);
-	clk_disable_unprepare(wdt->sclk);
 
 	return 0;
 }
diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c
index b7b705060438..e5cc30622b12 100644
--- a/drivers/watchdog/ath79_wdt.c
+++ b/drivers/watchdog/ath79_wdt.c
@@ -257,19 +257,13 @@ static int ath79_wdt_probe(struct platform_device *pdev)
 	if (IS_ERR(wdt_base))
 		return PTR_ERR(wdt_base);
 
-	wdt_clk = devm_clk_get(&pdev->dev, "wdt");
+	wdt_clk = devm_clk_get_enabled(&pdev->dev, "wdt");
 	if (IS_ERR(wdt_clk))
 		return PTR_ERR(wdt_clk);
 
-	err = clk_prepare_enable(wdt_clk);
-	if (err)
-		return err;
-
 	wdt_freq = clk_get_rate(wdt_clk);
-	if (!wdt_freq) {
-		err = -EINVAL;
-		goto err_clk_disable;
-	}
+	if (!wdt_freq)
+		return -EINVAL;
 
 	max_timeout = (0xfffffffful / wdt_freq);
 	if (timeout < 1 || timeout > max_timeout) {
@@ -286,20 +280,15 @@ static int ath79_wdt_probe(struct platform_device *pdev)
 	if (err) {
 		dev_err(&pdev->dev,
 			"unable to register misc device, err=%d\n", err);
-		goto err_clk_disable;
+		return err;
 	}
 
 	return 0;
-
-err_clk_disable:
-	clk_disable_unprepare(wdt_clk);
-	return err;
 }
 
 static void ath79_wdt_remove(struct platform_device *pdev)
 {
 	misc_deregister(&ath79_wdt_miscdev);
-	clk_disable_unprepare(wdt_clk);
 }
 
 static void ath79_wdt_shutdown(struct platform_device *pdev)
diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 0923201ce874..a7b814ea740b 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c
@@ -5,12 +5,13 @@
  * Author: 2013, Alexander Shiyan <shc_work@mail.ru>
  */
 
-#include <linux/err.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/err.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/watchdog.h>
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
@@ -106,7 +107,6 @@ static const struct watchdog_ops gpio_wdt_ops = {
 static int gpio_wdt_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np = dev->of_node;
 	struct gpio_wdt_priv *priv;
 	enum gpiod_flags gflags;
 	unsigned int hw_margin;
@@ -119,7 +119,7 @@ static int gpio_wdt_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
-	ret = of_property_read_string(np, "hw_algo", &algo);
+	ret = device_property_read_string(dev, "hw_algo", &algo);
 	if (ret)
 		return ret;
 	if (!strcmp(algo, "toggle")) {
@@ -136,16 +136,14 @@ static int gpio_wdt_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->gpiod))
 		return PTR_ERR(priv->gpiod);
 
-	ret = of_property_read_u32(np,
-				   "hw_margin_ms", &hw_margin);
+	ret = device_property_read_u32(dev, "hw_margin_ms", &hw_margin);
 	if (ret)
 		return ret;
 	/* Disallow values lower than 2 and higher than 65535 ms */
 	if (hw_margin < 2 || hw_margin > 65535)
 		return -EINVAL;
 
-	priv->always_running = of_property_read_bool(np,
-						     "always-running");
+	priv->always_running = device_property_read_bool(dev, "always-running");
 
 	watchdog_set_drvdata(&priv->wdd, priv);
 
diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c
index c703586c6e5f..b21d7a74a42d 100644
--- a/drivers/watchdog/imx7ulp_wdt.c
+++ b/drivers/watchdog/imx7ulp_wdt.c
@@ -23,6 +23,7 @@
 #define LPO_CLK_SHIFT		8
 #define WDOG_CS_CLK		(LPO_CLK << LPO_CLK_SHIFT)
 #define WDOG_CS_EN		BIT(7)
+#define WDOG_CS_INT_EN		BIT(6)
 #define WDOG_CS_UPDATE		BIT(5)
 #define WDOG_CS_WAIT		BIT(1)
 #define WDOG_CS_STOP		BIT(0)
@@ -62,6 +63,7 @@ struct imx7ulp_wdt_device {
 	void __iomem *base;
 	struct clk *clk;
 	bool post_rcs_wait;
+	bool ext_reset;
 	const struct imx_wdt_hw_feature *hw;
 };
 
@@ -285,6 +287,9 @@ static int imx7ulp_wdt_init(struct imx7ulp_wdt_device *wdt, unsigned int timeout
 	if (wdt->hw->prescaler_enable)
 		val |= WDOG_CS_PRES;
 
+	if (wdt->ext_reset)
+		val |= WDOG_CS_INT_EN;
+
 	do {
 		ret = _imx7ulp_wdt_init(wdt, timeout, val);
 		toval = readl(wdt->base + WDOG_TOVAL);
@@ -321,6 +326,9 @@ static int imx7ulp_wdt_probe(struct platform_device *pdev)
 		return PTR_ERR(imx7ulp_wdt->clk);
 	}
 
+	/* The WDOG may need to do external reset through dedicated pin */
+	imx7ulp_wdt->ext_reset = of_property_read_bool(dev->of_node, "fsl,ext-reset-output");
+
 	imx7ulp_wdt->post_rcs_wait = true;
 	if (of_device_is_compatible(dev->of_node,
 				    "fsl,imx8ulp-wdt")) {
diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c
index 8ac021748d16..e51fe1b78518 100644
--- a/drivers/watchdog/imx_sc_wdt.c
+++ b/drivers/watchdog/imx_sc_wdt.c
@@ -34,6 +34,7 @@
 
 #define SC_IRQ_WDOG			1
 #define SC_IRQ_GROUP_WDOG		1
+#define SC_TIMER_ERR_BUSY		10
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0000);
@@ -61,7 +62,9 @@ static int imx_sc_wdt_start(struct watchdog_device *wdog)
 
 	arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_START_WDOG,
 		      0, 0, 0, 0, 0, 0, &res);
-	if (res.a0)
+
+	/* Ignore if already enabled(SC_TIMER_ERR_BUSY) */
+	if (res.a0 && res.a0 != SC_TIMER_ERR_BUSY)
 		return -EACCES;
 
 	arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_SET_WDOG_ACT,
diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index bb1122909396..e888b1bdd1f2 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -13,9 +13,9 @@
  *		    http://www.ite.com.tw/
  *
  *	Support of the watchdog timers, which are available on
- *	IT8607, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, IT8686,
- *	IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, IT8728,
- *	IT8772, IT8783 and IT8784.
+ *	IT8607, IT8613, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665,
+ *	IT8686, IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726,
+ *	IT8728, IT8772, IT8783 and IT8784.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -50,6 +50,7 @@
 /* Chip Id numbers */
 #define NO_DEV_ID	0xffff
 #define IT8607_ID	0x8607
+#define IT8613_ID	0x8613
 #define IT8620_ID	0x8620
 #define IT8622_ID	0x8622
 #define IT8625_ID	0x8625
@@ -277,6 +278,7 @@ static int __init it87_wdt_init(void)
 		max_units = 65535;
 		break;
 	case IT8607_ID:
+	case IT8613_ID:
 	case IT8620_ID:
 	case IT8622_ID:
 	case IT8625_ID:
diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c
index 607ce4b8df57..ec0c08652ec2 100644
--- a/drivers/watchdog/ixp4xx_wdt.c
+++ b/drivers/watchdog/ixp4xx_wdt.c
@@ -105,6 +105,25 @@ static const struct watchdog_ops ixp4xx_wdt_ops = {
 	.owner = THIS_MODULE,
 };
 
+/*
+ * The A0 version of the IXP422 had a bug in the watchdog making
+ * is useless, but we still need to use it to restart the system
+ * as it is the only way, so in this special case we register a
+ * "dummy" watchdog that doesn't really work, but will support
+ * the restart operation.
+ */
+static int ixp4xx_wdt_dummy(struct watchdog_device *wdd)
+{
+	return 0;
+}
+
+static const struct watchdog_ops ixp4xx_wdt_restart_only_ops = {
+	.start = ixp4xx_wdt_dummy,
+	.stop = ixp4xx_wdt_dummy,
+	.restart = ixp4xx_wdt_restart,
+	.owner = THIS_MODULE,
+};
+
 static const struct watchdog_info ixp4xx_wdt_info = {
 	.options = WDIOF_KEEPALIVEPING
 		| WDIOF_MAGICCLOSE
@@ -114,14 +133,17 @@ static const struct watchdog_info ixp4xx_wdt_info = {
 
 static int ixp4xx_wdt_probe(struct platform_device *pdev)
 {
+	static const struct watchdog_ops *iwdt_ops;
 	struct device *dev = &pdev->dev;
 	struct ixp4xx_wdt *iwdt;
 	struct clk *clk;
 	int ret;
 
 	if (!(read_cpuid_id() & 0xf) && !cpu_is_ixp46x()) {
-		dev_err(dev, "Rev. A0 IXP42x CPU detected - watchdog disabled\n");
-		return -ENODEV;
+		dev_info(dev, "Rev. A0 IXP42x CPU detected - only restart supported\n");
+		iwdt_ops = &ixp4xx_wdt_restart_only_ops;
+	} else {
+		iwdt_ops = &ixp4xx_wdt_ops;
 	}
 
 	iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL);
@@ -141,7 +163,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev)
 		iwdt->rate = IXP4XX_TIMER_FREQ;
 
 	iwdt->wdd.info = &ixp4xx_wdt_info;
-	iwdt->wdd.ops = &ixp4xx_wdt_ops;
+	iwdt->wdd.ops = iwdt_ops;
 	iwdt->wdd.min_timeout = 1;
 	iwdt->wdd.max_timeout = U32_MAX / iwdt->rate;
 	iwdt->wdd.parent = dev;
diff --git a/drivers/watchdog/marvell_gti_wdt.c b/drivers/watchdog/marvell_gti_wdt.c
index d7eb8286e11e..098bb141a521 100644
--- a/drivers/watchdog/marvell_gti_wdt.c
+++ b/drivers/watchdog/marvell_gti_wdt.c
@@ -8,8 +8,8 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 #include <linux/watchdog.h>
 
 /*
@@ -190,6 +190,13 @@ static int gti_wdt_set_pretimeout(struct watchdog_device *wdev,
 	struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
 	struct watchdog_device *wdog_dev = &priv->wdev;
 
+	if (!timeout) {
+		/* Disable Interrupt */
+		writeq(GTI_CWD_INT_ENA_CLR_VAL(priv->wdt_timer_idx),
+		       priv->base + GTI_CWD_INT_ENA_CLR);
+		return 0;
+	}
+
 	/* pretimeout should 1/3 of max_timeout */
 	if (timeout * 3 <= wdog_dev->max_timeout)
 		return gti_wdt_settimeout(wdev, timeout * 3);
@@ -271,7 +278,7 @@ static int gti_wdt_probe(struct platform_device *pdev)
 				   &wdt_idx);
 	if (!err) {
 		if (wdt_idx >= priv->data->gti_num_timers)
-			return dev_err_probe(&pdev->dev, err,
+			return dev_err_probe(&pdev->dev, -EINVAL,
 				"GTI wdog timer index not valid");
 
 		priv->wdt_timer_idx = wdt_idx;
@@ -292,6 +299,7 @@ static int gti_wdt_probe(struct platform_device *pdev)
 
 	/* Maximum timeout is 3 times the pretimeout */
 	wdog_dev->max_timeout = max_pretimeout * 3;
+	wdog_dev->max_hw_heartbeat_ms = max_pretimeout * 1000;
 	/* Minimum first timeout (pretimeout) is 1, so min_timeout as 3 */
 	wdog_dev->min_timeout = 3;
 	wdog_dev->timeout = wdog_dev->pretimeout;
@@ -308,7 +316,7 @@ static int gti_wdt_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
-		return dev_err_probe(&pdev->dev, irq, "IRQ resource not found\n");
+		return irq;
 
 	err = devm_request_irq(dev, irq, gti_wdt_interrupt, 0,
 			       pdev->name, &priv->wdev);
diff --git a/drivers/watchdog/mlx_wdt.c b/drivers/watchdog/mlx_wdt.c
index 9c5b6616fc87..667e2c5b3431 100644
--- a/drivers/watchdog/mlx_wdt.c
+++ b/drivers/watchdog/mlx_wdt.c
@@ -39,6 +39,7 @@
  * @tleft_idx:	index for direct access to time left register;
  * @ping_idx:	index for direct access to ping register;
  * @reset_idx:	index for direct access to reset cause register;
+ * @regmap_val_sz: size of value in register map;
  * @wd_type:	watchdog HW type;
  */
 struct mlxreg_wdt {
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
index 05657dc1d36a..352853e6fe71 100644
--- a/drivers/watchdog/of_xilinx_wdt.c
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -187,7 +187,7 @@ static int xwdt_probe(struct platform_device *pdev)
 
 	watchdog_set_nowayout(xilinx_wdt_wdd, enable_once);
 
-	xdev->clk = devm_clk_get_enabled(dev, NULL);
+	xdev->clk = devm_clk_get_prepared(dev, NULL);
 	if (IS_ERR(xdev->clk)) {
 		if (PTR_ERR(xdev->clk) != -ENOENT)
 			return PTR_ERR(xdev->clk);
@@ -218,18 +218,25 @@ static int xwdt_probe(struct platform_device *pdev)
 	spin_lock_init(&xdev->spinlock);
 	watchdog_set_drvdata(xilinx_wdt_wdd, xdev);
 
+	rc = clk_enable(xdev->clk);
+	if (rc) {
+		dev_err(dev, "unable to enable clock\n");
+		return rc;
+	}
+
 	rc = xwdt_selftest(xdev);
 	if (rc == XWT_TIMER_FAILED) {
 		dev_err(dev, "SelfTest routine error\n");
+		clk_disable(xdev->clk);
 		return rc;
 	}
 
+	clk_disable(xdev->clk);
+
 	rc = devm_watchdog_register_device(dev, xilinx_wdt_wdd);
 	if (rc)
 		return rc;
 
-	clk_disable(xdev->clk);
-
 	dev_info(dev, "Xilinx Watchdog Timer with timeout %ds\n",
 		 xilinx_wdt_wdd->timeout);
 
diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c
index 421ebcda62e6..5f23913ce3b4 100644
--- a/drivers/watchdog/sbsa_gwdt.c
+++ b/drivers/watchdog/sbsa_gwdt.c
@@ -152,14 +152,14 @@ static int sbsa_gwdt_set_timeout(struct watchdog_device *wdd,
 	timeout = clamp_t(unsigned int, timeout, 1, wdd->max_hw_heartbeat_ms / 1000);
 
 	if (action)
-		sbsa_gwdt_reg_write(gwdt->clk * timeout, gwdt);
+		sbsa_gwdt_reg_write((u64)gwdt->clk * timeout, gwdt);
 	else
 		/*
 		 * In the single stage mode, The first signal (WS0) is ignored,
 		 * the timeout is (WOR * 2), so the WOR should be configured
 		 * to half value of timeout.
 		 */
-		sbsa_gwdt_reg_write(gwdt->clk / 2 * timeout, gwdt);
+		sbsa_gwdt_reg_write(((u64)gwdt->clk / 2) * timeout, gwdt);
 
 	return 0;
 }
diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c
index d2aa43c00221..4c5b8d98a4f3 100644
--- a/drivers/watchdog/st_lpc_wdt.c
+++ b/drivers/watchdog/st_lpc_wdt.c
@@ -15,7 +15,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/watchdog.h>
@@ -42,7 +41,7 @@ struct st_wdog {
 	void __iomem *base;
 	struct device *dev;
 	struct regmap *regmap;
-	struct st_wdog_syscfg *syscfg;
+	const struct st_wdog_syscfg *syscfg;
 	struct clk *clk;
 	unsigned long clkrate;
 	bool warm_reset;
@@ -150,7 +149,6 @@ static void st_clk_disable_unprepare(void *data)
 static int st_wdog_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct of_device_id *match;
 	struct device_node *np = dev->of_node;
 	struct st_wdog *st_wdog;
 	struct regmap *regmap;
@@ -173,12 +171,7 @@ static int st_wdog_probe(struct platform_device *pdev)
 	if (!st_wdog)
 		return -ENOMEM;
 
-	match = of_match_device(st_wdog_match, dev);
-	if (!match) {
-		dev_err(dev, "Couldn't match device\n");
-		return -ENODEV;
-	}
-	st_wdog->syscfg	= (struct st_wdog_syscfg *)match->data;
+	st_wdog->syscfg	= (struct st_wdog_syscfg *)device_get_match_data(dev);
 
 	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
diff --git a/drivers/watchdog/sunplus_wdt.c b/drivers/watchdog/sunplus_wdt.c
index e2d8c532bcb1..9d3ca848e8b6 100644
--- a/drivers/watchdog/sunplus_wdt.c
+++ b/drivers/watchdog/sunplus_wdt.c
@@ -136,11 +136,6 @@ static const struct watchdog_ops sp_wdt_ops = {
 	.restart	= sp_wdt_restart,
 };
 
-static void sp_clk_disable_unprepare(void *data)
-{
-	clk_disable_unprepare(data);
-}
-
 static void sp_reset_control_assert(void *data)
 {
 	reset_control_assert(data);
@@ -156,17 +151,9 @@ static int sp_wdt_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
-	priv->clk = devm_clk_get(dev, NULL);
+	priv->clk = devm_clk_get_enabled(dev, NULL);
 	if (IS_ERR(priv->clk))
-		return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to get clock\n");
-
-	ret = clk_prepare_enable(priv->clk);
-	if (ret)
-		return dev_err_probe(dev, ret, "Failed to enable clock\n");
-
-	ret = devm_add_action_or_reset(dev, sp_clk_disable_unprepare, priv->clk);
-	if (ret)
-		return ret;
+		return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to enable clock\n");
 
 	/* The timer and watchdog shared the STC reset */
 	priv->rstc = devm_reset_control_get_shared(dev, NULL);
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
index 0ba99bed59fc..650fdc7996e1 100644
--- a/drivers/watchdog/wdat_wdt.c
+++ b/drivers/watchdog/wdat_wdt.c
@@ -269,7 +269,7 @@ static int wdat_wdt_stop(struct watchdog_device *wdd)
 
 static int wdat_wdt_ping(struct watchdog_device *wdd)
 {
-	return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, 0, NULL);
+	return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, wdd->timeout, NULL);
 }
 
 static int wdat_wdt_set_timeout(struct watchdog_device *wdd,
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 2fe4a5832fcf..d6b9758ee23d 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -568,6 +568,7 @@ static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 
 const struct export_operations affs_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry = affs_fh_to_dentry,
 	.fh_to_parent = affs_fh_to_parent,
 	.get_parent = affs_get_parent,
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index df13a4f9a6e3..c08c2c7d6fbb 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -24,7 +24,6 @@ config BCACHEFS_FS
 	select XXHASH
 	select SRCU
 	select SYMBOLIC_ERRNAME
-	select MEAN_AND_VARIANCE
 	help
 	The bcachefs filesystem - a modern, copy on write filesystem, with
 	support for multiple devices, compression, checksumming, etc.
@@ -42,7 +41,6 @@ config BCACHEFS_POSIX_ACL
 config BCACHEFS_DEBUG_TRANSACTIONS
 	bool "bcachefs runtime info"
 	depends on BCACHEFS_FS
-	default y
 	help
 	This makes the list of running btree transactions available in debugfs.
 
@@ -78,7 +76,7 @@ config BCACHEFS_NO_LATENCY_ACCT
 config MEAN_AND_VARIANCE_UNIT_TEST
 	tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
 	depends on KUNIT
-	select MEAN_AND_VARIANCE
+	depends on BCACHEFS_FS
 	default KUNIT_ALL_TESTS
 	help
 	  This option enables the kunit tests for mean_and_variance module.
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 0749731b9072..45b64f89258c 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -70,6 +70,7 @@ bcachefs-y		:=	\
 	reflink.o		\
 	replicas.o		\
 	sb-clean.o		\
+	sb-errors.o		\
 	sb-members.o		\
 	siphash.o		\
 	six.o			\
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 2d516207e223..1fec0e67891f 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -192,123 +192,109 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
 	return DIV_ROUND_UP(bytes, sizeof(u64));
 }
 
-int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  enum bkey_invalid_flags flags,
 			  struct printbuf *err)
 {
 	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
+	int ret = 0;
 
 	/* allow for unknown fields */
-	if (bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v)) {
-		prt_printf(err, "incorrect value size (%zu < %u)",
-		       bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err,
+			 alloc_v1_val_size_bad,
+			 "incorrect value size (%zu < %u)",
+			 bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
+fsck_err:
+	return ret;
 }
 
-int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  enum bkey_invalid_flags flags,
 			  struct printbuf *err)
 {
 	struct bkey_alloc_unpacked u;
+	int ret = 0;
 
-	if (bch2_alloc_unpack_v2(&u, k)) {
-		prt_printf(err, "unpack error");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err,
+			 alloc_v2_unpack_error,
+			 "unpack error");
+fsck_err:
+	return ret;
 }
 
-int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  enum bkey_invalid_flags flags,
 			  struct printbuf *err)
 {
 	struct bkey_alloc_unpacked u;
+	int ret = 0;
 
-	if (bch2_alloc_unpack_v3(&u, k)) {
-		prt_printf(err, "unpack error");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err,
+			 alloc_v2_unpack_error,
+			 "unpack error");
+fsck_err:
+	return ret;
 }
 
-int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  enum bkey_invalid_flags flags, struct printbuf *err)
 {
 	struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
+	int ret = 0;
 
-	if (alloc_v4_u64s(a.v) > bkey_val_u64s(k.k)) {
-		prt_printf(err, "bad val size (%u > %zu)",
-		       alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err,
+			 alloc_v4_val_size_bad,
+			 "bad val size (%u > %zu)",
+			 alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
 
-	if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
-	    BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) {
-		prt_printf(err, "invalid backpointers_start");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
+			 BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err,
+			 alloc_v4_backpointers_start_bad,
+			 "invalid backpointers_start");
 
-	if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) {
-		prt_printf(err, "invalid data type (got %u should be %u)",
-		       a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err,
+			 alloc_key_data_type_bad,
+			 "invalid data type (got %u should be %u)",
+			 a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
 
 	switch (a.v->data_type) {
 	case BCH_DATA_free:
 	case BCH_DATA_need_gc_gens:
 	case BCH_DATA_need_discard:
-		if (a.v->dirty_sectors ||
-		    a.v->cached_sectors ||
-		    a.v->stripe) {
-			prt_printf(err, "empty data type free but have data");
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(a.v->dirty_sectors ||
+				 a.v->cached_sectors ||
+				 a.v->stripe, c, err,
+				 alloc_key_empty_but_have_data,
+				 "empty data type free but have data");
 		break;
 	case BCH_DATA_sb:
 	case BCH_DATA_journal:
 	case BCH_DATA_btree:
 	case BCH_DATA_user:
 	case BCH_DATA_parity:
-		if (!a.v->dirty_sectors) {
-			prt_printf(err, "data_type %s but dirty_sectors==0",
-			       bch2_data_types[a.v->data_type]);
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(!a.v->dirty_sectors, c, err,
+				 alloc_key_dirty_sectors_0,
+				 "data_type %s but dirty_sectors==0",
+				 bch2_data_types[a.v->data_type]);
 		break;
 	case BCH_DATA_cached:
-		if (!a.v->cached_sectors ||
-		    a.v->dirty_sectors ||
-		    a.v->stripe) {
-			prt_printf(err, "data type inconsistency");
-			return -BCH_ERR_invalid_bkey;
-		}
-
-		if (!a.v->io_time[READ] &&
-		    c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs) {
-			prt_printf(err, "cached bucket with read_time == 0");
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(!a.v->cached_sectors ||
+				 a.v->dirty_sectors ||
+				 a.v->stripe, c, err,
+				 alloc_key_cached_inconsistency,
+				 "data type inconsistency");
+
+		bkey_fsck_err_on(!a.v->io_time[READ] &&
+				 c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
+				 c, err,
+				 alloc_key_cached_but_read_time_zero,
+				 "cached bucket with read_time == 0");
 		break;
 	case BCH_DATA_stripe:
 		break;
 	}
-
-	return 0;
-}
-
-static inline u64 swab40(u64 x)
-{
-	return (((x & 0x00000000ffULL) << 32)|
-		((x & 0x000000ff00ULL) << 16)|
-		((x & 0x0000ff0000ULL) >>  0)|
-		((x & 0x00ff000000ULL) >> 16)|
-		((x & 0xff00000000ULL) >> 32));
+fsck_err:
+	return ret;
 }
 
 void bch2_alloc_v4_swab(struct bkey_s k)
@@ -324,6 +310,7 @@ void bch2_alloc_v4_swab(struct bkey_s k)
 	a->io_time[1]		= swab64(a->io_time[1]);
 	a->stripe		= swab32(a->stripe);
 	a->nr_external_backpointers = swab32(a->nr_external_backpointers);
+	a->fragmentation_lru	= swab64(a->fragmentation_lru);
 
 	bps = alloc_v4_backpointers(a);
 	for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
@@ -521,17 +508,18 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
 		: 0;
 }
 
-int bch2_bucket_gens_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k,
 			     enum bkey_invalid_flags flags,
 			     struct printbuf *err)
 {
-	if (bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens)) {
-		prt_printf(err, "bad val size (%zu != %zu)",
-		       bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
 
-	return 0;
+	bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err,
+			 bucket_gens_val_size_bad,
+			 "bad val size (%zu != %zu)",
+			 bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
+fsck_err:
+	return ret;
 }
 
 void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
@@ -727,7 +715,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
 			"incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n"
 			"  for %s",
 			set ? "setting" : "clearing",
-			bch2_btree_ids[btree],
+			bch2_btree_id_str(btree),
 			iter.pos.inode,
 			iter.pos.offset,
 			bch2_bkey_types[old.k->type],
@@ -986,6 +974,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
 	int ret;
 
 	if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c,
+			alloc_key_to_missing_dev_bucket,
 			"alloc key for invalid device:bucket %llu:%llu",
 			alloc_k.k->p.inode, alloc_k.k->p.offset))
 		return bch2_btree_delete_at(trans, alloc_iter, 0);
@@ -1005,7 +994,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
 
 	if (k.k->type != discard_key_type &&
 	    (c->opts.reconstruct_alloc ||
-	     fsck_err(c, "incorrect key in need_discard btree (got %s should be %s)\n"
+	     fsck_err(c, need_discard_key_wrong,
+		      "incorrect key in need_discard btree (got %s should be %s)\n"
 		      "  %s",
 		      bch2_bkey_types[k.k->type],
 		      bch2_bkey_types[discard_key_type],
@@ -1035,7 +1025,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
 
 	if (k.k->type != freespace_key_type &&
 	    (c->opts.reconstruct_alloc ||
-	     fsck_err(c, "incorrect key in freespace btree (got %s should be %s)\n"
+	     fsck_err(c, freespace_key_wrong,
+		      "incorrect key in freespace btree (got %s should be %s)\n"
 		      "  %s",
 		      bch2_bkey_types[k.k->type],
 		      bch2_bkey_types[freespace_key_type],
@@ -1066,7 +1057,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
 
 	if (a->gen != alloc_gen(k, gens_offset) &&
 	    (c->opts.reconstruct_alloc ||
-	     fsck_err(c, "incorrect gen in bucket_gens btree (got %u should be %u)\n"
+	     fsck_err(c, bucket_gens_key_wrong,
+		      "incorrect gen in bucket_gens btree (got %u should be %u)\n"
 		      "  %s",
 		      alloc_gen(k, gens_offset), a->gen,
 		      (printbuf_reset(&buf),
@@ -1124,7 +1116,8 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
 
 	if (k.k->type != KEY_TYPE_set &&
 	    (c->opts.reconstruct_alloc ||
-	     fsck_err(c, "hole in alloc btree missing in freespace btree\n"
+	     fsck_err(c, freespace_hole_missing,
+		      "hole in alloc btree missing in freespace btree\n"
 		      "  device %llu buckets %llu-%llu",
 		      freespace_iter->pos.inode,
 		      freespace_iter->pos.offset,
@@ -1187,6 +1180,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
 
 		for (i = gens_offset; i < gens_end_offset; i++) {
 			if (fsck_err_on(g.v.gens[i], c,
+					bucket_gens_hole_wrong,
 					"hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
 					bucket_gens_pos_to_alloc(k.k->p, i).inode,
 					bucket_gens_pos_to_alloc(k.k->p, i).offset,
@@ -1244,8 +1238,9 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr
 		return ret;
 
 	if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
+			need_discard_freespace_key_to_invalid_dev_bucket,
 			"entry in %s btree for nonexistant dev:bucket %llu:%llu",
-			bch2_btree_ids[iter->btree_id], pos.inode, pos.offset))
+			bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset))
 		goto delete;
 
 	a = bch2_alloc_to_v4(alloc_k, &a_convert);
@@ -1253,9 +1248,10 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr
 	if (fsck_err_on(a->data_type != state ||
 			(state == BCH_DATA_free &&
 			 genbits != alloc_freespace_genbits(*a)), c,
+			need_discard_freespace_key_bad,
 			"%s\n  incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
 			(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
-			bch2_btree_ids[iter->btree_id],
+			bch2_btree_id_str(iter->btree_id),
 			iter->pos.inode,
 			iter->pos.offset,
 			a->data_type == state,
@@ -1320,6 +1316,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
 	dev_exists = bch2_dev_exists2(c, k.k->p.inode);
 	if (!dev_exists) {
 		if (fsck_err_on(!dev_exists, c,
+				bucket_gens_to_invalid_dev,
 				"bucket_gens key for invalid device:\n  %s",
 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
 			ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1330,6 +1327,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
 	ca = bch_dev_bkey_exists(c, k.k->p.inode);
 	if (fsck_err_on(end <= ca->mi.first_bucket ||
 			start >= ca->mi.nbuckets, c,
+			bucket_gens_to_invalid_buckets,
 			"bucket_gens key for invalid buckets:\n  %s",
 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
 		ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1338,6 +1336,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
 
 	for (b = start; b < ca->mi.first_bucket; b++)
 		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
+				bucket_gens_nonzero_for_invalid_buckets,
 				"bucket_gens key has nonzero gen for invalid bucket")) {
 			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
 			need_update = true;
@@ -1345,6 +1344,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
 
 	for (b = ca->mi.nbuckets; b < end; b++)
 		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
+				bucket_gens_nonzero_for_invalid_buckets,
 				"bucket_gens key has nonzero gen for invalid bucket")) {
 			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
 			need_update = true;
@@ -1495,11 +1495,13 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
 		return ret;
 
 	if (fsck_err_on(!a->io_time[READ], c,
+			alloc_key_cached_but_read_time_zero,
 			"cached bucket with read_time 0\n"
 			"  %s",
 		(printbuf_reset(&buf),
 		 bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
 	    fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
+			alloc_key_to_missing_lru_entry,
 			"missing lru entry\n"
 			"  %s",
 			(printbuf_reset(&buf),
@@ -2075,6 +2077,17 @@ void bch2_recalc_capacity(struct bch_fs *c)
 	closure_wake_up(&c->freelist_wait);
 }
 
+u64 bch2_min_rw_member_capacity(struct bch_fs *c)
+{
+	struct bch_dev *ca;
+	unsigned i;
+	u64 ret = U64_MAX;
+
+	for_each_rw_member(ca, c, i)
+		ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
+	return ret;
+}
+
 static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
 {
 	struct open_bucket *ob;
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 97042067d2a9..73faf99a222a 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -149,13 +149,13 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s
 
 int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
 
-int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c,
 			  enum bkey_invalid_flags, struct printbuf *);
-int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c,
 			  enum bkey_invalid_flags, struct printbuf *);
-int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c,
 			  enum bkey_invalid_flags, struct printbuf *);
-int bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c,
 			  enum bkey_invalid_flags, struct printbuf *);
 void bch2_alloc_v4_swab(struct bkey_s);
 void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
@@ -193,7 +193,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 	.min_val_size	= 48,				\
 })
 
-int bch2_bucket_gens_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c,
 			     enum bkey_invalid_flags, struct printbuf *);
 void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
@@ -249,6 +249,7 @@ int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
 int bch2_fs_freespace_init(struct bch_fs *);
 
 void bch2_recalc_capacity(struct bch_fs *);
+u64 bch2_min_rw_member_capacity(struct bch_fs *);
 
 void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 3bc4abd3d7d5..b85c7765272f 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -399,12 +399,23 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
 			struct bucket_alloc_state *s,
 			struct closure *cl)
 {
-	struct btree_iter iter;
-	struct bkey_s_c k;
+	struct btree_iter iter, citer;
+	struct bkey_s_c k, ck;
 	struct open_bucket *ob = NULL;
-	u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
-	u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
+	u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+	u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor));
+	u64 alloc_cursor = alloc_start;
 	int ret;
+
+	/*
+	 * Scan with an uncached iterator to avoid polluting the key cache. An
+	 * uncached iter will return a cached key if one exists, but if not
+	 * there is no other underlying protection for the associated key cache
+	 * slot. To avoid racing bucket allocations, look up the cached key slot
+	 * of any likely allocation candidate before attempting to proceed with
+	 * the allocation. This provides proper exclusion on the associated
+	 * bucket.
+	 */
 again:
 	for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
 			   BTREE_ITER_SLOTS, k, ret) {
@@ -419,25 +430,38 @@ again:
 			continue;
 
 		a = bch2_alloc_to_v4(k, &a_convert);
-
 		if (a->data_type != BCH_DATA_free)
 			continue;
 
+		/* now check the cached key to serialize concurrent allocs of the bucket */
+		ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED);
+		ret = bkey_err(ck);
+		if (ret)
+			break;
+
+		a = bch2_alloc_to_v4(ck, &a_convert);
+		if (a->data_type != BCH_DATA_free)
+			goto next;
+
 		s->buckets_seen++;
 
 		ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
+next:
+		citer.path->preserve = false;
+		bch2_trans_iter_exit(trans, &citer);
 		if (ob)
 			break;
 	}
 	bch2_trans_iter_exit(trans, &iter);
 
+	alloc_cursor = iter.pos.offset;
 	ca->alloc_cursor = alloc_cursor;
 
 	if (!ob && ret)
 		ob = ERR_PTR(ret);
 
-	if (!ob && alloc_cursor > alloc_start) {
-		alloc_cursor = alloc_start;
+	if (!ob && alloc_start > first_bucket) {
+		alloc_cursor = alloc_start = first_bucket;
 		goto again;
 	}
 
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index cc856150a948..ef02c9bb0354 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -5,6 +5,7 @@
 #include "backpointers.h"
 #include "btree_cache.h"
 #include "btree_update.h"
+#include "btree_update_interior.h"
 #include "btree_write_buffer.h"
 #include "error.h"
 
@@ -37,25 +38,26 @@ static bool extent_matches_bp(struct bch_fs *c,
 	return false;
 }
 
-int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
 			     enum bkey_invalid_flags flags,
 			     struct printbuf *err)
 {
 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
 	struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
+	int ret = 0;
 
-	if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
-		prt_str(err, "backpointer at wrong pos");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+			 c, err,
+			 backpointer_pos_wrong,
+			 "backpointer at wrong pos");
+fsck_err:
+	return ret;
 }
 
 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
 {
 	prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
-	       bch2_btree_ids[bp->btree_id],
+	       bch2_btree_id_str(bp->btree_id),
 	       bp->level,
 	       (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
 	       (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
@@ -76,7 +78,7 @@ void bch2_backpointer_swab(struct bkey_s k)
 {
 	struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
 
-	bp.v->bucket_offset	= swab32(bp.v->bucket_offset);
+	bp.v->bucket_offset	= swab40(bp.v->bucket_offset);
 	bp.v->bucket_len	= swab32(bp.v->bucket_len);
 	bch2_bpos_swab(&bp.v->pos);
 }
@@ -219,18 +221,22 @@ out:
 static void backpointer_not_found(struct btree_trans *trans,
 				  struct bpos bp_pos,
 				  struct bch_backpointer bp,
-				  struct bkey_s_c k,
-				  const char *thing_it_points_to)
+				  struct bkey_s_c k)
 {
 	struct bch_fs *c = trans->c;
 	struct printbuf buf = PRINTBUF;
 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
 
+	/*
+	 * If we're using the btree write buffer, the backpointer we were
+	 * looking at may have already been deleted - failure to find what it
+	 * pointed to is not an error:
+	 */
 	if (likely(!bch2_backpointers_no_use_write_buffer))
 		return;
 
 	prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
-		   thing_it_points_to);
+		   bp.level ? "btree node" : "extent");
 	prt_printf(&buf, "bucket: ");
 	bch2_bpos_to_text(&buf, bucket);
 	prt_printf(&buf, "\n  ");
@@ -256,56 +262,37 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
 					 struct bch_backpointer bp,
 					 unsigned iter_flags)
 {
-	struct bch_fs *c = trans->c;
-	struct btree_root *r = bch2_btree_id_root(c, bp.btree_id);
-	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
-	struct bkey_s_c k;
-
-	bch2_trans_node_iter_init(trans, iter,
-				  bp.btree_id,
-				  bp.pos,
-				  0,
-				  min(bp.level, r->level),
-				  iter_flags);
-	k = bch2_btree_iter_peek_slot(iter);
-	if (bkey_err(k)) {
-		bch2_trans_iter_exit(trans, iter);
-		return k;
-	}
-
-	if (bp.level == r->level + 1)
-		k = bkey_i_to_s_c(&r->key);
-
-	if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
-		return k;
-
-	bch2_trans_iter_exit(trans, iter);
+	if (likely(!bp.level)) {
+		struct bch_fs *c = trans->c;
+		struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
+		struct bkey_s_c k;
+
+		bch2_trans_node_iter_init(trans, iter,
+					  bp.btree_id,
+					  bp.pos,
+					  0, 0,
+					  iter_flags);
+		k = bch2_btree_iter_peek_slot(iter);
+		if (bkey_err(k)) {
+			bch2_trans_iter_exit(trans, iter);
+			return k;
+		}
 
-	if (unlikely(bch2_backpointers_no_use_write_buffer)) {
-		if (bp.level) {
-			struct btree *b;
+		if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+			return k;
 
-			/*
-			 * If a backpointer for a btree node wasn't found, it may be
-			 * because it was overwritten by a new btree node that hasn't
-			 * been written out yet - backpointer_get_node() checks for
-			 * this:
-			 */
-			b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
-			if (!IS_ERR_OR_NULL(b))
-				return bkey_i_to_s_c(&b->key);
+		bch2_trans_iter_exit(trans, iter);
+		backpointer_not_found(trans, bp_pos, bp, k);
+		return bkey_s_c_null;
+	} else {
+		struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
 
+		if (IS_ERR_OR_NULL(b)) {
 			bch2_trans_iter_exit(trans, iter);
-
-			if (IS_ERR(b))
-				return bkey_s_c_err(PTR_ERR(b));
-			return bkey_s_c_null;
+			return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
 		}
-
-		backpointer_not_found(trans, bp_pos, bp, k, "extent");
+		return bkey_i_to_s_c(&b->key);
 	}
-
-	return bkey_s_c_null;
 }
 
 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
@@ -329,6 +316,8 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
 	if (IS_ERR(b))
 		goto err;
 
+	BUG_ON(b->c.level != bp.level - 1);
+
 	if (b && extent_matches_bp(c, bp.btree_id, bp.level,
 				   bkey_i_to_s_c(&b->key),
 				   bucket, bp))
@@ -337,8 +326,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
 	if (b && btree_node_will_make_reachable(b)) {
 		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
 	} else {
-		backpointer_not_found(trans, bp_pos, bp,
-				      bkey_i_to_s_c(&b->key), "btree node");
+		backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
 		b = NULL;
 	}
 err:
@@ -356,6 +344,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
 	int ret = 0;
 
 	if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
+			backpointer_to_missing_device,
 			"backpointer for missing device:\n%s",
 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
@@ -369,6 +358,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
 		goto out;
 
 	if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
+			backpointer_to_missing_alloc,
 			"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
 			alloc_iter.pos.inode, alloc_iter.pos.offset,
 			(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
@@ -453,14 +443,14 @@ fsck_err:
 	return ret;
 missing:
 	prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
-	       bch2_btree_ids[bp.btree_id], bp.level);
+	       bch2_btree_id_str(bp.btree_id), bp.level);
 	bch2_bkey_val_to_text(&buf, c, orig_k);
 	prt_printf(&buf, "\nbp pos ");
 	bch2_bpos_to_text(&buf, bp_iter.pos);
 
 	if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers ||
 	    c->opts.reconstruct_alloc ||
-	    fsck_err(c, "%s", buf.buf))
+	    fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
 		ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
 
 	goto out;
@@ -793,7 +783,9 @@ static int check_one_backpointer(struct btree_trans *trans,
 	}
 
 	if (fsck_err_on(!k.k, c,
-			"backpointer for missing extent\n  %s",
+			backpointer_to_missing_ptr,
+			"backpointer for missing %s\n  %s",
+			bp.v->level ? "btree node" : "extent",
 			(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
 		ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
 		goto out;
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index 547e0617602a..ab866feeaf66 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -7,7 +7,16 @@
 #include "buckets.h"
 #include "super.h"
 
-int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k,
+static inline u64 swab40(u64 x)
+{
+	return (((x & 0x00000000ffULL) << 32)|
+		((x & 0x000000ff00ULL) << 16)|
+		((x & 0x0000ff0000ULL) >>  0)|
+		((x & 0x00ff000000ULL) >> 16)|
+		((x & 0xff00000000ULL) >> 32));
+}
+
+int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k,
 			     enum bkey_invalid_flags, struct printbuf *);
 void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
 void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h
index 1fbed1f8378d..be2edced5213 100644
--- a/fs/bcachefs/bbpos.h
+++ b/fs/bcachefs/bbpos.h
@@ -2,20 +2,9 @@
 #ifndef _BCACHEFS_BBPOS_H
 #define _BCACHEFS_BBPOS_H
 
+#include "bbpos_types.h"
 #include "bkey_methods.h"
-
-struct bbpos {
-	enum btree_id		btree;
-	struct bpos		pos;
-};
-
-static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
-{
-	return (struct bbpos) { btree, pos };
-}
-
-#define BBPOS_MIN	BBPOS(0, POS_MIN)
-#define BBPOS_MAX	BBPOS(BTREE_ID_NR - 1, POS_MAX)
+#include "btree_cache.h"
 
 static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
 {
@@ -40,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos)
 
 static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos)
 {
-	prt_str(out, bch2_btree_ids[pos.btree]);
+	prt_str(out, bch2_btree_id_str(pos.btree));
 	prt_char(out, ':');
 	bch2_bpos_to_text(out, pos.pos);
 }
diff --git a/fs/bcachefs/bbpos_types.h b/fs/bcachefs/bbpos_types.h
new file mode 100644
index 000000000000..5198e94cf3b8
--- /dev/null
+++ b/fs/bcachefs/bbpos_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BBPOS_TYPES_H
+#define _BCACHEFS_BBPOS_TYPES_H
+
+struct bbpos {
+	enum btree_id		btree;
+	struct bpos		pos;
+};
+
+static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
+{
+	return (struct bbpos) { btree, pos };
+}
+
+#define BBPOS_MIN	BBPOS(0, POS_MIN)
+#define BBPOS_MAX	BBPOS(BTREE_ID_NR - 1, POS_MAX)
+
+#endif /* _BCACHEFS_BBPOS_TYPES_H */
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 53ffa88cae16..9cb8684959ee 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -209,6 +209,7 @@
 #include "nocow_locking_types.h"
 #include "opts.h"
 #include "recovery_types.h"
+#include "sb-errors_types.h"
 #include "seqmutex.h"
 #include "util.h"
 
@@ -418,6 +419,7 @@ enum bch_time_stats {
 #include "buckets_types.h"
 #include "buckets_waiting_for_journal_types.h"
 #include "clock_types.h"
+#include "disk_groups_types.h"
 #include "ec_types.h"
 #include "journal_types.h"
 #include "keylist_types.h"
@@ -463,6 +465,7 @@ enum gc_phase {
 	GC_PHASE_BTREE_snapshot_trees,
 	GC_PHASE_BTREE_deleted_inodes,
 	GC_PHASE_BTREE_logged_ops,
+	GC_PHASE_BTREE_rebalance_work,
 
 	GC_PHASE_PENDING_DELETE,
 };
@@ -500,6 +503,8 @@ struct bch_dev {
 	 * Committed by bch2_write_super() -> bch_fs_mi_update()
 	 */
 	struct bch_member_cpu	mi;
+	atomic64_t		errors[BCH_MEMBER_ERROR_NR];
+
 	__uuid_t		uuid;
 	char			name[BDEVNAME_SIZE];
 
@@ -578,7 +583,7 @@ enum {
 	BCH_FS_INITIAL_GC_UNFIXED,	/* kill when we enumerate fsck errors */
 	BCH_FS_NEED_ANOTHER_GC,
 
-	BCH_FS_HAVE_DELETED_SNAPSHOTS,
+	BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS,
 
 	/* errors: */
 	BCH_FS_ERROR,
@@ -938,9 +943,6 @@ struct bch_fs {
 	struct list_head	moving_context_list;
 	struct mutex		moving_context_lock;
 
-	struct list_head	data_progress_list;
-	struct mutex		data_progress_lock;
-
 	/* REBALANCE */
 	struct bch_fs_rebalance	rebalance;
 
@@ -991,11 +993,6 @@ struct bch_fs {
 	struct bio_set		dio_read_bioset;
 	struct bio_set		nocow_flush_bioset;
 
-	/* ERRORS */
-	struct list_head	fsck_errors;
-	struct mutex		fsck_error_lock;
-	bool			fsck_alloc_err;
-
 	/* QUOTAS */
 	struct bch_memquota_type quotas[QTYP_NR];
 
@@ -1044,6 +1041,14 @@ struct bch_fs {
 	struct bch2_time_stats	times[BCH_TIME_STAT_NR];
 
 	struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
+
+	/* ERRORS */
+	struct list_head	fsck_error_msgs;
+	struct mutex		fsck_error_msgs_lock;
+	bool			fsck_alloc_msgs_err;
+
+	bch_sb_errors_cpu	fsck_error_counts;
+	struct mutex		fsck_error_counts_lock;
 };
 
 extern struct wait_queue_head bch2_read_only_wait;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 99749f3315fe..0a750953ff92 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -613,31 +613,17 @@ struct bch_extent_stripe_ptr {
 #endif
 };
 
-struct bch_extent_reservation {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64			type:6,
-				unused:22,
-				replicas:4,
-				generation:32;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	__u64			generation:32,
-				replicas:4,
-				unused:22,
-				type:6;
-#endif
-};
-
 struct bch_extent_rebalance {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64			type:7,
-				unused:33,
-				compression:8,
+	__u64			type:6,
+				unused:34,
+				compression:8, /* enum bch_compression_opt */
 				target:16;
 #elif defined (__BIG_ENDIAN_BITFIELD)
 	__u64			target:16,
 				compression:8,
-				unused:33,
-				type:7;
+				unused:34,
+				type:6;
 #endif
 };
 
@@ -838,34 +824,30 @@ enum inode_opt_id {
 	Inode_opt_nr,
 };
 
-enum {
-	/*
-	 * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
-	 * flags)
-	 */
-	__BCH_INODE_SYNC		= 0,
-	__BCH_INODE_IMMUTABLE		= 1,
-	__BCH_INODE_APPEND		= 2,
-	__BCH_INODE_NODUMP		= 3,
-	__BCH_INODE_NOATIME		= 4,
-
-	__BCH_INODE_I_SIZE_DIRTY	= 5, /* obsolete */
-	__BCH_INODE_I_SECTORS_DIRTY	= 6, /* obsolete */
-	__BCH_INODE_UNLINKED		= 7,
-	__BCH_INODE_BACKPTR_UNTRUSTED	= 8,
-
-	/* bits 20+ reserved for packed fields below: */
-};
-
-#define BCH_INODE_SYNC		(1 << __BCH_INODE_SYNC)
-#define BCH_INODE_IMMUTABLE	(1 << __BCH_INODE_IMMUTABLE)
-#define BCH_INODE_APPEND	(1 << __BCH_INODE_APPEND)
-#define BCH_INODE_NODUMP	(1 << __BCH_INODE_NODUMP)
-#define BCH_INODE_NOATIME	(1 << __BCH_INODE_NOATIME)
-#define BCH_INODE_I_SIZE_DIRTY	(1 << __BCH_INODE_I_SIZE_DIRTY)
-#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-#define BCH_INODE_UNLINKED	(1 << __BCH_INODE_UNLINKED)
-#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED)
+#define BCH_INODE_FLAGS()			\
+	x(sync,				0)	\
+	x(immutable,			1)	\
+	x(append,			2)	\
+	x(nodump,			3)	\
+	x(noatime,			4)	\
+	x(i_size_dirty,			5)	\
+	x(i_sectors_dirty,		6)	\
+	x(unlinked,			7)	\
+	x(backptr_untrusted,		8)
+
+/* bits 20+ reserved for packed fields below: */
+
+enum bch_inode_flags {
+#define x(t, n)	BCH_INODE_##t = 1U << n,
+	BCH_INODE_FLAGS()
+#undef x
+};
+
+enum __bch_inode_flags {
+#define x(t, n)	__BCH_INODE_##t = n,
+	BCH_INODE_FLAGS()
+#undef x
+};
 
 LE32_BITMASK(INODE_STR_HASH,	struct bch_inode, bi_flags, 20, 24);
 LE32_BITMASK(INODE_NR_FIELDS,	struct bch_inode, bi_flags, 24, 31);
@@ -1232,7 +1214,8 @@ struct bch_sb_field {
 	x(journal_seq_blacklist, 8)		\
 	x(journal_v2,	9)			\
 	x(counters,	10)			\
-	x(members_v2,	11)
+	x(members_v2,	11)			\
+	x(errors,	12)
 
 enum bch_sb_field_type {
 #define x(f, nr)	BCH_SB_FIELD_##f = nr,
@@ -1282,6 +1265,18 @@ enum bch_iops_measurement {
 	BCH_IOPS_NR
 };
 
+#define BCH_MEMBER_ERROR_TYPES()		\
+	x(read,		0)			\
+	x(write,	1)			\
+	x(checksum,	2)
+
+enum bch_member_error_type {
+#define x(t, n) BCH_MEMBER_ERROR_##t = n,
+	BCH_MEMBER_ERROR_TYPES()
+#undef x
+	BCH_MEMBER_ERROR_NR
+};
+
 struct bch_member {
 	__uuid_t		uuid;
 	__le64			nbuckets;	/* device size */
@@ -1292,6 +1287,9 @@ struct bch_member {
 
 	__le64			flags;
 	__le32			iops[4];
+	__le64			errors[BCH_MEMBER_ERROR_NR];
+	__le64			errors_at_reset[BCH_MEMBER_ERROR_NR];
+	__le64			errors_reset_time;
 };
 
 #define BCH_MEMBER_V1_BYTES	56
@@ -1615,11 +1613,20 @@ struct journal_seq_blacklist_entry {
 
 struct bch_sb_field_journal_seq_blacklist {
 	struct bch_sb_field	field;
+	struct journal_seq_blacklist_entry start[];
+};
 
-	struct journal_seq_blacklist_entry start[0];
-	__u64			_data[];
+struct bch_sb_field_errors {
+	struct bch_sb_field	field;
+	struct bch_sb_field_error_entry {
+		__le64		v;
+		__le64		last_error_time;
+	}			entries[];
 };
 
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID,	struct bch_sb_field_error_entry, v,  0, 16);
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,	struct bch_sb_field_error_entry, v, 16, 64);
+
 /* Superblock: */
 
 /*
@@ -1682,7 +1689,9 @@ struct bch_sb_field_journal_seq_blacklist {
 	x(snapshot_skiplists,		BCH_VERSION(1,  1),		\
 	  BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))			\
 	x(deleted_inodes,		BCH_VERSION(1,  2),		\
-	  BIT_ULL(BCH_RECOVERY_PASS_check_inodes))
+	  BIT_ULL(BCH_RECOVERY_PASS_check_inodes))			\
+	x(rebalance_work,		BCH_VERSION(1,  3),		\
+	  BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
 
 enum bcachefs_metadata_version {
 	bcachefs_metadata_version_min = 9,
@@ -1693,7 +1702,7 @@ enum bcachefs_metadata_version {
 };
 
 static const __maybe_unused
-unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor;
+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
 
 #define bcachefs_metadata_version_current	(bcachefs_metadata_version_max - 1)
 
@@ -2247,7 +2256,8 @@ LE32_BITMASK(JSET_NO_FLUSH,	struct jset, flags, 5, 6);
 enum btree_id_flags {
 	BTREE_ID_EXTENTS	= BIT(0),
 	BTREE_ID_SNAPSHOTS	= BIT(1),
-	BTREE_ID_DATA		= BIT(2),
+	BTREE_ID_SNAPSHOT_FIELD	= BIT(2),
+	BTREE_ID_DATA		= BIT(3),
 };
 
 #define BCH_BTREE_IDS()								\
@@ -2302,11 +2312,13 @@ enum btree_id_flags {
 	  BIT_ULL(KEY_TYPE_bucket_gens))					\
 	x(snapshot_trees,	15,	0,					\
 	  BIT_ULL(KEY_TYPE_snapshot_tree))					\
-	x(deleted_inodes,	16,	BTREE_ID_SNAPSHOTS,			\
+	x(deleted_inodes,	16,	BTREE_ID_SNAPSHOT_FIELD,		\
 	  BIT_ULL(KEY_TYPE_set))						\
 	x(logged_ops,		17,	0,					\
 	  BIT_ULL(KEY_TYPE_logged_op_truncate)|					\
-	  BIT_ULL(KEY_TYPE_logged_op_finsert))
+	  BIT_ULL(KEY_TYPE_logged_op_finsert))					\
+	x(rebalance_work,	18,	BTREE_ID_SNAPSHOT_FIELD,		\
+	  BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
 
 enum btree_id {
 #define x(name, nr, ...) BTREE_ID_##name = nr,
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index 518450209236..831be01809f2 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -92,19 +92,15 @@ enum bkey_lr_packed {
 #define bkey_lr_packed(_l, _r)						\
 	((_l)->format + ((_r)->format << 1))
 
-#define bkey_copy(_dst, _src)					\
-do {								\
-	BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) &&		\
-		     !type_is(_dst, struct bkey_packed *));	\
-	BUILD_BUG_ON(!type_is(_src, struct bkey_i *) &&		\
-		     !type_is(_src, struct bkey_packed *));	\
-	EBUG_ON((u64 *) (_dst) > (u64 *) (_src) &&		\
-		(u64 *) (_dst) < (u64 *) (_src) +		\
-		((struct bkey *) (_src))->u64s);		\
-								\
-	memcpy_u64s_small((_dst), (_src),			\
-			  ((struct bkey *) (_src))->u64s);	\
-} while (0)
+static inline void bkey_p_copy(struct bkey_packed *dst, const struct bkey_packed *src)
+{
+	memcpy_u64s_small(dst, src, src->u64s);
+}
+
+static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src)
+{
+	memcpy_u64s_small(dst, src, src->k.u64s);
+}
 
 struct btree;
 
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index d9fb1fc81f1e..761f5e33b1e6 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -3,6 +3,7 @@
 #include "bcachefs.h"
 #include "backpointers.h"
 #include "bkey_methods.h"
+#include "btree_cache.h"
 #include "btree_types.h"
 #include "alloc_background.h"
 #include "dirent.h"
@@ -25,7 +26,7 @@ const char * const bch2_bkey_types[] = {
 	NULL
 };
 
-static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
+static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k,
 			       enum bkey_invalid_flags flags, struct printbuf *err)
 {
 	return 0;
@@ -39,23 +40,24 @@ static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
 	.key_invalid = deleted_key_invalid,		\
 })
 
-static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
+static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k,
 				 enum bkey_invalid_flags flags, struct printbuf *err)
 {
-	if (bkey_val_bytes(k.k)) {
-		prt_printf(err, "incorrect value size (%zu != 0)",
-		       bkey_val_bytes(k.k));
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	int ret = 0;
+
+	bkey_fsck_err_on(bkey_val_bytes(k.k), c, err,
+			 bkey_val_size_nonzero,
+			 "incorrect value size (%zu != 0)",
+			 bkey_val_bytes(k.k));
+fsck_err:
+	return ret;
 }
 
 #define bch2_bkey_ops_error ((struct bkey_ops) {	\
 	.key_invalid = empty_val_key_invalid,		\
 })
 
-static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
+static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k,
 				   enum bkey_invalid_flags flags, struct printbuf *err)
 {
 	return 0;
@@ -70,7 +72,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
 	.key_invalid = empty_val_key_invalid,		\
 })
 
-static int key_type_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
+static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k,
 					enum bkey_invalid_flags flags, struct printbuf *err)
 {
 	return 0;
@@ -91,18 +93,6 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
 	.val_to_text	= key_type_inline_data_to_text,	\
 })
 
-static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k,
-				enum bkey_invalid_flags flags, struct printbuf *err)
-{
-	if (bkey_val_bytes(k.k)) {
-		prt_printf(err, "incorrect value size (%zu != %zu)",
-		       bkey_val_bytes(k.k), sizeof(struct bch_cookie));
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
-}
-
 static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
 {
 	bch2_key_resize(l.k, l.k->size + r.k->size);
@@ -110,7 +100,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_
 }
 
 #define bch2_bkey_ops_set ((struct bkey_ops) {		\
-	.key_invalid	= key_type_set_invalid,		\
+	.key_invalid	= empty_val_key_invalid,	\
 	.key_merge	= key_type_set_merge,		\
 })
 
@@ -128,84 +118,95 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  struct printbuf *err)
 {
 	const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
+	int ret = 0;
 
-	if (bkey_val_bytes(k.k) < ops->min_val_size) {
-		prt_printf(err, "bad val size (%zu < %u)",
-			   bkey_val_bytes(k.k), ops->min_val_size);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err,
+			 bkey_val_size_too_small,
+			 "bad val size (%zu < %u)",
+			 bkey_val_bytes(k.k), ops->min_val_size);
 
 	if (!ops->key_invalid)
 		return 0;
 
-	return ops->key_invalid(c, k, flags, err);
+	ret = ops->key_invalid(c, k, flags, err);
+fsck_err:
+	return ret;
 }
 
 static u64 bch2_key_types_allowed[] = {
-#define x(name, nr, flags, keys)	[BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys,
-	BCH_BTREE_IDS()
-#undef x
 	[BKEY_TYPE_btree] =
 		BIT_ULL(KEY_TYPE_deleted)|
 		BIT_ULL(KEY_TYPE_btree_ptr)|
 		BIT_ULL(KEY_TYPE_btree_ptr_v2),
+#define x(name, nr, flags, keys)	[BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys,
+	BCH_BTREE_IDS()
+#undef x
 };
 
+const char *bch2_btree_node_type_str(enum btree_node_type type)
+{
+	return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
+}
+
 int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
 			enum btree_node_type type,
 			enum bkey_invalid_flags flags,
 			struct printbuf *err)
 {
-	if (k.k->u64s < BKEY_U64s) {
-		prt_printf(err, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s);
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
 
-	if (flags & BKEY_INVALID_COMMIT	 &&
-	    !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type))) {
-		prt_printf(err, "invalid key type for btree %s (%s)",
-			   bch2_btree_ids[type], bch2_bkey_types[k.k->type]);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err,
+			 bkey_u64s_too_small,
+			 "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s);
 
-	if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
-		if (k.k->size == 0) {
-			prt_printf(err, "size == 0");
-			return -BCH_ERR_invalid_bkey;
-		}
+	if (type >= BKEY_TYPE_NR)
+		return 0;
 
-		if (k.k->size > k.k->p.offset) {
-			prt_printf(err, "size greater than offset (%u > %llu)",
-			       k.k->size, k.k->p.offset);
-			return -BCH_ERR_invalid_bkey;
-		}
+	bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+			 !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
+			 bkey_invalid_type_for_btree,
+			 "invalid key type for btree %s (%s)",
+			 bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+
+	if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
+		bkey_fsck_err_on(k.k->size == 0, c, err,
+				 bkey_extent_size_zero,
+				 "size == 0");
+
+		bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err,
+				 bkey_extent_size_greater_than_offset,
+				 "size greater than offset (%u > %llu)",
+				 k.k->size, k.k->p.offset);
 	} else {
-		if (k.k->size) {
-			prt_printf(err, "size != 0");
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(k.k->size, c, err,
+				 bkey_size_nonzero,
+				 "size != 0");
 	}
 
 	if (type != BKEY_TYPE_btree) {
-		if (!btree_type_has_snapshots((enum btree_id) type) &&
-		    k.k->p.snapshot) {
-			prt_printf(err, "nonzero snapshot");
-			return -BCH_ERR_invalid_bkey;
-		}
-
-		if (btree_type_has_snapshots((enum btree_id) type) &&
-		    !k.k->p.snapshot) {
-			prt_printf(err, "snapshot == 0");
-			return -BCH_ERR_invalid_bkey;
+		enum btree_id btree = type - 1;
+
+		if (btree_type_has_snapshots(btree)) {
+			bkey_fsck_err_on(!k.k->p.snapshot, c, err,
+					 bkey_snapshot_zero,
+					 "snapshot == 0");
+		} else if (!btree_type_has_snapshot_field(btree)) {
+			bkey_fsck_err_on(k.k->p.snapshot, c, err,
+					 bkey_snapshot_nonzero,
+					 "nonzero snapshot");
+		} else {
+			/*
+			 * btree uses snapshot field but it's not required to be
+			 * nonzero
+			 */
 		}
 
-		if (bkey_eq(k.k->p, POS_MAX)) {
-			prt_printf(err, "key at POS_MAX");
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err,
+				 bkey_at_pos_max,
+				 "key at POS_MAX");
 	}
-
-	return 0;
+fsck_err:
+	return ret;
 }
 
 int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
@@ -217,20 +218,20 @@ int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
 		bch2_bkey_val_invalid(c, k, flags, err);
 }
 
-int bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k,
-			    struct printbuf *err)
+int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b,
+			    struct bkey_s_c k, struct printbuf *err)
 {
-	if (bpos_lt(k.k->p, b->data->min_key)) {
-		prt_printf(err, "key before start of btree node");
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
 
-	if (bpos_gt(k.k->p, b->data->max_key)) {
-		prt_printf(err, "key past end of btree node");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err,
+			 bkey_before_start_of_btree_node,
+			 "key before start of btree node");
 
-	return 0;
+	bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err,
+			 bkey_after_end_of_btree_node,
+			 "key past end of btree node");
+fsck_err:
+	return ret;
 }
 
 void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index 668f595e2fcf..3a370b7087ac 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -21,7 +21,7 @@ extern const struct bkey_ops bch2_bkey_null_ops;
  * being read or written; more aggressive checks can be enabled when rw == WRITE.
  */
 struct bkey_ops {
-	int		(*key_invalid)(const struct bch_fs *c, struct bkey_s_c k,
+	int		(*key_invalid)(struct bch_fs *c, struct bkey_s_c k,
 				       enum bkey_invalid_flags flags, struct printbuf *err);
 	void		(*val_to_text)(struct printbuf *, struct bch_fs *,
 				       struct bkey_s_c);
@@ -55,7 +55,8 @@ int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
 			enum bkey_invalid_flags, struct printbuf *);
 int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
 		      enum bkey_invalid_flags, struct printbuf *);
-int bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c, struct printbuf *);
+int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *,
+			    struct bkey_s_c, struct printbuf *);
 
 void bch2_bpos_to_text(struct printbuf *, struct bpos);
 void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
@@ -119,16 +120,6 @@ enum btree_update_flags {
 #define BTREE_TRIGGER_BUCKET_INVALIDATE	(1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
 #define BTREE_TRIGGER_NOATOMIC		(1U << __BTREE_TRIGGER_NOATOMIC)
 
-#define BTREE_TRIGGER_WANTS_OLD_AND_NEW		\
-	((1U << KEY_TYPE_alloc)|		\
-	 (1U << KEY_TYPE_alloc_v2)|		\
-	 (1U << KEY_TYPE_alloc_v3)|		\
-	 (1U << KEY_TYPE_alloc_v4)|		\
-	 (1U << KEY_TYPE_stripe)|		\
-	 (1U << KEY_TYPE_inode)|		\
-	 (1U << KEY_TYPE_inode_v2)|		\
-	 (1U << KEY_TYPE_snapshot))
-
 static inline int bch2_trans_mark_key(struct btree_trans *trans,
 				      enum btree_id btree_id, unsigned level,
 				      struct bkey_s_c old, struct bkey_i *new,
diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c
index b9aa027c881b..bcca9e76a0b4 100644
--- a/fs/bcachefs/bkey_sort.c
+++ b/fs/bcachefs/bkey_sort.c
@@ -106,7 +106,7 @@ bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
 	while ((k = sort_iter_peek(iter))) {
 		if (!bkey_deleted(k) &&
 		    !should_drop_next_key(iter)) {
-			bkey_copy(out, k);
+			bkey_p_copy(out, k);
 			btree_keys_account_key_add(&nr, 0, out);
 			out = bkey_p_next(out);
 		}
@@ -137,7 +137,7 @@ bch2_sort_repack(struct bset *dst, struct btree *src,
 			continue;
 
 		if (!transform)
-			bkey_copy(out, in);
+			bkey_p_copy(out, in);
 		else if (bch2_bkey_transform(out_f, out, bkey_packed(in)
 					     ? in_f : &bch2_bkey_format_current, in))
 			out->format = KEY_FORMAT_LOCAL_BTREE;
@@ -191,7 +191,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst,
 			memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in));
 			set_bkeyp_val_u64s(f, out, 0);
 		} else {
-			bkey_copy(out, in);
+			bkey_p_copy(out, in);
 		}
 		out->needs_whiteout |= needs_whiteout;
 		out = bkey_p_next(out);
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 5e5858191905..47e7770d0583 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -472,7 +472,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
 
 	mutex_init(&c->verify_lock);
 
-	shrink = shrinker_alloc(0, "%s/btree_cache", c->name);
+	shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
 	if (!shrink)
 		goto err;
 	bc->shrink = shrink;
@@ -785,12 +785,12 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
 	       "btree node header doesn't match ptr\n"
 	       "btree %s level %u\n"
 	       "ptr: ",
-	       bch2_btree_ids[b->c.btree_id], b->c.level);
+	       bch2_btree_id_str(b->c.btree_id), b->c.level);
 	bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
 
 	prt_printf(&buf, "\nheader: btree %s level %llu\n"
 	       "min ",
-	       bch2_btree_ids[BTREE_NODE_ID(b->data)],
+	       bch2_btree_id_str(BTREE_NODE_ID(b->data)),
 	       BTREE_NODE_LEVEL(b->data));
 	bch2_bpos_to_text(&buf, b->data->min_key);
 
@@ -1153,8 +1153,21 @@ wait_on_io:
 	six_unlock_intent(&b->c.lock);
 }
 
-void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
-			     const struct btree *b)
+const char *bch2_btree_id_str(enum btree_id btree)
+{
+	return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)";
+}
+
+void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
+{
+	prt_printf(out, "%s level %u/%u\n  ",
+	       bch2_btree_id_str(b->c.btree_id),
+	       b->c.level,
+	       bch2_btree_id_root(c, b->c.btree_id)->level);
+	bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+}
+
+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
 {
 	struct bset_stats stats;
 
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 1e562b6efa62..cfb80b201d61 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -123,8 +123,9 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
 	return bch2_btree_id_root(c, b->c.btree_id)->b;
 }
 
-void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
-			     const struct btree *);
+const char *bch2_btree_id_str(enum btree_id);
+void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
 void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
 
 #endif /* _BCACHEFS_BTREE_CACHE_H */
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 693ed067b1a7..0b5d09c8475d 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -95,15 +95,15 @@ static int bch2_gc_check_topology(struct bch_fs *c,
 			bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
 
 			if (__fsck_err(c,
-				  FSCK_CAN_FIX|
-				  FSCK_CAN_IGNORE|
-				  FSCK_NO_RATELIMIT,
-				  "btree node with incorrect min_key at btree %s level %u:\n"
-				  "  prev %s\n"
-				  "  cur %s",
-				  bch2_btree_ids[b->c.btree_id], b->c.level,
-				  buf1.buf, buf2.buf) &&
-			    should_restart_for_topology_repair(c)) {
+				       FSCK_CAN_FIX|
+				       FSCK_CAN_IGNORE|
+				       FSCK_NO_RATELIMIT,
+				       btree_node_topology_bad_min_key,
+				       "btree node with incorrect min_key at btree %s level %u:\n"
+				       "  prev %s\n"
+				       "  cur %s",
+				       bch2_btree_id_str(b->c.btree_id), b->c.level,
+				       buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
 				bch_info(c, "Halting mark and sweep to start topology repair pass");
 				ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
 				goto err;
@@ -122,14 +122,12 @@ static int bch2_gc_check_topology(struct bch_fs *c,
 		bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
 		bch2_bpos_to_text(&buf2, node_end);
 
-		if (__fsck_err(c,
-			  FSCK_CAN_FIX|
-			  FSCK_CAN_IGNORE|
-			  FSCK_NO_RATELIMIT,
+		if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
+			  btree_node_topology_bad_max_key,
 			  "btree node with incorrect max_key at btree %s level %u:\n"
 			  "  %s\n"
 			  "  expected %s",
-			  bch2_btree_ids[b->c.btree_id], b->c.level,
+			  bch2_btree_id_str(b->c.btree_id), b->c.level,
 			  buf1.buf, buf2.buf) &&
 		    should_restart_for_topology_repair(c)) {
 			bch_info(c, "Halting mark and sweep to start topology repair pass");
@@ -287,10 +285,11 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
 
 		if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
 						cur->data->min_key), c,
+				btree_node_topology_overwritten_by_next_node,
 				"btree node overwritten by next node at btree %s level %u:\n"
 				"  node %s\n"
 				"  next %s",
-				bch2_btree_ids[b->c.btree_id], b->c.level,
+				bch2_btree_id_str(b->c.btree_id), b->c.level,
 				buf1.buf, buf2.buf)) {
 			ret = DROP_PREV_NODE;
 			goto out;
@@ -298,10 +297,11 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
 
 		if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
 						 bpos_predecessor(cur->data->min_key)), c,
+				btree_node_topology_bad_max_key,
 				"btree node with incorrect max_key at btree %s level %u:\n"
 				"  node %s\n"
 				"  next %s",
-				bch2_btree_ids[b->c.btree_id], b->c.level,
+				bch2_btree_id_str(b->c.btree_id), b->c.level,
 				buf1.buf, buf2.buf))
 			ret = set_node_max(c, prev,
 					   bpos_predecessor(cur->data->min_key));
@@ -310,20 +310,22 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
 
 		if (mustfix_fsck_err_on(bpos_ge(expected_start,
 						cur->data->max_key), c,
+				btree_node_topology_overwritten_by_prev_node,
 				"btree node overwritten by prev node at btree %s level %u:\n"
 				"  prev %s\n"
 				"  node %s",
-				bch2_btree_ids[b->c.btree_id], b->c.level,
+				bch2_btree_id_str(b->c.btree_id), b->c.level,
 				buf1.buf, buf2.buf)) {
 			ret = DROP_THIS_NODE;
 			goto out;
 		}
 
 		if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
+				btree_node_topology_bad_min_key,
 				"btree node with incorrect min_key at btree %s level %u:\n"
 				"  prev %s\n"
 				"  node %s",
-				bch2_btree_ids[b->c.btree_id], b->c.level,
+				bch2_btree_id_str(b->c.btree_id), b->c.level,
 				buf1.buf, buf2.buf))
 			ret = set_node_min(c, cur, expected_start);
 	}
@@ -344,10 +346,11 @@ static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
 	bch2_bpos_to_text(&buf2, b->key.k.p);
 
 	if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
+				btree_node_topology_bad_max_key,
 			"btree node with incorrect max_key at btree %s level %u:\n"
 			"  %s\n"
 			"  expected %s",
-			bch2_btree_ids[b->c.btree_id], b->c.level,
+			bch2_btree_id_str(b->c.btree_id), b->c.level,
 			buf1.buf, buf2.buf)) {
 		ret = set_node_max(c, child, b->key.k.p);
 		if (ret)
@@ -396,9 +399,10 @@ again:
 		bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
 
 		if (mustfix_fsck_err_on(ret == -EIO, c,
+				btree_node_unreadable,
 				"Topology repair: unreadable btree node at btree %s level %u:\n"
 				"  %s",
-				bch2_btree_ids[b->c.btree_id],
+				bch2_btree_id_str(b->c.btree_id),
 				b->c.level - 1,
 				buf.buf)) {
 			bch2_btree_node_evict(trans, cur_k.k);
@@ -504,9 +508,10 @@ again:
 	bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
 
 	if (mustfix_fsck_err_on(!have_child, c,
+			btree_node_topology_interior_node_empty,
 			"empty interior btree node at btree %s level %u\n"
 			"  %s",
-			bch2_btree_ids[b->c.btree_id],
+			bch2_btree_id_str(b->c.btree_id),
 			b->c.level, buf.buf))
 		ret = DROP_THIS_NODE;
 err:
@@ -582,7 +587,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 
 		if (!g->gen_valid &&
 		    (c->opts.reconstruct_alloc ||
-		     fsck_err(c, "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
+		     fsck_err(c, ptr_to_missing_alloc_key,
+			      "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
 			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
@@ -599,7 +605,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 
 		if (gen_cmp(p.ptr.gen, g->gen) > 0 &&
 		    (c->opts.reconstruct_alloc ||
-		     fsck_err(c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
+		     fsck_err(c, ptr_gen_newer_than_bucket_gen,
+			      "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
 			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
@@ -620,7 +627,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 
 		if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX &&
 		    (c->opts.reconstruct_alloc ||
-		     fsck_err(c, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
+		     fsck_err(c, ptr_gen_newer_than_bucket_gen,
+			      "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
 			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
@@ -631,7 +639,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 
 		if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 &&
 		    (c->opts.reconstruct_alloc ||
-		     fsck_err(c, "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
+		     fsck_err(c, stale_dirty_ptr,
+			      "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
 			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
@@ -645,6 +654,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 
 		if (fsck_err_on(bucket_data_type(g->data_type) &&
 				bucket_data_type(g->data_type) != data_type, c,
+				ptr_bucket_data_type_mismatch,
 				"bucket %u:%zu different types of data in same bucket: %s, %s\n"
 				"while marking %s",
 				p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
@@ -664,6 +674,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 			struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx);
 
 			if (fsck_err_on(!m || !m->alive, c,
+					ptr_to_missing_stripe,
 					"pointer to nonexistent stripe %llu\n"
 					"while marking %s",
 					(u64) p.ec.idx,
@@ -672,6 +683,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 				do_update = true;
 
 			if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c,
+					ptr_to_incorrect_stripe,
 					"pointer does not match stripe %llu\n"
 					"while marking %s",
 					(u64) p.ec.idx,
@@ -811,6 +823,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
 			goto err;
 
 		if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
+				bkey_version_in_future,
 				"key version number higher than recorded: %llu > %llu",
 				k->k->version.lo,
 				atomic64_read(&c->key_version)))
@@ -968,9 +981,10 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
 					  FSCK_CAN_FIX|
 					  FSCK_CAN_IGNORE|
 					  FSCK_NO_RATELIMIT,
+					  btree_node_read_error,
 					  "Unreadable btree node at btree %s level %u:\n"
 					  "  %s",
-					  bch2_btree_ids[b->c.btree_id],
+					  bch2_btree_id_str(b->c.btree_id),
 					  b->c.level - 1,
 					  (printbuf_reset(&buf),
 					   bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
@@ -1025,6 +1039,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
 	printbuf_reset(&buf);
 	bch2_bpos_to_text(&buf, b->data->min_key);
 	if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), c,
+				btree_root_bad_min_key,
 			"btree root with incorrect min_key: %s", buf.buf)) {
 		bch_err(c, "repair unimplemented");
 		ret = -BCH_ERR_fsck_repair_unimplemented;
@@ -1034,6 +1049,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
 	printbuf_reset(&buf);
 	bch2_bpos_to_text(&buf, b->data->max_key);
 	if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), c,
+				btree_root_bad_max_key,
 			"btree root with incorrect max_key: %s", buf.buf)) {
 		bch_err(c, "repair unimplemented");
 		ret = -BCH_ERR_fsck_repair_unimplemented;
@@ -1207,16 +1223,16 @@ static int bch2_gc_done(struct bch_fs *c,
 
 	percpu_down_write(&c->mark_lock);
 
-#define copy_field(_f, _msg, ...)					\
+#define copy_field(_err, _f, _msg, ...)					\
 	if (dst->_f != src->_f &&					\
 	    (!verify ||							\
-	     fsck_err(c, _msg ": got %llu, should be %llu"		\
+	     fsck_err(c, _err, _msg ": got %llu, should be %llu"	\
 		      , ##__VA_ARGS__, dst->_f, src->_f)))		\
 		dst->_f = src->_f
-#define copy_dev_field(_f, _msg, ...)					\
-	copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
-#define copy_fs_field(_f, _msg, ...)					\
-	copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
+#define copy_dev_field(_err, _f, _msg, ...)				\
+	copy_field(_err, _f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
+#define copy_fs_field(_err, _f, _msg, ...)				\
+	copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__)
 
 	for (i = 0; i < ARRAY_SIZE(c->usage); i++)
 		bch2_fs_usage_acc_to_base(c, i);
@@ -1227,13 +1243,17 @@ static int bch2_gc_done(struct bch_fs *c,
 			bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc,
 					     dev_usage_u64s());
 
-		copy_dev_field(buckets_ec,		"buckets_ec");
-
 		for (i = 0; i < BCH_DATA_NR; i++) {
-			copy_dev_field(d[i].buckets,	"%s buckets", bch2_data_types[i]);
-			copy_dev_field(d[i].sectors,	"%s sectors", bch2_data_types[i]);
-			copy_dev_field(d[i].fragmented,	"%s fragmented", bch2_data_types[i]);
+			copy_dev_field(dev_usage_buckets_wrong,
+				       d[i].buckets,	"%s buckets", bch2_data_types[i]);
+			copy_dev_field(dev_usage_sectors_wrong,
+				       d[i].sectors,	"%s sectors", bch2_data_types[i]);
+			copy_dev_field(dev_usage_fragmented_wrong,
+				       d[i].fragmented,	"%s fragmented", bch2_data_types[i]);
 		}
+
+		copy_dev_field(dev_usage_buckets_ec_wrong,
+			       buckets_ec,		"buckets_ec");
 	}
 
 	{
@@ -1242,17 +1262,24 @@ static int bch2_gc_done(struct bch_fs *c,
 		struct bch_fs_usage *src = (void *)
 			bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
 
-		copy_fs_field(hidden,		"hidden");
-		copy_fs_field(btree,		"btree");
+		copy_fs_field(fs_usage_hidden_wrong,
+			      hidden,		"hidden");
+		copy_fs_field(fs_usage_btree_wrong,
+			      btree,		"btree");
 
 		if (!metadata_only) {
-			copy_fs_field(data,	"data");
-			copy_fs_field(cached,	"cached");
-			copy_fs_field(reserved,	"reserved");
-			copy_fs_field(nr_inodes,"nr_inodes");
+			copy_fs_field(fs_usage_data_wrong,
+				      data,	"data");
+			copy_fs_field(fs_usage_cached_wrong,
+				      cached,	"cached");
+			copy_fs_field(fs_usage_reserved_wrong,
+				      reserved,	"reserved");
+			copy_fs_field(fs_usage_nr_inodes_wrong,
+				      nr_inodes,"nr_inodes");
 
 			for (i = 0; i < BCH_REPLICAS_MAX; i++)
-				copy_fs_field(persistent_reserved[i],
+				copy_fs_field(fs_usage_persistent_reserved_wrong,
+					      persistent_reserved[i],
 					      "persistent_reserved[%i]", i);
 		}
 
@@ -1268,7 +1295,8 @@ static int bch2_gc_done(struct bch_fs *c,
 			printbuf_reset(&buf);
 			bch2_replicas_entry_to_text(&buf, e);
 
-			copy_fs_field(replicas[i], "%s", buf.buf);
+			copy_fs_field(fs_usage_replicas_wrong,
+				      replicas[i], "%s", buf.buf);
 		}
 	}
 
@@ -1404,6 +1432,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 
 	if (c->opts.reconstruct_alloc ||
 	    fsck_err_on(new.data_type != gc.data_type, c,
+			alloc_key_data_type_wrong,
 			"bucket %llu:%llu gen %u has wrong data_type"
 			": got %s, should be %s",
 			iter->pos.inode, iter->pos.offset,
@@ -1412,9 +1441,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 			bch2_data_types[gc.data_type]))
 		new.data_type = gc.data_type;
 
-#define copy_bucket_field(_f)						\
+#define copy_bucket_field(_errtype, _f)					\
 	if (c->opts.reconstruct_alloc ||				\
-	    fsck_err_on(new._f != gc._f, c,				\
+	    fsck_err_on(new._f != gc._f, c, _errtype,			\
 			"bucket %llu:%llu gen %u data type %s has wrong " #_f	\
 			": got %u, should be %u",			\
 			iter->pos.inode, iter->pos.offset,		\
@@ -1423,11 +1452,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 			new._f, gc._f))					\
 		new._f = gc._f;						\
 
-	copy_bucket_field(gen);
-	copy_bucket_field(dirty_sectors);
-	copy_bucket_field(cached_sectors);
-	copy_bucket_field(stripe_redundancy);
-	copy_bucket_field(stripe);
+	copy_bucket_field(alloc_key_gen_wrong,
+			  gen);
+	copy_bucket_field(alloc_key_dirty_sectors_wrong,
+			  dirty_sectors);
+	copy_bucket_field(alloc_key_cached_sectors_wrong,
+			  cached_sectors);
+	copy_bucket_field(alloc_key_stripe_wrong,
+			  stripe);
+	copy_bucket_field(alloc_key_stripe_redundancy_wrong,
+			  stripe_redundancy);
 #undef copy_bucket_field
 
 	if (!bch2_alloc_v4_cmp(*old, new))
@@ -1584,6 +1618,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
 	}
 
 	if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
+			reflink_v_refcount_wrong,
 			"reflink key has wrong refcount:\n"
 			"  %s\n"
 			"  should be %u",
@@ -1709,7 +1744,8 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans,
 	if (bad)
 		bch2_bkey_val_to_text(&buf, c, k);
 
-	if (fsck_err_on(bad, c, "%s", buf.buf)) {
+	if (fsck_err_on(bad, c, stripe_sector_count_wrong,
+			"%s", buf.buf)) {
 		struct bkey_i_stripe *new;
 
 		new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
@@ -1954,19 +1990,17 @@ int bch2_gc_gens(struct bch_fs *c)
 	trans = bch2_trans_get(c);
 
 	for_each_member_device(ca, c, i) {
-		struct bucket_gens *gens;
+		struct bucket_gens *gens = bucket_gens(ca);
 
 		BUG_ON(ca->oldest_gen);
 
-		ca->oldest_gen = kvmalloc(ca->mi.nbuckets, GFP_KERNEL);
+		ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL);
 		if (!ca->oldest_gen) {
 			percpu_ref_put(&ca->ref);
 			ret = -BCH_ERR_ENOMEM_gc_gens;
 			goto err;
 		}
 
-		gens = bucket_gens(ca);
-
 		for (b = gens->first_bucket;
 		     b < gens->nbuckets; b++)
 			ca->oldest_gen[b] = gens->b[b];
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index a869cf6ac7c6..37d896edb06e 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -184,7 +184,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
 	k = new_whiteouts;
 
 	while (ptrs != ptrs_end) {
-		bkey_copy(k, *ptrs);
+		bkey_p_copy(k, *ptrs);
 		k = bkey_p_next(k);
 		ptrs++;
 	}
@@ -260,7 +260,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
 			n = bkey_p_next(k);
 
 			if (!bkey_deleted(k)) {
-				bkey_copy(out, k);
+				bkey_p_copy(out, k);
 				out = bkey_p_next(out);
 			} else {
 				BUG_ON(k->needs_whiteout);
@@ -510,16 +510,6 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
 		bch2_trans_node_reinit_iter(trans, b);
 }
 
-static void btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
-			  struct btree *b)
-{
-	prt_printf(out, "%s level %u/%u\n  ",
-	       bch2_btree_ids[b->c.btree_id],
-	       b->c.level,
-	       bch2_btree_id_root(c, b->c.btree_id)->level);
-	bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-}
-
 static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
 			  struct bch_dev *ca,
 			  struct btree *b, struct bset *i,
@@ -532,7 +522,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
 	if (ca)
 		prt_printf(out, "on %s ", ca->name);
 	prt_printf(out, "at btree ");
-	btree_pos_to_text(out, c, b);
+	bch2_btree_pos_to_text(out, c, b);
 
 	prt_printf(out, "\n  node offset %u", b->written);
 	if (i)
@@ -540,7 +530,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
 	prt_str(out, ": ");
 }
 
-__printf(8, 9)
+__printf(9, 10)
 static int __btree_err(int ret,
 		       struct bch_fs *c,
 		       struct bch_dev *ca,
@@ -548,6 +538,7 @@ static int __btree_err(int ret,
 		       struct bset *i,
 		       int write,
 		       bool have_retry,
+		       enum bch_sb_error_id err_type,
 		       const char *fmt, ...)
 {
 	struct printbuf out = PRINTBUF;
@@ -572,9 +563,15 @@ static int __btree_err(int ret,
 	if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
 		ret = -BCH_ERR_btree_node_read_err_bad_node;
 
+	if (ret != -BCH_ERR_btree_node_read_err_fixable)
+		bch2_sb_error_count(c, err_type);
+
 	switch (ret) {
 	case -BCH_ERR_btree_node_read_err_fixable:
-		mustfix_fsck_err(c, "%s", out.buf);
+		ret = bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf);
+		if (ret != -BCH_ERR_fsck_fix &&
+		    ret != -BCH_ERR_fsck_ignore)
+			goto fsck_err;
 		ret = -BCH_ERR_fsck_fix;
 		break;
 	case -BCH_ERR_btree_node_read_err_want_retry:
@@ -599,9 +596,11 @@ fsck_err:
 	return ret;
 }
 
-#define btree_err(type, c, ca, b, i, msg, ...)				\
+#define btree_err(type, c, ca, b, i, _err_type, msg, ...)		\
 ({									\
-	int _ret = __btree_err(type, c, ca, b, i, write, have_retry, msg, ##__VA_ARGS__);\
+	int _ret = __btree_err(type, c, ca, b, i, write, have_retry,	\
+			       BCH_FSCK_ERR_##_err_type,		\
+			       msg, ##__VA_ARGS__);			\
 									\
 	if (_ret != -BCH_ERR_fsck_fix) {				\
 		ret = _ret;						\
@@ -676,13 +675,17 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 	int ret = 0;
 
 	btree_err_on(!bch2_version_compatible(version),
-		     -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i,
+		     -BCH_ERR_btree_node_read_err_incompatible,
+		     c, ca, b, i,
+		     btree_node_unsupported_version,
 		     "unsupported bset version %u.%u",
 		     BCH_VERSION_MAJOR(version),
 		     BCH_VERSION_MINOR(version));
 
 	if (btree_err_on(version < c->sb.version_min,
-			 -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i,
+			 -BCH_ERR_btree_node_read_err_fixable,
+			 c, NULL, b, i,
+			 btree_node_bset_older_than_sb_min,
 			 "bset version %u older than superblock version_min %u",
 			 version, c->sb.version_min)) {
 		mutex_lock(&c->sb_lock);
@@ -693,7 +696,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 	if (btree_err_on(BCH_VERSION_MAJOR(version) >
 			 BCH_VERSION_MAJOR(c->sb.version),
-			 -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i,
+			 -BCH_ERR_btree_node_read_err_fixable,
+			 c, NULL, b, i,
+			 btree_node_bset_newer_than_sb,
 			 "bset version %u newer than superblock version %u",
 			 version, c->sb.version)) {
 		mutex_lock(&c->sb_lock);
@@ -703,11 +708,15 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 	}
 
 	btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
-		     -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i,
+		     -BCH_ERR_btree_node_read_err_incompatible,
+		     c, ca, b, i,
+		     btree_node_unsupported_version,
 		     "BSET_SEPARATE_WHITEOUTS no longer supported");
 
 	if (btree_err_on(offset + sectors > btree_sectors(c),
-			 -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i,
+			 -BCH_ERR_btree_node_read_err_fixable,
+			 c, ca, b, i,
+			 bset_past_end_of_btree_node,
 			 "bset past end of btree node")) {
 		i->u64s = 0;
 		ret = 0;
@@ -715,12 +724,15 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 	}
 
 	btree_err_on(offset && !i->u64s,
-		     -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i,
+		     -BCH_ERR_btree_node_read_err_fixable,
+		     c, ca, b, i,
+		     bset_empty,
 		     "empty bset");
 
-	btree_err_on(BSET_OFFSET(i) &&
-		     BSET_OFFSET(i) != offset,
-		     -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
+	btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset,
+		     -BCH_ERR_btree_node_read_err_want_retry,
+		     c, ca, b, i,
+		     bset_wrong_sector_offset,
 		     "bset at wrong sector offset");
 
 	if (!offset) {
@@ -734,16 +746,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 			/* XXX endianness */
 			btree_err_on(bp->seq != bn->keys.seq,
-				     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
+				     -BCH_ERR_btree_node_read_err_must_retry,
+				     c, ca, b, NULL,
+				     bset_bad_seq,
 				     "incorrect sequence number (wrong btree node)");
 		}
 
 		btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
-			     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i,
+			     -BCH_ERR_btree_node_read_err_must_retry,
+			     c, ca, b, i,
+			     btree_node_bad_btree,
 			     "incorrect btree id");
 
 		btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
-			     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i,
+			     -BCH_ERR_btree_node_read_err_must_retry,
+			     c, ca, b, i,
+			     btree_node_bad_level,
 			     "incorrect level");
 
 		if (!write)
@@ -760,7 +778,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 			}
 
 			btree_err_on(!bpos_eq(b->data->min_key, bp->min_key),
-				     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
+				     -BCH_ERR_btree_node_read_err_must_retry,
+				     c, ca, b, NULL,
+				     btree_node_bad_min_key,
 				     "incorrect min_key: got %s should be %s",
 				     (printbuf_reset(&buf1),
 				      bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf),
@@ -769,7 +789,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 		}
 
 		btree_err_on(!bpos_eq(bn->max_key, b->key.k.p),
-			     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i,
+			     -BCH_ERR_btree_node_read_err_must_retry,
+			     c, ca, b, i,
+			     btree_node_bad_max_key,
 			     "incorrect max key %s",
 			     (printbuf_reset(&buf1),
 			      bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf));
@@ -779,7 +801,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 					  BSET_BIG_ENDIAN(i), write, bn);
 
 		btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1),
-			     -BCH_ERR_btree_node_read_err_bad_node, c, ca, b, i,
+			     -BCH_ERR_btree_node_read_err_bad_node,
+			     c, ca, b, i,
+			     btree_node_bad_format,
 			     "invalid bkey format: %s\n  %s", buf1.buf,
 			     (printbuf_reset(&buf2),
 			      bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf));
@@ -802,7 +826,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
 			    struct printbuf *err)
 {
 	return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?:
-		(!updated_range ? bch2_bkey_in_btree_node(b, k, err) : 0) ?:
+		(!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?:
 		(rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
 }
 
@@ -823,14 +847,18 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 		struct bkey tmp;
 
 		if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
-				 -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i,
+				 -BCH_ERR_btree_node_read_err_fixable,
+				 c, NULL, b, i,
+				 btree_node_bkey_past_bset_end,
 				 "key extends past end of bset")) {
 			i->u64s = cpu_to_le16((u64 *) k - i->_data);
 			break;
 		}
 
 		if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
-				 -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i,
+				 -BCH_ERR_btree_node_read_err_fixable,
+				 c, NULL, b, i,
+				 btree_node_bkey_bad_format,
 				 "invalid bkey format %u", k->format)) {
 			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
 			memmove_u64s_down(k, bkey_p_next(k),
@@ -849,12 +877,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 		printbuf_reset(&buf);
 		if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) {
 			printbuf_reset(&buf);
-			prt_printf(&buf, "invalid bkey:  ");
 			bset_key_invalid(c, b, u.s_c, updated_range, write, &buf);
 			prt_printf(&buf, "\n  ");
 			bch2_bkey_val_to_text(&buf, c, u.s_c);
 
-			btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf);
+			btree_err(-BCH_ERR_btree_node_read_err_fixable,
+				  c, NULL, b, i,
+				  btree_node_bad_bkey,
+				  "invalid bkey: %s", buf.buf);
 
 			i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
 			memmove_u64s_down(k, bkey_p_next(k),
@@ -878,7 +908,10 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 
 			bch2_dump_bset(c, b, i, 0);
 
-			if (btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf)) {
+			if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
+				      c, NULL, b, i,
+				      btree_node_bkey_out_of_order,
+				      "%s", buf.buf)) {
 				i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
 				memmove_u64s_down(k, bkey_p_next(k),
 						  (u64 *) vstruct_end(i) - (u64 *) k);
@@ -919,47 +952,62 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 	sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2);
 
 	if (bch2_meta_read_fault("btree"))
-		btree_err(-BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
+		btree_err(-BCH_ERR_btree_node_read_err_must_retry,
+			  c, ca, b, NULL,
+			  btree_node_fault_injected,
 			  "dynamic fault");
 
 	btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
-		     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
+		     -BCH_ERR_btree_node_read_err_must_retry,
+		     c, ca, b, NULL,
+		     btree_node_bad_magic,
 		     "bad magic: want %llx, got %llx",
 		     bset_magic(c), le64_to_cpu(b->data->magic));
 
-	btree_err_on(!b->data->keys.seq,
-		     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
-		     "bad btree header: seq 0");
-
 	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
 		struct bch_btree_ptr_v2 *bp =
 			&bkey_i_to_btree_ptr_v2(&b->key)->v;
 
 		btree_err_on(b->data->keys.seq != bp->seq,
-			     -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
+			     -BCH_ERR_btree_node_read_err_must_retry,
+			     c, ca, b, NULL,
+			     btree_node_bad_seq,
 			     "got wrong btree node (seq %llx want %llx)",
 			     b->data->keys.seq, bp->seq);
+	} else {
+		btree_err_on(!b->data->keys.seq,
+			     -BCH_ERR_btree_node_read_err_must_retry,
+			     c, ca, b, NULL,
+			     btree_node_bad_seq,
+			     "bad btree header: seq 0");
 	}
 
 	while (b->written < (ptr_written ?: btree_sectors(c))) {
 		unsigned sectors;
 		struct nonce nonce;
-		struct bch_csum csum;
 		bool first = !b->written;
+		bool csum_bad;
 
 		if (!b->written) {
 			i = &b->data->keys;
 
 			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-				     -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
-				     "unknown checksum type %llu",
-				     BSET_CSUM_TYPE(i));
+				     -BCH_ERR_btree_node_read_err_want_retry,
+				     c, ca, b, i,
+				     bset_unknown_csum,
+				     "unknown checksum type %llu", BSET_CSUM_TYPE(i));
 
 			nonce = btree_nonce(i, b->written << 9);
-			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
 
-			btree_err_on(bch2_crc_cmp(csum, b->data->csum),
-				     -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
+			csum_bad = bch2_crc_cmp(b->data->csum,
+				csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
+			if (csum_bad)
+				bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+
+			btree_err_on(csum_bad,
+				     -BCH_ERR_btree_node_read_err_want_retry,
+				     c, ca, b, i,
+				     bset_bad_csum,
 				     "invalid checksum");
 
 			ret = bset_encrypt(c, i, b->written << 9);
@@ -969,7 +1017,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 			btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
 				     !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
-				     -BCH_ERR_btree_node_read_err_incompatible, c, NULL, b, NULL,
+				     -BCH_ERR_btree_node_read_err_incompatible,
+				     c, NULL, b, NULL,
+				     btree_node_unsupported_version,
 				     "btree node does not have NEW_EXTENT_OVERWRITE set");
 
 			sectors = vstruct_sectors(b->data, c->block_bits);
@@ -981,15 +1031,21 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 				break;
 
 			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-				     -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
-				     "unknown checksum type %llu",
-				     BSET_CSUM_TYPE(i));
+				     -BCH_ERR_btree_node_read_err_want_retry,
+				     c, ca, b, i,
+				     bset_unknown_csum,
+				     "unknown checksum type %llu", BSET_CSUM_TYPE(i));
 
 			nonce = btree_nonce(i, b->written << 9);
-			csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-
-			btree_err_on(bch2_crc_cmp(csum, bne->csum),
-				     -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i,
+			csum_bad = bch2_crc_cmp(bne->csum,
+				csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
+			if (csum_bad)
+				bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+
+			btree_err_on(csum_bad,
+				     -BCH_ERR_btree_node_read_err_want_retry,
+				     c, ca, b, i,
+				     bset_bad_csum,
 				     "invalid checksum");
 
 			ret = bset_encrypt(c, i, b->written << 9);
@@ -1022,12 +1078,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 					true);
 
 		btree_err_on(blacklisted && first,
-			     -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i,
+			     -BCH_ERR_btree_node_read_err_fixable,
+			     c, ca, b, i,
+			     bset_blacklisted_journal_seq,
 			     "first btree node bset has blacklisted journal seq (%llu)",
 			     le64_to_cpu(i->journal_seq));
 
 		btree_err_on(blacklisted && ptr_written,
-			     -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i,
+			     -BCH_ERR_btree_node_read_err_fixable,
+			     c, ca, b, i,
+			     first_bset_blacklisted_journal_seq,
 			     "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u",
 			     le64_to_cpu(i->journal_seq),
 			     b->written, b->written + sectors, ptr_written);
@@ -1044,7 +1104,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 	if (ptr_written) {
 		btree_err_on(b->written < ptr_written,
-			     -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL,
+			     -BCH_ERR_btree_node_read_err_want_retry,
+			     c, ca, b, NULL,
+			     btree_node_data_missing,
 			     "btree node data missing: expected %u sectors, found %u",
 			     ptr_written, b->written);
 	} else {
@@ -1055,7 +1117,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 				     !bch2_journal_seq_is_blacklisted(c,
 								      le64_to_cpu(bne->keys.journal_seq),
 								      true),
-				     -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL,
+				     -BCH_ERR_btree_node_read_err_want_retry,
+				     c, ca, b, NULL,
+				     btree_node_bset_after_end,
 				     "found bset signature after last bset");
 	}
 
@@ -1097,7 +1161,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 			prt_printf(&buf, "\n  ");
 			bch2_bkey_val_to_text(&buf, c, u.s_c);
 
-			btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf);
+			btree_err(-BCH_ERR_btree_node_read_err_fixable,
+				  c, NULL, b, i,
+				  btree_node_bad_bkey,
+				  "%s", buf.buf);
 
 			btree_keys_account_key_drop(&b->nr, 0, k);
 
@@ -1177,8 +1244,9 @@ static void btree_node_read_work(struct work_struct *work)
 		}
 start:
 		printbuf_reset(&buf);
-		btree_pos_to_text(&buf, c, b);
-		bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s",
+		bch2_btree_pos_to_text(&buf, c, b);
+		bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+				   "btree read error %s for %s",
 				   bch2_blk_status_to_str(bio->bi_status), buf.buf);
 		if (rb->have_ioref)
 			percpu_ref_put(&ca->io_ref);
@@ -1213,7 +1281,7 @@ start:
 		printbuf_reset(&buf);
 		bch2_bpos_to_text(&buf, b->key.k.p);
 		bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
-			 __func__, bch2_btree_ids[b->c.btree_id], b->c.level, buf.buf);
+			 __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
 
 		bch2_btree_node_rewrite_async(c, b);
 	}
@@ -1322,14 +1390,20 @@ static void btree_node_read_all_replicas_done(struct closure *cl)
 		}
 
 		written2 = btree_node_sectors_written(c, ra->buf[i]);
-		if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL,
+		if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable,
+				 c, NULL, b, NULL,
+				 btree_node_replicas_sectors_written_mismatch,
 				 "btree node sectors written mismatch: %u != %u",
 				 written, written2) ||
 		    btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
-				 -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL,
+				 -BCH_ERR_btree_node_read_err_fixable,
+				 c, NULL, b, NULL,
+				 btree_node_bset_after_end,
 				 "found bset signature after last bset") ||
 		    btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9),
-				 -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL,
+				 -BCH_ERR_btree_node_read_err_fixable,
+				 c, NULL, b, NULL,
+				 btree_node_replicas_data_mismatch,
 				 "btree node replicas content mismatch"))
 			dump_bset_maps = true;
 
@@ -1524,7 +1598,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
 		struct printbuf buf = PRINTBUF;
 
 		prt_str(&buf, "btree node read error: no device to read from\n at ");
-		btree_pos_to_text(&buf, c, b);
+		bch2_btree_pos_to_text(&buf, c, b);
 		bch_err(c, "%s", buf.buf);
 
 		if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
@@ -1759,7 +1833,8 @@ static void btree_node_write_endio(struct bio *bio)
 	if (wbio->have_ioref)
 		bch2_latency_acct(ca, wbio->submit_time, WRITE);
 
-	if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write error: %s",
+	if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
+			       "btree write error: %s",
 			       bch2_blk_status_to_str(bio->bi_status)) ||
 	    bch2_meta_write_fault("btree")) {
 		spin_lock_irqsave(&c->btree_write_error_lock, flags);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 1d79514754d7..c2adf3fbb0b3 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -257,7 +257,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
 
 	BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
 	       (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
-	       !btree_type_has_snapshots(iter->btree_id));
+	       !btree_type_has_snapshot_field(iter->btree_id));
 
 	if (iter->update_path)
 		bch2_btree_path_verify(trans, iter->update_path);
@@ -362,7 +362,7 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
 	bch2_bpos_to_text(&buf, pos);
 
 	panic("not locked: %s %s%s\n",
-	      bch2_btree_ids[id], buf.buf,
+	      bch2_btree_id_str(id), buf.buf,
 	      key_cache ? " cached" : "");
 }
 
@@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
 	if (unlikely(ret))
 		goto out;
 
+	if (unlikely(!trans->srcu_held))
+		bch2_trans_srcu_lock(trans);
+
 	/*
 	 * Ensure we obey path->should_be_locked: if it's set, we can't unlock
 	 * and re-traverse the path without a transaction restart:
@@ -1371,7 +1374,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
 		struct bkey_s_c old = { &i->old_k, i->old_v };
 
 		prt_printf(buf, "update: btree=%s cached=%u %pS",
-		       bch2_btree_ids[i->btree_id],
+		       bch2_btree_id_str(i->btree_id),
 		       i->cached,
 		       (void *) i->ip_allocated);
 		prt_newline(buf);
@@ -1387,7 +1390,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
 
 	trans_for_each_wb_update(trans, wb) {
 		prt_printf(buf, "update: btree=%s wb=1 %pS",
-		       bch2_btree_ids[wb->btree],
+		       bch2_btree_id_str(wb->btree),
 		       (void *) i->ip_allocated);
 		prt_newline(buf);
 
@@ -1416,7 +1419,7 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
 		   path->idx, path->ref, path->intent_ref,
 		   path->preserve ? 'P' : ' ',
 		   path->should_be_locked ? 'S' : ' ',
-		   bch2_btree_ids[path->btree_id],
+		   bch2_btree_id_str(path->btree_id),
 		   path->level);
 	bch2_bpos_to_text(out, path->pos);
 
@@ -1523,6 +1526,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
 	path->ref		= 0;
 	path->intent_ref	= 0;
 	path->nodes_locked	= 0;
+	path->alloc_seq++;
 
 	btree_path_list_add(trans, pos, path);
 	trans->paths_sorted = false;
@@ -1598,7 +1602,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
 
 	locks_want = min(locks_want, BTREE_MAX_DEPTH);
 	if (locks_want > path->locks_want)
-		bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
+		bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
 
 	return path;
 }
@@ -2829,18 +2833,36 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
 	return p;
 }
 
-static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+static inline void check_srcu_held_too_long(struct btree_trans *trans)
 {
-	struct bch_fs *c = trans->c;
-	struct btree_path *path;
+	WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+	     "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
+	     (jiffies - trans->srcu_lock_time) / HZ);
+}
 
-	trans_for_each_path(trans, path)
-		if (path->cached && !btree_node_locked(path, 0))
-			path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+void bch2_trans_srcu_unlock(struct btree_trans *trans)
+{
+	if (trans->srcu_held) {
+		struct bch_fs *c = trans->c;
+		struct btree_path *path;
 
-	srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
-	trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
-	trans->srcu_lock_time	= jiffies;
+		trans_for_each_path(trans, path)
+			if (path->cached && !btree_node_locked(path, 0))
+				path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+		check_srcu_held_too_long(trans);
+		srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+		trans->srcu_held = false;
+	}
+}
+
+void bch2_trans_srcu_lock(struct btree_trans *trans)
+{
+	if (!trans->srcu_held) {
+		trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
+		trans->srcu_lock_time	= jiffies;
+		trans->srcu_held = true;
+	}
 }
 
 /**
@@ -2894,8 +2916,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
 	}
 	trans->last_begin_time = now;
 
-	if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
-		bch2_trans_reset_srcu_lock(trans);
+	if (unlikely(trans->srcu_held &&
+		     time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
+		bch2_trans_srcu_unlock(trans);
 
 	trans->last_begin_ip = _RET_IP_;
 	if (trans->restarted) {
@@ -2980,8 +3003,9 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
 		trans->wb_updates_size = s->wb_updates_size;
 	}
 
-	trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+	trans->srcu_idx		= srcu_read_lock(&c->btree_trans_barrier);
 	trans->srcu_lock_time	= jiffies;
+	trans->srcu_held	= true;
 
 	if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
 		struct btree_trans *pos;
@@ -3025,7 +3049,7 @@ leaked:
 	trans_for_each_path(trans, path)
 		if (path->ref)
 			printk(KERN_ERR "  btree %s %pS\n",
-			       bch2_btree_ids[path->btree_id],
+			       bch2_btree_id_str(path->btree_id),
 			       (void *) path->ip_allocated);
 	/* Be noisy about this: */
 	bch2_fatal_error(c);
@@ -3058,7 +3082,10 @@ void bch2_trans_put(struct btree_trans *trans)
 
 	check_btree_paths_leaked(trans);
 
-	srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+	if (trans->srcu_held) {
+		check_srcu_held_too_long(trans);
+		srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+	}
 
 	bch2_journal_preres_put(&c->journal, &trans->journal_preres);
 
@@ -3100,7 +3127,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
 
 	prt_tab(out);
 	prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
-		   b->level, bch2_btree_ids[b->btree_id]);
+		   b->level, bch2_btree_id_str(b->btree_id));
 	bch2_bpos_to_text(out, btree_node_pos(b));
 
 	prt_tab(out);
@@ -3130,7 +3157,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
 		       path->idx,
 		       path->cached ? 'c' : 'b',
 		       path->level,
-		       bch2_btree_ids[path->btree_id]);
+		       bch2_btree_id_str(path->btree_id));
 		bch2_bpos_to_text(out, path->pos);
 		prt_newline(out);
 
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index fbe273453db3..85e7cb52f6b6 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
 int bch2_trans_relock(struct btree_trans *);
 int bch2_trans_relock_notrace(struct btree_trans *);
 void bch2_trans_unlock(struct btree_trans *);
+void bch2_trans_unlock_long(struct btree_trans *);
 bool bch2_trans_locked(struct btree_trans *);
 
 static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
@@ -411,11 +412,11 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
 		flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS;
 
 	if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
-	    btree_node_type_is_extents(btree_id))
+	    btree_id_is_extents(btree_id))
 		flags |= BTREE_ITER_IS_EXTENTS;
 
 	if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
-	    !btree_type_has_snapshots(btree_id))
+	    !btree_type_has_snapshot_field(btree_id))
 		flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
 
 	if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
@@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
 	__bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags,	\
 				  KEY_TYPE_##_type, sizeof(*_val), _val)
 
+void bch2_trans_srcu_unlock(struct btree_trans *);
+void bch2_trans_srcu_lock(struct btree_trans *);
+
 u32 bch2_trans_begin(struct btree_trans *);
 
 /*
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index f9a5e38a085b..9b78f78a75b5 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -324,7 +324,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
 		ck = bkey_cached_reuse(bc);
 		if (unlikely(!ck)) {
 			bch_err(c, "error allocating memory for key cache item, btree %s",
-				bch2_btree_ids[path->btree_id]);
+				bch2_btree_id_str(path->btree_id));
 			return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
 		}
 
@@ -407,7 +407,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
 			new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
 			if (!new_k) {
 				bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
-					bch2_btree_ids[ck->key.btree_id], new_u64s);
+					bch2_btree_id_str(ck->key.btree_id), new_u64s);
 				ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
 				goto err;
 			}
@@ -509,7 +509,7 @@ fill:
 		 * path->uptodate yet:
 		 */
 		if (!path->locks_want &&
-		    !__bch2_btree_path_upgrade(trans, path, 1)) {
+		    !__bch2_btree_path_upgrade(trans, path, 1, NULL)) {
 			trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
 			ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
 			goto err;
@@ -1038,7 +1038,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
 
 	bc->table_init_done = true;
 
-	shrink = shrinker_alloc(0, "%s/btree_key_cache", c->name);
+	shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name);
 	if (!shrink)
 		return -BCH_ERR_ENOMEM_fs_btree_cache_init;
 	bc->shrink = shrink;
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 40c8ed8f7bf1..3d48834d091f 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -431,7 +431,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
 
 static inline bool btree_path_get_locks(struct btree_trans *trans,
 					struct btree_path *path,
-					bool upgrade)
+					bool upgrade,
+					struct get_locks_fail *f)
 {
 	unsigned l = path->level;
 	int fail_idx = -1;
@@ -442,8 +443,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
 
 		if (!(upgrade
 		      ? bch2_btree_node_upgrade(trans, path, l)
-		      : bch2_btree_node_relock(trans, path, l)))
-			fail_idx = l;
+		      : bch2_btree_node_relock(trans, path, l))) {
+			fail_idx	= l;
+
+			if (f) {
+				f->l	= l;
+				f->b	= path->l[l].b;
+			}
+		}
 
 		l++;
 	} while (l < path->locks_want);
@@ -584,7 +591,9 @@ __flatten
 bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
 			struct btree_path *path, unsigned long trace_ip)
 {
-	return btree_path_get_locks(trans, path, false);
+	struct get_locks_fail f;
+
+	return btree_path_get_locks(trans, path, false, &f);
 }
 
 int __bch2_btree_path_relock(struct btree_trans *trans,
@@ -600,22 +609,24 @@ int __bch2_btree_path_relock(struct btree_trans *trans,
 
 bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
 			       struct btree_path *path,
-			       unsigned new_locks_want)
+			       unsigned new_locks_want,
+			       struct get_locks_fail *f)
 {
 	EBUG_ON(path->locks_want >= new_locks_want);
 
 	path->locks_want = new_locks_want;
 
-	return btree_path_get_locks(trans, path, true);
+	return btree_path_get_locks(trans, path, true, f);
 }
 
 bool __bch2_btree_path_upgrade(struct btree_trans *trans,
 			       struct btree_path *path,
-			       unsigned new_locks_want)
+			       unsigned new_locks_want,
+			       struct get_locks_fail *f)
 {
 	struct btree_path *linked;
 
-	if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
+	if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
 		return true;
 
 	/*
@@ -644,7 +655,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
 			    linked->btree_id == path->btree_id &&
 			    linked->locks_want < new_locks_want) {
 				linked->locks_want = new_locks_want;
-				btree_path_get_locks(trans, linked, true);
+				btree_path_get_locks(trans, linked, true, NULL);
 			}
 
 	return false;
@@ -656,6 +667,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
 {
 	unsigned l;
 
+	if (trans->restarted)
+		return;
+
 	EBUG_ON(path->locks_want < new_locks_want);
 
 	path->locks_want = new_locks_want;
@@ -674,6 +688,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
 	}
 
 	bch2_btree_path_verify_locks(path);
+
+	path->downgrade_seq++;
+	trace_path_downgrade(trans, _RET_IP_, path);
 }
 
 /* Btree transaction locking: */
@@ -682,6 +699,9 @@ void bch2_trans_downgrade(struct btree_trans *trans)
 {
 	struct btree_path *path;
 
+	if (trans->restarted)
+		return;
+
 	trans_for_each_path(trans, path)
 		bch2_btree_path_downgrade(trans, path);
 }
@@ -733,6 +753,12 @@ void bch2_trans_unlock(struct btree_trans *trans)
 		__bch2_btree_path_unlock(trans, path);
 }
 
+void bch2_trans_unlock_long(struct btree_trans *trans)
+{
+	bch2_trans_unlock(trans);
+	bch2_trans_srcu_unlock(trans);
+}
+
 bool bch2_trans_locked(struct btree_trans *trans)
 {
 	struct btree_path *path;
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 6231e9ffc5d7..11b0a2c8cd69 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -355,26 +355,36 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
 
 /* upgrade */
 
+
+struct get_locks_fail {
+	unsigned	l;
+	struct btree	*b;
+};
+
 bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
-			       struct btree_path *, unsigned);
+			       struct btree_path *, unsigned,
+			       struct get_locks_fail *);
+
 bool __bch2_btree_path_upgrade(struct btree_trans *,
-			       struct btree_path *, unsigned);
+			       struct btree_path *, unsigned,
+			       struct get_locks_fail *);
 
 static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
 					  struct btree_path *path,
 					  unsigned new_locks_want)
 {
+	struct get_locks_fail f;
 	unsigned old_locks_want = path->locks_want;
 
 	new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
 
 	if (path->locks_want < new_locks_want
-	    ? __bch2_btree_path_upgrade(trans, path, new_locks_want)
+	    ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
 	    : path->uptodate == BTREE_ITER_UPTODATE)
 		return 0;
 
 	trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
-			old_locks_want, new_locks_want);
+			old_locks_want, new_locks_want, &f);
 	return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
 }
 
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 04c1f4610972..decad7b66c59 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -269,6 +269,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
 	BUG_ON(i->level		!= i->path->level);
 	BUG_ON(i->btree_id	!= i->path->btree_id);
 	EBUG_ON(!i->level &&
+		btree_type_has_snapshots(i->btree_id) &&
 		!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
 		test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
 		i->k->k.p.snapshot &&
@@ -349,7 +350,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
 	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
 	if (!new_k) {
 		bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
-			bch2_btree_ids[path->btree_id], new_u64s);
+			bch2_btree_id_str(path->btree_id), new_u64s);
 		return -BCH_ERR_ENOMEM_btree_key_cache_insert;
 	}
 
@@ -379,11 +380,10 @@ static int run_one_mem_trigger(struct btree_trans *trans,
 	if (unlikely(flags & BTREE_TRIGGER_NORUN))
 		return 0;
 
-	if (!btree_node_type_needs_gc((enum btree_node_type) i->btree_id))
+	if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
 		return 0;
 
-	if (old_ops->atomic_trigger == new_ops->atomic_trigger &&
-	    ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
+	if (old_ops->atomic_trigger == new_ops->atomic_trigger) {
 		ret   = bch2_mark_key(trans, i->btree_id, i->level,
 				old, bkey_i_to_s_c(new),
 				BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
@@ -425,8 +425,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
 
 	if (!i->insert_trigger_run &&
 	    !i->overwrite_trigger_run &&
-	    old_ops->trans_trigger == new_ops->trans_trigger &&
-	    ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
+	    old_ops->trans_trigger == new_ops->trans_trigger) {
 		i->overwrite_trigger_run = true;
 		i->insert_trigger_run = true;
 		return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k,
@@ -683,7 +682,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
 						       BCH_JSET_ENTRY_overwrite,
 						       i->btree_id, i->level,
 						       i->old_k.u64s);
-				bkey_reassemble(&entry->start[0],
+				bkey_reassemble((struct bkey_i *) entry->start,
 						(struct bkey_s_c) { &i->old_k, i->old_v });
 			}
 
@@ -691,7 +690,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
 					       BCH_JSET_ENTRY_btree_keys,
 					       i->btree_id, i->level,
 					       i->k->k.u64s);
-			bkey_copy(&entry->start[0], i->k);
+			bkey_copy((struct bkey_i *) entry->start, i->k);
 		}
 
 		trans_for_each_wb_update(trans, wb) {
@@ -699,7 +698,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
 					       BCH_JSET_ENTRY_btree_keys,
 					       wb->btree, 0,
 					       wb->k.k.u64s);
-			bkey_copy(&entry->start[0], &wb->k);
+			bkey_copy((struct bkey_i *) entry->start, &wb->k);
 		}
 
 		if (trans->journal_seq)
@@ -776,12 +775,12 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans
 		bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p);
 }
 
-static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, unsigned flags,
+static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
+						   enum bkey_invalid_flags flags,
 						   struct btree_insert_entry *i,
 						   struct printbuf *err)
 {
 	struct bch_fs *c = trans->c;
-	int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
 
 	printbuf_reset(err);
 	prt_printf(err, "invalid bkey on insert from %s -> %ps",
@@ -792,8 +791,7 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, un
 	bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k));
 	prt_newline(err);
 
-	bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
-			  i->bkey_type, rw, err);
+	bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err);
 	bch2_print_string_as_lines(KERN_ERR, err->buf);
 
 	bch2_inconsistent_error(c);
@@ -864,12 +862,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
 	 */
 	bch2_journal_res_put(&c->journal, &trans->journal_res);
 
-	if (unlikely(ret))
-		return ret;
-
-	bch2_trans_downgrade(trans);
-
-	return 0;
+	return ret;
 }
 
 static int journal_reclaim_wait_done(struct bch_fs *c)
@@ -1034,7 +1027,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
 
 		if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
 					       i->bkey_type, invalid_flags, &buf)))
-			ret = bch2_trans_commit_bkey_invalid(trans, flags, i, &buf);
+			ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf);
 		btree_insert_entry_checks(trans, i);
 		printbuf_exit(&buf);
 
@@ -1138,6 +1131,8 @@ out:
 	if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
 		bch2_write_ref_put(c, BCH_WRITE_REF_trans);
 out_reset:
+	if (!ret)
+		bch2_trans_downgrade(trans);
 	bch2_trans_reset_updates(trans);
 
 	return ret;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index bc6714d88925..941841a0c5bf 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -228,6 +228,8 @@ struct btree_path {
 	u8			sorted_idx;
 	u8			ref;
 	u8			intent_ref;
+	u32			alloc_seq;
+	u32			downgrade_seq;
 
 	/* btree_iter_copy starts here: */
 	struct bpos		pos;
@@ -424,6 +426,7 @@ struct btree_trans {
 	u8			nr_updates;
 	u8			nr_wb_updates;
 	u8			wb_updates_size;
+	bool			srcu_held:1;
 	bool			used_mempool:1;
 	bool			in_traverse_all:1;
 	bool			paths_sorted:1;
@@ -636,16 +639,17 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i)
 }
 
 enum btree_node_type {
-#define x(kwd, val, ...) BKEY_TYPE_##kwd = val,
+	BKEY_TYPE_btree,
+#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1,
 	BCH_BTREE_IDS()
 #undef x
-	BKEY_TYPE_btree,
+	BKEY_TYPE_NR
 };
 
 /* Type of a key in btree @id at level @level: */
 static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
 {
-	return level ? BKEY_TYPE_btree : (enum btree_node_type) id;
+	return level ? BKEY_TYPE_btree : (unsigned) id + 1;
 }
 
 /* Type of keys @b contains: */
@@ -654,19 +658,21 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
 	return __btree_node_type(b->c.level, b->c.btree_id);
 }
 
+const char *bch2_btree_node_type_str(enum btree_node_type);
+
 #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS		\
-	(BIT(BKEY_TYPE_extents)|			\
-	 BIT(BKEY_TYPE_alloc)|				\
-	 BIT(BKEY_TYPE_inodes)|				\
-	 BIT(BKEY_TYPE_stripes)|			\
-	 BIT(BKEY_TYPE_reflink)|			\
-	 BIT(BKEY_TYPE_btree))
+	(BIT_ULL(BKEY_TYPE_extents)|			\
+	 BIT_ULL(BKEY_TYPE_alloc)|			\
+	 BIT_ULL(BKEY_TYPE_inodes)|			\
+	 BIT_ULL(BKEY_TYPE_stripes)|			\
+	 BIT_ULL(BKEY_TYPE_reflink)|			\
+	 BIT_ULL(BKEY_TYPE_btree))
 
 #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS		\
-	(BIT(BKEY_TYPE_alloc)|				\
-	 BIT(BKEY_TYPE_inodes)|				\
-	 BIT(BKEY_TYPE_stripes)|			\
-	 BIT(BKEY_TYPE_snapshots))
+	(BIT_ULL(BKEY_TYPE_alloc)|			\
+	 BIT_ULL(BKEY_TYPE_inodes)|			\
+	 BIT_ULL(BKEY_TYPE_stripes)|			\
+	 BIT_ULL(BKEY_TYPE_snapshots))
 
 #define BTREE_NODE_TYPE_HAS_TRIGGERS			\
 	(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS|		\
@@ -674,13 +680,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
 
 static inline bool btree_node_type_needs_gc(enum btree_node_type type)
 {
-	return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
+	return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type);
 }
 
 static inline bool btree_node_type_is_extents(enum btree_node_type type)
 {
 	const unsigned mask = 0
-#define x(name, nr, flags, ...)	|((!!((flags) & BTREE_ID_EXTENTS)) << nr)
+#define x(name, nr, flags, ...)	|((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
 	BCH_BTREE_IDS()
 #undef x
 	;
@@ -690,7 +696,7 @@ static inline bool btree_node_type_is_extents(enum btree_node_type type)
 
 static inline bool btree_id_is_extents(enum btree_id btree)
 {
-	return btree_node_type_is_extents((enum btree_node_type) btree);
+	return btree_node_type_is_extents(__btree_node_type(0, btree));
 }
 
 static inline bool btree_type_has_snapshots(enum btree_id id)
@@ -704,6 +710,17 @@ static inline bool btree_type_has_snapshots(enum btree_id id)
 	return (1U << id) & mask;
 }
 
+static inline bool btree_type_has_snapshot_field(enum btree_id id)
+{
+	const unsigned mask = 0
+#define x(name, nr, flags, ...)	|((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
+	BCH_BTREE_IDS()
+#undef x
+	;
+
+	return (1U << id) & mask;
+}
+
 static inline bool btree_type_has_ptrs(enum btree_id id)
 {
 	const unsigned mask = 0
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 7dbf6b6c7f34..39c2db68123b 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1274,14 +1274,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
 
 	if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
 			      btree_node_type(b), WRITE, &buf) ?:
-	    bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf)) {
+	    bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) {
 		printbuf_reset(&buf);
 		prt_printf(&buf, "inserting invalid bkey\n  ");
 		bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
 		prt_printf(&buf, "\n  ");
 		bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
 				  btree_node_type(b), WRITE, &buf);
-		bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf);
+		bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf);
 
 		bch2_fs_inconsistent(c, "%s", buf.buf);
 		dump_stack();
@@ -1987,7 +1987,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 out:
 	if (new_path)
 		bch2_path_put(trans, new_path, true);
-	bch2_btree_path_downgrade(trans, iter->path);
+	bch2_trans_downgrade(trans);
 	return ret;
 err:
 	bch2_btree_node_free_never_used(as, trans, n);
@@ -2411,30 +2411,24 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
 
 	r->level = entry->level;
 	r->alive = true;
-	bkey_copy(&r->key, &entry->start[0]);
+	bkey_copy(&r->key, (struct bkey_i *) entry->start);
 
 	mutex_unlock(&c->btree_root_lock);
 }
 
 struct jset_entry *
 bch2_btree_roots_to_journal_entries(struct bch_fs *c,
-				    struct jset_entry *start,
-				    struct jset_entry *end)
+				    struct jset_entry *end,
+				    unsigned long skip)
 {
-	struct jset_entry *entry;
-	unsigned long have = 0;
 	unsigned i;
 
-	for (entry = start; entry < end; entry = vstruct_next(entry))
-		if (entry->type == BCH_JSET_ENTRY_btree_root)
-			__set_bit(entry->btree_id, &have);
-
 	mutex_lock(&c->btree_root_lock);
 
 	for (i = 0; i < btree_id_nr_alive(c); i++) {
 		struct btree_root *r = bch2_btree_id_root(c, i);
 
-		if (r->alive && !test_bit(i, &have)) {
+		if (r->alive && !test_bit(i, &skip)) {
 			journal_entry_set(end, BCH_JSET_ENTRY_btree_root,
 					  i, r->level, &r->key, r->key.k.u64s);
 			end = vstruct_next(end);
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 5e0a467fe905..4df21512d640 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -271,7 +271,7 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
 	struct btree_node_entry *bne = max(write_block(b),
 			(void *) btree_bkey_last(b, bset_tree_last(b)));
 	ssize_t remaining_space =
-		__bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
+		__bch_btree_u64s_remaining(c, b, bne->keys.start);
 
 	if (unlikely(bset_written(b, bset(b, t)))) {
 		if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
@@ -303,7 +303,7 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
 	k.needs_whiteout = true;
 
 	b->whiteout_u64s += k.u64s;
-	bkey_copy(unwritten_whiteouts_start(c, b), &k);
+	bkey_p_copy(unwritten_whiteouts_start(c, b), &k);
 }
 
 /*
@@ -325,7 +325,7 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *);
 
 void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *);
 struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
-					struct jset_entry *, struct jset_entry *);
+					struct jset_entry *, unsigned long);
 
 void bch2_do_pending_node_rewrites(struct bch_fs *);
 void bch2_free_pending_node_rewrites(struct bch_fs *);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index a1a4b5feadaa..58d8c6ffd955 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -370,8 +370,8 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
 
 	idx = bch2_replicas_entry_idx(c, r);
 	if (idx < 0 &&
-	    fsck_err(c, "no replicas entry\n"
-		     "  while marking %s",
+	    fsck_err(c, ptr_to_missing_replicas_entry,
+		     "no replicas entry\n  while marking %s",
 		     (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
 		percpu_up_read(&c->mark_lock);
 		ret = bch2_mark_replicas(c, r);
@@ -695,6 +695,7 @@ static int check_bucket_ref(struct btree_trans *trans,
 
 	if (gen_after(ptr->gen, b_gen)) {
 		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			      BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen,
 			"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
@@ -707,6 +708,7 @@ static int check_bucket_ref(struct btree_trans *trans,
 
 	if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
 		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			      BCH_FSCK_ERR_ptr_too_stale,
 			"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
@@ -720,6 +722,7 @@ static int check_bucket_ref(struct btree_trans *trans,
 
 	if (b_gen != ptr->gen && !ptr->cached) {
 		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			      BCH_FSCK_ERR_stale_dirty_ptr,
 			"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
@@ -741,6 +744,7 @@ static int check_bucket_ref(struct btree_trans *trans,
 	    ptr_data_type &&
 	    bucket_data_type != ptr_data_type) {
 		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			      BCH_FSCK_ERR_ptr_bucket_data_type_mismatch,
 			"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
@@ -754,6 +758,7 @@ static int check_bucket_ref(struct btree_trans *trans,
 
 	if ((u64) bucket_sectors + sectors > U32_MAX) {
 		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			      BCH_FSCK_ERR_bucket_sector_count_overflow,
 			"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
@@ -935,14 +940,12 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans,
 	return 0;
 }
 
-int bch2_mark_extent(struct btree_trans *trans,
-		     enum btree_id btree_id, unsigned level,
-		     struct bkey_s_c old, struct bkey_s_c new,
-		     unsigned flags)
+static int __mark_extent(struct btree_trans *trans,
+			 enum btree_id btree_id, unsigned level,
+			 struct bkey_s_c k, unsigned flags)
 {
 	u64 journal_seq = trans->journal_res.seq;
 	struct bch_fs *c = trans->c;
-	struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
 	const union bch_extent_entry *entry;
 	struct extent_ptr_decoded p;
@@ -1018,6 +1021,14 @@ int bch2_mark_extent(struct btree_trans *trans,
 	return 0;
 }
 
+int bch2_mark_extent(struct btree_trans *trans,
+		     enum btree_id btree_id, unsigned level,
+		     struct bkey_s_c old, struct bkey_s_c new,
+		     unsigned flags)
+{
+	return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags);
+}
+
 int bch2_mark_stripe(struct btree_trans *trans,
 		     enum btree_id btree_id, unsigned level,
 		     struct bkey_s_c old, struct bkey_s_c new,
@@ -1124,13 +1135,11 @@ int bch2_mark_stripe(struct btree_trans *trans,
 	return 0;
 }
 
-int bch2_mark_reservation(struct btree_trans *trans,
-			  enum btree_id btree_id, unsigned level,
-			  struct bkey_s_c old, struct bkey_s_c new,
-			  unsigned flags)
+static int __mark_reservation(struct btree_trans *trans,
+			      enum btree_id btree_id, unsigned level,
+			      struct bkey_s_c k, unsigned flags)
 {
 	struct bch_fs *c = trans->c;
-	struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
 	struct bch_fs_usage *fs_usage;
 	unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 	s64 sectors = (s64) k.k->size;
@@ -1157,6 +1166,14 @@ int bch2_mark_reservation(struct btree_trans *trans,
 	return 0;
 }
 
+int bch2_mark_reservation(struct btree_trans *trans,
+			  enum btree_id btree_id, unsigned level,
+			  struct bkey_s_c old, struct bkey_s_c new,
+			  unsigned flags)
+{
+	return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags);
+}
+
 static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
 				 struct bkey_s_c_reflink_p p,
 				 u64 start, u64 end,
@@ -1183,7 +1200,8 @@ static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
 	*idx = r->offset;
 	return 0;
 not_found:
-	if (fsck_err(c, "pointer to missing indirect extent\n"
+	if (fsck_err(c, reflink_p_to_missing_reflink_v,
+		     "pointer to missing indirect extent\n"
 		     "  %s\n"
 		     "  missing range %llu-%llu",
 		     (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
@@ -1211,13 +1229,11 @@ fsck_err:
 	return ret;
 }
 
-int bch2_mark_reflink_p(struct btree_trans *trans,
-			enum btree_id btree_id, unsigned level,
-			struct bkey_s_c old, struct bkey_s_c new,
-			unsigned flags)
+static int __mark_reflink_p(struct btree_trans *trans,
+			    enum btree_id btree_id, unsigned level,
+			    struct bkey_s_c k, unsigned flags)
 {
 	struct bch_fs *c = trans->c;
-	struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
 	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
 	struct reflink_gc *ref;
 	size_t l, r, m;
@@ -1251,6 +1267,14 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
 	return ret;
 }
 
+int bch2_mark_reflink_p(struct btree_trans *trans,
+			enum btree_id btree_id, unsigned level,
+			struct bkey_s_c old, struct bkey_s_c new,
+			unsigned flags)
+{
+	return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags);
+}
+
 void bch2_trans_fs_usage_revert(struct btree_trans *trans,
 				struct replicas_delta_list *deltas)
 {
@@ -1298,7 +1322,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
 	struct bch_fs *c = trans->c;
 	static int warned_disk_usage = 0;
 	bool warn = false;
-	unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
+	u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
 	struct replicas_delta *d, *d2;
 	struct replicas_delta *top = (void *) deltas->d + deltas->used;
 	struct bch_fs_usage *dst;
@@ -1357,7 +1381,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
 
 	if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
 		bch2_trans_inconsistent(trans,
-					"disk usage increased %lli more than %u sectors reserved)",
+					"disk usage increased %lli more than %llu sectors reserved)",
 					should_not_have_added, disk_res_sectors);
 	return 0;
 need_mark:
@@ -1452,15 +1476,11 @@ err:
 	return ret;
 }
 
-int bch2_trans_mark_extent(struct btree_trans *trans,
-			   enum btree_id btree_id, unsigned level,
-			   struct bkey_s_c old, struct bkey_i *new,
-			   unsigned flags)
+static int __trans_mark_extent(struct btree_trans *trans,
+			       enum btree_id btree_id, unsigned level,
+			       struct bkey_s_c k, unsigned flags)
 {
 	struct bch_fs *c = trans->c;
-	struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
-		? old
-		: bkey_i_to_s_c(new);
 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
 	const union bch_extent_entry *entry;
 	struct extent_ptr_decoded p;
@@ -1517,6 +1537,24 @@ int bch2_trans_mark_extent(struct btree_trans *trans,
 	return ret;
 }
 
+int bch2_trans_mark_extent(struct btree_trans *trans,
+			   enum btree_id btree_id, unsigned level,
+			   struct bkey_s_c old, struct bkey_i *new,
+			   unsigned flags)
+{
+	struct bch_fs *c = trans->c;
+	int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) -
+		  (int) bch2_bkey_needs_rebalance(c, old);
+
+	if (mod) {
+		int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0);
+		if (ret)
+			return ret;
+	}
+
+	return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags);
+}
+
 static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
 					 struct bkey_s_c_stripe s,
 					 unsigned idx, bool deleting)
@@ -1670,15 +1708,10 @@ int bch2_trans_mark_stripe(struct btree_trans *trans,
 	return ret;
 }
 
-int bch2_trans_mark_reservation(struct btree_trans *trans,
-				enum btree_id btree_id, unsigned level,
-				struct bkey_s_c old,
-				struct bkey_i *new,
-				unsigned flags)
+static int __trans_mark_reservation(struct btree_trans *trans,
+				    enum btree_id btree_id, unsigned level,
+				    struct bkey_s_c k, unsigned flags)
 {
-	struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
-		? old
-		: bkey_i_to_s_c(new);
 	unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 	s64 sectors = (s64) k.k->size;
 	struct replicas_delta_list *d;
@@ -1700,7 +1733,16 @@ int bch2_trans_mark_reservation(struct btree_trans *trans,
 	return 0;
 }
 
-static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
+int bch2_trans_mark_reservation(struct btree_trans *trans,
+				enum btree_id btree_id, unsigned level,
+				struct bkey_s_c old,
+				struct bkey_i *new,
+				unsigned flags)
+{
+	return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags);
+}
+
+static int trans_mark_reflink_p_segment(struct btree_trans *trans,
 			struct bkey_s_c_reflink_p p,
 			u64 *idx, unsigned flags)
 {
@@ -1767,35 +1809,38 @@ err:
 	return ret;
 }
 
-int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-			      enum btree_id btree_id, unsigned level,
-			      struct bkey_s_c old,
-			      struct bkey_i *new,
-			      unsigned flags)
+static int __trans_mark_reflink_p(struct btree_trans *trans,
+				enum btree_id btree_id, unsigned level,
+				struct bkey_s_c k, unsigned flags)
 {
-	struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
-		? old
-		: bkey_i_to_s_c(new);
 	struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
 	u64 idx, end_idx;
 	int ret = 0;
 
-	if (flags & BTREE_TRIGGER_INSERT) {
-		struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
-
-		v->front_pad = v->back_pad = 0;
-	}
-
 	idx	= le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
 	end_idx = le64_to_cpu(p.v->idx) + p.k->size +
 		le32_to_cpu(p.v->back_pad);
 
 	while (idx < end_idx && !ret)
-		ret = __bch2_trans_mark_reflink_p(trans, p, &idx, flags);
-
+		ret = trans_mark_reflink_p_segment(trans, p, &idx, flags);
 	return ret;
 }
 
+int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+			      enum btree_id btree_id, unsigned level,
+			      struct bkey_s_c old,
+			      struct bkey_i *new,
+			      unsigned flags)
+{
+	if (flags & BTREE_TRIGGER_INSERT) {
+		struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v;
+
+		v->front_pad = v->back_pad = 0;
+	}
+
+	return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags);
+}
+
 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
 				    struct bch_dev *ca, size_t b,
 				    enum bch_data_type type,
@@ -1818,6 +1863,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
 
 	if (a->v.data_type && type && a->v.data_type != type) {
 		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+			      BCH_FSCK_ERR_bucket_metadata_type_mismatch,
 			"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
 			"while marking %s",
 			iter.pos.inode, iter.pos.offset, a->v.gen,
@@ -1825,16 +1871,16 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
 			bch2_data_types[type],
 			bch2_data_types[type]);
 		ret = -EIO;
-		goto out;
+		goto err;
 	}
 
-	a->v.data_type		= type;
-	a->v.dirty_sectors	= sectors;
-
-	ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
-	if (ret)
-		goto out;
-out:
+	if (a->v.data_type	!= type ||
+	    a->v.dirty_sectors	!= sectors) {
+		a->v.data_type		= type;
+		a->v.dirty_sectors	= sectors;
+		ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+	}
+err:
 	bch2_trans_iter_exit(trans, &iter);
 	return ret;
 }
@@ -1929,6 +1975,22 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
 	return ret;
 }
 
+int bch2_trans_mark_dev_sbs(struct bch_fs *c)
+{
+	struct bch_dev *ca;
+	unsigned i;
+
+	for_each_online_member(ca, c, i) {
+		int ret = bch2_trans_mark_dev_sb(c, ca);
+		if (ret) {
+			percpu_ref_put(&ca->ref);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /* Disk reservations: */
 
 #define SECTORS_CACHE	1024
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index bf8d7f407e9c..21f6cb356921 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -339,12 +339,27 @@ int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct
 int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
 int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
 
+#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
+({												\
+	int ret = 0;										\
+												\
+	if (_old.k->type)									\
+		ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT);	\
+	if (!ret && _new.k->type)								\
+		ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE);	\
+	ret;											\
+})
+
+#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)	\
+	mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags)
+
 void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
 int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
 
 int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
 				    size_t, enum bch_data_type, unsigned);
 int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *);
+int bch2_trans_mark_dev_sbs(struct bch_fs *);
 
 static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
 {
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index f69e15dc699c..4bb88aefed12 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -332,8 +332,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
 	struct bch_ioctl_data_event e = {
 		.type			= BCH_DATA_EVENT_PROGRESS,
 		.p.data_type		= ctx->stats.data_type,
-		.p.btree_id		= ctx->stats.btree_id,
-		.p.pos			= ctx->stats.pos,
+		.p.btree_id		= ctx->stats.pos.btree,
+		.p.pos			= ctx->stats.pos.pos,
 		.p.sectors_done		= atomic64_read(&ctx->stats.sectors_seen),
 		.p.sectors_total	= bch2_fs_usage_read_short(c).used,
 	};
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index 1480b64547b0..a8b148ec2a2b 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -697,14 +697,32 @@ err:
 	return ret;
 }
 
+void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
+{
+	struct bch_compression_opt opt = bch2_compression_decode(v);
+
+	if (opt.type < BCH_COMPRESSION_OPT_NR)
+		prt_str(out, bch2_compression_opts[opt.type]);
+	else
+		prt_printf(out, "(unknown compression opt %u)", opt.type);
+	if (opt.level)
+		prt_printf(out, ":%u", opt.level);
+}
+
 void bch2_opt_compression_to_text(struct printbuf *out,
 				  struct bch_fs *c,
 				  struct bch_sb *sb,
 				  u64 v)
 {
-	struct bch_compression_opt opt = bch2_compression_decode(v);
+	return bch2_compression_opt_to_text(out, v);
+}
 
-	prt_str(out, bch2_compression_opts[opt.type]);
-	if (opt.level)
-		prt_printf(out, ":%u", opt.level);
+int bch2_opt_compression_validate(u64 v, struct printbuf *err)
+{
+	if (!bch2_compression_opt_valid(v)) {
+		prt_printf(err, "invalid compression opt %llu", v);
+		return -BCH_ERR_invalid_sb_opt_compression;
+	}
+
+	return 0;
 }
diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
index 052ea303241f..607fd5e232c9 100644
--- a/fs/bcachefs/compress.h
+++ b/fs/bcachefs/compress.h
@@ -4,12 +4,18 @@
 
 #include "extents_types.h"
 
+static const unsigned __bch2_compression_opt_to_type[] = {
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
+	BCH_COMPRESSION_OPTS()
+#undef x
+};
+
 struct bch_compression_opt {
 	u8		type:4,
 			level:4;
 };
 
-static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
+static inline struct bch_compression_opt __bch2_compression_decode(unsigned v)
 {
 	return (struct bch_compression_opt) {
 		.type	= v & 15,
@@ -17,17 +23,25 @@ static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
 	};
 }
 
+static inline bool bch2_compression_opt_valid(unsigned v)
+{
+	struct bch_compression_opt opt = __bch2_compression_decode(v);
+
+	return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level);
+}
+
+static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
+{
+	return bch2_compression_opt_valid(v)
+		? __bch2_compression_decode(v)
+		: (struct bch_compression_opt) { 0 };
+}
+
 static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
 {
 	return opt.type|(opt.level << 4);
 }
 
-static const unsigned __bch2_compression_opt_to_type[] = {
-#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
-	BCH_COMPRESSION_OPTS()
-#undef x
-};
-
 static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
 {
 	return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
@@ -44,12 +58,16 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
 void bch2_fs_compress_exit(struct bch_fs *);
 int bch2_fs_compress_init(struct bch_fs *);
 
+void bch2_compression_opt_to_text(struct printbuf *, u64);
+
 int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
 void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+int bch2_opt_compression_validate(u64, struct printbuf *);
 
 #define bch2_opt_compression (struct bch_opt_fn) {		\
-	.parse		= bch2_opt_compression_parse,	\
-	.to_text	= bch2_opt_compression_to_text,	\
+	.parse		= bch2_opt_compression_parse,		\
+	.to_text	= bch2_opt_compression_to_text,		\
+	.validate	= bch2_opt_compression_validate,	\
 }
 
 #endif /* _BCACHEFS_COMPRESS_H */
diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h
index 114f86b45fd5..87b4b2d1ec76 100644
--- a/fs/bcachefs/darray.h
+++ b/fs/bcachefs/darray.h
@@ -69,9 +69,15 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
 	_ret;								\
 })
 
+#define darray_remove_item(_d, _pos)					\
+	array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
+
 #define darray_for_each(_d, _i)						\
 	for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++)
 
+#define darray_for_each_reverse(_d, _i)					\
+	for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
+
 #define darray_init(_d)							\
 do {									\
 	(_d)->data = NULL;						\
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 899ff46de8e0..0771a6d880bf 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -13,6 +13,7 @@
 #include "keylist.h"
 #include "move.h"
 #include "nocow_locking.h"
+#include "rebalance.h"
 #include "subvolume.h"
 #include "trace.h"
 
@@ -161,11 +162,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
 			if (((1U << i) & m->data_opts.rewrite_ptrs) &&
 			    (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
 			    !ptr->cached) {
-				bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
-				/*
-				 * See comment below:
 				bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
-				*/
 				rewrites_found |= 1U << i;
 			}
 			i++;
@@ -211,14 +208,8 @@ restart_drop_extra_replicas:
 			if (!p.ptr.cached &&
 			    durability - ptr_durability >= m->op.opts.data_replicas) {
 				durability -= ptr_durability;
-				bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr);
-				/*
-				 * Currently, we're dropping unneeded replicas
-				 * instead of marking them as cached, since
-				 * cached data in stripe buckets prevents them
-				 * from being reused:
+
 				bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
-				*/
 				goto restart_drop_extra_replicas;
 			}
 		}
@@ -251,11 +242,11 @@ restart_drop_extra_replicas:
 		ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
 						k.k->p, bkey_start_pos(&insert->k)) ?:
 			bch2_insert_snapshot_whiteouts(trans, m->btree_id,
-						k.k->p, insert->k.p);
-		if (ret)
-			goto err;
-
-		ret   = bch2_trans_update(trans, &iter, insert,
+						k.k->p, insert->k.p) ?:
+			bch2_bkey_set_needs_rebalance(c, insert,
+						      op->opts.background_target,
+						      op->opts.background_compression) ?:
+			bch2_trans_update(trans, &iter, insert,
 				BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
 			bch2_trans_commit(trans, &op->res,
 				NULL,
@@ -281,11 +272,11 @@ next:
 		}
 		continue;
 nowork:
-		if (m->ctxt && m->ctxt->stats) {
+		if (m->stats && m->stats) {
 			BUG_ON(k.k->p.offset <= iter.pos.offset);
-			atomic64_inc(&m->ctxt->stats->keys_raced);
+			atomic64_inc(&m->stats->keys_raced);
 			atomic64_add(k.k->p.offset - iter.pos.offset,
-				     &m->ctxt->stats->sectors_raced);
+				     &m->stats->sectors_raced);
 		}
 
 		this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]);
@@ -439,6 +430,8 @@ int bch2_data_update_init(struct btree_trans *trans,
 	bch2_bkey_buf_reassemble(&m->k, c, k);
 	m->btree_id	= btree_id;
 	m->data_opts	= data_opts;
+	m->ctxt		= ctxt;
+	m->stats	= ctxt ? ctxt->stats : NULL;
 
 	bch2_write_op_init(&m->op, c, io_opts);
 	m->op.pos	= bkey_start_pos(k.k);
@@ -487,7 +480,7 @@ int bch2_data_update_init(struct btree_trans *trans,
 
 		if (c->opts.nocow_enabled) {
 			if (ctxt) {
-				move_ctxt_wait_event(ctxt, trans,
+				move_ctxt_wait_event(ctxt,
 						(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
 									  PTR_BUCKET_POS(c, &p.ptr), 0)) ||
 						!atomic_read(&ctxt->read_sectors));
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 7ca1f98d7e94..9dc17b9d8379 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -23,6 +23,7 @@ struct data_update {
 	struct bkey_buf		k;
 	struct data_update_opts	data_opts;
 	struct moving_context	*ctxt;
+	struct bch_move_stats	*stats;
 	struct bch_write_op	op;
 };
 
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 75a3dc7cbd47..57c5128db173 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -517,7 +517,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
 
 	prt_printf(out, "%px btree=%s l=%u ",
 	       b,
-	       bch2_btree_ids[b->c.btree_id],
+	       bch2_btree_id_str(b->c.btree_id),
 	       b->c.level);
 	prt_newline(out);
 
@@ -919,18 +919,18 @@ void bch2_fs_debug_init(struct bch_fs *c)
 	     bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
 	     bd++) {
 		bd->id = bd - c->btree_debug;
-		debugfs_create_file(bch2_btree_ids[bd->id],
+		debugfs_create_file(bch2_btree_id_str(bd->id),
 				    0400, c->btree_debug_dir, bd,
 				    &btree_debug_ops);
 
 		snprintf(name, sizeof(name), "%s-formats",
-			 bch2_btree_ids[bd->id]);
+			 bch2_btree_id_str(bd->id));
 
 		debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
 				    &btree_format_debug_ops);
 
 		snprintf(name, sizeof(name), "%s-bfloat-failed",
-			 bch2_btree_ids[bd->id]);
+			 bch2_btree_id_str(bd->id));
 
 		debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
 				    &bfloat_failed_debug_ops);
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 6c6c8d57d72b..1a0f2d571569 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -97,61 +97,51 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
 	.is_visible	= dirent_is_visible,
 };
 
-int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k,
 			enum bkey_invalid_flags flags,
 			struct printbuf *err)
 {
 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
 	struct qstr d_name = bch2_dirent_get_name(d);
+	int ret = 0;
 
-	if (!d_name.len) {
-		prt_printf(err, "empty name");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(!d_name.len, c, err,
+			 dirent_empty_name,
+			 "empty name");
 
-	if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) {
-		prt_printf(err, "value too big (%zu > %u)",
-		       bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err,
+			 dirent_val_too_big,
+			 "value too big (%zu > %u)",
+			 bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
 
 	/*
 	 * Check new keys don't exceed the max length
 	 * (older keys may be larger.)
 	 */
-	if ((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX) {
-		prt_printf(err, "dirent name too big (%u > %u)",
-		       d_name.len, BCH_NAME_MAX);
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (d_name.len != strnlen(d_name.name, d_name.len)) {
-		prt_printf(err, "dirent has stray data after name's NUL");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (d_name.len == 1 && !memcmp(d_name.name, ".", 1)) {
-		prt_printf(err, "invalid name");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (d_name.len == 2 && !memcmp(d_name.name, "..", 2)) {
-		prt_printf(err, "invalid name");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (memchr(d_name.name, '/', d_name.len)) {
-		prt_printf(err, "invalid name");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (d.v->d_type != DT_SUBVOL &&
-	    le64_to_cpu(d.v->d_inum) == d.k->p.inode) {
-		prt_printf(err, "dirent points to own directory");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err,
+			 dirent_name_too_long,
+			 "dirent name too big (%u > %u)",
+			 d_name.len, BCH_NAME_MAX);
+
+	bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err,
+			 dirent_name_embedded_nul,
+			 "dirent has stray data after name's NUL");
+
+	bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
+			 (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err,
+			 dirent_name_dot_or_dotdot,
+			 "invalid name");
+
+	bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err,
+			 dirent_name_has_slash,
+			 "name with /");
+
+	bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
+			 le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err,
+			 dirent_to_itself,
+			 "dirent points to own directory");
+fsck_err:
+	return ret;
 }
 
 void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index e9fa1df38232..cd262bf4d9c5 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -7,7 +7,7 @@
 enum bkey_invalid_flags;
 extern const struct bch_hash_desc bch2_dirent_hash_desc;
 
-int bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c,
 			enum bkey_invalid_flags, struct printbuf *);
 void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
index e00133b6ea51..d613695abf9f 100644
--- a/fs/bcachefs/disk_groups.c
+++ b/fs/bcachefs/disk_groups.c
@@ -175,6 +175,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
 
 		dst->deleted	= BCH_GROUP_DELETED(src);
 		dst->parent	= BCH_GROUP_PARENT(src);
+		memcpy(dst->label, src->label, sizeof(dst->label));
 	}
 
 	for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
@@ -382,7 +383,57 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
 	return v;
 }
 
-void bch2_disk_path_to_text(struct printbuf *out, struct bch_sb *sb, unsigned v)
+void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
+{
+	struct bch_disk_groups_cpu *groups;
+	struct bch_disk_group_cpu *g;
+	unsigned nr = 0;
+	u16 path[32];
+
+	out->atomic++;
+	rcu_read_lock();
+	groups = rcu_dereference(c->disk_groups);
+	if (!groups)
+		goto invalid;
+
+	while (1) {
+		if (nr == ARRAY_SIZE(path))
+			goto invalid;
+
+		if (v >= groups->nr)
+			goto invalid;
+
+		g = groups->entries + v;
+
+		if (g->deleted)
+			goto invalid;
+
+		path[nr++] = v;
+
+		if (!g->parent)
+			break;
+
+		v = g->parent - 1;
+	}
+
+	while (nr) {
+		v = path[--nr];
+		g = groups->entries + v;
+
+		prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
+		if (nr)
+			prt_printf(out, ".");
+	}
+out:
+	rcu_read_unlock();
+	out->atomic--;
+	return;
+invalid:
+	prt_printf(out, "invalid label %u", v);
+	goto out;
+}
+
+void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
 {
 	struct bch_sb_field_disk_groups *groups =
 		bch2_sb_field_get(sb, disk_groups);
@@ -493,10 +544,7 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
 	return -EINVAL;
 }
 
-void bch2_opt_target_to_text(struct printbuf *out,
-			     struct bch_fs *c,
-			     struct bch_sb *sb,
-			     u64 v)
+void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
 {
 	struct target t = target_decode(v);
 
@@ -504,47 +552,69 @@ void bch2_opt_target_to_text(struct printbuf *out,
 	case TARGET_NULL:
 		prt_printf(out, "none");
 		break;
-	case TARGET_DEV:
-		if (c) {
-			struct bch_dev *ca;
-
-			rcu_read_lock();
-			ca = t.dev < c->sb.nr_devices
-				? rcu_dereference(c->devs[t.dev])
-				: NULL;
-
-			if (ca && percpu_ref_tryget(&ca->io_ref)) {
-				prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
-				percpu_ref_put(&ca->io_ref);
-			} else if (ca) {
-				prt_printf(out, "offline device %u", t.dev);
-			} else {
-				prt_printf(out, "invalid device %u", t.dev);
-			}
-
-			rcu_read_unlock();
+	case TARGET_DEV: {
+		struct bch_dev *ca;
+
+		rcu_read_lock();
+		ca = t.dev < c->sb.nr_devices
+			? rcu_dereference(c->devs[t.dev])
+			: NULL;
+
+		if (ca && percpu_ref_tryget(&ca->io_ref)) {
+			prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
+			percpu_ref_put(&ca->io_ref);
+		} else if (ca) {
+			prt_printf(out, "offline device %u", t.dev);
 		} else {
-			struct bch_member m = bch2_sb_member_get(sb, t.dev);
-
-			if (bch2_dev_exists(sb, t.dev)) {
-				prt_printf(out, "Device ");
-				pr_uuid(out, m.uuid.b);
-				prt_printf(out, " (%u)", t.dev);
-			} else {
-				prt_printf(out, "Bad device %u", t.dev);
-			}
+			prt_printf(out, "invalid device %u", t.dev);
 		}
+
+		rcu_read_unlock();
 		break;
+	}
 	case TARGET_GROUP:
-		if (c) {
-			mutex_lock(&c->sb_lock);
-			bch2_disk_path_to_text(out, c->disk_sb.sb, t.group);
-			mutex_unlock(&c->sb_lock);
+		bch2_disk_path_to_text(out, c, t.group);
+		break;
+	default:
+		BUG();
+	}
+}
+
+void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+{
+	struct target t = target_decode(v);
+
+	switch (t.type) {
+	case TARGET_NULL:
+		prt_printf(out, "none");
+		break;
+	case TARGET_DEV: {
+		struct bch_member m = bch2_sb_member_get(sb, t.dev);
+
+		if (bch2_dev_exists(sb, t.dev)) {
+			prt_printf(out, "Device ");
+			pr_uuid(out, m.uuid.b);
+			prt_printf(out, " (%u)", t.dev);
 		} else {
-			bch2_disk_path_to_text(out, sb, t.group);
+			prt_printf(out, "Bad device %u", t.dev);
 		}
 		break;
+	}
+	case TARGET_GROUP:
+		bch2_disk_path_to_text_sb(out, sb, t.group);
+		break;
 	default:
 		BUG();
 	}
 }
+
+void bch2_opt_target_to_text(struct printbuf *out,
+			     struct bch_fs *c,
+			     struct bch_sb *sb,
+			     u64 v)
+{
+	if (c)
+		bch2_target_to_text(out, c, v);
+	else
+		bch2_target_to_text_sb(out, sb, v);
+}
diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h
index bd7711767fd4..441826fff224 100644
--- a/fs/bcachefs/disk_groups.h
+++ b/fs/bcachefs/disk_groups.h
@@ -2,6 +2,8 @@
 #ifndef _BCACHEFS_DISK_GROUPS_H
 #define _BCACHEFS_DISK_GROUPS_H
 
+#include "disk_groups_types.h"
+
 extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
 
 static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
@@ -83,7 +85,10 @@ int bch2_disk_path_find(struct bch_sb_handle *, const char *);
 /* Exported for userspace bcachefs-tools: */
 int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
 
-void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned);
+void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned);
+void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned);
+
+void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned);
 
 int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
 void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
diff --git a/fs/bcachefs/disk_groups_types.h b/fs/bcachefs/disk_groups_types.h
new file mode 100644
index 000000000000..a54ef085b13d
--- /dev/null
+++ b/fs/bcachefs/disk_groups_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H
+#define _BCACHEFS_DISK_GROUPS_TYPES_H
+
+struct bch_disk_group_cpu {
+	bool				deleted;
+	u16				parent;
+	u8				label[BCH_SB_LABEL_SIZE];
+	struct bch_devs_mask		devs;
+};
+
+struct bch_disk_groups_cpu {
+	struct rcu_head			rcu;
+	unsigned			nr;
+	struct bch_disk_group_cpu	entries[] __counted_by(nr);
+};
+
+#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 8646856e4539..875f7c5a6fca 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -105,29 +105,26 @@ struct ec_bio {
 
 /* Stripes btree keys: */
 
-int bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k,
 			enum bkey_invalid_flags flags,
 			struct printbuf *err)
 {
 	const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+	int ret = 0;
 
-	if (bkey_eq(k.k->p, POS_MIN)) {
-		prt_printf(err, "stripe at POS_MIN");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (k.k->p.inode) {
-		prt_printf(err, "nonzero inode field");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) ||
+			 bpos_gt(k.k->p, POS(0, U32_MAX)), c, err,
+			 stripe_pos_bad,
+			 "stripe at bad pos");
 
-	if (bkey_val_u64s(k.k) < stripe_val_u64s(s)) {
-		prt_printf(err, "incorrect value size (%zu < %u)",
-		       bkey_val_u64s(k.k), stripe_val_u64s(s));
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err,
+			 stripe_val_size_bad,
+			 "incorrect value size (%zu < %u)",
+			 bkey_val_u64s(k.k), stripe_val_u64s(s));
 
-	return bch2_bkey_ptrs_invalid(c, k, flags, err);
+	ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+	return ret;
 }
 
 void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
@@ -153,6 +150,7 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
 		prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
 		if (i < nr_data)
 			prt_printf(out, "#%u", stripe_blockcount_get(s, i));
+		prt_printf(out, " gen %u", ptr->gen);
 		if (ptr_stale(ca, ptr))
 			prt_printf(out, " stale");
 	}
@@ -306,16 +304,21 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
 			struct bch_csum got = ec_block_checksum(buf, i, offset);
 
 			if (bch2_crc_cmp(want, got)) {
-				struct printbuf buf2 = PRINTBUF;
+				struct printbuf err = PRINTBUF;
+				struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
+
+				prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
+					   want.hi, want.lo,
+					   got.hi, got.lo,
+					   bch2_csum_types[v->csum_type]);
+				prt_printf(&err, "  for %ps at %u of\n  ", (void *) _RET_IP_, i);
+				bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
+				bch_err_ratelimited(ca, "%s", err.buf);
+				printbuf_exit(&err);
 
-				bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&buf->key));
-
-				bch_err_ratelimited(c,
-					"stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s",
-					(void *) _RET_IP_, i, j, v->csum_type,
-					want.lo, got.lo, buf2.buf);
-				printbuf_exit(&buf2);
 				clear_bit(i, buf->valid);
+
+				bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
 				break;
 			}
 
@@ -373,7 +376,11 @@ static void ec_block_endio(struct bio *bio)
 	struct bch_dev *ca = ec_bio->ca;
 	struct closure *cl = bio->bi_private;
 
-	if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s",
+	if (bch2_dev_io_err_on(bio->bi_status, ca,
+			       bio_data_dir(bio)
+			       ? BCH_MEMBER_ERROR_write
+			       : BCH_MEMBER_ERROR_read,
+			       "erasure coding %s error: %s",
 			       bio_data_dir(bio) ? "write" : "read",
 			       bch2_blk_status_to_str(bio->bi_status)))
 		clear_bit(ec_bio->idx, ec_bio->buf->valid);
@@ -474,14 +481,10 @@ err:
 	return ret;
 }
 
-static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
-{
-	return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe));
-}
-
 /* recovery read path: */
-int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
+int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
 {
+	struct bch_fs *c = trans->c;
 	struct ec_stripe_buf *buf;
 	struct closure cl;
 	struct bch_stripe *v;
@@ -496,7 +499,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
 	if (!buf)
 		return -BCH_ERR_ENOMEM_ec_read_extent;
 
-	ret = get_stripe_key(c, rbio->pick.ec.idx, buf);
+	ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
 	if (ret) {
 		bch_err_ratelimited(c,
 			"error doing reconstruct read: error %i looking up stripe", ret);
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 966d165a3b66..7d0237c9819f 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -8,7 +8,7 @@
 
 enum bkey_invalid_flags;
 
-int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c,
 			enum bkey_invalid_flags, struct printbuf *);
 void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
 			 struct bkey_s_c);
@@ -199,7 +199,7 @@ struct ec_stripe_head {
 	struct ec_stripe_new	*s;
 };
 
-int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
+int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *);
 
 void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
 
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 7cc083776a2e..68a1a96bb7ca 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -3,6 +3,8 @@
 #define _BCACHEFS_ERRCODE_H
 
 #define BCH_ERRCODES()								\
+	x(ERANGE,			ERANGE_option_too_small)		\
+	x(ERANGE,			ERANGE_option_too_big)			\
 	x(ENOMEM,			ENOMEM_stripe_buf)			\
 	x(ENOMEM,			ENOMEM_replicas_table)			\
 	x(ENOMEM,			ENOMEM_cpu_replicas)			\
@@ -213,6 +215,8 @@
 	x(BCH_ERR_invalid_sb,		invalid_sb_crypt)			\
 	x(BCH_ERR_invalid_sb,		invalid_sb_clean)			\
 	x(BCH_ERR_invalid_sb,		invalid_sb_quota)			\
+	x(BCH_ERR_invalid_sb,		invalid_sb_errors)			\
+	x(BCH_ERR_invalid_sb,		invalid_sb_opt_compression)		\
 	x(BCH_ERR_invalid,		invalid_bkey)				\
 	x(BCH_ERR_operation_blocked,    nocow_lock_blocked)			\
 	x(EIO,				btree_node_read_err)			\
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 2a5af8872613..7b28d37922fd 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -56,8 +56,9 @@ void bch2_io_error_work(struct work_struct *work)
 	up_write(&c->state_lock);
 }
 
-void bch2_io_error(struct bch_dev *ca)
+void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
 {
+	atomic64_inc(&ca->errors[type]);
 	//queue_work(system_long_wq, &ca->io_error_work);
 }
 
@@ -116,31 +117,34 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
 	if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
 		return NULL;
 
-	list_for_each_entry(s, &c->fsck_errors, list)
+	list_for_each_entry(s, &c->fsck_error_msgs, list)
 		if (s->fmt == fmt) {
 			/*
 			 * move it to the head of the list: repeated fsck errors
 			 * are common
 			 */
-			list_move(&s->list, &c->fsck_errors);
+			list_move(&s->list, &c->fsck_error_msgs);
 			return s;
 		}
 
 	s = kzalloc(sizeof(*s), GFP_NOFS);
 	if (!s) {
-		if (!c->fsck_alloc_err)
+		if (!c->fsck_alloc_msgs_err)
 			bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
-		c->fsck_alloc_err = true;
+		c->fsck_alloc_msgs_err = true;
 		return NULL;
 	}
 
 	INIT_LIST_HEAD(&s->list);
 	s->fmt = fmt;
-	list_add(&s->list, &c->fsck_errors);
+	list_add(&s->list, &c->fsck_error_msgs);
 	return s;
 }
 
-int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
+int bch2_fsck_err(struct bch_fs *c,
+		  enum bch_fsck_flags flags,
+		  enum bch_sb_error_id err,
+		  const char *fmt, ...)
 {
 	struct fsck_err_state *s = NULL;
 	va_list args;
@@ -148,11 +152,13 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
 	struct printbuf buf = PRINTBUF, *out = &buf;
 	int ret = -BCH_ERR_fsck_ignore;
 
+	bch2_sb_error_count(c, err);
+
 	va_start(args, fmt);
 	prt_vprintf(out, fmt, args);
 	va_end(args);
 
-	mutex_lock(&c->fsck_error_lock);
+	mutex_lock(&c->fsck_error_msgs_lock);
 	s = fsck_err_get(c, fmt);
 	if (s) {
 		/*
@@ -162,7 +168,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
 		 */
 		if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
 			ret = s->ret;
-			mutex_unlock(&c->fsck_error_lock);
+			mutex_unlock(&c->fsck_error_msgs_lock);
 			printbuf_exit(&buf);
 			return ret;
 		}
@@ -257,7 +263,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
 	if (s)
 		s->ret = ret;
 
-	mutex_unlock(&c->fsck_error_lock);
+	mutex_unlock(&c->fsck_error_msgs_lock);
 
 	printbuf_exit(&buf);
 
@@ -278,9 +284,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
 {
 	struct fsck_err_state *s, *n;
 
-	mutex_lock(&c->fsck_error_lock);
+	mutex_lock(&c->fsck_error_msgs_lock);
 
-	list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
+	list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
 		if (s->ratelimited && s->last_msg)
 			bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->last_msg);
 
@@ -289,5 +295,5 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
 		kfree(s);
 	}
 
-	mutex_unlock(&c->fsck_error_lock);
+	mutex_unlock(&c->fsck_error_msgs_lock);
 }
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 7ce9540052e5..d167d65986e0 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -4,6 +4,7 @@
 
 #include <linux/list.h>
 #include <linux/printk.h>
+#include "sb-errors.h"
 
 struct bch_dev;
 struct bch_fs;
@@ -101,18 +102,26 @@ struct fsck_err_state {
 	char			*last_msg;
 };
 
-#define FSCK_CAN_FIX		(1 << 0)
-#define FSCK_CAN_IGNORE		(1 << 1)
-#define FSCK_NEED_FSCK		(1 << 2)
-#define FSCK_NO_RATELIMIT	(1 << 3)
+enum bch_fsck_flags {
+	FSCK_CAN_FIX		= 1 << 0,
+	FSCK_CAN_IGNORE		= 1 << 1,
+	FSCK_NEED_FSCK		= 1 << 2,
+	FSCK_NO_RATELIMIT	= 1 << 3,
+};
+
+#define fsck_err_count(_c, _err)	bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
 
-__printf(3, 4) __cold
-int bch2_fsck_err(struct bch_fs *, unsigned, const char *, ...);
+__printf(4, 5) __cold
+int bch2_fsck_err(struct bch_fs *,
+		  enum bch_fsck_flags,
+		  enum bch_sb_error_id,
+		  const char *, ...);
 void bch2_flush_fsck_errs(struct bch_fs *);
 
-#define __fsck_err(c, _flags, msg, ...)					\
+#define __fsck_err(c, _flags, _err_type, ...)				\
 ({									\
-	int _ret = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);	\
+	int _ret = bch2_fsck_err(c, _flags, BCH_FSCK_ERR_##_err_type,	\
+				 __VA_ARGS__);				\
 									\
 	if (_ret != -BCH_ERR_fsck_fix &&				\
 	    _ret != -BCH_ERR_fsck_ignore) {				\
@@ -127,26 +136,53 @@ void bch2_flush_fsck_errs(struct bch_fs *);
 
 /* XXX: mark in superblock that filesystem contains errors, if we ignore: */
 
-#define __fsck_err_on(cond, c, _flags, ...)				\
-	(unlikely(cond) ? __fsck_err(c, _flags,	##__VA_ARGS__) : false)
+#define __fsck_err_on(cond, c, _flags, _err_type, ...)			\
+	(unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false)
+
+#define need_fsck_err_on(cond, c, _err_type, ...)				\
+	__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
+
+#define need_fsck_err(c, _err_type, ...)				\
+	__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
+
+#define mustfix_fsck_err(c, _err_type, ...)				\
+	__fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
+
+#define mustfix_fsck_err_on(cond, c, _err_type, ...)			\
+	__fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
 
-#define need_fsck_err_on(cond, c, ...)					\
-	__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
+#define fsck_err(c, _err_type, ...)					\
+	__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
 
-#define need_fsck_err(c, ...)						\
-	__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
+#define fsck_err_on(cond, c, _err_type, ...)				\
+	__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
 
-#define mustfix_fsck_err(c, ...)					\
-	__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
+static inline void bch2_bkey_fsck_err(struct bch_fs *c,
+				     struct printbuf *err_msg,
+				     enum bch_sb_error_id err_type,
+				     const char *fmt, ...)
+{
+	va_list args;
 
-#define mustfix_fsck_err_on(cond, c, ...)				\
-	__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
+	va_start(args, fmt);
+	prt_vprintf(err_msg, fmt, args);
+	va_end(args);
 
-#define fsck_err(c, ...)						\
-	__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
+}
 
-#define fsck_err_on(cond, c, ...)					\
-	__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
+#define bkey_fsck_err(c, _err_msg, _err_type, ...)			\
+do {									\
+	prt_printf(_err_msg, __VA_ARGS__);				\
+	bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type);		\
+	ret = -BCH_ERR_invalid_bkey;					\
+	goto fsck_err;							\
+} while (0)
+
+#define bkey_fsck_err_on(cond, ...)					\
+do {									\
+	if (unlikely(cond))						\
+		bkey_fsck_err(__VA_ARGS__);				\
+} while (0)
 
 /*
  * Fatal errors: these don't indicate a bug, but we can't continue running in RW
@@ -179,26 +215,26 @@ do {									\
 void bch2_io_error_work(struct work_struct *);
 
 /* Does the error handling without logging a message */
-void bch2_io_error(struct bch_dev *);
+void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
 
-#define bch2_dev_io_err_on(cond, ca, ...)				\
+#define bch2_dev_io_err_on(cond, ca, _type, ...)			\
 ({									\
 	bool _ret = (cond);						\
 									\
 	if (_ret) {							\
 		bch_err_dev_ratelimited(ca, __VA_ARGS__);		\
-		bch2_io_error(ca);					\
+		bch2_io_error(ca, _type);				\
 	}								\
 	_ret;								\
 })
 
-#define bch2_dev_inum_io_err_on(cond, ca, ...)				\
+#define bch2_dev_inum_io_err_on(cond, ca, _type, ...)			\
 ({									\
 	bool _ret = (cond);						\
 									\
 	if (_ret) {							\
 		bch_err_inum_offset_ratelimited(ca, __VA_ARGS__);	\
-		bch2_io_error(ca);					\
+		bch2_io_error(ca, _type);				\
 	}								\
 	_ret;								\
 })
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 1b25f84e4b9c..a864de231b69 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -13,6 +13,7 @@
 #include "btree_iter.h"
 #include "buckets.h"
 #include "checksum.h"
+#include "compress.h"
 #include "debug.h"
 #include "disk_groups.h"
 #include "error.h"
@@ -162,17 +163,19 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
 
 /* KEY_TYPE_btree_ptr: */
 
-int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k,
 			   enum bkey_invalid_flags flags,
 			   struct printbuf *err)
 {
-	if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) {
-		prt_printf(err, "value too big (%zu > %u)",
-		       bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
+
+	bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err,
+			 btree_ptr_val_too_big,
+			 "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
 
-	return bch2_bkey_ptrs_invalid(c, k, flags, err);
+	ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+	return ret;
 }
 
 void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
@@ -181,17 +184,20 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
 	bch2_bkey_ptrs_to_text(out, c, k);
 }
 
-int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
 			      enum bkey_invalid_flags flags,
 			      struct printbuf *err)
 {
-	if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) {
-		prt_printf(err, "value too big (%zu > %zu)",
-		       bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
+
+	bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
+			 btree_ptr_v2_val_too_big,
+			 "value too big (%zu > %zu)",
+			 bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
 
-	return bch2_bkey_ptrs_invalid(c, k, flags, err);
+	ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+	return ret;
 }
 
 void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
@@ -372,19 +378,18 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
 
 /* KEY_TYPE_reservation: */
 
-int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k,
 			     enum bkey_invalid_flags flags,
 			     struct printbuf *err)
 {
 	struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+	int ret = 0;
 
-	if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) {
-		prt_printf(err, "invalid nr_replicas (%u)",
-		       r.v->nr_replicas);
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err,
+			 reservation_key_nr_replicas_invalid,
+			 "invalid nr_replicas (%u)", r.v->nr_replicas);
+fsck_err:
+	return ret;
 }
 
 void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
@@ -757,18 +762,6 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
 	return i;
 }
 
-static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
-{
-	union bch_extent_entry *next = extent_entry_next(entry);
-
-	/* stripes have ptrs, but their layout doesn't work with this code */
-	BUG_ON(k.k->type == KEY_TYPE_stripe);
-
-	memmove_u64s_down(entry, next,
-			  (u64 *) bkey_val_end(k) - (u64 *) next);
-	k.k->u64s -= (u64 *) next - (u64 *) entry;
-}
-
 /*
  * Returns pointer to the next entry after the one being dropped:
  */
@@ -992,10 +985,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
 {
 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
 	const union bch_extent_entry *entry;
-	struct bch_extent_crc_unpacked crc;
-	const struct bch_extent_ptr *ptr;
-	const struct bch_extent_stripe_ptr *ec;
-	struct bch_dev *ca;
 	bool first = true;
 
 	if (c)
@@ -1006,9 +995,9 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
 			prt_printf(out, " ");
 
 		switch (__extent_entry_type(entry)) {
-		case BCH_EXTENT_ENTRY_ptr:
-			ptr = entry_to_ptr(entry);
-			ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+		case BCH_EXTENT_ENTRY_ptr: {
+			const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
+			struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
 				? bch_dev_bkey_exists(c, ptr->dev)
 				: NULL;
 
@@ -1030,10 +1019,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
 					prt_printf(out, " stale");
 			}
 			break;
+		}
 		case BCH_EXTENT_ENTRY_crc32:
 		case BCH_EXTENT_ENTRY_crc64:
-		case BCH_EXTENT_ENTRY_crc128:
-			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
+		case BCH_EXTENT_ENTRY_crc128: {
+			struct bch_extent_crc_unpacked crc =
+				bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
 			prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
 			       crc.compressed_size,
@@ -1042,12 +1033,26 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
 			       bch2_csum_types[crc.csum_type],
 			       bch2_compression_types[crc.compression_type]);
 			break;
-		case BCH_EXTENT_ENTRY_stripe_ptr:
-			ec = &entry->stripe_ptr;
+		}
+		case BCH_EXTENT_ENTRY_stripe_ptr: {
+			const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr;
 
 			prt_printf(out, "ec: idx %llu block %u",
 			       (u64) ec->idx, ec->block);
 			break;
+		}
+		case BCH_EXTENT_ENTRY_rebalance: {
+			const struct bch_extent_rebalance *r = &entry->rebalance;
+
+			prt_str(out, "rebalance: target ");
+			if (c)
+				bch2_target_to_text(out, c, r->target);
+			else
+				prt_printf(out, "%u", r->target);
+			prt_str(out, " compression ");
+			bch2_compression_opt_to_text(out, r->compression);
+			break;
+		}
 		default:
 			prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
 			return;
@@ -1057,7 +1062,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
 	}
 }
 
-static int extent_ptr_invalid(const struct bch_fs *c,
+static int extent_ptr_invalid(struct bch_fs *c,
 			      struct bkey_s_c k,
 			      enum bkey_invalid_flags flags,
 			      const struct bch_extent_ptr *ptr,
@@ -1070,6 +1075,7 @@ static int extent_ptr_invalid(const struct bch_fs *c,
 	u64 bucket;
 	u32 bucket_offset;
 	struct bch_dev *ca;
+	int ret = 0;
 
 	if (!bch2_dev_exists2(c, ptr->dev)) {
 		/*
@@ -1080,41 +1086,33 @@ static int extent_ptr_invalid(const struct bch_fs *c,
 		if (flags & BKEY_INVALID_WRITE)
 			return 0;
 
-		prt_printf(err, "pointer to invalid device (%u)", ptr->dev);
-		return -BCH_ERR_invalid_bkey;
+		bkey_fsck_err(c, err, ptr_to_invalid_device,
+			   "pointer to invalid device (%u)", ptr->dev);
 	}
 
 	ca = bch_dev_bkey_exists(c, ptr->dev);
 	bkey_for_each_ptr(ptrs, ptr2)
-		if (ptr != ptr2 && ptr->dev == ptr2->dev) {
-			prt_printf(err, "multiple pointers to same device (%u)", ptr->dev);
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err,
+				 ptr_to_duplicate_device,
+				 "multiple pointers to same device (%u)", ptr->dev);
 
 	bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset);
 
-	if (bucket >= ca->mi.nbuckets) {
-		prt_printf(err, "pointer past last bucket (%llu > %llu)",
-		       bucket, ca->mi.nbuckets);
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) {
-		prt_printf(err, "pointer before first bucket (%llu < %u)",
-		       bucket, ca->mi.first_bucket);
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (bucket_offset + size_ondisk > ca->mi.bucket_size) {
-		prt_printf(err, "pointer spans multiple buckets (%u + %u > %u)",
+	bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err,
+			 ptr_after_last_bucket,
+			 "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets);
+	bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err,
+			 ptr_before_first_bucket,
+			 "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket);
+	bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err,
+			 ptr_spans_multiple_buckets,
+			 "pointer spans multiple buckets (%u + %u > %u)",
 		       bucket_offset, size_ondisk, ca->mi.bucket_size);
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+fsck_err:
+	return ret;
 }
 
-int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k,
 			   enum bkey_invalid_flags flags,
 			   struct printbuf *err)
 {
@@ -1124,24 +1122,22 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
 	unsigned size_ondisk = k.k->size;
 	unsigned nonce = UINT_MAX;
 	unsigned nr_ptrs = 0;
-	bool unwritten = false, have_ec = false, crc_since_last_ptr = false;
-	int ret;
+	bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false;
+	int ret = 0;
 
 	if (bkey_is_btree_ptr(k.k))
 		size_ondisk = btree_sectors(c);
 
 	bkey_extent_entry_for_each(ptrs, entry) {
-		if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) {
-			prt_printf(err, "invalid extent entry type (got %u, max %u)",
-			       __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err,
+			extent_ptrs_invalid_entry,
+			"invalid extent entry type (got %u, max %u)",
+			__extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
 
-		if (bkey_is_btree_ptr(k.k) &&
-		    !extent_entry_is_ptr(entry)) {
-			prt_printf(err, "has non ptr field");
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(bkey_is_btree_ptr(k.k) &&
+				 !extent_entry_is_ptr(entry), c, err,
+				 btree_ptr_has_non_ptr,
+				 "has non ptr field");
 
 		switch (extent_entry_type(entry)) {
 		case BCH_EXTENT_ENTRY_ptr:
@@ -1150,22 +1146,15 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
 			if (ret)
 				return ret;
 
-			if (nr_ptrs && unwritten != entry->ptr.unwritten) {
-				prt_printf(err, "extent with unwritten and written ptrs");
-				return -BCH_ERR_invalid_bkey;
-			}
-
-			if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
-				prt_printf(err, "has unwritten ptrs");
-				return -BCH_ERR_invalid_bkey;
-			}
+			bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err,
+					 ptr_cached_and_erasure_coded,
+					 "cached, erasure coded ptr");
 
-			if (entry->ptr.cached && have_ec) {
-				prt_printf(err, "cached, erasure coded ptr");
-				return -BCH_ERR_invalid_bkey;
-			}
+			if (!entry->ptr.unwritten)
+				have_written = true;
+			else
+				have_unwritten = true;
 
-			unwritten = entry->ptr.unwritten;
 			have_ec = false;
 			crc_since_last_ptr = false;
 			nr_ptrs++;
@@ -1175,72 +1164,77 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
 		case BCH_EXTENT_ENTRY_crc128:
 			crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
-			if (crc.offset + crc.live_size >
-			    crc.uncompressed_size) {
-				prt_printf(err, "checksum offset + key size > uncompressed size");
-				return -BCH_ERR_invalid_bkey;
-			}
-
-			size_ondisk = crc.compressed_size;
-
-			if (!bch2_checksum_type_valid(c, crc.csum_type)) {
-				prt_printf(err, "invalid checksum type");
-				return -BCH_ERR_invalid_bkey;
-			}
-
-			if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) {
-				prt_printf(err, "invalid compression type");
-				return -BCH_ERR_invalid_bkey;
-			}
+			bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err,
+					 ptr_crc_uncompressed_size_too_small,
+					 "checksum offset + key size > uncompressed size");
+			bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err,
+					 ptr_crc_csum_type_unknown,
+					 "invalid checksum type");
+			bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err,
+					 ptr_crc_compression_type_unknown,
+					 "invalid compression type");
 
 			if (bch2_csum_type_is_encryption(crc.csum_type)) {
 				if (nonce == UINT_MAX)
 					nonce = crc.offset + crc.nonce;
-				else if (nonce != crc.offset + crc.nonce) {
-					prt_printf(err, "incorrect nonce");
-					return -BCH_ERR_invalid_bkey;
-				}
+				else if (nonce != crc.offset + crc.nonce)
+					bkey_fsck_err(c, err, ptr_crc_nonce_mismatch,
+						      "incorrect nonce");
 			}
 
-			if (crc_since_last_ptr) {
-				prt_printf(err, "redundant crc entry");
-				return -BCH_ERR_invalid_bkey;
-			}
+			bkey_fsck_err_on(crc_since_last_ptr, c, err,
+					 ptr_crc_redundant,
+					 "redundant crc entry");
 			crc_since_last_ptr = true;
+
+			bkey_fsck_err_on(crc_is_encoded(crc) &&
+					 (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
+					 (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err,
+					 ptr_crc_uncompressed_size_too_big,
+					 "too large encoded extent");
+
+			size_ondisk = crc.compressed_size;
 			break;
 		case BCH_EXTENT_ENTRY_stripe_ptr:
-			if (have_ec) {
-				prt_printf(err, "redundant stripe entry");
-				return -BCH_ERR_invalid_bkey;
-			}
+			bkey_fsck_err_on(have_ec, c, err,
+					 ptr_stripe_redundant,
+					 "redundant stripe entry");
 			have_ec = true;
 			break;
-		case BCH_EXTENT_ENTRY_rebalance:
+		case BCH_EXTENT_ENTRY_rebalance: {
+			const struct bch_extent_rebalance *r = &entry->rebalance;
+
+			if (!bch2_compression_opt_valid(r->compression)) {
+				struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
+				prt_printf(err, "invalid compression opt %u:%u",
+					   opt.type, opt.level);
+				return -BCH_ERR_invalid_bkey;
+			}
 			break;
 		}
+		}
 	}
 
-	if (!nr_ptrs) {
-		prt_str(err, "no ptrs");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (nr_ptrs >= BCH_BKEY_PTRS_MAX) {
-		prt_str(err, "too many ptrs");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (crc_since_last_ptr) {
-		prt_printf(err, "redundant crc entry");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (have_ec) {
-		prt_printf(err, "redundant stripe entry");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	bkey_fsck_err_on(!nr_ptrs, c, err,
+			 extent_ptrs_no_ptrs,
+			 "no ptrs");
+	bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err,
+			 extent_ptrs_too_many_ptrs,
+			 "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX);
+	bkey_fsck_err_on(have_written && have_unwritten, c, err,
+			 extent_ptrs_written_and_unwritten,
+			 "extent with unwritten and written ptrs");
+	bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err,
+			 extent_ptrs_unwritten,
+			 "has unwritten ptrs");
+	bkey_fsck_err_on(crc_since_last_ptr, c, err,
+			 extent_ptrs_redundant_crc,
+			 "redundant crc entry");
+	bkey_fsck_err_on(have_ec, c, err,
+			 extent_ptrs_redundant_stripe,
+			 "redundant stripe entry");
+fsck_err:
+	return ret;
 }
 
 void bch2_ptr_swab(struct bkey_s k)
@@ -1281,6 +1275,125 @@ void bch2_ptr_swab(struct bkey_s k)
 	}
 }
 
+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k)
+{
+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+	const union bch_extent_entry *entry;
+
+	bkey_extent_entry_for_each(ptrs, entry)
+		if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance)
+			return &entry->rebalance;
+
+	return NULL;
+}
+
+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
+				       unsigned target, unsigned compression)
+{
+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+	unsigned rewrite_ptrs = 0;
+
+	if (compression) {
+		unsigned compression_type = bch2_compression_opt_to_type(compression);
+		const union bch_extent_entry *entry;
+		struct extent_ptr_decoded p;
+		unsigned i = 0;
+
+		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+			if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) {
+				rewrite_ptrs = 0;
+				goto incompressible;
+			}
+
+			if (!p.ptr.cached && p.crc.compression_type != compression_type)
+				rewrite_ptrs |= 1U << i;
+			i++;
+		}
+	}
+incompressible:
+	if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
+		const struct bch_extent_ptr *ptr;
+		unsigned i = 0;
+
+		bkey_for_each_ptr(ptrs, ptr) {
+			if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target))
+				rewrite_ptrs |= 1U << i;
+			i++;
+		}
+	}
+
+	return rewrite_ptrs;
+}
+
+bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
+{
+	const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
+
+	/*
+	 * If it's an indirect extent, we don't delete the rebalance entry when
+	 * done so that we know what options were applied - check if it still
+	 * needs work done:
+	 */
+	if (r &&
+	    k.k->type == KEY_TYPE_reflink_v &&
+	    !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression))
+		r = NULL;
+
+	return r != NULL;
+}
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
+				  unsigned target, unsigned compression)
+{
+	struct bkey_s k = bkey_i_to_s(_k);
+	struct bch_extent_rebalance *r;
+	bool needs_rebalance;
+
+	if (!bkey_extent_is_direct_data(k.k))
+		return 0;
+
+	/* get existing rebalance entry: */
+	r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
+	if (r) {
+		if (k.k->type == KEY_TYPE_reflink_v) {
+			/*
+			 * indirect extents: existing options take precedence,
+			 * so that we don't move extents back and forth if
+			 * they're referenced by different inodes with different
+			 * options:
+			 */
+			if (r->target)
+				target = r->target;
+			if (r->compression)
+				compression = r->compression;
+		}
+
+		r->target	= target;
+		r->compression	= compression;
+	}
+
+	needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression);
+
+	if (needs_rebalance && !r) {
+		union bch_extent_entry *new = bkey_val_end(k);
+
+		new->rebalance.type		= 1U << BCH_EXTENT_ENTRY_rebalance;
+		new->rebalance.compression	= compression;
+		new->rebalance.target		= target;
+		new->rebalance.unused		= 0;
+		k.k->u64s += extent_entry_u64s(new);
+	} else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) {
+		/*
+		 * For indirect extents, don't delete the rebalance entry when
+		 * we're finished so that we know we specifically moved it or
+		 * compressed it to its current location/compression type
+		 */
+		extent_entry_drop(k, (union bch_extent_entry *) r);
+	}
+
+	return 0;
+}
+
 /* Generic extent code: */
 
 int bch2_cut_front_s(struct bpos where, struct bkey_s k)
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 879e7d218b6a..a2ce8a3be13c 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -89,6 +89,18 @@ static inline void __extent_entry_insert(struct bkey_i *k,
 	memcpy_u64s_small(dst, new, extent_entry_u64s(new));
 }
 
+static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
+{
+	union bch_extent_entry *next = extent_entry_next(entry);
+
+	/* stripes have ptrs, but their layout doesn't work with this code */
+	BUG_ON(k.k->type == KEY_TYPE_stripe);
+
+	memmove_u64s_down(entry, next,
+			  (u64 *) bkey_val_end(k) - (u64 *) next);
+	k.k->u64s -= (u64 *) next - (u64 *) entry;
+}
+
 static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
 {
 	return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
@@ -190,6 +202,11 @@ static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
 		crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
 }
 
+static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc)
+{
+	return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc);
+}
+
 /* bkey_ptrs: generically over any key type that has ptrs */
 
 struct bkey_ptrs_c {
@@ -383,12 +400,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
 
 /* KEY_TYPE_btree_ptr: */
 
-int bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c,
 			   enum bkey_invalid_flags, struct printbuf *);
 void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
 			    struct bkey_s_c);
 
-int bch2_btree_ptr_v2_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c,
 			      enum bkey_invalid_flags, struct printbuf *);
 void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
@@ -428,7 +445,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
 /* KEY_TYPE_reservation: */
 
-int bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c,
 			     enum bkey_invalid_flags, struct printbuf *);
 void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
@@ -688,11 +705,19 @@ void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
 bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
 void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
 			    struct bkey_s_c);
-int bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
 			   enum bkey_invalid_flags, struct printbuf *);
 
 void bch2_ptr_swab(struct bkey_s);
 
+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
+				       unsigned, unsigned);
+bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
+				  unsigned, unsigned);
+
 /* Generic extent code: */
 
 enum bch_extent_overlap {
@@ -737,22 +762,4 @@ static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
 	k->size = new_size;
 }
 
-/*
- * In extent_sort_fix_overlapping(), insert_fixup_extent(),
- * extent_merge_inline() - we're modifying keys in place that are packed. To do
- * that we have to unpack the key, modify the unpacked key - then this
- * copies/repacks the unpacked to the original as necessary.
- */
-static inline void extent_save(struct btree *b, struct bkey_packed *dst,
-			       struct bkey *src)
-{
-	struct bkey_format *f = &b->format;
-	struct bkey_i *dst_unpacked;
-
-	if ((dst_unpacked = packed_to_bkey(dst)))
-		dst_unpacked->k = *src;
-	else
-		BUG_ON(!bch2_bkey_pack_key(dst, src, f));
-}
-
 #endif /* _BCACHEFS_EXTENTS_H */
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
index bb5305441f27..4496cf91a4c1 100644
--- a/fs/bcachefs/fs-common.c
+++ b/fs/bcachefs/fs-common.c
@@ -51,7 +51,7 @@ int bch2_create_trans(struct btree_trans *trans,
 		bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
 
 		if (flags & BCH_CREATE_TMPFILE)
-			new_inode->bi_flags |= BCH_INODE_UNLINKED;
+			new_inode->bi_flags |= BCH_INODE_unlinked;
 
 		ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
 		if (ret)
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 58ccc7b91ac7..52f0e7acda3d 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -389,6 +389,21 @@ static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs
 	return ret;
 }
 
+/*
+ * Determine when a writepage io is full. We have to limit writepage bios to a
+ * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to
+ * what the bounce path in bch2_write_extent() can handle. In theory we could
+ * loosen this restriction for non-bounce I/O, but we don't have that context
+ * here. Ideally, we can up this limit and make it configurable in the future
+ * when the bounce path can be enhanced to accommodate larger source bios.
+ */
+static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len)
+{
+	struct bio *bio = &io->op.wbio.bio;
+	return bio_full(bio, len) ||
+		(bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE);
+}
+
 static void bch2_writepage_io_done(struct bch_write_op *op)
 {
 	struct bch_writepage_io *io =
@@ -606,9 +621,7 @@ do_io:
 
 		if (w->io &&
 		    (w->io->op.res.nr_replicas != nr_replicas_this_write ||
-		     bio_full(&w->io->op.wbio.bio, sectors << 9) ||
-		     w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
-		     (BIO_MAX_VECS * PAGE_SIZE) ||
+		     bch_io_full(w->io, sectors << 9) ||
 		     bio_end_sector(&w->io->op.wbio.bio) != sector))
 			bch2_writepage_do_io(w);
 
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 6a9557e7ecab..5b42a76c4796 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -113,6 +113,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
 	} else {
 		atomic_set(&dio->cl.remaining,
 			   CLOSURE_REMAINING_INITIALIZER + 1);
+		dio->cl.closure_get_happened = true;
 	}
 
 	dio->req	= req;
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 6040bd3f0778..5a39bcb597a3 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -45,13 +45,13 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
 	unsigned newflags = s->flags;
 	unsigned oldflags = bi->bi_flags & s->mask;
 
-	if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
+	if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
 	    !capable(CAP_LINUX_IMMUTABLE))
 		return -EPERM;
 
 	if (!S_ISREG(bi->bi_mode) &&
 	    !S_ISDIR(bi->bi_mode) &&
-	    (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
+	    (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
 		return -EINVAL;
 
 	if (s->set_projinherit) {
diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
index 54a9c21a3b83..d30f9bb056fd 100644
--- a/fs/bcachefs/fs-ioctl.h
+++ b/fs/bcachefs/fs-ioctl.h
@@ -6,28 +6,28 @@
 
 /* bcachefs inode flags -> vfs inode flags: */
 static const __maybe_unused unsigned bch_flags_to_vfs[] = {
-	[__BCH_INODE_SYNC]	= S_SYNC,
-	[__BCH_INODE_IMMUTABLE]	= S_IMMUTABLE,
-	[__BCH_INODE_APPEND]	= S_APPEND,
-	[__BCH_INODE_NOATIME]	= S_NOATIME,
+	[__BCH_INODE_sync]	= S_SYNC,
+	[__BCH_INODE_immutable]	= S_IMMUTABLE,
+	[__BCH_INODE_append]	= S_APPEND,
+	[__BCH_INODE_noatime]	= S_NOATIME,
 };
 
 /* bcachefs inode flags -> FS_IOC_GETFLAGS: */
 static const __maybe_unused unsigned bch_flags_to_uflags[] = {
-	[__BCH_INODE_SYNC]	= FS_SYNC_FL,
-	[__BCH_INODE_IMMUTABLE]	= FS_IMMUTABLE_FL,
-	[__BCH_INODE_APPEND]	= FS_APPEND_FL,
-	[__BCH_INODE_NODUMP]	= FS_NODUMP_FL,
-	[__BCH_INODE_NOATIME]	= FS_NOATIME_FL,
+	[__BCH_INODE_sync]	= FS_SYNC_FL,
+	[__BCH_INODE_immutable]	= FS_IMMUTABLE_FL,
+	[__BCH_INODE_append]	= FS_APPEND_FL,
+	[__BCH_INODE_nodump]	= FS_NODUMP_FL,
+	[__BCH_INODE_noatime]	= FS_NOATIME_FL,
 };
 
 /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
 static const __maybe_unused unsigned bch_flags_to_xflags[] = {
-	[__BCH_INODE_SYNC]	= FS_XFLAG_SYNC,
-	[__BCH_INODE_IMMUTABLE]	= FS_XFLAG_IMMUTABLE,
-	[__BCH_INODE_APPEND]	= FS_XFLAG_APPEND,
-	[__BCH_INODE_NODUMP]	= FS_XFLAG_NODUMP,
-	[__BCH_INODE_NOATIME]	= FS_XFLAG_NOATIME,
+	[__BCH_INODE_sync]	= FS_XFLAG_SYNC,
+	[__BCH_INODE_immutable]	= FS_XFLAG_IMMUTABLE,
+	[__BCH_INODE_append]	= FS_XFLAG_APPEND,
+	[__BCH_INODE_nodump]	= FS_XFLAG_NODUMP,
+	[__BCH_INODE_noatime]	= FS_XFLAG_NOATIME,
 	//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
 };
 
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index a2a5133fb6b5..166d8d8abe68 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -764,15 +764,15 @@ static int bch2_getattr(struct mnt_idmap *idmap,
 		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
 	}
 
-	if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
+	if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
 		stat->attributes |= STATX_ATTR_IMMUTABLE;
 	stat->attributes_mask	 |= STATX_ATTR_IMMUTABLE;
 
-	if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
+	if (inode->ei_inode.bi_flags & BCH_INODE_append)
 		stat->attributes |= STATX_ATTR_APPEND;
 	stat->attributes_mask	 |= STATX_ATTR_APPEND;
 
-	if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
+	if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
 		stat->attributes |= STATX_ATTR_NODUMP;
 	stat->attributes_mask	 |= STATX_ATTR_NODUMP;
 
@@ -1213,9 +1213,6 @@ static struct dentry *bch2_get_parent(struct dentry *child)
 		.inum = inode->ei_inode.bi_dir,
 	};
 
-	if (!parent_inum.inum)
-		return NULL;
-
 	return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
 }
 
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index b8f9e7475dc5..9f3e9bd3d767 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "bkey_buf.h"
+#include "btree_cache.h"
 #include "btree_update.h"
 #include "buckets.h"
 #include "darray.h"
@@ -444,9 +445,10 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
 		if (i->equiv == n.equiv) {
 			bch_err(c, "snapshot deletion did not finish:\n"
 				"  duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
-				bch2_btree_ids[btree_id],
+				bch2_btree_id_str(btree_id),
 				pos.inode, pos.offset,
 				i->id, n.id, n.equiv);
+			set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
 			return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots);
 		}
 	}
@@ -719,8 +721,9 @@ static int check_key_has_snapshot(struct btree_trans *trans,
 	int ret = 0;
 
 	if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
-			"key in missing snapshot: %s",
-			(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+				bkey_in_missing_snapshot,
+				"key in missing snapshot: %s",
+				(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
 		ret = bch2_btree_delete_at(trans, iter,
 					    BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1;
 fsck_err:
@@ -789,6 +792,7 @@ static int hash_check_key(struct btree_trans *trans,
 
 		if (fsck_err_on(k.k->type == desc.key_type &&
 				!desc.cmp_bkey(k, hash_k), c,
+				hash_table_key_duplicate,
 				"duplicate hash table keys:\n%s",
 				(printbuf_reset(&buf),
 				 bch2_bkey_val_to_text(&buf, c, hash_k),
@@ -807,8 +811,9 @@ out:
 	printbuf_exit(&buf);
 	return ret;
 bad_hash:
-	if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
-		     bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
+	if (fsck_err(c, hash_table_key_wrong_offset,
+		     "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
+		     bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
 		     (printbuf_reset(&buf),
 		      bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
 		ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
@@ -849,22 +854,23 @@ static int check_inode(struct btree_trans *trans,
 	BUG_ON(bch2_inode_unpack(k, &u));
 
 	if (!full &&
-	    !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|
-			    BCH_INODE_I_SECTORS_DIRTY|
-			    BCH_INODE_UNLINKED)))
+	    !(u.bi_flags & (BCH_INODE_i_size_dirty|
+			    BCH_INODE_i_sectors_dirty|
+			    BCH_INODE_unlinked)))
 		return 0;
 
 	if (prev->bi_inum != u.bi_inum)
 		*prev = u;
 
 	if (fsck_err_on(prev->bi_hash_seed	!= u.bi_hash_seed ||
-			inode_d_type(prev)	!= inode_d_type(&u), c,
+			inode_d_type(prev)	!= inode_d_type(&u),
+			c, inode_snapshot_mismatch,
 			"inodes in different snapshots don't match")) {
 		bch_err(c, "repair not implemented yet");
 		return -EINVAL;
 	}
 
-	if ((u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED)) &&
+	if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) &&
 	    bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
 		struct bpos new_min_pos;
 
@@ -872,7 +878,7 @@ static int check_inode(struct btree_trans *trans,
 		if (ret)
 			goto err;
 
-		u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED;
+		u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
 
 		ret = __write_inode(trans, &u, iter->pos.snapshot);
 		bch_err_msg(c, ret, "in fsck updating inode");
@@ -884,9 +890,10 @@ static int check_inode(struct btree_trans *trans,
 		return 0;
 	}
 
-	if (u.bi_flags & BCH_INODE_UNLINKED &&
+	if (u.bi_flags & BCH_INODE_unlinked &&
 	    (!c->sb.clean ||
-	     fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
+	     fsck_err(c, inode_unlinked_but_clean,
+		      "filesystem marked clean, but inode %llu unlinked",
 		      u.bi_inum))) {
 		bch2_trans_unlock(trans);
 		bch2_fs_lazy_rw(c);
@@ -896,9 +903,10 @@ static int check_inode(struct btree_trans *trans,
 		return ret;
 	}
 
-	if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
+	if (u.bi_flags & BCH_INODE_i_size_dirty &&
 	    (!c->sb.clean ||
-	     fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
+	     fsck_err(c, inode_i_size_dirty_but_clean,
+		      "filesystem marked clean, but inode %llu has i_size dirty",
 		      u.bi_inum))) {
 		bch_verbose(c, "truncating inode %llu", u.bi_inum);
 
@@ -922,15 +930,16 @@ static int check_inode(struct btree_trans *trans,
 		 * We truncated without our normal sector accounting hook, just
 		 * make sure we recalculate it:
 		 */
-		u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
+		u.bi_flags |= BCH_INODE_i_sectors_dirty;
 
-		u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+		u.bi_flags &= ~BCH_INODE_i_size_dirty;
 		do_update = true;
 	}
 
-	if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
+	if (u.bi_flags & BCH_INODE_i_sectors_dirty &&
 	    (!c->sb.clean ||
-	     fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
+	     fsck_err(c, inode_i_sectors_dirty_but_clean,
+		      "filesystem marked clean, but inode %llu has i_sectors dirty",
 		      u.bi_inum))) {
 		s64 sectors;
 
@@ -944,14 +953,14 @@ static int check_inode(struct btree_trans *trans,
 		}
 
 		u.bi_sectors = sectors;
-		u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
+		u.bi_flags &= ~BCH_INODE_i_sectors_dirty;
 		do_update = true;
 	}
 
-	if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
+	if (u.bi_flags & BCH_INODE_backptr_untrusted) {
 		u.bi_dir = 0;
 		u.bi_dir_offset = 0;
-		u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
+		u.bi_flags &= ~BCH_INODE_backptr_untrusted;
 		do_update = true;
 	}
 
@@ -1056,10 +1065,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
 			return -BCH_ERR_internal_fsck_err;
 		}
 
-		if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
-			    "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
-			    w->last_pos.inode, i->snapshot,
-			    i->inode.bi_sectors, i->count)) {
+		if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
+				c, inode_i_sectors_wrong,
+				"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
+				w->last_pos.inode, i->snapshot,
+				i->inode.bi_sectors, i->count)) {
 			i->inode.bi_sectors = i->count;
 			ret = fsck_write_inode(trans, &i->inode, i->snapshot);
 			if (ret)
@@ -1200,7 +1210,8 @@ static int overlapping_extents_found(struct btree_trans *trans,
 	prt_printf(&buf, "\n  overwriting %s extent",
 		   pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
 
-	if (fsck_err(c, "overlapping extents%s", buf.buf)) {
+	if (fsck_err(c, extent_overlapping,
+		     "overlapping extents%s", buf.buf)) {
 		struct btree_iter *old_iter = &iter1;
 		struct disk_reservation res = { 0 };
 
@@ -1297,6 +1308,28 @@ err:
 	return ret;
 }
 
+static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter,
+				struct bkey_s_c k)
+{
+	struct bch_fs *c = trans->c;
+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+	struct bch_extent_crc_unpacked crc;
+	const union bch_extent_entry *i;
+	unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9;
+
+	bkey_for_each_crc(k.k, ptrs, crc, i)
+		if (crc_is_encoded(crc) &&
+		    crc.uncompressed_size > encoded_extent_max_sectors) {
+			struct printbuf buf = PRINTBUF;
+
+			bch2_bkey_val_to_text(&buf, c, k);
+			bch_err(c, "overbig encoded extent, please report this:\n  %s", buf.buf);
+			printbuf_exit(&buf);
+		}
+
+	return 0;
+}
+
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
 			struct bkey_s_c k,
 			struct inode_walker *inode,
@@ -1333,7 +1366,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
 		goto err;
 
 	if (k.k->type != KEY_TYPE_whiteout) {
-		if (fsck_err_on(!i, c,
+		if (fsck_err_on(!i, c, extent_in_missing_inode,
 				"extent in missing inode:\n  %s",
 				(printbuf_reset(&buf),
 				 bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
@@ -1341,7 +1374,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
 
 		if (fsck_err_on(i &&
 				!S_ISREG(i->inode.bi_mode) &&
-				!S_ISLNK(i->inode.bi_mode), c,
+				!S_ISLNK(i->inode.bi_mode),
+				c, extent_in_non_reg_inode,
 				"extent in non regular inode mode %o:\n  %s",
 				i->inode.bi_mode,
 				(printbuf_reset(&buf),
@@ -1371,9 +1405,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
 			continue;
 
 		if (k.k->type != KEY_TYPE_whiteout) {
-			if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+			if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) &&
 					k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
-					!bkey_extent_is_reservation(k), c,
+					!bkey_extent_is_reservation(k),
+					c, extent_past_end_of_inode,
 					"extent type past end of inode %llu:%u, i_size %llu\n  %s",
 					i->inode.bi_inum, i->snapshot, i->inode.bi_size,
 					(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
@@ -1432,7 +1467,8 @@ int bch2_check_extents(struct bch_fs *c)
 			&res, NULL,
 			BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
 		bch2_disk_reservation_put(c, &res);
-		check_extent(trans, &iter, k, &w, &s, &extent_ends);
+		check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
+		check_extent_overbig(trans, &iter, k);
 	})) ?:
 	check_i_sectors(trans, &w);
 
@@ -1446,6 +1482,30 @@ int bch2_check_extents(struct bch_fs *c)
 	return ret;
 }
 
+int bch2_check_indirect_extents(struct bch_fs *c)
+{
+	struct btree_trans *trans = bch2_trans_get(c);
+	struct btree_iter iter;
+	struct bkey_s_c k;
+	struct disk_reservation res = { 0 };
+	int ret = 0;
+
+	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
+			POS_MIN,
+			BTREE_ITER_PREFETCH, k,
+			&res, NULL,
+			BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
+		bch2_disk_reservation_put(c, &res);
+		check_extent_overbig(trans, &iter, k);
+	}));
+
+	bch2_disk_reservation_put(c, &res);
+	bch2_trans_put(trans);
+
+	bch_err_fn(c, ret);
+	return ret;
+}
+
 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
 {
 	struct bch_fs *c = trans->c;
@@ -1470,7 +1530,8 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
 				continue;
 		}
 
-		if (fsck_err_on(i->inode.bi_nlink != i->count, c,
+		if (fsck_err_on(i->inode.bi_nlink != i->count,
+				c, inode_dir_wrong_nlink,
 				"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
 				w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
 			i->inode.bi_nlink = i->count;
@@ -1514,27 +1575,28 @@ static int check_dirent_target(struct btree_trans *trans,
 		backpointer_exists = ret;
 		ret = 0;
 
-		if (fsck_err_on(S_ISDIR(target->bi_mode) &&
-				backpointer_exists, c,
+		if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
+				c, inode_dir_multiple_links,
 				"directory %llu with multiple links",
 				target->bi_inum)) {
 			ret = __remove_dirent(trans, d.k->p);
 			goto out;
 		}
 
-		if (fsck_err_on(backpointer_exists &&
-				!target->bi_nlink, c,
+		if (fsck_err_on(backpointer_exists && !target->bi_nlink,
+				c, inode_multiple_links_but_nlink_0,
 				"inode %llu type %s has multiple links but i_nlink 0",
 				target->bi_inum, bch2_d_types[d.v->d_type])) {
 			target->bi_nlink++;
-			target->bi_flags &= ~BCH_INODE_UNLINKED;
+			target->bi_flags &= ~BCH_INODE_unlinked;
 
 			ret = __write_inode(trans, target, target_snapshot);
 			if (ret)
 				goto err;
 		}
 
-		if (fsck_err_on(!backpointer_exists, c,
+		if (fsck_err_on(!backpointer_exists,
+				c, inode_wrong_backpointer,
 				"inode %llu:%u has wrong backpointer:\n"
 				"got       %llu:%llu\n"
 				"should be %llu:%llu",
@@ -1552,7 +1614,8 @@ static int check_dirent_target(struct btree_trans *trans,
 		}
 	}
 
-	if (fsck_err_on(d.v->d_type != inode_d_type(target), c,
+	if (fsck_err_on(d.v->d_type != inode_d_type(target),
+			c, dirent_d_type_wrong,
 			"incorrect d_type: got %s, should be %s:\n%s",
 			bch2_d_type_str(d.v->d_type),
 			bch2_d_type_str(inode_d_type(target)),
@@ -1576,7 +1639,8 @@ static int check_dirent_target(struct btree_trans *trans,
 	if (d.v->d_type == DT_SUBVOL &&
 	    target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
 	    (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
-	     fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u",
+	     fsck_err(c, dirent_d_parent_subvol_wrong,
+		      "dirent has wrong d_parent_subvol field: got %u, should be %u",
 		      le32_to_cpu(d.v->d_parent_subvol),
 		      target->bi_parent_subvol))) {
 		n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
@@ -1648,7 +1712,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 		*hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
 	dir->first_this_inode = false;
 
-	if (fsck_err_on(!i, c,
+	if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
 			"dirent in nonexisting directory:\n%s",
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
@@ -1660,7 +1724,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 	if (!i)
 		goto out;
 
-	if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c,
+	if (fsck_err_on(!S_ISDIR(i->inode.bi_mode),
+			c, dirent_in_non_dir_inode,
 			"dirent in non directory inode type %s:\n%s",
 			bch2_d_type_str(inode_d_type(&i->inode)),
 			(printbuf_reset(&buf),
@@ -1694,7 +1759,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 		if (ret && !bch2_err_matches(ret, ENOENT))
 			goto err;
 
-		if (fsck_err_on(ret, c,
+		if (fsck_err_on(ret, c, dirent_to_missing_subvol,
 				"dirent points to missing subvolume %u",
 				le32_to_cpu(d.v->d_child_subvol))) {
 			ret = __remove_dirent(trans, d.k->p);
@@ -1706,7 +1771,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 		if (ret && !bch2_err_matches(ret, ENOENT))
 			goto err;
 
-		if (fsck_err_on(ret, c,
+		if (fsck_err_on(ret, c, subvol_to_missing_root,
 				"subvolume %u points to missing subvolume root %llu",
 				target_subvol,
 				target_inum)) {
@@ -1715,7 +1780,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 			goto err;
 		}
 
-		if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c,
+		if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
+				c, subvol_root_wrong_bi_subvol,
 				"subvol root %llu has wrong bi_subvol field: got %u, should be %u",
 				target_inum,
 				subvol_root.bi_subvol, target_subvol)) {
@@ -1734,7 +1800,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 		if (ret)
 			goto err;
 
-		if (fsck_err_on(!target->inodes.nr, c,
+		if (fsck_err_on(!target->inodes.nr,
+				c, dirent_to_missing_inode,
 				"dirent points to missing inode: (equiv %u)\n%s",
 				equiv.snapshot,
 				(printbuf_reset(&buf),
@@ -1820,7 +1887,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
 		*hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
 	inode->first_this_inode = false;
 
-	if (fsck_err_on(!i, c,
+	if (fsck_err_on(!i, c, xattr_in_missing_inode,
 			"xattr for missing inode %llu",
 			k.k->p.inode))
 		return bch2_btree_delete_at(trans, iter, 0);
@@ -1869,7 +1936,8 @@ static int check_root_trans(struct btree_trans *trans)
 	if (ret && !bch2_err_matches(ret, ENOENT))
 		return ret;
 
-	if (mustfix_fsck_err_on(ret, c, "root subvol missing")) {
+	if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
+				"root subvol missing")) {
 		struct bkey_i_subvolume root_subvol;
 
 		snapshot	= U32_MAX;
@@ -1895,8 +1963,10 @@ static int check_root_trans(struct btree_trans *trans)
 	if (ret && !bch2_err_matches(ret, ENOENT))
 		return ret;
 
-	if (mustfix_fsck_err_on(ret, c, "root directory missing") ||
-	    mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c,
+	if (mustfix_fsck_err_on(ret, c, root_dir_missing,
+				"root directory missing") ||
+	    mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode),
+				c, root_inode_not_dir,
 				"root inode not a directory")) {
 		bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
 				0, NULL);
@@ -2000,7 +2070,8 @@ static int check_path(struct btree_trans *trans,
 		}
 
 		if (bch2_err_matches(ret, ENOENT)) {
-			if (fsck_err(c,  "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
+			if (fsck_err(c,  inode_unreachable,
+				     "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
 				     inode->bi_inum, snapshot,
 				     bch2_d_type_str(inode_d_type(inode)),
 				     inode->bi_nlink,
@@ -2040,7 +2111,8 @@ static int check_path(struct btree_trans *trans,
 				pr_err("%llu:%u", i->inum, i->snapshot);
 			pr_err("%llu:%u", inode->bi_inum, snapshot);
 
-			if (!fsck_err(c, "directory structure loop"))
+			if (!fsck_err(c, dir_loop,
+				      "directory structure loop"))
 				return 0;
 
 			ret = commit_do(trans, NULL, NULL,
@@ -2088,7 +2160,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
 			break;
 		}
 
-		if (u.bi_flags & BCH_INODE_UNLINKED)
+		if (u.bi_flags & BCH_INODE_unlinked)
 			continue;
 
 		ret = check_path(trans, &path, &u, iter.pos.snapshot);
@@ -2300,7 +2372,8 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite
 		link = &links->d[++*idx];
 	}
 
-	if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
+	if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count,
+			c, inode_wrong_nlink,
 			"inode %llu type %s has wrong i_nlink (%u, should be %u)",
 			u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
 			bch2_inode_nlink_get(&u), link->count)) {
diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h
index 90c87b5089a0..da991e8cf27e 100644
--- a/fs/bcachefs/fsck.h
+++ b/fs/bcachefs/fsck.h
@@ -4,6 +4,7 @@
 
 int bch2_check_inodes(struct bch_fs *);
 int bch2_check_extents(struct bch_fs *);
+int bch2_check_indirect_extents(struct bch_fs *);
 int bch2_check_dirents(struct bch_fs *);
 int bch2_check_xattrs(struct bch_fs *);
 int bch2_check_root(struct bch_fs *);
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index bb3f443d8381..def77f2d8802 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -6,6 +6,7 @@
 #include "bkey_methods.h"
 #include "btree_update.h"
 #include "buckets.h"
+#include "compress.h"
 #include "error.h"
 #include "extents.h"
 #include "extent_update.h"
@@ -19,13 +20,18 @@
 
 #include <asm/unaligned.h>
 
-const char * const bch2_inode_opts[] = {
 #define x(name, ...)	#name,
+const char * const bch2_inode_opts[] = {
 	BCH_INODE_OPTS()
-#undef  x
 	NULL,
 };
 
+static const char * const bch2_inode_flag_strs[] = {
+	BCH_INODE_FLAGS()
+	NULL
+};
+#undef  x
+
 static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
 
 static int inode_decode_field(const u8 *in, const u8 *end,
@@ -361,9 +367,10 @@ int bch2_inode_peek(struct btree_trans *trans,
 	return ret;
 }
 
-int bch2_inode_write(struct btree_trans *trans,
+int bch2_inode_write_flags(struct btree_trans *trans,
 		     struct btree_iter *iter,
-		     struct bch_inode_unpacked *inode)
+		     struct bch_inode_unpacked *inode,
+		     enum btree_update_flags flags)
 {
 	struct bkey_inode_buf *inode_p;
 
@@ -373,7 +380,7 @@ int bch2_inode_write(struct btree_trans *trans,
 
 	bch2_inode_pack_inlined(inode_p, inode);
 	inode_p->inode.k.p.snapshot = iter->snapshot;
-	return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
+	return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags);
 }
 
 struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
@@ -397,117 +404,121 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
 	return &inode_p->inode.k_i;
 }
 
-static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
+static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err)
 {
 	struct bch_inode_unpacked unpacked;
+	int ret = 0;
 
-	if (k.k->p.inode) {
-		prt_printf(err, "nonzero k.p.inode");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (k.k->p.offset < BLOCKDEV_INODE_MAX) {
-		prt_printf(err, "fs inode in blockdev range");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(k.k->p.inode, c, err,
+			 inode_pos_inode_nonzero,
+			 "nonzero k.p.inode");
 
-	if (bch2_inode_unpack(k, &unpacked)) {
-		prt_printf(err, "invalid variable length fields");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err,
+			 inode_pos_blockdev_range,
+			 "fs inode in blockdev range");
 
-	if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) {
-		prt_printf(err, "invalid data checksum type (%u >= %u",
-			unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err,
+			 inode_unpack_error,
+			 "invalid variable length fields");
 
-	if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) {
-		prt_printf(err, "invalid data checksum type (%u >= %u)",
-		       unpacked.bi_compression, BCH_COMPRESSION_OPT_NR + 1);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err,
+			 inode_checksum_type_invalid,
+			 "invalid data checksum type (%u >= %u",
+			 unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1);
 
-	if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
-	    unpacked.bi_nlink != 0) {
-		prt_printf(err, "flagged as unlinked but bi_nlink != 0");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(unpacked.bi_compression &&
+			 !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err,
+			 inode_compression_type_invalid,
+			 "invalid compression opt %u", unpacked.bi_compression - 1);
 
-	if (unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode)) {
-		prt_printf(err, "subvolume root but not a directory");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) &&
+			 unpacked.bi_nlink != 0, c, err,
+			 inode_unlinked_but_nlink_nonzero,
+			 "flagged as unlinked but bi_nlink != 0");
 
-	return 0;
+	bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err,
+			 inode_subvol_root_but_not_dir,
+			 "subvolume root but not a directory");
+fsck_err:
+	return ret;
 }
 
-int bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k,
 		       enum bkey_invalid_flags flags,
 		       struct printbuf *err)
 {
 	struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
+	int ret = 0;
 
-	if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
-		prt_printf(err, "invalid str hash type (%llu >= %u)",
-		       INODE_STR_HASH(inode.v), BCH_STR_HASH_NR);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
+			 inode_str_hash_invalid,
+			 "invalid str hash type (%llu >= %u)",
+			 INODE_STR_HASH(inode.v), BCH_STR_HASH_NR);
 
-	return __bch2_inode_invalid(k, err);
+	ret = __bch2_inode_invalid(c, k, err);
+fsck_err:
+	return ret;
 }
 
-int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  enum bkey_invalid_flags flags,
 			  struct printbuf *err)
 {
 	struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
+	int ret = 0;
 
-	if (INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
-		prt_printf(err, "invalid str hash type (%llu >= %u)",
-		       INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
+			 inode_str_hash_invalid,
+			 "invalid str hash type (%llu >= %u)",
+			 INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR);
 
-	return __bch2_inode_invalid(k, err);
+	ret = __bch2_inode_invalid(c, k, err);
+fsck_err:
+	return ret;
 }
 
-int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  enum bkey_invalid_flags flags,
 			  struct printbuf *err)
 {
 	struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+	int ret = 0;
 
-	if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
-	    INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) {
-		prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)",
-		       INODEv3_FIELDS_START(inode.v),
-		       INODEv3_FIELDS_START_INITIAL,
-		       bkey_val_u64s(inode.k));
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
+			 INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err,
+			 inode_v3_fields_start_bad,
+			 "invalid fields_start (got %llu, min %u max %zu)",
+			 INODEv3_FIELDS_START(inode.v),
+			 INODEv3_FIELDS_START_INITIAL,
+			 bkey_val_u64s(inode.k));
 
-	if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
-		prt_printf(err, "invalid str hash type (%llu >= %u)",
-		       INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
+			 inode_str_hash_invalid,
+			 "invalid str hash type (%llu >= %u)",
+			 INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
 
-	return __bch2_inode_invalid(k, err);
+	ret = __bch2_inode_invalid(c, k, err);
+fsck_err:
+	return ret;
 }
 
 static void __bch2_inode_unpacked_to_text(struct printbuf *out,
 					  struct bch_inode_unpacked *inode)
 {
-	prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
-	       inode->bi_mode, inode->bi_flags,
+	prt_printf(out, "mode=%o ", inode->bi_mode);
+
+	prt_str(out, "flags=");
+	prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
+	prt_printf(out, " (%x)", inode->bi_flags);
+
+	prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu",
 	       inode->bi_journal_seq,
 	       inode->bi_size,
 	       inode->bi_sectors,
 	       inode->bi_version);
 
 #define x(_name, _bits)						\
-	prt_printf(out, " "#_name " %llu", (u64) inode->_name);
+	prt_printf(out, " "#_name "=%llu", (u64) inode->_name);
 	BCH_INODE_FIELDS_v3()
 #undef  x
 }
@@ -546,7 +557,7 @@ static inline u64 bkey_inode_flags(struct bkey_s_c k)
 
 static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
 {
-	return bkey_inode_flags(k) & BCH_INODE_UNLINKED;
+	return bkey_inode_flags(k) & BCH_INODE_unlinked;
 }
 
 int bch2_trans_mark_inode(struct btree_trans *trans,
@@ -610,16 +621,17 @@ int bch2_mark_inode(struct btree_trans *trans,
 	return 0;
 }
 
-int bch2_inode_generation_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k,
 				  enum bkey_invalid_flags flags,
 				  struct printbuf *err)
 {
-	if (k.k->p.inode) {
-		prt_printf(err, "nonzero k.p.inode");
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
 
-	return 0;
+	bkey_fsck_err_on(k.k->p.inode, c, err,
+			 inode_pos_inode_nonzero,
+			 "nonzero k.p.inode");
+fsck_err:
+	return ret;
 }
 
 void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
@@ -926,8 +938,8 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
 
 int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
 {
-	if (bi->bi_flags & BCH_INODE_UNLINKED)
-		bi->bi_flags &= ~BCH_INODE_UNLINKED;
+	if (bi->bi_flags & BCH_INODE_unlinked)
+		bi->bi_flags &= ~BCH_INODE_unlinked;
 	else {
 		if (bi->bi_nlink == U32_MAX)
 			return -EINVAL;
@@ -940,13 +952,13 @@ int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
 
 void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi)
 {
-	if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_UNLINKED)) {
+	if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) {
 		bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero",
 					bi->bi_inum);
 		return;
 	}
 
-	if (bi->bi_flags & BCH_INODE_UNLINKED) {
+	if (bi->bi_flags & BCH_INODE_unlinked) {
 		bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum);
 		return;
 	}
@@ -954,7 +966,7 @@ void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *
 	if (bi->bi_nlink)
 		bi->bi_nlink--;
 	else
-		bi->bi_flags |= BCH_INODE_UNLINKED;
+		bi->bi_flags |= BCH_INODE_unlinked;
 }
 
 struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode)
@@ -979,6 +991,18 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
 		opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0;
 }
 
+int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts)
+{
+	struct bch_inode_unpacked inode;
+	int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
+
+	if (ret)
+		return ret;
+
+	bch2_inode_opts_get(opts, trans->c, &inode);
+	return 0;
+}
+
 int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
 {
 	struct bch_fs *c = trans->c;
@@ -1042,53 +1066,85 @@ err:
 	return ret ?: -BCH_ERR_transaction_restart_nested;
 }
 
-static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos)
+static int may_delete_deleted_inode(struct btree_trans *trans,
+				    struct btree_iter *iter,
+				    struct bpos pos,
+				    bool *need_another_pass)
 {
 	struct bch_fs *c = trans->c;
-	struct btree_iter iter;
+	struct btree_iter inode_iter;
 	struct bkey_s_c k;
 	struct bch_inode_unpacked inode;
 	int ret;
 
-	if (bch2_snapshot_is_internal_node(c, pos.snapshot))
-		return 0;
-
-	if (!fsck_err_on(c->sb.clean, c,
-			 "filesystem marked as clean but have deleted inode %llu:%u",
-			 pos.offset, pos.snapshot))
-		return 0;
-
-	k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED);
+	k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED);
 	ret = bkey_err(k);
 	if (ret)
 		return ret;
 
 	ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
 	if (fsck_err_on(!bkey_is_inode(k.k), c,
+			deleted_inode_missing,
 			"nonexistent inode %llu:%u in deleted_inodes btree",
 			pos.offset, pos.snapshot))
 		goto delete;
 
 	ret = bch2_inode_unpack(k, &inode);
 	if (ret)
-		goto err;
+		goto out;
 
 	if (fsck_err_on(S_ISDIR(inode.bi_mode), c,
+			deleted_inode_is_dir,
 			"directory %llu:%u in deleted_inodes btree",
 			pos.offset, pos.snapshot))
 		goto delete;
 
-	if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c,
+	if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c,
+			deleted_inode_not_unlinked,
 			"non-deleted inode %llu:%u in deleted_inodes btree",
 			pos.offset, pos.snapshot))
 		goto delete;
 
-	return 1;
-err:
+	if (c->sb.clean &&
+	    !fsck_err(c,
+		      deleted_inode_but_clean,
+		      "filesystem marked as clean but have deleted inode %llu:%u",
+		      pos.offset, pos.snapshot)) {
+		ret = 0;
+		goto out;
+	}
+
+	if (bch2_snapshot_is_internal_node(c, pos.snapshot)) {
+		struct bpos new_min_pos;
+
+		ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos);
+		if (ret)
+			goto out;
+
+		inode.bi_flags &= ~BCH_INODE_unlinked;
+
+		ret = bch2_inode_write_flags(trans, &inode_iter, &inode,
+					     BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+		bch_err_msg(c, ret, "clearing inode unlinked flag");
+		if (ret)
+			goto out;
+
+		/*
+		 * We'll need another write buffer flush to pick up the new
+		 * unlinked inodes in the snapshot leaves:
+		 */
+		*need_another_pass = true;
+		return 0;
+	}
+
+	ret = 1;
+out:
 fsck_err:
+	bch2_trans_iter_exit(trans, &inode_iter);
 	return ret;
 delete:
-	return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+	ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+	goto out;
 }
 
 int bch2_delete_dead_inodes(struct bch_fs *c)
@@ -1096,7 +1152,10 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
 	struct btree_trans *trans = bch2_trans_get(c);
 	struct btree_iter iter;
 	struct bkey_s_c k;
+	bool need_another_pass;
 	int ret;
+again:
+	need_another_pass = false;
 
 	ret = bch2_btree_write_buffer_flush_sync(trans);
 	if (ret)
@@ -1110,7 +1169,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
 	 */
 	for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
 			   BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-		ret = lockrestart_do(trans, may_delete_deleted_inode(trans, k.k->p));
+		ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
+								     &need_another_pass));
 		if (ret < 0)
 			break;
 
@@ -1120,12 +1180,17 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
 				bch2_fs_lazy_rw(c);
 			}
 
+			bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
+
 			ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
 			if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
 				break;
 		}
 	}
 	bch2_trans_iter_exit(trans, &iter);
+
+	if (!ret && need_another_pass)
+		goto again;
 err:
 	bch2_trans_put(trans);
 
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index a7464e1b6960..88818a332b1e 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -3,16 +3,17 @@
 #define _BCACHEFS_INODE_H
 
 #include "bkey.h"
+#include "bkey_methods.h"
 #include "opts.h"
 
 enum bkey_invalid_flags;
 extern const char * const bch2_inode_opts[];
 
-int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c,
 		       enum bkey_invalid_flags, struct printbuf *);
-int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c,
 			  enum bkey_invalid_flags, struct printbuf *);
-int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c,
 			  enum bkey_invalid_flags, struct printbuf *);
 void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
@@ -52,7 +53,7 @@ static inline bool bkey_is_inode(const struct bkey *k)
 		k->type == KEY_TYPE_inode_v3;
 }
 
-int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c,
 				  enum bkey_invalid_flags, struct printbuf *);
 void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
@@ -101,8 +102,16 @@ void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *)
 
 int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
 		    struct bch_inode_unpacked *, subvol_inum, unsigned);
-int bch2_inode_write(struct btree_trans *, struct btree_iter *,
-		     struct bch_inode_unpacked *);
+
+int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *,
+		     struct bch_inode_unpacked *, enum btree_update_flags);
+
+static inline int bch2_inode_write(struct btree_trans *trans,
+		     struct btree_iter *iter,
+		     struct bch_inode_unpacked *inode)
+{
+	return bch2_inode_write_flags(trans, iter, inode, 0);
+}
 
 void bch2_inode_init_early(struct bch_fs *,
 			   struct bch_inode_unpacked *);
@@ -177,7 +186,7 @@ static inline unsigned nlink_bias(umode_t mode)
 
 static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
 {
-	return bi->bi_flags & BCH_INODE_UNLINKED
+	return bi->bi_flags & BCH_INODE_unlinked
 		  ? 0
 		  : bi->bi_nlink + nlink_bias(bi->bi_mode);
 }
@@ -187,10 +196,10 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
 {
 	if (nlink) {
 		bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
-		bi->bi_flags &= ~BCH_INODE_UNLINKED;
+		bi->bi_flags &= ~BCH_INODE_unlinked;
 	} else {
 		bi->bi_nlink = 0;
-		bi->bi_flags |= BCH_INODE_UNLINKED;
+		bi->bi_flags |= BCH_INODE_unlinked;
 	}
 }
 
@@ -200,6 +209,7 @@ void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
 struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
 void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
 			 struct bch_inode_unpacked *);
+int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
 
 int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32);
 int bch2_delete_dead_inodes(struct bch_fs *);
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 119834cb8f9e..bebc11444ef5 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -16,13 +16,14 @@
 #include "io_misc.h"
 #include "io_write.h"
 #include "logged_ops.h"
+#include "rebalance.h"
 #include "subvolume.h"
 
 /* Overwrites whatever was present with zeroes: */
 int bch2_extent_fallocate(struct btree_trans *trans,
 			  subvol_inum inum,
 			  struct btree_iter *iter,
-			  unsigned sectors,
+			  u64 sectors,
 			  struct bch_io_opts opts,
 			  s64 *i_sectors_delta,
 			  struct write_point_specifier write_point)
@@ -104,7 +105,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
 		if (ret)
 			goto err;
 
-		sectors = min(sectors, wp->sectors_free);
+		sectors = min_t(u64, sectors, wp->sectors_free);
 		sectors_allocated = sectors;
 
 		bch2_key_resize(&e->k, sectors);
@@ -355,6 +356,7 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
 	struct btree_iter iter;
 	struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
 	subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
+	struct bch_io_opts opts;
 	u64 dst_offset = le64_to_cpu(op->v.dst_offset);
 	u64 src_offset = le64_to_cpu(op->v.src_offset);
 	s64 shift = dst_offset - src_offset;
@@ -363,6 +365,10 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
 	bool insert = shift > 0;
 	int ret = 0;
 
+	ret = bch2_inum_opts_get(trans, inum, &opts);
+	if (ret)
+		return ret;
+
 	bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
 			     POS(inum.inum, 0),
 			     BTREE_ITER_INTENT);
@@ -443,7 +449,10 @@ case LOGGED_OP_FINSERT_shift_extents:
 
 		op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
 
-		ret =   bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
+		ret =   bch2_bkey_set_needs_rebalance(c, copy,
+					opts.background_target,
+					opts.background_compression) ?:
+			bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
 			bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
 			bch2_logged_op_update(trans, &op->k_i) ?:
 			bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL);
diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h
index c9e6ed40e1b8..9cb44a7c43c1 100644
--- a/fs/bcachefs/io_misc.h
+++ b/fs/bcachefs/io_misc.h
@@ -3,7 +3,7 @@
 #define _BCACHEFS_IO_MISC_H
 
 int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
-			  unsigned, struct bch_io_opts, s64 *,
+			  u64, struct bch_io_opts, s64 *,
 			  struct write_point_specifier);
 int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
 		   subvol_inum, u64, s64 *);
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 443c3ea65527..a56ed553dc15 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -643,7 +643,7 @@ csum_err:
 		"data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
 		rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
 		csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
-	bch2_io_error(ca);
+	bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
 	bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
 	goto out;
 decompression_err:
@@ -677,7 +677,7 @@ static void bch2_read_endio(struct bio *bio)
 	if (!rbio->split)
 		rbio->bio.bi_end_io = rbio->end_io;
 
-	if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
+	if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
 				    rbio->read_pos.inode,
 				    rbio->read_pos.offset,
 				    "data read error: %s",
@@ -1025,7 +1025,7 @@ get_bio:
 		trans->notrace_relock_fail = true;
 	} else {
 		/* Attempting reconstruct read: */
-		if (bch2_ec_read_extent(c, rbio)) {
+		if (bch2_ec_read_extent(trans, rbio)) {
 			bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
 			goto out;
 		}
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 6e4f85eb6ec8..f02b3f7d26a0 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -202,6 +202,17 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
 	struct btree_iter iter;
 	struct bkey_i *k;
 	struct bkey_i_inode_v3 *inode;
+	/*
+	 * Crazy performance optimization:
+	 * Every extent update needs to also update the inode: the inode trigger
+	 * will set bi->journal_seq to the journal sequence number of this
+	 * transaction - for fsync.
+	 *
+	 * But if that's the only reason we're updating the inode (we're not
+	 * updating bi_size or bi_sectors), then we don't need the inode update
+	 * to be journalled - if we crash, the bi_journal_seq update will be
+	 * lost, but that's fine.
+	 */
 	unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
 	int ret;
 
@@ -223,7 +234,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
 
 	inode = bkey_i_to_inode_v3(k);
 
-	if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
+	if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
 	    new_i_size > le64_to_cpu(inode->v.bi_size)) {
 		inode->v.bi_size = cpu_to_le64(new_i_size);
 		inode_update_flags = 0;
@@ -351,10 +362,13 @@ static int bch2_write_index_default(struct bch_write_op *op)
 				     bkey_start_pos(&sk.k->k),
 				     BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
-		ret = bch2_extent_update(trans, inum, &iter, sk.k,
-					 &op->res,
-					 op->new_i_size, &op->i_sectors_delta,
-					 op->flags & BCH_WRITE_CHECK_ENOSPC);
+		ret =   bch2_bkey_set_needs_rebalance(c, sk.k,
+					op->opts.background_target,
+					op->opts.background_compression) ?:
+			bch2_extent_update(trans, inum, &iter, sk.k,
+					&op->res,
+					op->new_i_size, &op->i_sectors_delta,
+					op->flags & BCH_WRITE_CHECK_ENOSPC);
 		bch2_trans_iter_exit(trans, &iter);
 
 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -495,7 +509,6 @@ static void __bch2_write_index(struct bch_write_op *op)
 {
 	struct bch_fs *c = op->c;
 	struct keylist *keys = &op->insert_keys;
-	struct bkey_i *k;
 	unsigned dev;
 	int ret = 0;
 
@@ -505,14 +518,6 @@ static void __bch2_write_index(struct bch_write_op *op)
 			goto err;
 	}
 
-	/*
-	 * probably not the ideal place to hook this in, but I don't
-	 * particularly want to plumb io_opts all the way through the btree
-	 * update stack right now
-	 */
-	for_each_keylist_key(keys, k)
-		bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
-
 	if (!bch2_keylist_empty(keys)) {
 		u64 sectors_start = keylist_sectors(keys);
 
@@ -643,7 +648,7 @@ static void bch2_write_endio(struct bio *bio)
 	struct bch_fs *c		= wbio->c;
 	struct bch_dev *ca		= bch_dev_bkey_exists(c, wbio->dev);
 
-	if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
+	if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
 				    op->pos.inode,
 				    wbio->inode_offset << 9,
 				    "data write error: %s",
@@ -816,6 +821,7 @@ static enum prep_encoded_ret {
 
 	/* Can we just write the entire extent as is? */
 	if (op->crc.uncompressed_size == op->crc.live_size &&
+	    op->crc.uncompressed_size <= c->opts.encoded_extent_max >> 9 &&
 	    op->crc.compressed_size <= wp->sectors_free &&
 	    (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
 	     op->incompressible)) {
@@ -1091,9 +1097,7 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op,
 
 	e = bkey_s_c_to_extent(k);
 	extent_for_each_ptr_decode(e, p, entry) {
-		if (p.crc.csum_type ||
-		    crc_is_compressed(p.crc) ||
-		    p.has_ec)
+		if (crc_is_encoded(p.crc) || p.has_ec)
 			return false;
 
 		replicas += bch2_extent_ptr_durability(c, &p);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 0e7a9ffa3671..5b5d69f2316b 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1019,6 +1019,25 @@ err:
 	return ret;
 }
 
+int bch2_fs_journal_alloc(struct bch_fs *c)
+{
+	struct bch_dev *ca;
+	unsigned i;
+
+	for_each_online_member(ca, c, i) {
+		if (ca->journal.nr)
+			continue;
+
+		int ret = bch2_dev_journal_alloc(ca);
+		if (ret) {
+			percpu_ref_put(&ca->io_ref);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /* startup/shutdown: */
 
 static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 491133cc52f3..011711e99c8d 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -534,6 +534,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
 int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
 				unsigned nr);
 int bch2_dev_journal_alloc(struct bch_dev *);
+int bch2_fs_journal_alloc(struct bch_fs *);
 
 void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
 
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 6a3d6a374e9c..f4bc2cdbfdd7 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -140,7 +140,8 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
 		if (!dup->csum_good)
 			goto replace;
 
-		fsck_err(c, "found duplicate but non identical journal entries (seq %llu)",
+		fsck_err(c, journal_entry_replicas_data_mismatch,
+			 "found duplicate but non identical journal entries (seq %llu)",
 			 le64_to_cpu(j->seq));
 		i = dup;
 		goto found;
@@ -235,7 +236,7 @@ static void journal_entry_err_msg(struct printbuf *out,
 	prt_str(out, ": ");
 }
 
-#define journal_entry_err(c, version, jset, entry, msg, ...)		\
+#define journal_entry_err(c, version, jset, entry, _err, msg, ...)	\
 ({									\
 	struct printbuf _buf = PRINTBUF;				\
 									\
@@ -244,9 +245,10 @@ static void journal_entry_err_msg(struct printbuf *out,
 									\
 	switch (flags & BKEY_INVALID_WRITE) {				\
 	case READ:							\
-		mustfix_fsck_err(c, "%s", _buf.buf);			\
+		mustfix_fsck_err(c, _err, "%s", _buf.buf);		\
 		break;							\
 	case WRITE:							\
+		bch2_sb_error_count(c, BCH_FSCK_ERR_##_err);		\
 		bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\
 		if (bch2_fs_inconsistent(c)) {				\
 			ret = -BCH_ERR_fsck_errors_not_fixed;		\
@@ -259,8 +261,8 @@ static void journal_entry_err_msg(struct printbuf *out,
 	true;								\
 })
 
-#define journal_entry_err_on(cond, c, version, jset, entry, msg, ...)	\
-	((cond) ? journal_entry_err(c, version, jset, entry, msg, ##__VA_ARGS__) : false)
+#define journal_entry_err_on(cond, ...)					\
+	((cond) ? journal_entry_err(__VA_ARGS__) : false)
 
 #define FSCK_DELETED_KEY	5
 
@@ -277,7 +279,10 @@ static int journal_validate_key(struct bch_fs *c,
 	struct printbuf buf = PRINTBUF;
 	int ret = 0;
 
-	if (journal_entry_err_on(!k->k.u64s, c, version, jset, entry, "k->u64s 0")) {
+	if (journal_entry_err_on(!k->k.u64s,
+				 c, version, jset, entry,
+				 journal_entry_bkey_u64s_0,
+				 "k->u64s 0")) {
 		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
 		journal_entry_null_range(vstruct_next(entry), next);
 		return FSCK_DELETED_KEY;
@@ -286,6 +291,7 @@ static int journal_validate_key(struct bch_fs *c,
 	if (journal_entry_err_on((void *) bkey_next(k) >
 				 (void *) vstruct_next(entry),
 				 c, version, jset, entry,
+				 journal_entry_bkey_past_end,
 				 "extends past end of journal entry")) {
 		entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
 		journal_entry_null_range(vstruct_next(entry), next);
@@ -294,6 +300,7 @@ static int journal_validate_key(struct bch_fs *c,
 
 	if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT,
 				 c, version, jset, entry,
+				 journal_entry_bkey_bad_format,
 				 "bad format %u", k->k.format)) {
 		le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
 		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
@@ -317,7 +324,8 @@ static int journal_validate_key(struct bch_fs *c,
 		bch2_bkey_invalid(c, bkey_i_to_s_c(k),
 				  __btree_node_type(level, btree_id), write, &buf);
 
-		mustfix_fsck_err(c, "%s", buf.buf);
+		mustfix_fsck_err(c, journal_entry_bkey_invalid,
+				 "%s", buf.buf);
 
 		le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
 		memmove(k, bkey_next(k), next - (void *) bkey_next(k));
@@ -369,7 +377,7 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
 			prt_newline(out);
 			prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
 		}
-		prt_printf(out, "btree=%s l=%u ", bch2_btree_ids[entry->btree_id], entry->level);
+		prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
 		bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
 		first = false;
 	}
@@ -387,6 +395,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c,
 	if (journal_entry_err_on(!entry->u64s ||
 				 le16_to_cpu(entry->u64s) != k->k.u64s,
 				 c, version, jset, entry,
+				 journal_entry_btree_root_bad_size,
 				 "invalid btree root journal entry: wrong number of keys")) {
 		void *next = vstruct_next(entry);
 		/*
@@ -436,6 +445,7 @@ static int journal_entry_blacklist_validate(struct bch_fs *c,
 
 	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1,
 				 c, version, jset, entry,
+				 journal_entry_blacklist_bad_size,
 		"invalid journal seq blacklist entry: bad size")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 	}
@@ -463,6 +473,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c,
 
 	if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2,
 				 c, version, jset, entry,
+				 journal_entry_blacklist_v2_bad_size,
 		"invalid journal seq blacklist entry: bad size")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		goto out;
@@ -473,6 +484,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c,
 	if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
 				 le64_to_cpu(bl_entry->end),
 				 c, version, jset, entry,
+				 journal_entry_blacklist_v2_start_past_end,
 		"invalid journal seq blacklist entry: start > end")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 	}
@@ -505,6 +517,7 @@ static int journal_entry_usage_validate(struct bch_fs *c,
 
 	if (journal_entry_err_on(bytes < sizeof(*u),
 				 c, version, jset, entry,
+				 journal_entry_usage_bad_size,
 				 "invalid journal entry usage: bad size")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		return ret;
@@ -539,6 +552,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
 	if (journal_entry_err_on(bytes < sizeof(*u) ||
 				 bytes < sizeof(*u) + u->r.nr_devs,
 				 c, version, jset, entry,
+				 journal_entry_data_usage_bad_size,
 				 "invalid journal entry usage: bad size")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		return ret;
@@ -570,13 +584,17 @@ static int journal_entry_clock_validate(struct bch_fs *c,
 	int ret = 0;
 
 	if (journal_entry_err_on(bytes != sizeof(*clock),
-				 c, version, jset, entry, "bad size")) {
+				 c, version, jset, entry,
+				 journal_entry_clock_bad_size,
+				 "bad size")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		return ret;
 	}
 
 	if (journal_entry_err_on(clock->rw > 1,
-				 c, version, jset, entry, "bad rw")) {
+				 c, version, jset, entry,
+				 journal_entry_clock_bad_rw,
+				 "bad rw")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		return ret;
 	}
@@ -608,7 +626,9 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c,
 	int ret = 0;
 
 	if (journal_entry_err_on(bytes < expected,
-				 c, version, jset, entry, "bad size (%u < %u)",
+				 c, version, jset, entry,
+				 journal_entry_dev_usage_bad_size,
+				 "bad size (%u < %u)",
 				 bytes, expected)) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		return ret;
@@ -617,13 +637,17 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c,
 	dev = le32_to_cpu(u->dev);
 
 	if (journal_entry_err_on(!bch2_dev_exists2(c, dev),
-				 c, version, jset, entry, "bad dev")) {
+				 c, version, jset, entry,
+				 journal_entry_dev_usage_bad_dev,
+				 "bad dev")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		return ret;
 	}
 
 	if (journal_entry_err_on(u->pad,
-				 c, version, jset, entry, "bad pad")) {
+				 c, version, jset, entry,
+				 journal_entry_dev_usage_bad_pad,
+				 "bad pad")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
 		return ret;
 	}
@@ -738,7 +762,8 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
 
 	vstruct_for_each(jset, entry) {
 		if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset),
-					 c, version, jset, entry,
+				c, version, jset, entry,
+				journal_entry_past_jset_end,
 				"journal entry extends past end of jset")) {
 			jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
 			break;
@@ -767,6 +792,7 @@ static int jset_validate(struct bch_fs *c,
 	version = le32_to_cpu(jset->version);
 	if (journal_entry_err_on(!bch2_version_compatible(version),
 			c, version, jset, NULL,
+			jset_unsupported_version,
 			"%s sector %llu seq %llu: incompatible journal entry version %u.%u",
 			ca ? ca->name : c->name,
 			sector, le64_to_cpu(jset->seq),
@@ -777,7 +803,8 @@ static int jset_validate(struct bch_fs *c,
 	}
 
 	if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)),
-				 c, version, jset, NULL,
+			c, version, jset, NULL,
+			jset_unknown_csum,
 			"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
 			ca ? ca->name : c->name,
 			sector, le64_to_cpu(jset->seq),
@@ -788,6 +815,7 @@ static int jset_validate(struct bch_fs *c,
 	if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
 				 le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq),
 				 c, version, jset, NULL,
+				 jset_last_seq_newer_than_seq,
 				 "invalid journal entry: last_seq > seq (%llu > %llu)",
 				 le64_to_cpu(jset->last_seq),
 				 le64_to_cpu(jset->seq))) {
@@ -816,7 +844,8 @@ static int jset_validate_early(struct bch_fs *c,
 
 	version = le32_to_cpu(jset->version);
 	if (journal_entry_err_on(!bch2_version_compatible(version),
-			 c, version, jset, NULL,
+			c, version, jset, NULL,
+			jset_unsupported_version,
 			"%s sector %llu seq %llu: unknown journal entry version %u.%u",
 			ca ? ca->name : c->name,
 			sector, le64_to_cpu(jset->seq),
@@ -831,7 +860,8 @@ static int jset_validate_early(struct bch_fs *c,
 		return JOURNAL_ENTRY_REREAD;
 
 	if (journal_entry_err_on(bytes > bucket_sectors_left << 9,
-			 c, version, jset, NULL,
+			c, version, jset, NULL,
+			jset_past_bucket_end,
 			"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
 			ca ? ca->name : c->name,
 			sector, le64_to_cpu(jset->seq), bytes))
@@ -900,7 +930,7 @@ reread:
 			ret = submit_bio_wait(bio);
 			kfree(bio);
 
-			if (bch2_dev_io_err_on(ret, ca,
+			if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read,
 					       "journal read error: sector %llu",
 					       offset) ||
 			    bch2_meta_read_fault("journal")) {
@@ -956,7 +986,8 @@ reread:
 		ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
 
 		csum_good = jset_csum_good(c, j);
-		if (!csum_good)
+		if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
+				       "journal checksum error"))
 			saw_bad = true;
 
 		ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
@@ -1172,6 +1203,7 @@ int bch2_journal_read(struct bch_fs *c,
 
 		if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq),
 					 c, le32_to_cpu(i->j.version), &i->j, NULL,
+					 jset_last_seq_newer_than_seq,
 					 "invalid journal entry: last_seq > seq (%llu > %llu)",
 					 le64_to_cpu(i->j.last_seq),
 					 le64_to_cpu(i->j.seq)))
@@ -1188,7 +1220,8 @@ int bch2_journal_read(struct bch_fs *c,
 	}
 
 	if (!*last_seq) {
-		fsck_err(c, "journal read done, but no entries found after dropping non-flushes");
+		fsck_err(c, dirty_but_no_journal_entries_post_drop_nonflushes,
+			 "journal read done, but no entries found after dropping non-flushes");
 		return 0;
 	}
 
@@ -1214,6 +1247,7 @@ int bch2_journal_read(struct bch_fs *c,
 
 		if (bch2_journal_seq_is_blacklisted(c, seq, true)) {
 			fsck_err_on(!JSET_NO_FLUSH(&i->j), c,
+				    jset_seq_blacklisted,
 				    "found blacklisted journal entry %llu", seq);
 			i->ignore = true;
 		}
@@ -1254,7 +1288,8 @@ int bch2_journal_read(struct bch_fs *c,
 			bch2_journal_ptrs_to_text(&buf2, c, i);
 
 			missing_end = seq - 1;
-			fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
+			fsck_err(c, journal_entries_missing,
+				 "journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
 				 "  prev at %s\n"
 				 "  next at %s",
 				 missing_start, missing_end,
@@ -1309,7 +1344,8 @@ int bch2_journal_read(struct bch_fs *c,
 		if (!degraded &&
 		    !bch2_replicas_marked(c, &replicas.e) &&
 		    (le64_to_cpu(i->j.seq) == *last_seq ||
-		     fsck_err(c, "superblock not marked as containing replicas for journal entry %llu\n  %s",
+		     fsck_err(c, journal_entry_replicas_not_marked,
+			      "superblock not marked as containing replicas for journal entry %llu\n  %s",
 			      le64_to_cpu(i->j.seq), buf.buf))) {
 			ret = bch2_mark_replicas(c, &replicas.e);
 			if (ret)
@@ -1581,7 +1617,8 @@ static void journal_write_endio(struct bio *bio)
 	struct journal_buf *w = journal_last_unwritten_buf(j);
 	unsigned long flags;
 
-	if (bch2_dev_io_err_on(bio->bi_status, ca, "error writing journal entry %llu: %s",
+	if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
+			       "error writing journal entry %llu: %s",
 			       le64_to_cpu(w->data->seq),
 			       bch2_blk_status_to_str(bio->bi_status)) ||
 	    bch2_meta_write_fault("journal")) {
@@ -1641,9 +1678,15 @@ static void do_journal_write(struct closure *cl)
 	continue_at(cl, journal_write_done, c->io_complete_wq);
 }
 
-static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset)
+static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
 {
-	struct jset_entry *i, *next, *prev = NULL;
+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
+	struct jset_entry *start, *end, *i, *next, *prev = NULL;
+	struct jset *jset = w->data;
+	unsigned sectors, bytes, u64s;
+	bool validate_before_checksum = false;
+	unsigned long btree_roots_have = 0;
+	int ret;
 
 	/*
 	 * Simple compaction, dropping empty jset_entries (from journal
@@ -1660,8 +1703,20 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
 		if (!u64s)
 			continue;
 
-		if (i->type == BCH_JSET_ENTRY_btree_root)
+		/*
+		 * New btree roots are set by journalling them; when the journal
+		 * entry gets written we have to propagate them to
+		 * c->btree_roots
+		 *
+		 * But, every journal entry we write has to contain all the
+		 * btree roots (at least for now); so after we copy btree roots
+		 * to c->btree_roots we have to get any missing btree roots and
+		 * add them to this journal entry:
+		 */
+		if (i->type == BCH_JSET_ENTRY_btree_root) {
 			bch2_journal_entry_to_btree_root(c, i);
+			__set_bit(i->btree_id, &btree_roots_have);
+		}
 
 		/* Can we merge with previous entry? */
 		if (prev &&
@@ -1685,85 +1740,10 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
 
 	prev = prev ? vstruct_next(prev) : jset->start;
 	jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
-}
-
-void bch2_journal_write(struct closure *cl)
-{
-	struct journal *j = container_of(cl, struct journal, io);
-	struct bch_fs *c = container_of(j, struct bch_fs, journal);
-	struct bch_dev *ca;
-	struct journal_buf *w = journal_last_unwritten_buf(j);
-	struct bch_replicas_padded replicas;
-	struct jset_entry *start, *end;
-	struct jset *jset;
-	struct bio *bio;
-	struct printbuf journal_debug_buf = PRINTBUF;
-	bool validate_before_checksum = false;
-	unsigned i, sectors, bytes, u64s, nr_rw_members = 0;
-	int ret;
-
-	BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
-
-	journal_buf_realloc(j, w);
-	jset = w->data;
-
-	j->write_start_time = local_clock();
-
-	spin_lock(&j->lock);
-
-	/*
-	 * If the journal is in an error state - we did an emergency shutdown -
-	 * we prefer to continue doing journal writes. We just mark them as
-	 * noflush so they'll never be used, but they'll still be visible by the
-	 * list_journal tool - this helps in debugging.
-	 *
-	 * There's a caveat: the first journal write after marking the
-	 * superblock dirty must always be a flush write, because on startup
-	 * from a clean shutdown we didn't necessarily read the journal and the
-	 * new journal write might overwrite whatever was in the journal
-	 * previously - we can't leave the journal without any flush writes in
-	 * it.
-	 *
-	 * So if we're in an error state, and we're still starting up, we don't
-	 * write anything at all.
-	 */
-	if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) &&
-	    (bch2_journal_error(j) ||
-	     w->noflush ||
-	     (!w->must_flush &&
-	      (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
-	      test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) {
-		w->noflush = true;
-		SET_JSET_NO_FLUSH(jset, true);
-		jset->last_seq	= 0;
-		w->last_seq	= 0;
-
-		j->nr_noflush_writes++;
-	} else if (!bch2_journal_error(j)) {
-		j->last_flush_write = jiffies;
-		j->nr_flush_writes++;
-		clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
-	} else {
-		spin_unlock(&j->lock);
-		goto err;
-	}
-	spin_unlock(&j->lock);
-
-	/*
-	 * New btree roots are set by journalling them; when the journal entry
-	 * gets written we have to propagate them to c->btree_roots
-	 *
-	 * But, every journal entry we write has to contain all the btree roots
-	 * (at least for now); so after we copy btree roots to c->btree_roots we
-	 * have to get any missing btree roots and add them to this journal
-	 * entry:
-	 */
-
-	bch2_journal_entries_postprocess(c, jset);
 
 	start = end = vstruct_last(jset);
 
-	end	= bch2_btree_roots_to_journal_entries(c, jset->start, end);
+	end	= bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
 
 	bch2_journal_super_entries_add_common(c, &end,
 				le64_to_cpu(jset->seq));
@@ -1779,7 +1759,7 @@ void bch2_journal_write(struct closure *cl)
 		bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
 				    vstruct_bytes(jset), w->sectors << 9,
 				    u64s, w->u64s_reserved, j->entry_u64s_reserved);
-		goto err;
+		return -EINVAL;
 	}
 
 	jset->magic		= cpu_to_le64(jset_magic(c));
@@ -1798,37 +1778,117 @@ void bch2_journal_write(struct closure *cl)
 		validate_before_checksum = true;
 
 	if (validate_before_checksum &&
-	    jset_validate(c, NULL, jset, 0, WRITE))
-		goto err;
+	    (ret = jset_validate(c, NULL, jset, 0, WRITE)))
+		return ret;
 
 	ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
 		    jset->encrypted_start,
 		    vstruct_end(jset) - (void *) jset->encrypted_start);
 	if (bch2_fs_fatal_err_on(ret, c,
 			"error decrypting journal entry: %i", ret))
-		goto err;
+		return ret;
 
 	jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
 				  journal_nonce(jset), jset);
 
 	if (!validate_before_checksum &&
-	    jset_validate(c, NULL, jset, 0, WRITE))
-		goto err;
+	    (ret = jset_validate(c, NULL, jset, 0, WRITE)))
+		return ret;
 
 	memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
+	return 0;
+}
+
+static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w)
+{
+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
+	int error = bch2_journal_error(j);
+
+	/*
+	 * If the journal is in an error state - we did an emergency shutdown -
+	 * we prefer to continue doing journal writes. We just mark them as
+	 * noflush so they'll never be used, but they'll still be visible by the
+	 * list_journal tool - this helps in debugging.
+	 *
+	 * There's a caveat: the first journal write after marking the
+	 * superblock dirty must always be a flush write, because on startup
+	 * from a clean shutdown we didn't necessarily read the journal and the
+	 * new journal write might overwrite whatever was in the journal
+	 * previously - we can't leave the journal without any flush writes in
+	 * it.
+	 *
+	 * So if we're in an error state, and we're still starting up, we don't
+	 * write anything at all.
+	 */
+	if (error && test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags))
+		return -EIO;
+
+	if (error ||
+	    w->noflush ||
+	    (!w->must_flush &&
+	     (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
+	     test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
+		     w->noflush = true;
+		SET_JSET_NO_FLUSH(w->data, true);
+		w->data->last_seq	= 0;
+		w->last_seq		= 0;
+
+		j->nr_noflush_writes++;
+	} else {
+		j->last_flush_write = jiffies;
+		j->nr_flush_writes++;
+		clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
+	}
+
+	return 0;
+}
+
+void bch2_journal_write(struct closure *cl)
+{
+	struct journal *j = container_of(cl, struct journal, io);
+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
+	struct bch_dev *ca;
+	struct journal_buf *w = journal_last_unwritten_buf(j);
+	struct bch_replicas_padded replicas;
+	struct bio *bio;
+	struct printbuf journal_debug_buf = PRINTBUF;
+	unsigned i, nr_rw_members = 0;
+	int ret;
+
+	BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
+
+	j->write_start_time = local_clock();
 
-retry_alloc:
 	spin_lock(&j->lock);
-	ret = journal_write_alloc(j, w);
+	ret = bch2_journal_write_pick_flush(j, w);
+	spin_unlock(&j->lock);
+	if (ret)
+		goto err;
+
+	journal_buf_realloc(j, w);
+
+	ret = bch2_journal_write_prep(j, w);
+	if (ret)
+		goto err;
+
+	while (1) {
+		spin_lock(&j->lock);
+		ret = journal_write_alloc(j, w);
+		if (!ret || !j->can_discard)
+			break;
 
-	if (ret && j->can_discard) {
 		spin_unlock(&j->lock);
 		bch2_journal_do_discards(j);
-		goto retry_alloc;
 	}
 
-	if (ret)
+	if (ret) {
 		__bch2_journal_debug_to_text(&journal_debug_buf, j);
+		spin_unlock(&j->lock);
+		bch_err(c, "Unable to allocate journal write:\n%s",
+			journal_debug_buf.buf);
+		printbuf_exit(&journal_debug_buf);
+		goto err;
+	}
 
 	/*
 	 * write is allocated, no longer need to account for it in
@@ -1843,13 +1903,6 @@ retry_alloc:
 	bch2_journal_space_available(j);
 	spin_unlock(&j->lock);
 
-	if (ret) {
-		bch_err(c, "Unable to allocate journal write:\n%s",
-			journal_debug_buf.buf);
-		printbuf_exit(&journal_debug_buf);
-		goto err;
-	}
-
 	w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
 
 	if (c->opts.nochanges)
@@ -1871,7 +1924,7 @@ retry_alloc:
 	if (ret)
 		goto err;
 
-	if (!JSET_NO_FLUSH(jset) && w->separate_flush) {
+	if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
 		for_each_rw_member(ca, c, i) {
 			percpu_ref_get(&ca->io_ref);
 
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 215a653322f3..a5cc0ed195d6 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -10,17 +10,17 @@
 #include "recovery.h"
 
 /* KEY_TYPE_lru is obsolete: */
-int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k,
 		     enum bkey_invalid_flags flags,
 		     struct printbuf *err)
 {
-	if (!lru_pos_time(k.k->p)) {
-		prt_printf(err, "lru entry at time=0");
-		return -BCH_ERR_invalid_bkey;
-
-	}
+	int ret = 0;
 
-	return 0;
+	bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err,
+			 lru_entry_at_time_0,
+			 "lru entry at time=0");
+fsck_err:
+	return ret;
 }
 
 void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c,
@@ -95,6 +95,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
 	int ret;
 
 	if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c,
+			lru_entry_to_invalid_bucket,
 			"lru key points to nonexistent device:bucket %llu:%llu",
 			alloc_pos.inode, alloc_pos.offset))
 		return bch2_btree_delete_at(trans, lru_iter, 0);
@@ -125,7 +126,8 @@ static int bch2_check_lru_key(struct btree_trans *trans,
 		}
 
 		if (c->opts.reconstruct_alloc ||
-		    fsck_err(c, "incorrect lru entry: lru %s time %llu\n"
+		    fsck_err(c, lru_entry_bad,
+			     "incorrect lru entry: lru %s time %llu\n"
 			     "  %s\n"
 			     "  for %s",
 			     bch2_lru_types[type],
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index be66bf9ad809..429dca816df5 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -48,7 +48,7 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l)
 	return BCH_LRU_read;
 }
 
-int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c,
 		     enum bkey_invalid_flags, struct printbuf *);
 void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 39a14e321680..ab749bf2fcbc 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -20,6 +20,7 @@
 #include "keylist.h"
 #include "move.h"
 #include "replicas.h"
+#include "snapshot.h"
 #include "super-io.h"
 #include "trace.h"
 
@@ -59,20 +60,6 @@ static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c
 	}
 }
 
-static void progress_list_add(struct bch_fs *c, struct bch_move_stats *stats)
-{
-	mutex_lock(&c->data_progress_lock);
-	list_add(&stats->list, &c->data_progress_list);
-	mutex_unlock(&c->data_progress_lock);
-}
-
-static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats)
-{
-	mutex_lock(&c->data_progress_lock);
-	list_del(&stats->list);
-	mutex_unlock(&c->data_progress_lock);
-}
-
 struct moving_io {
 	struct list_head		read_list;
 	struct list_head		io_list;
@@ -156,35 +143,31 @@ static void move_read_endio(struct bio *bio)
 	closure_put(&ctxt->cl);
 }
 
-void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt,
-					struct btree_trans *trans)
+void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
 {
 	struct moving_io *io;
 
-	if (trans)
-		bch2_trans_unlock(trans);
-
 	while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
+		bch2_trans_unlock_long(ctxt->trans);
 		list_del(&io->read_list);
 		move_write(io);
 	}
 }
 
-static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
-				       struct btree_trans *trans)
+void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
 {
 	unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
 
-	move_ctxt_wait_event(ctxt, trans,
+	move_ctxt_wait_event(ctxt,
 		!atomic_read(&ctxt->write_sectors) ||
 		atomic_read(&ctxt->write_sectors) != sectors_pending);
 }
 
 void bch2_moving_ctxt_exit(struct moving_context *ctxt)
 {
-	struct bch_fs *c = ctxt->c;
+	struct bch_fs *c = ctxt->trans->c;
 
-	move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
+	move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
 	closure_sync(&ctxt->cl);
 
 	EBUG_ON(atomic_read(&ctxt->write_sectors));
@@ -192,16 +175,12 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt)
 	EBUG_ON(atomic_read(&ctxt->read_sectors));
 	EBUG_ON(atomic_read(&ctxt->read_ios));
 
-	if (ctxt->stats) {
-		progress_list_del(c, ctxt->stats);
-		trace_move_data(c,
-				atomic64_read(&ctxt->stats->sectors_moved),
-				atomic64_read(&ctxt->stats->keys_moved));
-	}
-
 	mutex_lock(&c->moving_context_lock);
 	list_del(&ctxt->list);
 	mutex_unlock(&c->moving_context_lock);
+
+	bch2_trans_put(ctxt->trans);
+	memset(ctxt, 0, sizeof(*ctxt));
 }
 
 void bch2_moving_ctxt_init(struct moving_context *ctxt,
@@ -213,7 +192,7 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
 {
 	memset(ctxt, 0, sizeof(*ctxt));
 
-	ctxt->c		= c;
+	ctxt->trans	= bch2_trans_get(c);
 	ctxt->fn	= (void *) _RET_IP_;
 	ctxt->rate	= rate;
 	ctxt->stats	= stats;
@@ -230,16 +209,17 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
 	mutex_lock(&c->moving_context_lock);
 	list_add(&ctxt->list, &c->moving_context_list);
 	mutex_unlock(&c->moving_context_lock);
+}
 
-	if (stats) {
-		progress_list_add(c, stats);
-		stats->data_type = BCH_DATA_user;
-	}
+void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
+{
+	trace_move_data(c, stats);
 }
 
 void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
 {
 	memset(stats, 0, sizeof(*stats));
+	stats->data_type = BCH_DATA_user;
 	scnprintf(stats->name, sizeof(stats->name), "%s", name);
 }
 
@@ -286,15 +266,14 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
 		bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
 }
 
-static int bch2_move_extent(struct btree_trans *trans,
-			    struct btree_iter *iter,
-			    struct moving_context *ctxt,
-			    struct move_bucket_in_flight *bucket_in_flight,
-			    struct bch_io_opts io_opts,
-			    enum btree_id btree_id,
-			    struct bkey_s_c k,
-			    struct data_update_opts data_opts)
+int bch2_move_extent(struct moving_context *ctxt,
+		     struct move_bucket_in_flight *bucket_in_flight,
+		     struct btree_iter *iter,
+		     struct bkey_s_c k,
+		     struct bch_io_opts io_opts,
+		     struct data_update_opts data_opts)
 {
+	struct btree_trans *trans = ctxt->trans;
 	struct bch_fs *c = trans->c;
 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
 	struct moving_io *io;
@@ -303,6 +282,8 @@ static int bch2_move_extent(struct btree_trans *trans,
 	unsigned sectors = k.k->size, pages;
 	int ret = -ENOMEM;
 
+	if (ctxt->stats)
+		ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
 	trace_move_extent2(c, k);
 
 	bch2_data_update_opts_normalize(k, &data_opts);
@@ -355,7 +336,7 @@ static int bch2_move_extent(struct btree_trans *trans,
 	io->rbio.bio.bi_end_io		= move_read_endio;
 
 	ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
-				    io_opts, data_opts, btree_id, k);
+				    io_opts, data_opts, iter->btree_id, k);
 	if (ret && ret != -BCH_ERR_unwritten_extent_update)
 		goto err_free_pages;
 
@@ -367,9 +348,11 @@ static int bch2_move_extent(struct btree_trans *trans,
 
 	BUG_ON(ret);
 
-	io->write.ctxt = ctxt;
 	io->write.op.end_io = move_write_done;
 
+	if (ctxt->rate)
+		bch2_ratelimit_increment(ctxt->rate, k.k->size);
+
 	if (ctxt->stats) {
 		atomic64_inc(&ctxt->stats->keys_moved);
 		atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
@@ -399,7 +382,7 @@ static int bch2_move_extent(struct btree_trans *trans,
 	closure_get(&ctxt->cl);
 	bch2_read_extent(trans, &io->rbio,
 			 bkey_start_pos(k.k),
-			 btree_id, k, 0,
+			 iter->btree_id, k, 0,
 			 BCH_READ_NODECODE|
 			 BCH_READ_LAST_FRAGMENT);
 	return 0;
@@ -413,45 +396,96 @@ err:
 	return ret;
 }
 
-static int lookup_inode(struct btree_trans *trans, struct bpos pos,
-			struct bch_inode_unpacked *inode)
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
+			  struct per_snapshot_io_opts *io_opts,
+			  struct bkey_s_c extent_k)
+{
+	struct bch_fs *c = trans->c;
+	u32 restart_count = trans->restart_count;
+	int ret = 0;
+
+	if (io_opts->cur_inum != extent_k.k->p.inode) {
+		struct btree_iter iter;
+		struct bkey_s_c k;
+
+		io_opts->d.nr = 0;
+
+		for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
+				   BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+			if (k.k->p.offset != extent_k.k->p.inode)
+				break;
+
+			if (!bkey_is_inode(k.k))
+				continue;
+
+			struct bch_inode_unpacked inode;
+			BUG_ON(bch2_inode_unpack(k, &inode));
+
+			struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
+			bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
+
+			ret = darray_push(&io_opts->d, e);
+			if (ret)
+				break;
+		}
+		bch2_trans_iter_exit(trans, &iter);
+		io_opts->cur_inum = extent_k.k->p.inode;
+	}
+
+	ret = ret ?: trans_was_restarted(trans, restart_count);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (extent_k.k->p.snapshot) {
+		struct snapshot_io_opts_entry *i;
+		darray_for_each(io_opts->d, i)
+			if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
+				return &i->io_opts;
+	}
+
+	return &io_opts->fs_io_opts;
+}
+
+int bch2_move_get_io_opts_one(struct btree_trans *trans,
+			      struct bch_io_opts *io_opts,
+			      struct bkey_s_c extent_k)
 {
 	struct btree_iter iter;
 	struct bkey_s_c k;
 	int ret;
 
-	bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos,
-			     BTREE_ITER_ALL_SNAPSHOTS);
-	k = bch2_btree_iter_peek(&iter);
+	/* reflink btree? */
+	if (!extent_k.k->p.inode) {
+		*io_opts = bch2_opts_to_inode_opts(trans->c->opts);
+		return 0;
+	}
+
+	k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+			       SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
+			       BTREE_ITER_CACHED);
 	ret = bkey_err(k);
-	if (ret)
-		goto err;
+	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+		return ret;
 
-	if (!k.k || !bkey_eq(k.k->p, pos)) {
-		ret = -BCH_ERR_ENOENT_inode;
-		goto err;
+	if (!ret && bkey_is_inode(k.k)) {
+		struct bch_inode_unpacked inode;
+		bch2_inode_unpack(k, &inode);
+		bch2_inode_opts_get(io_opts, trans->c, &inode);
+	} else {
+		*io_opts = bch2_opts_to_inode_opts(trans->c->opts);
 	}
 
-	ret = bkey_is_inode(k.k) ? 0 : -EIO;
-	if (ret)
-		goto err;
-
-	ret = bch2_inode_unpack(k, inode);
-	if (ret)
-		goto err;
-err:
 	bch2_trans_iter_exit(trans, &iter);
-	return ret;
+	return 0;
 }
 
-static int move_ratelimit(struct btree_trans *trans,
-			  struct moving_context *ctxt)
+int bch2_move_ratelimit(struct moving_context *ctxt)
 {
-	struct bch_fs *c = trans->c;
+	struct bch_fs *c = ctxt->trans->c;
 	u64 delay;
 
-	if (ctxt->wait_on_copygc) {
-		bch2_trans_unlock(trans);
+	if (ctxt->wait_on_copygc && !c->copygc_running) {
+		bch2_trans_unlock_long(ctxt->trans);
 		wait_event_killable(c->copygc_running_wq,
 				    !c->copygc_running ||
 				    kthread_should_stop());
@@ -460,8 +494,12 @@ static int move_ratelimit(struct btree_trans *trans,
 	do {
 		delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
 
+
 		if (delay) {
-			bch2_trans_unlock(trans);
+			if (delay > HZ / 10)
+				bch2_trans_unlock_long(ctxt->trans);
+			else
+				bch2_trans_unlock(ctxt->trans);
 			set_current_state(TASK_INTERRUPTIBLE);
 		}
 
@@ -474,7 +512,7 @@ static int move_ratelimit(struct btree_trans *trans,
 			schedule_timeout(delay);
 
 		if (unlikely(freezing(current))) {
-			move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads));
+			move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
 			try_to_freeze();
 		}
 	} while (delay);
@@ -483,7 +521,7 @@ static int move_ratelimit(struct btree_trans *trans,
 	 * XXX: these limits really ought to be per device, SSDs and hard drives
 	 * will want different limits
 	 */
-	move_ctxt_wait_event(ctxt, trans,
+	move_ctxt_wait_event(ctxt,
 		atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 &&
 		atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 &&
 		atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight &&
@@ -492,52 +530,28 @@ static int move_ratelimit(struct btree_trans *trans,
 	return 0;
 }
 
-static int move_get_io_opts(struct btree_trans *trans,
-			    struct bch_io_opts *io_opts,
-			    struct bkey_s_c k, u64 *cur_inum)
-{
-	struct bch_inode_unpacked inode;
-	int ret;
-
-	if (*cur_inum == k.k->p.inode)
-		return 0;
-
-	ret = lookup_inode(trans,
-			   SPOS(0, k.k->p.inode, k.k->p.snapshot),
-			   &inode);
-	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-		return ret;
-
-	if (!ret)
-		bch2_inode_opts_get(io_opts, trans->c, &inode);
-	else
-		*io_opts = bch2_opts_to_inode_opts(trans->c->opts);
-	*cur_inum = k.k->p.inode;
-	return 0;
-}
-
-static int __bch2_move_data(struct moving_context *ctxt,
-			    struct bpos start,
-			    struct bpos end,
-			    move_pred_fn pred, void *arg,
-			    enum btree_id btree_id)
+static int bch2_move_data_btree(struct moving_context *ctxt,
+				struct bpos start,
+				struct bpos end,
+				move_pred_fn pred, void *arg,
+				enum btree_id btree_id)
 {
-	struct bch_fs *c = ctxt->c;
-	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+	struct btree_trans *trans = ctxt->trans;
+	struct bch_fs *c = trans->c;
+	struct per_snapshot_io_opts snapshot_io_opts;
+	struct bch_io_opts *io_opts;
 	struct bkey_buf sk;
-	struct btree_trans *trans = bch2_trans_get(c);
 	struct btree_iter iter;
 	struct bkey_s_c k;
 	struct data_update_opts data_opts;
-	u64 cur_inum = U64_MAX;
 	int ret = 0, ret2;
 
+	per_snapshot_io_opts_init(&snapshot_io_opts, c);
 	bch2_bkey_buf_init(&sk);
 
 	if (ctxt->stats) {
 		ctxt->stats->data_type	= BCH_DATA_user;
-		ctxt->stats->btree_id	= btree_id;
-		ctxt->stats->pos	= start;
+		ctxt->stats->pos	= BBPOS(btree_id, start);
 	}
 
 	bch2_trans_iter_init(trans, &iter, btree_id, start,
@@ -547,7 +561,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
 	if (ctxt->rate)
 		bch2_ratelimit_reset(ctxt->rate);
 
-	while (!move_ratelimit(trans, ctxt)) {
+	while (!bch2_move_ratelimit(ctxt)) {
 		bch2_trans_begin(trans);
 
 		k = bch2_btree_iter_peek(&iter);
@@ -564,17 +578,18 @@ static int __bch2_move_data(struct moving_context *ctxt,
 			break;
 
 		if (ctxt->stats)
-			ctxt->stats->pos = iter.pos;
+			ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
 
 		if (!bkey_extent_is_direct_data(k.k))
 			goto next_nondata;
 
-		ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
+		io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k);
+		ret = PTR_ERR_OR_ZERO(io_opts);
 		if (ret)
 			continue;
 
 		memset(&data_opts, 0, sizeof(data_opts));
-		if (!pred(c, arg, k, &io_opts, &data_opts))
+		if (!pred(c, arg, k, io_opts, &data_opts))
 			goto next;
 
 		/*
@@ -584,24 +599,20 @@ static int __bch2_move_data(struct moving_context *ctxt,
 		bch2_bkey_buf_reassemble(&sk, c, k);
 		k = bkey_i_to_s_c(sk.k);
 
-		ret2 = bch2_move_extent(trans, &iter, ctxt, NULL,
-					io_opts, btree_id, k, data_opts);
+		ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts);
 		if (ret2) {
 			if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
 				continue;
 
 			if (ret2 == -ENOMEM) {
 				/* memory allocation failure, wait for some IO to finish */
-				bch2_move_ctxt_wait_for_io(ctxt, trans);
+				bch2_move_ctxt_wait_for_io(ctxt);
 				continue;
 			}
 
 			/* XXX signal failure */
 			goto next;
 		}
-
-		if (ctxt->rate)
-			bch2_ratelimit_increment(ctxt->rate, k.k->size);
 next:
 		if (ctxt->stats)
 			atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
@@ -610,59 +621,68 @@ next_nondata:
 	}
 
 	bch2_trans_iter_exit(trans, &iter);
-	bch2_trans_put(trans);
 	bch2_bkey_buf_exit(&sk, c);
+	per_snapshot_io_opts_exit(&snapshot_io_opts);
 
 	return ret;
 }
 
-int bch2_move_data(struct bch_fs *c,
-		   enum btree_id start_btree_id, struct bpos start_pos,
-		   enum btree_id end_btree_id,   struct bpos end_pos,
-		   struct bch_ratelimit *rate,
-		   struct bch_move_stats *stats,
-		   struct write_point_specifier wp,
-		   bool wait_on_copygc,
-		   move_pred_fn pred, void *arg)
+int __bch2_move_data(struct moving_context *ctxt,
+		     struct bbpos start,
+		     struct bbpos end,
+		     move_pred_fn pred, void *arg)
 {
-	struct moving_context ctxt;
+	struct bch_fs *c = ctxt->trans->c;
 	enum btree_id id;
 	int ret = 0;
 
-	bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
-
-	for (id = start_btree_id;
-	     id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
+	for (id = start.btree;
+	     id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
 	     id++) {
-		stats->btree_id = id;
+		ctxt->stats->pos = BBPOS(id, POS_MIN);
 
-		if (id != BTREE_ID_extents &&
-		    id != BTREE_ID_reflink)
+		if (!btree_type_has_ptrs(id) ||
+		    !bch2_btree_id_root(c, id)->b)
 			continue;
 
-		if (!bch2_btree_id_root(c, id)->b)
-			continue;
-
-		ret = __bch2_move_data(&ctxt,
-				       id == start_btree_id ? start_pos : POS_MIN,
-				       id == end_btree_id   ? end_pos   : POS_MAX,
+		ret = bch2_move_data_btree(ctxt,
+				       id == start.btree ? start.pos : POS_MIN,
+				       id == end.btree   ? end.pos   : POS_MAX,
 				       pred, arg, id);
 		if (ret)
 			break;
 	}
 
+	return ret;
+}
+
+int bch2_move_data(struct bch_fs *c,
+		   struct bbpos start,
+		   struct bbpos end,
+		   struct bch_ratelimit *rate,
+		   struct bch_move_stats *stats,
+		   struct write_point_specifier wp,
+		   bool wait_on_copygc,
+		   move_pred_fn pred, void *arg)
+{
+
+	struct moving_context ctxt;
+	int ret;
+
+	bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
+	ret = __bch2_move_data(&ctxt, start, end, pred, arg);
 	bch2_moving_ctxt_exit(&ctxt);
 
 	return ret;
 }
 
-int __bch2_evacuate_bucket(struct btree_trans *trans,
-			   struct moving_context *ctxt,
+int __bch2_evacuate_bucket(struct moving_context *ctxt,
 			   struct move_bucket_in_flight *bucket_in_flight,
 			   struct bpos bucket, int gen,
 			   struct data_update_opts _data_opts)
 {
-	struct bch_fs *c = ctxt->c;
+	struct btree_trans *trans = ctxt->trans;
+	struct bch_fs *c = trans->c;
 	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
 	struct btree_iter iter;
 	struct bkey_buf sk;
@@ -673,7 +693,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
 	struct data_update_opts data_opts;
 	unsigned dirty_sectors, bucket_size;
 	u64 fragmentation;
-	u64 cur_inum = U64_MAX;
 	struct bpos bp_pos = POS_MIN;
 	int ret = 0;
 
@@ -708,7 +727,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
 		goto err;
 	}
 
-	while (!(ret = move_ratelimit(trans, ctxt))) {
+	while (!(ret = bch2_move_ratelimit(ctxt))) {
 		bch2_trans_begin(trans);
 
 		ret = bch2_get_next_backpointer(trans, bucket, gen,
@@ -737,7 +756,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
 			bch2_bkey_buf_reassemble(&sk, c, k);
 			k = bkey_i_to_s_c(sk.k);
 
-			ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
+			ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
 			if (ret) {
 				bch2_trans_iter_exit(trans, &iter);
 				continue;
@@ -758,23 +777,20 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
 				i++;
 			}
 
-			ret = bch2_move_extent(trans, &iter, ctxt,
-					bucket_in_flight,
-					io_opts, bp.btree_id, k, data_opts);
+			ret = bch2_move_extent(ctxt, bucket_in_flight,
+					       &iter, k, io_opts, data_opts);
 			bch2_trans_iter_exit(trans, &iter);
 
 			if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 				continue;
 			if (ret == -ENOMEM) {
 				/* memory allocation failure, wait for some IO to finish */
-				bch2_move_ctxt_wait_for_io(ctxt, trans);
+				bch2_move_ctxt_wait_for_io(ctxt);
 				continue;
 			}
 			if (ret)
 				goto err;
 
-			if (ctxt->rate)
-				bch2_ratelimit_increment(ctxt->rate, k.k->size);
 			if (ctxt->stats)
 				atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
 		} else {
@@ -825,14 +841,12 @@ int bch2_evacuate_bucket(struct bch_fs *c,
 			 struct write_point_specifier wp,
 			 bool wait_on_copygc)
 {
-	struct btree_trans *trans = bch2_trans_get(c);
 	struct moving_context ctxt;
 	int ret;
 
 	bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
-	ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts);
+	ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts);
 	bch2_moving_ctxt_exit(&ctxt);
-	bch2_trans_put(trans);
 
 	return ret;
 }
@@ -849,21 +863,25 @@ static int bch2_move_btree(struct bch_fs *c,
 {
 	bool kthread = (current->flags & PF_KTHREAD) != 0;
 	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-	struct btree_trans *trans = bch2_trans_get(c);
+	struct moving_context ctxt;
+	struct btree_trans *trans;
 	struct btree_iter iter;
 	struct btree *b;
 	enum btree_id id;
 	struct data_update_opts data_opts;
 	int ret = 0;
 
-	progress_list_add(c, stats);
+	bch2_moving_ctxt_init(&ctxt, c, NULL, stats,
+			      writepoint_ptr(&c->btree_write_point),
+			      true);
+	trans = ctxt.trans;
 
 	stats->data_type = BCH_DATA_btree;
 
 	for (id = start_btree_id;
 	     id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
 	     id++) {
-		stats->btree_id = id;
+		stats->pos = BBPOS(id, POS_MIN);
 
 		if (!bch2_btree_id_root(c, id)->b)
 			continue;
@@ -882,7 +900,7 @@ retry:
 			     bpos_cmp(b->key.k.p, end_pos)) > 0)
 				break;
 
-			stats->pos = iter.pos;
+			stats->pos = BBPOS(iter.btree_id, iter.pos);
 
 			if (!pred(c, arg, b, &io_opts, &data_opts))
 				goto next;
@@ -904,14 +922,10 @@ next:
 			break;
 	}
 
-	bch2_trans_put(trans);
-
-	if (ret)
-		bch_err_fn(c, ret);
-
+	bch_err_fn(c, ret);
+	bch2_moving_ctxt_exit(&ctxt);
 	bch2_btree_interior_updates_flush(c);
 
-	progress_list_del(c, stats);
 	return ret;
 }
 
@@ -1032,8 +1046,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
 		mutex_unlock(&c->sb_lock);
 	}
 
-	if (ret)
-		bch_err_fn(c, ret);
+	bch_err_fn(c, ret);
 	return ret;
 }
 
@@ -1056,14 +1069,16 @@ int bch2_data_job(struct bch_fs *c,
 		ret = bch2_replicas_gc2(c) ?: ret;
 
 		ret = bch2_move_data(c,
-				     op.start_btree,	op.start_pos,
-				     op.end_btree,	op.end_pos,
+				     (struct bbpos) { op.start_btree,	op.start_pos },
+				     (struct bbpos) { op.end_btree,	op.end_pos },
 				     NULL,
 				     stats,
 				     writepoint_hashed((unsigned long) current),
 				     true,
 				     rereplicate_pred, c) ?: ret;
 		ret = bch2_replicas_gc2(c) ?: ret;
+
+		bch2_move_stats_exit(stats, c);
 		break;
 	case BCH_DATA_OP_MIGRATE:
 		if (op.migrate.dev >= c->sb.nr_devices)
@@ -1080,18 +1095,21 @@ int bch2_data_job(struct bch_fs *c,
 		ret = bch2_replicas_gc2(c) ?: ret;
 
 		ret = bch2_move_data(c,
-				     op.start_btree,	op.start_pos,
-				     op.end_btree,	op.end_pos,
+				     (struct bbpos) { op.start_btree,	op.start_pos },
+				     (struct bbpos) { op.end_btree,	op.end_pos },
 				     NULL,
 				     stats,
 				     writepoint_hashed((unsigned long) current),
 				     true,
 				     migrate_pred, &op) ?: ret;
 		ret = bch2_replicas_gc2(c) ?: ret;
+
+		bch2_move_stats_exit(stats, c);
 		break;
 	case BCH_DATA_OP_REWRITE_OLD_NODES:
 		bch2_move_stats_init(stats, "rewrite_old_nodes");
 		ret = bch2_scan_old_btree_nodes(c, stats);
+		bch2_move_stats_exit(stats, c);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1100,19 +1118,43 @@ int bch2_data_job(struct bch_fs *c,
 	return ret;
 }
 
-static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
+void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
 {
-	struct bch_move_stats *stats = ctxt->stats;
-	struct moving_io *io;
+	prt_printf(out, "%s: data type=%s pos=",
+		   stats->name,
+		   bch2_data_types[stats->data_type]);
+	bch2_bbpos_to_text(out, stats->pos);
+	prt_newline(out);
+	printbuf_indent_add(out, 2);
+
+	prt_str(out, "keys moved:  ");
+	prt_u64(out, atomic64_read(&stats->keys_moved));
+	prt_newline(out);
+
+	prt_str(out, "keys raced:  ");
+	prt_u64(out, atomic64_read(&stats->keys_raced));
+	prt_newline(out);
+
+	prt_str(out, "bytes seen:  ");
+	prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9);
+	prt_newline(out);
 
-	prt_printf(out, "%s (%ps):", stats->name, ctxt->fn);
+	prt_str(out, "bytes moved: ");
+	prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9);
 	prt_newline(out);
 
-	prt_printf(out, " data type %s btree_id %s position: ",
-		   bch2_data_types[stats->data_type],
-		   bch2_btree_ids[stats->btree_id]);
-	bch2_bpos_to_text(out, stats->pos);
+	prt_str(out, "bytes raced: ");
+	prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
 	prt_newline(out);
+
+	printbuf_indent_sub(out, 2);
+}
+
+static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
+{
+	struct moving_io *io;
+
+	bch2_move_stats_to_text(out, ctxt->stats);
 	printbuf_indent_add(out, 2);
 
 	prt_printf(out, "reads: ios %u/%u sectors %u/%u",
@@ -1153,7 +1195,4 @@ void bch2_fs_move_init(struct bch_fs *c)
 {
 	INIT_LIST_HEAD(&c->moving_context_list);
 	mutex_init(&c->moving_context_lock);
-
-	INIT_LIST_HEAD(&c->data_progress_list);
-	mutex_init(&c->data_progress_lock);
 }
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index cbdd58db8782..07cf9d42643b 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -2,6 +2,7 @@
 #ifndef _BCACHEFS_MOVE_H
 #define _BCACHEFS_MOVE_H
 
+#include "bbpos.h"
 #include "bcachefs_ioctl.h"
 #include "btree_iter.h"
 #include "buckets.h"
@@ -11,7 +12,7 @@
 struct bch_read_bio;
 
 struct moving_context {
-	struct bch_fs		*c;
+	struct btree_trans	*trans;
 	struct list_head	list;
 	void			*fn;
 
@@ -37,13 +38,14 @@ struct moving_context {
 	wait_queue_head_t	wait;
 };
 
-#define move_ctxt_wait_event(_ctxt, _trans, _cond)			\
+#define move_ctxt_wait_event(_ctxt, _cond)				\
 do {									\
 	bool cond_finished = false;					\
-	bch2_moving_ctxt_do_pending_writes(_ctxt, _trans);		\
+	bch2_moving_ctxt_do_pending_writes(_ctxt);			\
 									\
 	if (_cond)							\
 		break;							\
+	bch2_trans_unlock_long((_ctxt)->trans);				\
 	__wait_event((_ctxt)->wait,					\
 		     bch2_moving_ctxt_next_pending_write(_ctxt) ||	\
 		     (cond_finished = (_cond)));			\
@@ -59,22 +61,60 @@ void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
 			   struct bch_ratelimit *, struct bch_move_stats *,
 			   struct write_point_specifier, bool);
 struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
-void bch2_moving_ctxt_do_pending_writes(struct moving_context *,
-					struct btree_trans *);
+void bch2_moving_ctxt_do_pending_writes(struct moving_context *);
+void bch2_move_ctxt_wait_for_io(struct moving_context *);
+int bch2_move_ratelimit(struct moving_context *);
+
+/* Inodes in different snapshots may have different IO options: */
+struct snapshot_io_opts_entry {
+	u32			snapshot;
+	struct bch_io_opts	io_opts;
+};
+
+struct per_snapshot_io_opts {
+	u64			cur_inum;
+	struct bch_io_opts	fs_io_opts;
+	DARRAY(struct snapshot_io_opts_entry) d;
+};
+
+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
+{
+	memset(io_opts, 0, sizeof(*io_opts));
+	io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts);
+}
+
+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
+{
+	darray_exit(&io_opts->d);
+}
+
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
+				struct per_snapshot_io_opts *, struct bkey_s_c);
+int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c);
 
 int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
 
+int bch2_move_extent(struct moving_context *,
+		     struct move_bucket_in_flight *,
+		     struct btree_iter *,
+		     struct bkey_s_c,
+		     struct bch_io_opts,
+		     struct data_update_opts);
+
+int __bch2_move_data(struct moving_context *,
+		     struct bbpos,
+		     struct bbpos,
+		     move_pred_fn, void *);
 int bch2_move_data(struct bch_fs *,
-		   enum btree_id, struct bpos,
-		   enum btree_id, struct bpos,
+		   struct bbpos start,
+		   struct bbpos end,
 		   struct bch_ratelimit *,
 		   struct bch_move_stats *,
 		   struct write_point_specifier,
 		   bool,
 		   move_pred_fn, void *);
 
-int __bch2_evacuate_bucket(struct btree_trans *,
-			   struct moving_context *,
+int __bch2_evacuate_bucket(struct moving_context *,
 			   struct move_bucket_in_flight *,
 			   struct bpos, int,
 			   struct data_update_opts);
@@ -88,7 +128,10 @@ int bch2_data_job(struct bch_fs *,
 		  struct bch_move_stats *,
 		  struct bch_ioctl_data);
 
-void bch2_move_stats_init(struct bch_move_stats *stats, char *name);
+void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *);
+void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *);
+void bch2_move_stats_init(struct bch_move_stats *, char *);
+
 void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
 
 void bch2_fs_move_init(struct bch_fs *);
diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
index baf1f8570b3f..e22841ef31e4 100644
--- a/fs/bcachefs/move_types.h
+++ b/fs/bcachefs/move_types.h
@@ -2,17 +2,17 @@
 #ifndef _BCACHEFS_MOVE_TYPES_H
 #define _BCACHEFS_MOVE_TYPES_H
 
+#include "bbpos_types.h"
+
 struct bch_move_stats {
 	enum bch_data_type	data_type;
-	enum btree_id		btree_id;
-	struct bpos		pos;
-	struct list_head	list;
+	struct bbpos		pos;
 	char			name[32];
 
 	atomic64_t		keys_moved;
 	atomic64_t		keys_raced;
-	atomic64_t		sectors_moved;
 	atomic64_t		sectors_seen;
+	atomic64_t		sectors_moved;
 	atomic64_t		sectors_raced;
 };
 
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 4017120baeee..0a0576326c5b 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -101,8 +101,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
 	return ret;
 }
 
-static void move_buckets_wait(struct btree_trans *trans,
-			      struct moving_context *ctxt,
+static void move_buckets_wait(struct moving_context *ctxt,
 			      struct buckets_in_flight *list,
 			      bool flush)
 {
@@ -111,7 +110,7 @@ static void move_buckets_wait(struct btree_trans *trans,
 
 	while ((i = list->first)) {
 		if (flush)
-			move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count));
+			move_ctxt_wait_event(ctxt, !atomic_read(&i->count));
 
 		if (atomic_read(&i->count))
 			break;
@@ -129,7 +128,7 @@ static void move_buckets_wait(struct btree_trans *trans,
 		kfree(i);
 	}
 
-	bch2_trans_unlock(trans);
+	bch2_trans_unlock_long(ctxt->trans);
 }
 
 static bool bucket_in_flight(struct buckets_in_flight *list,
@@ -140,11 +139,11 @@ static bool bucket_in_flight(struct buckets_in_flight *list,
 
 typedef DARRAY(struct move_bucket) move_buckets;
 
-static int bch2_copygc_get_buckets(struct btree_trans *trans,
-			struct moving_context *ctxt,
+static int bch2_copygc_get_buckets(struct moving_context *ctxt,
 			struct buckets_in_flight *buckets_in_flight,
 			move_buckets *buckets)
 {
+	struct btree_trans *trans = ctxt->trans;
 	struct bch_fs *c = trans->c;
 	struct btree_iter iter;
 	struct bkey_s_c k;
@@ -152,7 +151,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
 	size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
 	int ret;
 
-	move_buckets_wait(trans, ctxt, buckets_in_flight, false);
+	move_buckets_wait(ctxt, buckets_in_flight, false);
 
 	ret = bch2_btree_write_buffer_flush(trans);
 	if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()",
@@ -188,10 +187,11 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
 }
 
 noinline
-static int bch2_copygc(struct btree_trans *trans,
-		       struct moving_context *ctxt,
-		       struct buckets_in_flight *buckets_in_flight)
+static int bch2_copygc(struct moving_context *ctxt,
+		       struct buckets_in_flight *buckets_in_flight,
+		       bool *did_work)
 {
+	struct btree_trans *trans = ctxt->trans;
 	struct bch_fs *c = trans->c;
 	struct data_update_opts data_opts = {
 		.btree_insert_flags = BCH_WATERMARK_copygc,
@@ -202,7 +202,7 @@ static int bch2_copygc(struct btree_trans *trans,
 	u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
 	int ret = 0;
 
-	ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets);
+	ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets);
 	if (ret)
 		goto err;
 
@@ -221,10 +221,12 @@ static int bch2_copygc(struct btree_trans *trans,
 			break;
 		}
 
-		ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket.k.bucket,
+		ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket,
 					     f->bucket.k.gen, data_opts);
 		if (ret)
 			goto err;
+
+		*did_work = true;
 	}
 err:
 	darray_exit(&buckets);
@@ -300,24 +302,24 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
 static int bch2_copygc_thread(void *arg)
 {
 	struct bch_fs *c = arg;
-	struct btree_trans *trans;
 	struct moving_context ctxt;
 	struct bch_move_stats move_stats;
 	struct io_clock *clock = &c->io_clock[WRITE];
-	struct buckets_in_flight buckets;
+	struct buckets_in_flight *buckets;
 	u64 last, wait;
 	int ret = 0;
 
-	memset(&buckets, 0, sizeof(buckets));
-
-	ret = rhashtable_init(&buckets.table, &bch_move_bucket_params);
+	buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL);
+	if (!buckets)
+		return -ENOMEM;
+	ret = rhashtable_init(&buckets->table, &bch_move_bucket_params);
 	if (ret) {
+		kfree(buckets);
 		bch_err_msg(c, ret, "allocating copygc buckets in flight");
 		return ret;
 	}
 
 	set_freezable();
-	trans = bch2_trans_get(c);
 
 	bch2_move_stats_init(&move_stats, "copygc");
 	bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
@@ -325,16 +327,18 @@ static int bch2_copygc_thread(void *arg)
 			      false);
 
 	while (!ret && !kthread_should_stop()) {
-		bch2_trans_unlock(trans);
+		bool did_work = false;
+
+		bch2_trans_unlock_long(ctxt.trans);
 		cond_resched();
 
 		if (!c->copy_gc_enabled) {
-			move_buckets_wait(trans, &ctxt, &buckets, true);
+			move_buckets_wait(&ctxt, buckets, true);
 			kthread_wait_freezable(c->copy_gc_enabled);
 		}
 
 		if (unlikely(freezing(current))) {
-			move_buckets_wait(trans, &ctxt, &buckets, true);
+			move_buckets_wait(&ctxt, buckets, true);
 			__refrigerator(false);
 			continue;
 		}
@@ -345,7 +349,7 @@ static int bch2_copygc_thread(void *arg)
 		if (wait > clock->max_slop) {
 			c->copygc_wait_at = last;
 			c->copygc_wait = last + wait;
-			move_buckets_wait(trans, &ctxt, &buckets, true);
+			move_buckets_wait(&ctxt, buckets, true);
 			trace_and_count(c, copygc_wait, c, wait, last + wait);
 			bch2_kthread_io_clock_wait(clock, last + wait,
 					MAX_SCHEDULE_TIMEOUT);
@@ -355,16 +359,29 @@ static int bch2_copygc_thread(void *arg)
 		c->copygc_wait = 0;
 
 		c->copygc_running = true;
-		ret = bch2_copygc(trans, &ctxt, &buckets);
+		ret = bch2_copygc(&ctxt, buckets, &did_work);
 		c->copygc_running = false;
 
 		wake_up(&c->copygc_running_wq);
+
+		if (!wait && !did_work) {
+			u64 min_member_capacity = bch2_min_rw_member_capacity(c);
+
+			if (min_member_capacity == U64_MAX)
+				min_member_capacity = 128 * 2048;
+
+			bch2_trans_unlock_long(ctxt.trans);
+			bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
+					MAX_SCHEDULE_TIMEOUT);
+		}
 	}
 
-	move_buckets_wait(trans, &ctxt, &buckets, true);
-	rhashtable_destroy(&buckets.table);
-	bch2_trans_put(trans);
+	move_buckets_wait(&ctxt, buckets, true);
+
+	rhashtable_destroy(&buckets->table);
+	kfree(buckets);
 	bch2_moving_ctxt_exit(&ctxt);
+	bch2_move_stats_exit(&move_stats, c);
 
 	return 0;
 }
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 232f50c73a94..8dd4046cca41 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -12,11 +12,6 @@
 
 #define x(t, n, ...) [n] = #t,
 
-const char * const bch2_iops_measurements[] = {
-	BCH_IOPS_MEASUREMENTS()
-	NULL
-};
-
 const char * const bch2_error_actions[] = {
 	BCH_ERROR_ACTIONS()
 	NULL
@@ -42,9 +37,8 @@ const char * const bch2_sb_compat[] = {
 	NULL
 };
 
-const char * const bch2_btree_ids[] = {
+const char * const __bch2_btree_ids[] = {
 	BCH_BTREE_IDS()
-	"interior btree node",
 	NULL
 };
 
@@ -271,14 +265,14 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
 		if (err)
 			prt_printf(err, "%s: too small (min %llu)",
 			       opt->attr.name, opt->min);
-		return -ERANGE;
+		return -BCH_ERR_ERANGE_option_too_small;
 	}
 
 	if (opt->max && v >= opt->max) {
 		if (err)
 			prt_printf(err, "%s: too big (max %llu)",
 			       opt->attr.name, opt->max);
-		return -ERANGE;
+		return -BCH_ERR_ERANGE_option_too_big;
 	}
 
 	if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) {
@@ -295,6 +289,9 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
 		return -EINVAL;
 	}
 
+	if (opt->fn.validate)
+		return opt->fn.validate(v, err);
+
 	return 0;
 }
 
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 55014336c5f7..8526f177450a 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -10,13 +10,12 @@
 
 struct bch_fs;
 
-extern const char * const bch2_iops_measurements[];
 extern const char * const bch2_error_actions[];
 extern const char * const bch2_fsck_fix_opts[];
 extern const char * const bch2_version_upgrade_opts[];
 extern const char * const bch2_sb_features[];
 extern const char * const bch2_sb_compat[];
-extern const char * const bch2_btree_ids[];
+extern const char * const __bch2_btree_ids[];
 extern const char * const bch2_csum_types[];
 extern const char * const bch2_csum_opts[];
 extern const char * const bch2_compression_types[];
@@ -74,6 +73,7 @@ enum opt_type {
 struct bch_opt_fn {
 	int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *);
 	void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+	int (*validate)(u64, struct printbuf *);
 };
 
 /**
diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c
index de41f9a14492..5e653eb81d54 100644
--- a/fs/bcachefs/printbuf.c
+++ b/fs/bcachefs/printbuf.c
@@ -415,11 +415,11 @@ void bch2_prt_bitflags(struct printbuf *out,
 	while (list[nr])
 		nr++;
 
-	while (flags && (bit = __ffs(flags)) < nr) {
+	while (flags && (bit = __ffs64(flags)) < nr) {
 		if (!first)
 			bch2_prt_printf(out, ",");
 		first = false;
 		bch2_prt_printf(out, "%s", list[bit]);
-		flags ^= 1 << bit;
+		flags ^= BIT_ULL(bit);
 	}
 }
diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
index cb68ae44d597..a54647c36b85 100644
--- a/fs/bcachefs/quota.c
+++ b/fs/bcachefs/quota.c
@@ -59,17 +59,18 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = {
 	.to_text	= bch2_sb_quota_to_text,
 };
 
-int bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k,
 		       enum bkey_invalid_flags flags,
 		       struct printbuf *err)
 {
-	if (k.k->p.inode >= QTYP_NR) {
-		prt_printf(err, "invalid quota type (%llu >= %u)",
-		       k.k->p.inode, QTYP_NR);
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
 
-	return 0;
+	bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err,
+			 quota_type_invalid,
+			 "invalid quota type (%llu >= %u)",
+			 k.k->p.inode, QTYP_NR);
+fsck_err:
+	return ret;
 }
 
 void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h
index 2f463874a362..884f601f41c4 100644
--- a/fs/bcachefs/quota.h
+++ b/fs/bcachefs/quota.h
@@ -8,7 +8,7 @@
 enum bkey_invalid_flags;
 extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
 
-int bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c,
 		       enum bkey_invalid_flags, struct printbuf *);
 void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 568f1e8e7507..3319190b8d9c 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -1,15 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "btree_iter.h"
+#include "btree_update.h"
+#include "btree_write_buffer.h"
 #include "buckets.h"
 #include "clock.h"
 #include "compress.h"
 #include "disk_groups.h"
 #include "errcode.h"
+#include "error.h"
+#include "inode.h"
 #include "move.h"
 #include "rebalance.h"
+#include "subvolume.h"
 #include "super-io.h"
 #include "trace.h"
 
@@ -17,302 +23,396 @@
 #include <linux/kthread.h>
 #include <linux/sched/cputime.h>
 
-/*
- * Check if an extent should be moved:
- * returns -1 if it should not be moved, or
- * device of pointer that should be moved, if known, or INT_MAX if unknown
- */
-static bool rebalance_pred(struct bch_fs *c, void *arg,
-			   struct bkey_s_c k,
-			   struct bch_io_opts *io_opts,
-			   struct data_update_opts *data_opts)
-{
-	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-	unsigned i;
-
-	data_opts->rewrite_ptrs		= 0;
-	data_opts->target		= io_opts->background_target;
-	data_opts->extra_replicas	= 0;
-	data_opts->btree_insert_flags	= 0;
-
-	if (io_opts->background_compression &&
-	    !bch2_bkey_is_incompressible(k)) {
-		const union bch_extent_entry *entry;
-		struct extent_ptr_decoded p;
-
-		i = 0;
-		bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-			if (!p.ptr.cached &&
-			    p.crc.compression_type !=
-			    bch2_compression_opt_to_type(io_opts->background_compression))
-				data_opts->rewrite_ptrs |= 1U << i;
-			i++;
-		}
-	}
+#define REBALANCE_WORK_SCAN_OFFSET	(U64_MAX - 1)
 
-	if (io_opts->background_target) {
-		const struct bch_extent_ptr *ptr;
+static const char * const bch2_rebalance_state_strs[] = {
+#define x(t) #t,
+	BCH_REBALANCE_STATES()
+	NULL
+#undef x
+};
 
-		i = 0;
-		bkey_for_each_ptr(ptrs, ptr) {
-			if (!ptr->cached &&
-			    !bch2_dev_in_target(c, ptr->dev, io_opts->background_target) &&
-			    bch2_target_accepts_data(c, BCH_DATA_user, io_opts->background_target))
-				data_opts->rewrite_ptrs |= 1U << i;
-			i++;
-		}
-	}
+static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum)
+{
+	struct btree_iter iter;
+	struct bkey_s_c k;
+	struct bkey_i_cookie *cookie;
+	u64 v;
+	int ret;
 
-	return data_opts->rewrite_ptrs != 0;
+	bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
+			     SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
+			     BTREE_ITER_INTENT);
+	k = bch2_btree_iter_peek_slot(&iter);
+	ret = bkey_err(k);
+	if (ret)
+		goto err;
+
+	v = k.k->type == KEY_TYPE_cookie
+		? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
+		: 0;
+
+	cookie = bch2_trans_kmalloc(trans, sizeof(*cookie));
+	ret = PTR_ERR_OR_ZERO(cookie);
+	if (ret)
+		goto err;
+
+	bkey_cookie_init(&cookie->k_i);
+	cookie->k.p = iter.pos;
+	cookie->v.cookie = cpu_to_le64(v + 1);
+
+	ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0);
+err:
+	bch2_trans_iter_exit(trans, &iter);
+	return ret;
 }
 
-void bch2_rebalance_add_key(struct bch_fs *c,
-			    struct bkey_s_c k,
-			    struct bch_io_opts *io_opts)
+int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
 {
-	struct data_update_opts update_opts = { 0 };
-	struct bkey_ptrs_c ptrs;
-	const struct bch_extent_ptr *ptr;
-	unsigned i;
-
-	if (!rebalance_pred(c, NULL, k, io_opts, &update_opts))
-		return;
-
-	i = 0;
-	ptrs = bch2_bkey_ptrs_c(k);
-	bkey_for_each_ptr(ptrs, ptr) {
-		if ((1U << i) && update_opts.rewrite_ptrs)
-			if (atomic64_add_return(k.k->size,
-					&bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) ==
-			    k.k->size)
-				rebalance_wakeup(c);
-		i++;
-	}
+	int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+			    __bch2_set_rebalance_needs_scan(trans, inum));
+	rebalance_wakeup(c);
+	return ret;
 }
 
-void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
+int bch2_set_fs_needs_rebalance(struct bch_fs *c)
 {
-	if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
-	    sectors)
-		rebalance_wakeup(c);
+	return bch2_set_rebalance_needs_scan(c, 0);
 }
 
-struct rebalance_work {
-	int		dev_most_full_idx;
-	unsigned	dev_most_full_percent;
-	u64		dev_most_full_work;
-	u64		dev_most_full_capacity;
-	u64		total_work;
-};
+static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie)
+{
+	struct btree_iter iter;
+	struct bkey_s_c k;
+	u64 v;
+	int ret;
+
+	bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
+			     SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
+			     BTREE_ITER_INTENT);
+	k = bch2_btree_iter_peek_slot(&iter);
+	ret = bkey_err(k);
+	if (ret)
+		goto err;
+
+	v = k.k->type == KEY_TYPE_cookie
+		? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
+		: 0;
+
+	if (v == cookie)
+		ret = bch2_btree_delete_at(trans, &iter, 0);
+err:
+	bch2_trans_iter_exit(trans, &iter);
+	return ret;
+}
 
-static void rebalance_work_accumulate(struct rebalance_work *w,
-		u64 dev_work, u64 unknown_dev, u64 capacity, int idx)
+static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans,
+					    struct btree_iter *work_iter)
 {
-	unsigned percent_full;
-	u64 work = dev_work + unknown_dev;
+	return !kthread_should_stop()
+		? bch2_btree_iter_peek(work_iter)
+		: bkey_s_c_null;
+}
 
-	/* avoid divide by 0 */
-	if (!capacity)
-		return;
+static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
+					   struct btree_iter *iter,
+					   struct bkey_s_c k)
+{
+	struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0);
+	int ret = PTR_ERR_OR_ZERO(n);
+	if (ret)
+		return ret;
+
+	extent_entry_drop(bkey_i_to_s(n),
+			  (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n)));
+	return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
+}
+
+static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
+			struct bpos work_pos,
+			struct btree_iter *extent_iter,
+			struct data_update_opts *data_opts)
+{
+	struct bch_fs *c = trans->c;
+	struct bkey_s_c k;
+
+	bch2_trans_iter_exit(trans, extent_iter);
+	bch2_trans_iter_init(trans, extent_iter,
+			     work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink,
+			     work_pos,
+			     BTREE_ITER_ALL_SNAPSHOTS);
+	k = bch2_btree_iter_peek_slot(extent_iter);
+	if (bkey_err(k))
+		return k;
+
+	const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL;
+	if (!r) {
+		/* raced due to btree write buffer, nothing to do */
+		return bkey_s_c_null;
+	}
 
-	if (work < dev_work || work < unknown_dev)
-		work = U64_MAX;
-	work = min(work, capacity);
+	memset(data_opts, 0, sizeof(*data_opts));
 
-	percent_full = div64_u64(work * 100, capacity);
+	data_opts->rewrite_ptrs		=
+		bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression);
+	data_opts->target		= r->target;
 
-	if (percent_full >= w->dev_most_full_percent) {
-		w->dev_most_full_idx		= idx;
-		w->dev_most_full_percent	= percent_full;
-		w->dev_most_full_work		= work;
-		w->dev_most_full_capacity	= capacity;
+	if (!data_opts->rewrite_ptrs) {
+		/*
+		 * device we would want to write to offline? devices in target
+		 * changed?
+		 *
+		 * We'll now need a full scan before this extent is picked up
+		 * again:
+		 */
+		int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k);
+		if (ret)
+			return bkey_s_c_err(ret);
+		return bkey_s_c_null;
 	}
 
-	if (w->total_work + dev_work >= w->total_work &&
-	    w->total_work + dev_work >= dev_work)
-		w->total_work += dev_work;
+	return k;
 }
 
-static struct rebalance_work rebalance_work(struct bch_fs *c)
+noinline_for_stack
+static int do_rebalance_extent(struct moving_context *ctxt,
+			       struct bpos work_pos,
+			       struct btree_iter *extent_iter)
 {
-	struct bch_dev *ca;
-	struct rebalance_work ret = { .dev_most_full_idx = -1 };
-	u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev);
-	unsigned i;
-
-	for_each_online_member(ca, c, i)
-		rebalance_work_accumulate(&ret,
-			atomic64_read(&ca->rebalance_work),
-			unknown_dev,
-			bucket_to_sector(ca, ca->mi.nbuckets -
-					 ca->mi.first_bucket),
-			i);
-
-	rebalance_work_accumulate(&ret,
-		unknown_dev, 0, c->capacity, -1);
+	struct btree_trans *trans = ctxt->trans;
+	struct bch_fs *c = trans->c;
+	struct bch_fs_rebalance *r = &trans->c->rebalance;
+	struct data_update_opts data_opts;
+	struct bch_io_opts io_opts;
+	struct bkey_s_c k;
+	struct bkey_buf sk;
+	int ret;
+
+	ctxt->stats = &r->work_stats;
+	r->state = BCH_REBALANCE_working;
+
+	bch2_bkey_buf_init(&sk);
+
+	ret = bkey_err(k = next_rebalance_extent(trans, work_pos,
+						 extent_iter, &data_opts));
+	if (ret || !k.k)
+		goto out;
 
+	ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
+	if (ret)
+		goto out;
+
+	atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
+
+	/*
+	 * The iterator gets unlocked by __bch2_read_extent - need to
+	 * save a copy of @k elsewhere:
+	 */
+	bch2_bkey_buf_reassemble(&sk, c, k);
+	k = bkey_i_to_s_c(sk.k);
+
+	ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts);
+	if (ret) {
+		if (bch2_err_matches(ret, ENOMEM)) {
+			/* memory allocation failure, wait for some IO to finish */
+			bch2_move_ctxt_wait_for_io(ctxt);
+			ret = -BCH_ERR_transaction_restart_nested;
+		}
+
+		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+			goto out;
+
+		/* skip it and continue, XXX signal failure */
+		ret = 0;
+	}
+out:
+	bch2_bkey_buf_exit(&sk, c);
 	return ret;
 }
 
-static void rebalance_work_reset(struct bch_fs *c)
+static bool rebalance_pred(struct bch_fs *c, void *arg,
+			   struct bkey_s_c k,
+			   struct bch_io_opts *io_opts,
+			   struct data_update_opts *data_opts)
 {
-	struct bch_dev *ca;
-	unsigned i;
+	unsigned target, compression;
+
+	if (k.k->p.inode) {
+		target		= io_opts->background_target;
+		compression	= io_opts->background_compression ?: io_opts->compression;
+	} else {
+		const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
 
-	for_each_online_member(ca, c, i)
-		atomic64_set(&ca->rebalance_work, 0);
+		target		= r ? r->target : io_opts->background_target;
+		compression	= r ? r->compression :
+			(io_opts->background_compression ?: io_opts->compression);
+	}
 
-	atomic64_set(&c->rebalance.work_unknown_dev, 0);
+	data_opts->rewrite_ptrs		= bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
+	data_opts->target		= target;
+	return data_opts->rewrite_ptrs != 0;
 }
 
-static unsigned long curr_cputime(void)
+static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
 {
-	u64 utime, stime;
+	struct btree_trans *trans = ctxt->trans;
+	struct bch_fs_rebalance *r = &trans->c->rebalance;
+	int ret;
+
+	bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
+	ctxt->stats = &r->scan_stats;
 
-	task_cputime_adjusted(current, &utime, &stime);
-	return nsecs_to_jiffies(utime + stime);
+	if (!inum) {
+		r->scan_start	= BBPOS_MIN;
+		r->scan_end	= BBPOS_MAX;
+	} else {
+		r->scan_start	= BBPOS(BTREE_ID_extents, POS(inum, 0));
+		r->scan_end	= BBPOS(BTREE_ID_extents, POS(inum, U64_MAX));
+	}
+
+	r->state = BCH_REBALANCE_scanning;
+
+	ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?:
+		commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+			  bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+
+	bch2_move_stats_exit(&r->scan_stats, trans->c);
+	return ret;
 }
 
-static int bch2_rebalance_thread(void *arg)
+static void rebalance_wait(struct bch_fs *c)
 {
-	struct bch_fs *c = arg;
 	struct bch_fs_rebalance *r = &c->rebalance;
 	struct io_clock *clock = &c->io_clock[WRITE];
-	struct rebalance_work w, p;
-	struct bch_move_stats move_stats;
-	unsigned long start, prev_start;
-	unsigned long prev_run_time, prev_run_cputime;
-	unsigned long cputime, prev_cputime;
-	u64 io_start;
-	long throttle;
+	u64 now = atomic64_read(&clock->now);
+	u64 min_member_capacity = bch2_min_rw_member_capacity(c);
 
-	set_freezable();
+	if (min_member_capacity == U64_MAX)
+		min_member_capacity = 128 * 2048;
+
+	r->wait_iotime_end		= now + (min_member_capacity >> 6);
 
-	io_start	= atomic64_read(&clock->now);
-	p		= rebalance_work(c);
-	prev_start	= jiffies;
-	prev_cputime	= curr_cputime();
+	if (r->state != BCH_REBALANCE_waiting) {
+		r->wait_iotime_start	= now;
+		r->wait_wallclock_start	= ktime_get_real_ns();
+		r->state		= BCH_REBALANCE_waiting;
+	}
+
+	bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
+}
 
-	bch2_move_stats_init(&move_stats, "rebalance");
-	while (!kthread_wait_freezable(r->enabled)) {
-		cond_resched();
+static int do_rebalance(struct moving_context *ctxt)
+{
+	struct btree_trans *trans = ctxt->trans;
+	struct bch_fs *c = trans->c;
+	struct bch_fs_rebalance *r = &c->rebalance;
+	struct btree_iter rebalance_work_iter, extent_iter = { NULL };
+	struct bkey_s_c k;
+	int ret = 0;
 
-		start			= jiffies;
-		cputime			= curr_cputime();
+	bch2_move_stats_init(&r->work_stats, "rebalance_work");
+	bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
 
-		prev_run_time		= start - prev_start;
-		prev_run_cputime	= cputime - prev_cputime;
+	bch2_trans_iter_init(trans, &rebalance_work_iter,
+			     BTREE_ID_rebalance_work, POS_MIN,
+			     BTREE_ITER_ALL_SNAPSHOTS);
 
-		w			= rebalance_work(c);
-		BUG_ON(!w.dev_most_full_capacity);
+	while (!bch2_move_ratelimit(ctxt) &&
+	       !kthread_wait_freezable(r->enabled)) {
+		bch2_trans_begin(trans);
 
-		if (!w.total_work) {
-			r->state = REBALANCE_WAITING;
-			kthread_wait_freezable(rebalance_work(c).total_work);
+		ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter));
+		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 			continue;
-		}
+		if (ret || !k.k)
+			break;
 
-		/*
-		 * If there isn't much work to do, throttle cpu usage:
-		 */
-		throttle = prev_run_cputime * 100 /
-			max(1U, w.dev_most_full_percent) -
-			prev_run_time;
-
-		if (w.dev_most_full_percent < 20 && throttle > 0) {
-			r->throttled_until_iotime = io_start +
-				div_u64(w.dev_most_full_capacity *
-					(20 - w.dev_most_full_percent),
-					50);
-
-			if (atomic64_read(&clock->now) + clock->max_slop <
-			    r->throttled_until_iotime) {
-				r->throttled_until_cputime = start + throttle;
-				r->state = REBALANCE_THROTTLED;
-
-				bch2_kthread_io_clock_wait(clock,
-					r->throttled_until_iotime,
-					throttle);
-				continue;
-			}
-		}
+		ret = k.k->type == KEY_TYPE_cookie
+			? do_rebalance_scan(ctxt, k.k->p.inode,
+					    le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie))
+			: do_rebalance_extent(ctxt, k.k->p, &extent_iter);
 
-		/* minimum 1 mb/sec: */
-		r->pd.rate.rate =
-			max_t(u64, 1 << 11,
-			      r->pd.rate.rate *
-			      max(p.dev_most_full_percent, 1U) /
-			      max(w.dev_most_full_percent, 1U));
-
-		io_start	= atomic64_read(&clock->now);
-		p		= w;
-		prev_start	= start;
-		prev_cputime	= cputime;
-
-		r->state = REBALANCE_RUNNING;
-		memset(&move_stats, 0, sizeof(move_stats));
-		rebalance_work_reset(c);
-
-		bch2_move_data(c,
-			       0,		POS_MIN,
-			       BTREE_ID_NR,	POS_MAX,
-			       /* ratelimiting disabled for now */
-			       NULL, /*  &r->pd.rate, */
-			       &move_stats,
-			       writepoint_ptr(&c->rebalance_write_point),
-			       true,
-			       rebalance_pred, NULL);
+		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+			continue;
+		if (ret)
+			break;
+
+		bch2_btree_iter_advance(&rebalance_work_iter);
 	}
 
-	return 0;
+	bch2_trans_iter_exit(trans, &extent_iter);
+	bch2_trans_iter_exit(trans, &rebalance_work_iter);
+	bch2_move_stats_exit(&r->scan_stats, c);
+
+	if (!ret &&
+	    !kthread_should_stop() &&
+	    !atomic64_read(&r->work_stats.sectors_seen) &&
+	    !atomic64_read(&r->scan_stats.sectors_seen)) {
+		bch2_trans_unlock_long(trans);
+		rebalance_wait(c);
+	}
+
+	if (!bch2_err_matches(ret, EROFS))
+		bch_err_fn(c, ret);
+	return ret;
 }
 
-void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
+static int bch2_rebalance_thread(void *arg)
 {
+	struct bch_fs *c = arg;
 	struct bch_fs_rebalance *r = &c->rebalance;
-	struct rebalance_work w = rebalance_work(c);
+	struct moving_context ctxt;
+	int ret;
 
-	if (!out->nr_tabstops)
-		printbuf_tabstop_push(out, 20);
+	set_freezable();
 
-	prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx);
-	prt_tab(out);
+	bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
+			      writepoint_ptr(&c->rebalance_write_point),
+			      true);
 
-	prt_human_readable_u64(out, w.dev_most_full_work << 9);
-	prt_printf(out, "/");
-	prt_human_readable_u64(out, w.dev_most_full_capacity << 9);
-	prt_newline(out);
+	while (!kthread_should_stop() &&
+	       !(ret = do_rebalance(&ctxt)))
+		;
 
-	prt_printf(out, "total work:");
-	prt_tab(out);
+	bch2_moving_ctxt_exit(&ctxt);
 
-	prt_human_readable_u64(out, w.total_work << 9);
-	prt_printf(out, "/");
-	prt_human_readable_u64(out, c->capacity << 9);
-	prt_newline(out);
+	return 0;
+}
+
+void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
+{
+	struct bch_fs_rebalance *r = &c->rebalance;
 
-	prt_printf(out, "rate:");
-	prt_tab(out);
-	prt_printf(out, "%u", r->pd.rate.rate);
+	prt_str(out, bch2_rebalance_state_strs[r->state]);
 	prt_newline(out);
+	printbuf_indent_add(out, 2);
 
 	switch (r->state) {
-	case REBALANCE_WAITING:
-		prt_printf(out, "waiting");
+	case BCH_REBALANCE_waiting: {
+		u64 now = atomic64_read(&c->io_clock[WRITE].now);
+
+		prt_str(out, "io wait duration:  ");
+		bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start);
+		prt_newline(out);
+
+		prt_str(out, "io wait remaining: ");
+		bch2_prt_human_readable_s64(out, r->wait_iotime_end - now);
+		prt_newline(out);
+
+		prt_str(out, "duration waited:   ");
+		bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start);
+		prt_newline(out);
 		break;
-	case REBALANCE_THROTTLED:
-		prt_printf(out, "throttled for %lu sec or ",
-		       (r->throttled_until_cputime - jiffies) / HZ);
-		prt_human_readable_u64(out,
-			    (r->throttled_until_iotime -
-			     atomic64_read(&c->io_clock[WRITE].now)) << 9);
-		prt_printf(out, " io");
+	}
+	case BCH_REBALANCE_working:
+		bch2_move_stats_to_text(out, &r->work_stats);
 		break;
-	case REBALANCE_RUNNING:
-		prt_printf(out, "running");
+	case BCH_REBALANCE_scanning:
+		bch2_move_stats_to_text(out, &r->scan_stats);
 		break;
 	}
 	prt_newline(out);
+	printbuf_indent_sub(out, 2);
 }
 
 void bch2_rebalance_stop(struct bch_fs *c)
@@ -361,6 +461,4 @@ int bch2_rebalance_start(struct bch_fs *c)
 void bch2_fs_rebalance_init(struct bch_fs *c)
 {
 	bch2_pd_controller_init(&c->rebalance.pd);
-
-	atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX);
 }
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index 7ade0bb81cce..28a52638f16c 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -4,6 +4,9 @@
 
 #include "rebalance_types.h"
 
+int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
+int bch2_set_fs_needs_rebalance(struct bch_fs *);
+
 static inline void rebalance_wakeup(struct bch_fs *c)
 {
 	struct task_struct *p;
@@ -15,11 +18,7 @@ static inline void rebalance_wakeup(struct bch_fs *c)
 	rcu_read_unlock();
 }
 
-void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
-			    struct bch_io_opts *);
-void bch2_rebalance_add_work(struct bch_fs *, u64);
-
-void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *);
+void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *);
 
 void bch2_rebalance_stop(struct bch_fs *);
 int bch2_rebalance_start(struct bch_fs *);
diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h
index 7462a92e9598..0fffb536c1d0 100644
--- a/fs/bcachefs/rebalance_types.h
+++ b/fs/bcachefs/rebalance_types.h
@@ -2,25 +2,36 @@
 #ifndef _BCACHEFS_REBALANCE_TYPES_H
 #define _BCACHEFS_REBALANCE_TYPES_H
 
+#include "bbpos_types.h"
 #include "move_types.h"
 
-enum rebalance_state {
-	REBALANCE_WAITING,
-	REBALANCE_THROTTLED,
-	REBALANCE_RUNNING,
+#define BCH_REBALANCE_STATES()		\
+	x(waiting)			\
+	x(working)			\
+	x(scanning)
+
+enum bch_rebalance_states {
+#define x(t)	BCH_REBALANCE_##t,
+	BCH_REBALANCE_STATES()
+#undef x
 };
 
 struct bch_fs_rebalance {
-	struct task_struct __rcu *thread;
+	struct task_struct __rcu	*thread;
 	struct bch_pd_controller pd;
 
-	atomic64_t		work_unknown_dev;
+	enum bch_rebalance_states	state;
+	u64				wait_iotime_start;
+	u64				wait_iotime_end;
+	u64				wait_wallclock_start;
+
+	struct bch_move_stats		work_stats;
 
-	enum rebalance_state	state;
-	u64			throttled_until_iotime;
-	unsigned long		throttled_until_cputime;
+	struct bbpos			scan_start;
+	struct bbpos			scan_end;
+	struct bch_move_stats		scan_stats;
 
-	unsigned		enabled:1;
+	unsigned			enabled:1;
 };
 
 #endif /* _BCACHEFS_REBALANCE_TYPES_H */
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 4cd660650e5b..9c30500ce920 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -23,6 +23,7 @@
 #include "logged_ops.h"
 #include "move.h"
 #include "quota.h"
+#include "rebalance.h"
 #include "recovery.h"
 #include "replicas.h"
 #include "sb-clean.h"
@@ -182,7 +183,7 @@ static int bch2_journal_replay(struct bch_fs *c)
 			     bch2_journal_replay_key(trans, k));
 		if (ret) {
 			bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
-				bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret));
+				bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret));
 			goto err;
 		}
 	}
@@ -225,7 +226,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
 
 		if (entry->u64s) {
 			r->level = entry->level;
-			bkey_copy(&r->key, &entry->start[0]);
+			bkey_copy(&r->key, (struct bkey_i *) entry->start);
 			r->error = 0;
 		} else {
 			r->error = -EIO;
@@ -364,10 +365,12 @@ static int read_btree_roots(struct bch_fs *c)
 		}
 
 		if (r->error) {
-			__fsck_err(c, btree_id_is_alloc(i)
+			__fsck_err(c,
+				   btree_id_is_alloc(i)
 				   ? FSCK_CAN_IGNORE : 0,
+				   btree_root_bkey_invalid,
 				   "invalid btree root %s",
-				   bch2_btree_ids[i]);
+				   bch2_btree_id_str(i));
 			if (i == BTREE_ID_alloc)
 				c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
 		}
@@ -375,8 +378,9 @@ static int read_btree_roots(struct bch_fs *c)
 		ret = bch2_btree_root_read(c, i, &r->key, r->level);
 		if (ret) {
 			fsck_err(c,
+				 btree_root_read_error,
 				 "error reading btree root %s",
-				 bch2_btree_ids[i]);
+				 bch2_btree_id_str(i));
 			if (btree_id_is_alloc(i))
 				c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
 			ret = 0;
@@ -713,6 +717,7 @@ int bch2_fs_recovery(struct bch_fs *c)
 		if (mustfix_fsck_err_on(c->sb.clean &&
 					last_journal_entry &&
 					!journal_entry_empty(last_journal_entry), c,
+				clean_but_journal_not_empty,
 				"filesystem marked clean but journal not empty")) {
 			c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
 			SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
@@ -720,7 +725,9 @@ int bch2_fs_recovery(struct bch_fs *c)
 		}
 
 		if (!last_journal_entry) {
-			fsck_err_on(!c->sb.clean, c, "no journal entries found");
+			fsck_err_on(!c->sb.clean, c,
+				    dirty_but_no_journal_entries,
+				    "no journal entries found");
 			if (clean)
 				goto use_clean;
 
@@ -728,6 +735,13 @@ int bch2_fs_recovery(struct bch_fs *c)
 				if (*i) {
 					last_journal_entry = &(*i)->j;
 					(*i)->ignore = false;
+					/*
+					 * This was probably a NO_FLUSH entry,
+					 * so last_seq was garbage - but we know
+					 * we're only using a single journal
+					 * entry, set it here:
+					 */
+					(*i)->j.last_seq = (*i)->j.seq;
 					break;
 				}
 		}
@@ -901,7 +915,7 @@ out:
 	}
 	kfree(clean);
 
-	if (!ret && test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) {
+	if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
 		bch2_fs_read_write_early(c);
 		bch2_delete_dead_snapshots_async(c);
 	}
@@ -946,16 +960,12 @@ int bch2_fs_initialize(struct bch_fs *c)
 	for (i = 0; i < BTREE_ID_NR; i++)
 		bch2_btree_root_alloc(c, i);
 
-	for_each_online_member(ca, c, i)
+	for_each_member_device(ca, c, i)
 		bch2_dev_usage_init(ca);
 
-	for_each_online_member(ca, c, i) {
-		ret = bch2_dev_journal_alloc(ca);
-		if (ret) {
-			percpu_ref_put(&ca->io_ref);
-			goto err;
-		}
-	}
+	ret = bch2_fs_journal_alloc(c);
+	if (ret)
+		goto err;
 
 	/*
 	 * journal_res_get() will crash if called before this has
@@ -973,15 +983,13 @@ int bch2_fs_initialize(struct bch_fs *c)
 	 * btree updates
 	 */
 	bch_verbose(c, "marking superblocks");
-	for_each_member_device(ca, c, i) {
-		ret = bch2_trans_mark_dev_sb(c, ca);
-		if (ret) {
-			percpu_ref_put(&ca->ref);
-			goto err;
-		}
+	ret = bch2_trans_mark_dev_sbs(c);
+	bch_err_msg(c, ret, "marking superblocks");
+	if (ret)
+		goto err;
 
+	for_each_online_member(ca, c, i)
 		ca->new_fs_bucket_idx = 0;
-	}
 
 	ret = bch2_fs_freespace_init(c);
 	if (ret)
diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h
index fbfa9d831d6f..515e3d62c2ac 100644
--- a/fs/bcachefs/recovery_types.h
+++ b/fs/bcachefs/recovery_types.h
@@ -14,6 +14,8 @@
 	x(snapshots_read,		PASS_ALWAYS)						\
 	x(check_topology,		0)							\
 	x(check_allocations,		PASS_FSCK)						\
+	x(trans_mark_dev_sbs,		PASS_ALWAYS|PASS_SILENT)				\
+	x(fs_journal_alloc,		PASS_ALWAYS|PASS_SILENT)				\
 	x(set_may_go_rw,		PASS_ALWAYS|PASS_SILENT)				\
 	x(journal_replay,		PASS_ALWAYS)						\
 	x(check_alloc_info,		PASS_FSCK)						\
@@ -27,11 +29,12 @@
 	x(check_snapshot_trees,		PASS_FSCK)						\
 	x(check_snapshots,		PASS_FSCK)						\
 	x(check_subvols,		PASS_FSCK)						\
-	x(delete_dead_snapshots,	PASS_FSCK|PASS_UNCLEAN)					\
+	x(delete_dead_snapshots,	PASS_FSCK)						\
 	x(fs_upgrade_for_subvolumes,	0)							\
 	x(resume_logged_ops,		PASS_ALWAYS)						\
 	x(check_inodes,			PASS_FSCK)						\
 	x(check_extents,		PASS_FSCK)						\
+	x(check_indirect_extents,	PASS_FSCK)						\
 	x(check_dirents,		PASS_FSCK)						\
 	x(check_xattrs,			PASS_FSCK)						\
 	x(check_root,			PASS_FSCK)						\
@@ -39,6 +42,7 @@
 	x(check_nlinks,			PASS_FSCK)						\
 	x(delete_dead_inodes,		PASS_FSCK|PASS_UNCLEAN)					\
 	x(fix_reflink_p,		0)							\
+	x(set_fs_needs_rebalance,	0)							\
 
 enum bch_recovery_pass {
 #define x(n, when)	BCH_RECOVERY_PASS_##n,
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index d77d0ea9afff..6e1bfe9feb59 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -7,6 +7,7 @@
 #include "inode.h"
 #include "io_misc.h"
 #include "io_write.h"
+#include "rebalance.h"
 #include "reflink.h"
 #include "subvolume.h"
 #include "super-io.h"
@@ -27,7 +28,7 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k)
 
 /* reflink pointers */
 
-int bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k,
 			   enum bkey_invalid_flags flags,
 			   struct printbuf *err)
 {
@@ -74,7 +75,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
 
 /* indirect extents */
 
-int bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k,
 			   enum bkey_invalid_flags flags,
 			   struct printbuf *err)
 {
@@ -103,28 +104,29 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
 }
 #endif
 
+static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags)
+{
+	if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) {
+		new->k.type = KEY_TYPE_deleted;
+		new->k.size = 0;
+		set_bkey_val_u64s(&new->k, 0);;
+		*flags &= ~BTREE_TRIGGER_INSERT;
+	}
+}
+
 int bch2_trans_mark_reflink_v(struct btree_trans *trans,
 			      enum btree_id btree_id, unsigned level,
 			      struct bkey_s_c old, struct bkey_i *new,
 			      unsigned flags)
 {
-	if (!(flags & BTREE_TRIGGER_OVERWRITE)) {
-		struct bkey_i_reflink_v *r = bkey_i_to_reflink_v(new);
-
-		if (!r->v.refcount) {
-			r->k.type = KEY_TYPE_deleted;
-			r->k.size = 0;
-			set_bkey_val_u64s(&r->k, 0);
-			return 0;
-		}
-	}
+	check_indirect_extent_deleting(new, &flags);
 
 	return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
 }
 
 /* indirect inline data */
 
-int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k,
 				      enum bkey_invalid_flags flags,
 				      struct printbuf *err)
 {
@@ -132,7 +134,7 @@ int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
 }
 
 void bch2_indirect_inline_data_to_text(struct printbuf *out,
-					struct bch_fs *c, struct bkey_s_c k)
+				       struct bch_fs *c, struct bkey_s_c k)
 {
 	struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k);
 	unsigned datalen = bkey_inline_data_bytes(k.k);
@@ -147,16 +149,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
 			      struct bkey_s_c old, struct bkey_i *new,
 			      unsigned flags)
 {
-	if (!(flags & BTREE_TRIGGER_OVERWRITE)) {
-		struct bkey_i_indirect_inline_data *r =
-			bkey_i_to_indirect_inline_data(new);
-
-		if (!r->v.refcount) {
-			r->k.type = KEY_TYPE_deleted;
-			r->k.size = 0;
-			set_bkey_val_u64s(&r->k, 0);
-		}
-	}
+	check_indirect_extent_deleting(new, &flags);
 
 	return 0;
 }
@@ -260,8 +253,9 @@ s64 bch2_remap_range(struct bch_fs *c,
 	struct bpos dst_start = POS(dst_inum.inum, dst_offset);
 	struct bpos src_start = POS(src_inum.inum, src_offset);
 	struct bpos dst_end = dst_start, src_end = src_start;
+	struct bch_io_opts opts;
 	struct bpos src_want;
-	u64 dst_done;
+	u64 dst_done = 0;
 	u32 dst_snapshot, src_snapshot;
 	int ret = 0, ret2 = 0;
 
@@ -277,6 +271,10 @@ s64 bch2_remap_range(struct bch_fs *c,
 	bch2_bkey_buf_init(&new_src);
 	trans = bch2_trans_get(c);
 
+	ret = bch2_inum_opts_get(trans, src_inum, &opts);
+	if (ret)
+		goto err;
+
 	bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
 			     BTREE_ITER_INTENT);
 	bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
@@ -360,10 +358,13 @@ s64 bch2_remap_range(struct bch_fs *c,
 				min(src_k.k->p.offset - src_want.offset,
 				    dst_end.offset - dst_iter.pos.offset));
 
-		ret = bch2_extent_update(trans, dst_inum, &dst_iter,
-					 new_dst.k, &disk_res,
-					 new_i_size, i_sectors_delta,
-					 true);
+		ret =   bch2_bkey_set_needs_rebalance(c, new_dst.k,
+					opts.background_target,
+					opts.background_compression) ?:
+			bch2_extent_update(trans, dst_inum, &dst_iter,
+					new_dst.k, &disk_res,
+					new_i_size, i_sectors_delta,
+					true);
 		bch2_disk_reservation_put(c, &disk_res);
 	}
 	bch2_trans_iter_exit(trans, &dst_iter);
@@ -394,7 +395,7 @@ s64 bch2_remap_range(struct bch_fs *c,
 
 		bch2_trans_iter_exit(trans, &inode_iter);
 	} while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
-
+err:
 	bch2_trans_put(trans);
 	bch2_bkey_buf_exit(&new_src, c);
 	bch2_bkey_buf_exit(&new_dst, c);
diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
index fe52538efb52..8ccf3f9c4939 100644
--- a/fs/bcachefs/reflink.h
+++ b/fs/bcachefs/reflink.h
@@ -4,7 +4,7 @@
 
 enum bkey_invalid_flags;
 
-int bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c,
 			   enum bkey_invalid_flags, struct printbuf *);
 void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
 			    struct bkey_s_c);
@@ -19,7 +19,7 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 	.min_val_size	= 16,					\
 })
 
-int bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c,
 			   enum bkey_invalid_flags, struct printbuf *);
 void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
 			    struct bkey_s_c);
@@ -35,7 +35,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
 	.min_val_size	= 8,					\
 })
 
-int bch2_indirect_inline_data_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c,
 				      enum bkey_invalid_flags, struct printbuf *);
 void bch2_indirect_inline_data_to_text(struct printbuf *,
 				struct bch_fs *, struct bkey_s_c);
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index cef2a0447b86..1c3ae13bfced 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -462,18 +462,13 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
 {
 	lockdep_assert_held(&c->replicas_gc_lock);
 
-	if (ret)
-		goto err;
-
 	mutex_lock(&c->sb_lock);
 	percpu_down_write(&c->mark_lock);
 
-	ret = bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc);
-	if (ret)
-		goto err;
+	ret =   ret ?:
+		bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?:
+		replicas_table_update(c, &c->replicas_gc);
 
-	ret = replicas_table_update(c, &c->replicas_gc);
-err:
 	kfree(c->replicas_gc.entries);
 	c->replicas_gc.entries = NULL;
 
@@ -579,12 +574,9 @@ retry:
 
 	bch2_cpu_replicas_sort(&new);
 
-	ret = bch2_cpu_replicas_to_sb_replicas(c, &new);
-	if (ret)
-		goto err;
+	ret =   bch2_cpu_replicas_to_sb_replicas(c, &new) ?:
+		replicas_table_update(c, &new);
 
-	ret = replicas_table_update(c, &new);
-err:
 	kfree(new.entries);
 
 	percpu_up_write(&c->mark_lock);
diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index 61203d7c8d36..e151ada1c8bd 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -82,6 +82,7 @@ int bch2_verify_superblock_clean(struct bch_fs *c,
 	int ret = 0;
 
 	if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
+			sb_clean_journal_seq_mismatch,
 			"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
 			le64_to_cpu(clean->journal_seq),
 			le64_to_cpu(j->seq))) {
@@ -119,6 +120,7 @@ int bch2_verify_superblock_clean(struct bch_fs *c,
 				    k1->k.u64s != k2->k.u64s ||
 				    memcmp(k1, k2, bkey_bytes(&k1->k)) ||
 				    l1 != l2, c,
+			sb_clean_btree_root_mismatch,
 			"superblock btree root %u doesn't match journal after clean shutdown\n"
 			"sb:      l=%u %s\n"
 			"journal: l=%u %s\n", i,
@@ -140,6 +142,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
 	sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean);
 
 	if (fsck_err_on(!sb_clean, c,
+			sb_clean_missing,
 			"superblock marked clean but clean section not present")) {
 		SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
 		c->sb.clean = false;
@@ -373,7 +376,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
 
 	entry = sb_clean->start;
 	bch2_journal_super_entries_add_common(c, &entry, 0);
-	entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
+	entry = bch2_btree_roots_to_journal_entries(c, entry, 0);
 	BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
 
 	memset(entry, 0,
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
new file mode 100644
index 000000000000..f0930ab7f036
--- /dev/null
+++ b/fs/bcachefs/sb-errors.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "sb-errors.h"
+#include "super-io.h"
+
+static const char * const bch2_sb_error_strs[] = {
+#define x(t, n, ...) [n] = #t,
+	BCH_SB_ERRS()
+	NULL
+};
+
+static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id)
+{
+	if (id < BCH_SB_ERR_MAX)
+		prt_str(out, bch2_sb_error_strs[id]);
+	else
+		prt_printf(out, "(unknown error %u)", id);
+}
+
+static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
+{
+	return e
+		? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
+		: 0;
+}
+
+static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
+{
+	return (sizeof(struct bch_sb_field_errors) +
+		sizeof(struct bch_sb_field_error_entry) * nr) / sizeof(u64);
+}
+
+static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f,
+				   struct printbuf *err)
+{
+	struct bch_sb_field_errors *e = field_to_type(f, errors);
+	unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
+
+	for (i = 0; i < nr; i++) {
+		if (!BCH_SB_ERROR_ENTRY_NR(&e->entries[i])) {
+			prt_printf(err, "entry with count 0 (id ");
+			bch2_sb_error_id_to_text(err, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
+			prt_printf(err, ")");
+			return -BCH_ERR_invalid_sb_errors;
+		}
+
+		if (i + 1 < nr &&
+		    BCH_SB_ERROR_ENTRY_ID(&e->entries[i]) >=
+		    BCH_SB_ERROR_ENTRY_ID(&e->entries[i + 1])) {
+			prt_printf(err, "entries out of order");
+			return -BCH_ERR_invalid_sb_errors;
+		}
+	}
+
+	return 0;
+}
+
+static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
+				   struct bch_sb_field *f)
+{
+	struct bch_sb_field_errors *e = field_to_type(f, errors);
+	unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
+
+	if (out->nr_tabstops <= 1)
+		printbuf_tabstop_push(out, 16);
+
+	for (i = 0; i < nr; i++) {
+		bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
+		prt_tab(out);
+		prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
+		prt_tab(out);
+		bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time));
+		prt_newline(out);
+	}
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_errors = {
+	.validate	= bch2_sb_errors_validate,
+	.to_text	= bch2_sb_errors_to_text,
+};
+
+void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
+{
+	bch_sb_errors_cpu *e = &c->fsck_error_counts;
+	struct bch_sb_error_entry_cpu n = {
+		.id = err,
+		.nr = 1,
+		.last_error_time = ktime_get_real_seconds()
+	};
+	unsigned i;
+
+	mutex_lock(&c->fsck_error_counts_lock);
+	for (i = 0; i < e->nr; i++) {
+		if (err == e->data[i].id) {
+			e->data[i].nr++;
+			e->data[i].last_error_time = n.last_error_time;
+			goto out;
+		}
+		if (err < e->data[i].id)
+			break;
+	}
+
+	if (darray_make_room(e, 1))
+		goto out;
+
+	darray_insert_item(e, i, n);
+out:
+	mutex_unlock(&c->fsck_error_counts_lock);
+}
+
+void bch2_sb_errors_from_cpu(struct bch_fs *c)
+{
+	bch_sb_errors_cpu *src = &c->fsck_error_counts;
+	struct bch_sb_field_errors *dst =
+		bch2_sb_field_resize(&c->disk_sb, errors,
+				     bch2_sb_field_errors_u64s(src->nr));
+	unsigned i;
+
+	if (!dst)
+		return;
+
+	for (i = 0; i < src->nr; i++) {
+		SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
+		SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
+		dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
+	}
+}
+
+static int bch2_sb_errors_to_cpu(struct bch_fs *c)
+{
+	struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors);
+	bch_sb_errors_cpu *dst = &c->fsck_error_counts;
+	unsigned i, nr = bch2_sb_field_errors_nr_entries(src);
+	int ret;
+
+	if (!nr)
+		return 0;
+
+	mutex_lock(&c->fsck_error_counts_lock);
+	ret = darray_make_room(dst, nr);
+	if (ret)
+		goto err;
+
+	dst->nr = nr;
+
+	for (i = 0; i < nr; i++) {
+		dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]);
+		dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]);
+		dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time);
+	}
+err:
+	mutex_unlock(&c->fsck_error_counts_lock);
+
+	return ret;
+}
+
+void bch2_fs_sb_errors_exit(struct bch_fs *c)
+{
+	darray_exit(&c->fsck_error_counts);
+}
+
+void bch2_fs_sb_errors_init_early(struct bch_fs *c)
+{
+	mutex_init(&c->fsck_error_counts_lock);
+	darray_init(&c->fsck_error_counts);
+}
+
+int bch2_fs_sb_errors_init(struct bch_fs *c)
+{
+	return bch2_sb_errors_to_cpu(c);
+}
diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h
new file mode 100644
index 000000000000..5a09a53966be
--- /dev/null
+++ b/fs/bcachefs/sb-errors.h
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_ERRORS_H
+#define _BCACHEFS_SB_ERRORS_H
+
+#include "sb-errors_types.h"
+
+#define BCH_SB_ERRS()							\
+	x(clean_but_journal_not_empty,				0)	\
+	x(dirty_but_no_journal_entries,				1)	\
+	x(dirty_but_no_journal_entries_post_drop_nonflushes,	2)	\
+	x(sb_clean_journal_seq_mismatch,			3)	\
+	x(sb_clean_btree_root_mismatch,				4)	\
+	x(sb_clean_missing,					5)	\
+	x(jset_unsupported_version,				6)	\
+	x(jset_unknown_csum,					7)	\
+	x(jset_last_seq_newer_than_seq,				8)	\
+	x(jset_past_bucket_end,					9)	\
+	x(jset_seq_blacklisted,					10)	\
+	x(journal_entries_missing,				11)	\
+	x(journal_entry_replicas_not_marked,			12)	\
+	x(journal_entry_past_jset_end,				13)	\
+	x(journal_entry_replicas_data_mismatch,			14)	\
+	x(journal_entry_bkey_u64s_0,				15)	\
+	x(journal_entry_bkey_past_end,				16)	\
+	x(journal_entry_bkey_bad_format,			17)	\
+	x(journal_entry_bkey_invalid,				18)	\
+	x(journal_entry_btree_root_bad_size,			19)	\
+	x(journal_entry_blacklist_bad_size,			20)	\
+	x(journal_entry_blacklist_v2_bad_size,			21)	\
+	x(journal_entry_blacklist_v2_start_past_end,		22)	\
+	x(journal_entry_usage_bad_size,				23)	\
+	x(journal_entry_data_usage_bad_size,			24)	\
+	x(journal_entry_clock_bad_size,				25)	\
+	x(journal_entry_clock_bad_rw,				26)	\
+	x(journal_entry_dev_usage_bad_size,			27)	\
+	x(journal_entry_dev_usage_bad_dev,			28)	\
+	x(journal_entry_dev_usage_bad_pad,			29)	\
+	x(btree_node_unreadable,				30)	\
+	x(btree_node_fault_injected,				31)	\
+	x(btree_node_bad_magic,					32)	\
+	x(btree_node_bad_seq,					33)	\
+	x(btree_node_unsupported_version,			34)	\
+	x(btree_node_bset_older_than_sb_min,			35)	\
+	x(btree_node_bset_newer_than_sb,			36)	\
+	x(btree_node_data_missing,				37)	\
+	x(btree_node_bset_after_end,				38)	\
+	x(btree_node_replicas_sectors_written_mismatch,		39)	\
+	x(btree_node_replicas_data_mismatch,			40)	\
+	x(bset_unknown_csum,					41)	\
+	x(bset_bad_csum,					42)	\
+	x(bset_past_end_of_btree_node,				43)	\
+	x(bset_wrong_sector_offset,				44)	\
+	x(bset_empty,						45)	\
+	x(bset_bad_seq,						46)	\
+	x(bset_blacklisted_journal_seq,				47)	\
+	x(first_bset_blacklisted_journal_seq,			48)	\
+	x(btree_node_bad_btree,					49)	\
+	x(btree_node_bad_level,					50)	\
+	x(btree_node_bad_min_key,				51)	\
+	x(btree_node_bad_max_key,				52)	\
+	x(btree_node_bad_format,				53)	\
+	x(btree_node_bkey_past_bset_end,			54)	\
+	x(btree_node_bkey_bad_format,				55)	\
+	x(btree_node_bad_bkey,					56)	\
+	x(btree_node_bkey_out_of_order,				57)	\
+	x(btree_root_bkey_invalid,				58)	\
+	x(btree_root_read_error,				59)	\
+	x(btree_root_bad_min_key,				50)	\
+	x(btree_root_bad_max_key,				61)	\
+	x(btree_node_read_error,				62)	\
+	x(btree_node_topology_bad_min_key,			63)	\
+	x(btree_node_topology_bad_max_key,			64)	\
+	x(btree_node_topology_overwritten_by_prev_node,		65)	\
+	x(btree_node_topology_overwritten_by_next_node,		66)	\
+	x(btree_node_topology_interior_node_empty,		67)	\
+	x(fs_usage_hidden_wrong,				68)	\
+	x(fs_usage_btree_wrong,					69)	\
+	x(fs_usage_data_wrong,					70)	\
+	x(fs_usage_cached_wrong,				71)	\
+	x(fs_usage_reserved_wrong,				72)	\
+	x(fs_usage_persistent_reserved_wrong,			73)	\
+	x(fs_usage_nr_inodes_wrong,				74)	\
+	x(fs_usage_replicas_wrong,				75)	\
+	x(dev_usage_buckets_wrong,				76)	\
+	x(dev_usage_sectors_wrong,				77)	\
+	x(dev_usage_fragmented_wrong,				78)	\
+	x(dev_usage_buckets_ec_wrong,				79)	\
+	x(bkey_version_in_future,				80)	\
+	x(bkey_u64s_too_small,					81)	\
+	x(bkey_invalid_type_for_btree,				82)	\
+	x(bkey_extent_size_zero,				83)	\
+	x(bkey_extent_size_greater_than_offset,			84)	\
+	x(bkey_size_nonzero,					85)	\
+	x(bkey_snapshot_nonzero,				86)	\
+	x(bkey_snapshot_zero,					87)	\
+	x(bkey_at_pos_max,					88)	\
+	x(bkey_before_start_of_btree_node,			89)	\
+	x(bkey_after_end_of_btree_node,				90)	\
+	x(bkey_val_size_nonzero,				91)	\
+	x(bkey_val_size_too_small,				92)	\
+	x(alloc_v1_val_size_bad,				93)	\
+	x(alloc_v2_unpack_error,				94)	\
+	x(alloc_v3_unpack_error,				95)	\
+	x(alloc_v4_val_size_bad,				96)	\
+	x(alloc_v4_backpointers_start_bad,			97)	\
+	x(alloc_key_data_type_bad,				98)	\
+	x(alloc_key_empty_but_have_data,			99)	\
+	x(alloc_key_dirty_sectors_0,				100)	\
+	x(alloc_key_data_type_inconsistency,			101)	\
+	x(alloc_key_to_missing_dev_bucket,			102)	\
+	x(alloc_key_cached_inconsistency,			103)	\
+	x(alloc_key_cached_but_read_time_zero,			104)	\
+	x(alloc_key_to_missing_lru_entry,			105)	\
+	x(alloc_key_data_type_wrong,				106)	\
+	x(alloc_key_gen_wrong,					107)	\
+	x(alloc_key_dirty_sectors_wrong,			108)	\
+	x(alloc_key_cached_sectors_wrong,			109)	\
+	x(alloc_key_stripe_wrong,				110)	\
+	x(alloc_key_stripe_redundancy_wrong,			111)	\
+	x(bucket_sector_count_overflow,				112)	\
+	x(bucket_metadata_type_mismatch,			113)	\
+	x(need_discard_key_wrong,				114)	\
+	x(freespace_key_wrong,					115)	\
+	x(freespace_hole_missing,				116)	\
+	x(bucket_gens_val_size_bad,				117)	\
+	x(bucket_gens_key_wrong,				118)	\
+	x(bucket_gens_hole_wrong,				119)	\
+	x(bucket_gens_to_invalid_dev,				120)	\
+	x(bucket_gens_to_invalid_buckets,			121)	\
+	x(bucket_gens_nonzero_for_invalid_buckets,		122)	\
+	x(need_discard_freespace_key_to_invalid_dev_bucket,	123)	\
+	x(need_discard_freespace_key_bad,			124)	\
+	x(backpointer_pos_wrong,				125)	\
+	x(backpointer_to_missing_device,			126)	\
+	x(backpointer_to_missing_alloc,				127)	\
+	x(backpointer_to_missing_ptr,				128)	\
+	x(lru_entry_at_time_0,					129)	\
+	x(lru_entry_to_invalid_bucket,				130)	\
+	x(lru_entry_bad,					131)	\
+	x(btree_ptr_val_too_big,				132)	\
+	x(btree_ptr_v2_val_too_big,				133)	\
+	x(btree_ptr_has_non_ptr,				134)	\
+	x(extent_ptrs_invalid_entry,				135)	\
+	x(extent_ptrs_no_ptrs,					136)	\
+	x(extent_ptrs_too_many_ptrs,				137)	\
+	x(extent_ptrs_redundant_crc,				138)	\
+	x(extent_ptrs_redundant_stripe,				139)	\
+	x(extent_ptrs_unwritten,				140)	\
+	x(extent_ptrs_written_and_unwritten,			141)	\
+	x(ptr_to_invalid_device,				142)	\
+	x(ptr_to_duplicate_device,				143)	\
+	x(ptr_after_last_bucket,				144)	\
+	x(ptr_before_first_bucket,				145)	\
+	x(ptr_spans_multiple_buckets,				146)	\
+	x(ptr_to_missing_backpointer,				147)	\
+	x(ptr_to_missing_alloc_key,				148)	\
+	x(ptr_to_missing_replicas_entry,			149)	\
+	x(ptr_to_missing_stripe,				150)	\
+	x(ptr_to_incorrect_stripe,				151)	\
+	x(ptr_gen_newer_than_bucket_gen,			152)	\
+	x(ptr_too_stale,					153)	\
+	x(stale_dirty_ptr,					154)	\
+	x(ptr_bucket_data_type_mismatch,			155)	\
+	x(ptr_cached_and_erasure_coded,				156)	\
+	x(ptr_crc_uncompressed_size_too_small,			157)	\
+	x(ptr_crc_csum_type_unknown,				158)	\
+	x(ptr_crc_compression_type_unknown,			159)	\
+	x(ptr_crc_redundant,					160)	\
+	x(ptr_crc_uncompressed_size_too_big,			161)	\
+	x(ptr_crc_nonce_mismatch,				162)	\
+	x(ptr_stripe_redundant,					163)	\
+	x(reservation_key_nr_replicas_invalid,			164)	\
+	x(reflink_v_refcount_wrong,				165)	\
+	x(reflink_p_to_missing_reflink_v,			166)	\
+	x(stripe_pos_bad,					167)	\
+	x(stripe_val_size_bad,					168)	\
+	x(stripe_sector_count_wrong,				169)	\
+	x(snapshot_tree_pos_bad,				170)	\
+	x(snapshot_tree_to_missing_snapshot,			171)	\
+	x(snapshot_tree_to_missing_subvol,			172)	\
+	x(snapshot_tree_to_wrong_subvol,			173)	\
+	x(snapshot_tree_to_snapshot_subvol,			174)	\
+	x(snapshot_pos_bad,					175)	\
+	x(snapshot_parent_bad,					176)	\
+	x(snapshot_children_not_normalized,			177)	\
+	x(snapshot_child_duplicate,				178)	\
+	x(snapshot_child_bad,					179)	\
+	x(snapshot_skiplist_not_normalized,			180)	\
+	x(snapshot_skiplist_bad,				181)	\
+	x(snapshot_should_not_have_subvol,			182)	\
+	x(snapshot_to_bad_snapshot_tree,			183)	\
+	x(snapshot_bad_depth,					184)	\
+	x(snapshot_bad_skiplist,				185)	\
+	x(subvol_pos_bad,					186)	\
+	x(subvol_not_master_and_not_snapshot,			187)	\
+	x(subvol_to_missing_root,				188)	\
+	x(subvol_root_wrong_bi_subvol,				189)	\
+	x(bkey_in_missing_snapshot,				190)	\
+	x(inode_pos_inode_nonzero,				191)	\
+	x(inode_pos_blockdev_range,				192)	\
+	x(inode_unpack_error,					193)	\
+	x(inode_str_hash_invalid,				194)	\
+	x(inode_v3_fields_start_bad,				195)	\
+	x(inode_snapshot_mismatch,				196)	\
+	x(inode_unlinked_but_clean,				197)	\
+	x(inode_unlinked_but_nlink_nonzero,			198)	\
+	x(inode_checksum_type_invalid,				199)	\
+	x(inode_compression_type_invalid,			200)	\
+	x(inode_subvol_root_but_not_dir,			201)	\
+	x(inode_i_size_dirty_but_clean,				202)	\
+	x(inode_i_sectors_dirty_but_clean,			203)	\
+	x(inode_i_sectors_wrong,				204)	\
+	x(inode_dir_wrong_nlink,				205)	\
+	x(inode_dir_multiple_links,				206)	\
+	x(inode_multiple_links_but_nlink_0,			207)	\
+	x(inode_wrong_backpointer,				208)	\
+	x(inode_wrong_nlink,					209)	\
+	x(inode_unreachable,					210)	\
+	x(deleted_inode_but_clean,				211)	\
+	x(deleted_inode_missing,				212)	\
+	x(deleted_inode_is_dir,					213)	\
+	x(deleted_inode_not_unlinked,				214)	\
+	x(extent_overlapping,					215)	\
+	x(extent_in_missing_inode,				216)	\
+	x(extent_in_non_reg_inode,				217)	\
+	x(extent_past_end_of_inode,				218)	\
+	x(dirent_empty_name,					219)	\
+	x(dirent_val_too_big,					220)	\
+	x(dirent_name_too_long,					221)	\
+	x(dirent_name_embedded_nul,				222)	\
+	x(dirent_name_dot_or_dotdot,				223)	\
+	x(dirent_name_has_slash,				224)	\
+	x(dirent_d_type_wrong,					225)	\
+	x(dirent_d_parent_subvol_wrong,				226)	\
+	x(dirent_in_missing_dir_inode,				227)	\
+	x(dirent_in_non_dir_inode,				228)	\
+	x(dirent_to_missing_inode,				229)	\
+	x(dirent_to_missing_subvol,				230)	\
+	x(dirent_to_itself,					231)	\
+	x(quota_type_invalid,					232)	\
+	x(xattr_val_size_too_small,				233)	\
+	x(xattr_val_size_too_big,				234)	\
+	x(xattr_invalid_type,					235)	\
+	x(xattr_name_invalid_chars,				236)	\
+	x(xattr_in_missing_inode,				237)	\
+	x(root_subvol_missing,					238)	\
+	x(root_dir_missing,					239)	\
+	x(root_inode_not_dir,					240)	\
+	x(dir_loop,						241)	\
+	x(hash_table_key_duplicate,				242)	\
+	x(hash_table_key_wrong_offset,				243)
+
+enum bch_sb_error_id {
+#define x(t, n) BCH_FSCK_ERR_##t = n,
+	BCH_SB_ERRS()
+#undef x
+	BCH_SB_ERR_MAX
+};
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
+
+void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);
+
+void bch2_sb_errors_from_cpu(struct bch_fs *);
+
+void bch2_fs_sb_errors_exit(struct bch_fs *);
+void bch2_fs_sb_errors_init_early(struct bch_fs *);
+int bch2_fs_sb_errors_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_SB_ERRORS_H */
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
new file mode 100644
index 000000000000..b1c099843a39
--- /dev/null
+++ b/fs/bcachefs/sb-errors_types.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_ERRORS_TYPES_H
+#define _BCACHEFS_SB_ERRORS_TYPES_H
+
+#include "darray.h"
+
+struct bch_sb_error_entry_cpu {
+	u64			id:16,
+				nr:48;
+	u64			last_error_time;
+};
+
+typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu;
+
+#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */
+
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 6dd85bb996fe..bed0f857fe5b 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -7,21 +7,28 @@
 #include "sb-members.h"
 #include "super-io.h"
 
-/* Code for bch_sb_field_members_v1: */
+#define x(t, n, ...) [n] = #t,
+static const char * const bch2_iops_measurements[] = {
+	BCH_IOPS_MEASUREMENTS()
+	NULL
+};
 
-static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i)
-{
-	return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
-}
+char * const bch2_member_error_strs[] = {
+	BCH_MEMBER_ERROR_TYPES()
+	NULL
+};
+#undef x
+
+/* Code for bch_sb_field_members_v1: */
 
 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
 {
-	return members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
+	return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
 }
 
 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
 {
-	struct bch_member ret, *p = members_v2_get_mut(mi, i);
+	struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
 	memset(&ret, 0, sizeof(ret));
 	memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
 	return ret;
@@ -36,7 +43,8 @@ static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int
 {
 	struct bch_member ret, *p = members_v1_get_mut(mi, i);
 	memset(&ret, 0, sizeof(ret));
-	memcpy(&ret, p, min_t(size_t, sizeof(struct bch_member), sizeof(ret))); return ret;
+	memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
+	return ret;
 }
 
 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
@@ -62,7 +70,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c)
 
 		for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
 			void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
-			memmove(dst, members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
+			memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
 			memset(dst + le16_to_cpu(mi->member_bytes),
 			       0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
 		}
@@ -71,7 +79,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c)
 	return 0;
 }
 
-int bch2_members_v2_init(struct bch_fs *c)
+int bch2_sb_members_v2_init(struct bch_fs *c)
 {
 	struct bch_sb_field_members_v1 *mi1;
 	struct bch_sb_field_members_v2 *mi2;
@@ -91,7 +99,7 @@ int bch2_members_v2_init(struct bch_fs *c)
 	return sb_members_v2_resize_entries(c);
 }
 
-int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
 {
 	struct bch_sb_field_members_v1 *mi1;
 	struct bch_sb_field_members_v2 *mi2;
@@ -105,7 +113,7 @@ int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
 	mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
 
 	for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
-		memcpy(members_v1_get_mut(mi1, i), members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
+		memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
 
 	return 0;
 }
@@ -155,6 +163,8 @@ static void member_to_text(struct printbuf *out,
 	u64 bucket_size = le16_to_cpu(m.bucket_size);
 	u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
 
+	if (!bch2_member_exists(&m))
+		return;
 
 	prt_printf(out, "Device:");
 	prt_tab(out);
@@ -163,6 +173,21 @@ static void member_to_text(struct printbuf *out,
 
 	printbuf_indent_add(out, 2);
 
+	prt_printf(out, "Label:");
+	prt_tab(out);
+	if (BCH_MEMBER_GROUP(&m)) {
+		unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
+
+		if (idx < disk_groups_nr(gi))
+			prt_printf(out, "%s (%u)",
+				   gi->entries[idx].label, idx);
+		else
+			prt_printf(out, "(bad disk labels section)");
+	} else {
+		prt_printf(out, "(none)");
+	}
+	prt_newline(out);
+
 	prt_printf(out, "UUID:");
 	prt_tab(out);
 	pr_uuid(out, m.uuid.b);
@@ -173,6 +198,13 @@ static void member_to_text(struct printbuf *out,
 	prt_units_u64(out, device_size << 9);
 	prt_newline(out);
 
+	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+		prt_printf(out, "%s errors:", bch2_member_error_strs[i]);
+		prt_tab(out);
+		prt_u64(out, le64_to_cpu(m.errors[i]));
+		prt_newline(out);
+	}
+
 	for (unsigned i = 0; i < BCH_IOPS_NR; i++) {
 		prt_printf(out, "%s iops:", bch2_iops_measurements[i]);
 		prt_tab(out);
@@ -198,7 +230,7 @@ static void member_to_text(struct printbuf *out,
 	prt_printf(out, "Last mount:");
 	prt_tab(out);
 	if (m.last_mount)
-		pr_time(out, le64_to_cpu(m.last_mount));
+		bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
 	else
 		prt_printf(out, "(never)");
 	prt_newline(out);
@@ -211,21 +243,6 @@ static void member_to_text(struct printbuf *out,
 		   : "unknown");
 	prt_newline(out);
 
-	prt_printf(out, "Label:");
-	prt_tab(out);
-	if (BCH_MEMBER_GROUP(&m)) {
-		unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
-
-		if (idx < disk_groups_nr(gi))
-			prt_printf(out, "%s (%u)",
-				   gi->entries[idx].label, idx);
-		else
-			prt_printf(out, "(bad disk labels section)");
-	} else {
-		prt_printf(out, "(none)");
-	}
-	prt_newline(out);
-
 	prt_printf(out, "Data allowed:");
 	prt_tab(out);
 	if (BCH_MEMBER_DATA_ALLOWED(&m))
@@ -262,8 +279,7 @@ static int bch2_sb_members_v1_validate(struct bch_sb *sb,
 	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
 	unsigned i;
 
-	if ((void *) members_v1_get_mut(mi, sb->nr_devices)  >
-	    vstruct_end(&mi->field)) {
+	if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
 		prt_printf(err, "too many devices for section size");
 		return -BCH_ERR_invalid_sb_members;
 	}
@@ -286,10 +302,8 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
 	unsigned i;
 
-	for (i = 0; i < sb->nr_devices; i++) {
-		struct bch_member m = members_v1_get(mi, i);
-		member_to_text(out, m, gi, sb, i);
-	}
+	for (i = 0; i < sb->nr_devices; i++)
+		member_to_text(out, members_v1_get(mi, i), gi, sb, i);
 }
 
 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
@@ -304,10 +318,8 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
 	unsigned i;
 
-	for (i = 0; i < sb->nr_devices; i++) {
-		struct bch_member m = members_v2_get(mi, i);
-		member_to_text(out, m, gi, sb, i);
-	}
+	for (i = 0; i < sb->nr_devices; i++)
+		member_to_text(out, members_v2_get(mi, i), gi, sb, i);
 }
 
 static int bch2_sb_members_v2_validate(struct bch_sb *sb,
@@ -315,7 +327,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb,
 				       struct printbuf *err)
 {
 	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
-	size_t mi_bytes = (void *) members_v2_get_mut(mi, sb->nr_devices) -
+	size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
 		(void *) mi;
 
 	if (mi_bytes > vstruct_bytes(&mi->field)) {
@@ -337,3 +349,72 @@ const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
 	.validate	= bch2_sb_members_v2_validate,
 	.to_text	= bch2_sb_members_v2_to_text,
 };
+
+void bch2_sb_members_from_cpu(struct bch_fs *c)
+{
+	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+	struct bch_dev *ca;
+	unsigned i, e;
+
+	rcu_read_lock();
+	for_each_member_device_rcu(ca, c, i, NULL) {
+		struct bch_member *m = __bch2_members_v2_get_mut(mi, i);
+
+		for (e = 0; e < BCH_MEMBER_ERROR_NR; e++)
+			m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
+	}
+	rcu_read_unlock();
+}
+
+void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+	struct bch_fs *c = ca->fs;
+	struct bch_member m;
+
+	mutex_lock(&ca->fs->sb_lock);
+	m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
+	mutex_unlock(&ca->fs->sb_lock);
+
+	printbuf_tabstop_push(out, 12);
+
+	prt_str(out, "IO errors since filesystem creation");
+	prt_newline(out);
+
+	printbuf_indent_add(out, 2);
+	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+		prt_printf(out, "%s:", bch2_member_error_strs[i]);
+		prt_tab(out);
+		prt_u64(out, atomic64_read(&ca->errors[i]));
+		prt_newline(out);
+	}
+	printbuf_indent_sub(out, 2);
+
+	prt_str(out, "IO errors since ");
+	bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
+	prt_str(out, " ago");
+	prt_newline(out);
+
+	printbuf_indent_add(out, 2);
+	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+		prt_printf(out, "%s:", bch2_member_error_strs[i]);
+		prt_tab(out);
+		prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
+		prt_newline(out);
+	}
+	printbuf_indent_sub(out, 2);
+}
+
+void bch2_dev_errors_reset(struct bch_dev *ca)
+{
+	struct bch_fs *c = ca->fs;
+	struct bch_member *m;
+
+	mutex_lock(&c->sb_lock);
+	m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+	for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
+		m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
+	m->errors_reset_time = ktime_get_real_seconds();
+
+	bch2_write_super(c);
+	mutex_unlock(&c->sb_lock);
+}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index 430f3457bfd4..03613e3eb8e3 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -2,8 +2,16 @@
 #ifndef _BCACHEFS_SB_MEMBERS_H
 #define _BCACHEFS_SB_MEMBERS_H
 
-int bch2_members_v2_init(struct bch_fs *c);
-int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
+extern char * const bch2_member_error_strs[];
+
+static inline struct bch_member *
+__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i)
+{
+	return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
+}
+
+int bch2_sb_members_v2_init(struct bch_fs *c);
+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
 
@@ -179,4 +187,41 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
 extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
 extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
 
+static inline bool bch2_member_exists(struct bch_member *m)
+{
+	return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
+}
+
+static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev)
+{
+	if (dev < sb->nr_devices) {
+		struct bch_member m = bch2_sb_member_get(sb, dev);
+		return bch2_member_exists(&m);
+	}
+	return false;
+}
+
+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
+{
+	return (struct bch_member_cpu) {
+		.nbuckets	= le64_to_cpu(mi->nbuckets),
+		.first_bucket	= le16_to_cpu(mi->first_bucket),
+		.bucket_size	= le16_to_cpu(mi->bucket_size),
+		.group		= BCH_MEMBER_GROUP(mi),
+		.state		= BCH_MEMBER_STATE(mi),
+		.discard	= BCH_MEMBER_DISCARD(mi),
+		.data_allowed	= BCH_MEMBER_DATA_ALLOWED(mi),
+		.durability	= BCH_MEMBER_DURABILITY(mi)
+			? BCH_MEMBER_DURABILITY(mi) - 1
+			: 1,
+		.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
+		.valid		= bch2_member_exists(mi),
+	};
+}
+
+void bch2_sb_members_from_cpu(struct bch_fs *);
+
+void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
+void bch2_dev_errors_reset(struct bch_dev *);
+
 #endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index b684b9f00c1b..b775cf0fb7cb 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -11,6 +11,8 @@
 #include <linux/sched/task.h>
 #include <linux/slab.h>
 
+#include <trace/events/lock.h>
+
 #include "six.h"
 
 #ifdef DEBUG
@@ -462,11 +464,12 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
 		smp_mb__after_atomic();
 	}
 
+	trace_contention_begin(lock, 0);
+	lock_contended(&lock->dep_map, ip);
+
 	if (six_optimistic_spin(lock, type))
 		goto out;
 
-	lock_contended(&lock->dep_map, ip);
-
 	wait->task		= current;
 	wait->lock_want		= type;
 	wait->lock_acquired	= false;
@@ -546,6 +549,7 @@ out:
 		six_clear_bitmask(lock, SIX_LOCK_HELD_write);
 		six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
 	}
+	trace_contention_end(lock, 0);
 
 	return ret;
 }
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 4982468bfe11..e9af77b384c7 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -30,17 +30,18 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
 		   le32_to_cpu(t.v->root_snapshot));
 }
 
-int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k,
 			       enum bkey_invalid_flags flags,
 			       struct printbuf *err)
 {
-	if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
-	    bkey_lt(k.k->p, POS(0, 1))) {
-		prt_printf(err, "bad pos");
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
 
-	return 0;
+	bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
+			 bkey_lt(k.k->p, POS(0, 1)), c, err,
+			 snapshot_tree_pos_bad,
+			 "bad pos");
+fsck_err:
+	return ret;
 }
 
 int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
@@ -202,68 +203,60 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
 			   le32_to_cpu(s.v->skip[2]));
 }
 
-int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k,
 			  enum bkey_invalid_flags flags,
 			  struct printbuf *err)
 {
 	struct bkey_s_c_snapshot s;
 	u32 i, id;
+	int ret = 0;
 
-	if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
-	    bkey_lt(k.k->p, POS(0, 1))) {
-		prt_printf(err, "bad pos");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
+			 bkey_lt(k.k->p, POS(0, 1)), c, err,
+			 snapshot_pos_bad,
+			 "bad pos");
 
 	s = bkey_s_c_to_snapshot(k);
 
 	id = le32_to_cpu(s.v->parent);
-	if (id && id <= k.k->p.offset) {
-		prt_printf(err, "bad parent node (%u <= %llu)",
-		       id, k.k->p.offset);
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(id && id <= k.k->p.offset, c, err,
+			 snapshot_parent_bad,
+			 "bad parent node (%u <= %llu)",
+			 id, k.k->p.offset);
 
-	if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) {
-		prt_printf(err, "children not normalized");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err,
+			 snapshot_children_not_normalized,
+			 "children not normalized");
 
-	if (s.v->children[0] &&
-	    s.v->children[0] == s.v->children[1]) {
-		prt_printf(err, "duplicate child nodes");
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err,
+			 snapshot_child_duplicate,
+			 "duplicate child nodes");
 
 	for (i = 0; i < 2; i++) {
 		id = le32_to_cpu(s.v->children[i]);
 
-		if (id >= k.k->p.offset) {
-			prt_printf(err, "bad child node (%u >= %llu)",
-			       id, k.k->p.offset);
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(id >= k.k->p.offset, c, err,
+				 snapshot_child_bad,
+				 "bad child node (%u >= %llu)",
+				 id, k.k->p.offset);
 	}
 
 	if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
-		if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
-		    le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
-			prt_printf(err, "skiplist not normalized");
-			return -BCH_ERR_invalid_bkey;
-		}
+		bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
+				 le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err,
+				 snapshot_skiplist_not_normalized,
+				 "skiplist not normalized");
 
 		for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
 			id = le32_to_cpu(s.v->skip[i]);
 
-			if ((id && !s.v->parent) ||
-			    (id && id <= k.k->p.offset)) {
-				prt_printf(err, "bad skiplist node %u", id);
-				return -BCH_ERR_invalid_bkey;
-			}
+			bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err,
+					 snapshot_skiplist_bad,
+					 "bad skiplist node %u", id);
 		}
 	}
-
-	return 0;
+fsck_err:
+	return ret;
 }
 
 static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
@@ -325,8 +318,9 @@ int bch2_mark_snapshot(struct btree_trans *trans,
 		__set_is_ancestor_bitmap(c, id);
 
 		if (BCH_SNAPSHOT_DELETED(s.v)) {
-			set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
-			c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots);
+			set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
+			if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
+				bch2_delete_dead_snapshots_async(c);
 		}
 	} else {
 		memset(t, 0, sizeof(*t));
@@ -529,7 +523,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
 	if (fsck_err_on(ret ||
 			root_id != bch2_snapshot_root(c, root_id) ||
 			st.k->p.offset != le32_to_cpu(s.tree),
-			c,
+			c, snapshot_tree_to_missing_snapshot,
 			"snapshot tree points to missing/incorrect snapshot:\n  %s",
 			(bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
 		ret = bch2_btree_delete_at(trans, iter, 0);
@@ -541,17 +535,20 @@ static int check_snapshot_tree(struct btree_trans *trans,
 	if (ret && !bch2_err_matches(ret, ENOENT))
 		goto err;
 
-	if (fsck_err_on(ret, c,
+	if (fsck_err_on(ret,
+			c, snapshot_tree_to_missing_subvol,
 			"snapshot tree points to missing subvolume:\n  %s",
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
 	    fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
 						le32_to_cpu(subvol.snapshot),
-						root_id), c,
+						root_id),
+			c, snapshot_tree_to_wrong_subvol,
 			"snapshot tree points to subvolume that does not point to snapshot in this tree:\n  %s",
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
-	    fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c,
+	    fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
+			c, snapshot_tree_to_snapshot_subvol,
 			"snapshot tree points to snapshot subvolume:\n  %s",
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
@@ -787,7 +784,9 @@ static int check_snapshot(struct btree_trans *trans,
 			goto err;
 		}
 	} else {
-		if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n  %s",
+		if (fsck_err_on(s.subvol,
+				c, snapshot_should_not_have_subvol,
+				"snapshot should not point to subvol:\n  %s",
 				(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
 			u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
 			ret = PTR_ERR_OR_ZERO(u);
@@ -803,7 +802,8 @@ static int check_snapshot(struct btree_trans *trans,
 	if (ret < 0)
 		goto err;
 
-	if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n  %s",
+	if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree,
+			"snapshot points to missing/incorrect tree:\n  %s",
 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
 		ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
 		if (ret)
@@ -815,7 +815,8 @@ static int check_snapshot(struct btree_trans *trans,
 
 	if (le32_to_cpu(s.depth) != real_depth &&
 	    (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
-	     fsck_err(c, "snapshot with incorrect depth field, should be %u:\n  %s",
+	     fsck_err(c, snapshot_bad_depth,
+		      "snapshot with incorrect depth field, should be %u:\n  %s",
 		      real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
 		ret = PTR_ERR_OR_ZERO(u);
@@ -832,7 +833,8 @@ static int check_snapshot(struct btree_trans *trans,
 
 	if (!ret &&
 	    (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
-	     fsck_err(c, "snapshot with bad skiplist field:\n  %s",
+	     fsck_err(c, snapshot_bad_skiplist,
+		      "snapshot with bad skiplist field:\n  %s",
 		      (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
 		u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
 		ret = PTR_ERR_OR_ZERO(u);
@@ -1251,13 +1253,7 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans,
 	return 0;
 }
 
-/*
- * For a given snapshot, if it doesn't have a subvolume that points to it, and
- * it doesn't have child snapshot nodes - it's now redundant and we can mark it
- * as deleted.
- */
-static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
-					  struct bkey_s_c k)
+static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
 {
 	struct bkey_s_c_snapshot snap;
 	u32 children[2];
@@ -1278,10 +1274,21 @@ static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btre
 		bch2_snapshot_live(trans, children[1]);
 	if (ret < 0)
 		return ret;
+	return !ret;
+}
 
-	if (!ret)
-		return bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
-	return 0;
+/*
+ * For a given snapshot, if it doesn't have a subvolume that points to it, and
+ * it doesn't have child snapshot nodes - it's now redundant and we can mark it
+ * as deleted.
+ */
+static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
+{
+	int ret = bch2_snapshot_needs_delete(trans, k);
+
+	return ret <= 0
+		? ret
+		: bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
 }
 
 static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
@@ -1342,12 +1349,12 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
 			u32 id = le32_to_cpu(s->v.skip[j]);
 
 			if (snapshot_list_has_id(deleted, id)) {
-				id = depth > 1
-					? bch2_snapshot_nth_parent_skip(c,
+				id = bch2_snapshot_nth_parent_skip(c,
 							parent,
-							get_random_u32_below(depth - 1),
-							deleted)
-					: parent;
+							depth > 1
+							? get_random_u32_below(depth - 1)
+							: 0,
+							deleted);
 				s->v.skip[j] = cpu_to_le32(id);
 			}
 		}
@@ -1369,6 +1376,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
 	u32 *i, id;
 	int ret = 0;
 
+	if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags))
+		return 0;
+
 	if (!test_bit(BCH_FS_STARTED, &c->flags)) {
 		ret = bch2_fs_read_write_early(c);
 		if (ret) {
@@ -1386,7 +1396,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
 	ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
 			POS_MIN, 0, k,
 			NULL, NULL, 0,
-		bch2_delete_redundant_snapshot(trans, &iter, k));
+		bch2_delete_redundant_snapshot(trans, k));
 	if (ret) {
 		bch_err_msg(c, ret, "deleting redundant snapshots");
 		goto err;
@@ -1427,6 +1437,15 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
 		if (!btree_type_has_snapshots(id))
 			continue;
 
+		/*
+		 * deleted inodes btree is maintained by a trigger on the inodes
+		 * btree - no work for us to do here, and it's not safe to scan
+		 * it because we'll see out of date keys due to the btree write
+		 * buffer:
+		 */
+		if (id == BTREE_ID_deleted_inodes)
+			continue;
+
 		ret = for_each_btree_key_commit(trans, iter,
 				id, POS_MIN,
 				BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
@@ -1447,6 +1466,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
 		}
 	}
 
+	bch2_trans_unlock(trans);
 	down_write(&c->snapshot_create_lock);
 
 	for_each_btree_key(trans, iter, BTREE_ID_snapshots,
@@ -1491,8 +1511,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
 			goto err_create_lock;
 		}
 	}
-
-	clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
 err_create_lock:
 	up_write(&c->snapshot_create_lock);
 err:
@@ -1508,8 +1526,7 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
 {
 	struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
 
-	if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags))
-		bch2_delete_dead_snapshots(c);
+	bch2_delete_dead_snapshots(c);
 	bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
 }
 
@@ -1520,20 +1537,6 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
 		bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
 }
 
-int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
-				    struct btree_trans_commit_hook *h)
-{
-	struct bch_fs *c = trans->c;
-
-	set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
-
-	if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots)
-		return 0;
-
-	bch2_delete_dead_snapshots_async(c);
-	return 0;
-}
-
 int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
 				       enum btree_id id,
 				       struct bpos pos)
@@ -1664,6 +1667,26 @@ again:
 	return ret ?: trans_was_restarted(trans, restart_count);
 }
 
+static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
+{
+	struct bch_fs *c = trans->c;
+	struct bkey_s_c_snapshot snap;
+	int ret = 0;
+
+	if (k.k->type != KEY_TYPE_snapshot)
+		return 0;
+
+	snap = bkey_s_c_to_snapshot(k);
+	if (BCH_SNAPSHOT_DELETED(snap.v) ||
+	    bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
+	    (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
+		set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
+		return 0;
+	}
+
+	return ret;
+}
+
 int bch2_snapshots_read(struct bch_fs *c)
 {
 	struct btree_iter iter;
@@ -1674,7 +1697,8 @@ int bch2_snapshots_read(struct bch_fs *c)
 		for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
 			   POS_MIN, 0, k,
 			bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
-			bch2_snapshot_set_equiv(trans, k)) ?:
+			bch2_snapshot_set_equiv(trans, k) ?:
+			bch2_check_snapshot_needs_deletion(trans, k)) ?:
 		for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
 			   POS_MIN, 0, k,
 			   (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index de215d9d1252..f09a22f44239 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -5,7 +5,7 @@
 enum bkey_invalid_flags;
 
 void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-int bch2_snapshot_tree_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c,
 			       enum bkey_invalid_flags, struct printbuf *);
 
 #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) {	\
@@ -19,7 +19,7 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *);
 int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *);
 
 void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c,
 			  enum bkey_invalid_flags, struct printbuf *);
 int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
 		       struct bkey_s_c, struct bkey_s_c, unsigned);
@@ -244,8 +244,6 @@ int bch2_check_snapshot_trees(struct bch_fs *);
 int bch2_check_snapshots(struct bch_fs *);
 
 int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
-int bch2_delete_dead_snapshots_hook(struct btree_trans *,
-				    struct btree_trans_commit_hook *);
 void bch2_delete_dead_snapshots_work(struct work_struct *);
 
 int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos);
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index caf2dd7dafff..fccd25aa3242 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -62,7 +62,8 @@ static int check_subvol(struct btree_trans *trans,
 		if (ret)
 			return ret;
 
-		if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c,
+		if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset,
+				c, subvol_not_master_and_not_snapshot,
 				"subvolume %llu is not set as snapshot but is not master subvolume",
 				k.k->p.offset)) {
 			struct bkey_i_subvolume *s =
@@ -97,16 +98,17 @@ int bch2_check_subvols(struct bch_fs *c)
 
 /* Subvolumes: */
 
-int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
 			   enum bkey_invalid_flags flags, struct printbuf *err)
 {
-	if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
-	    bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
-		prt_printf(err, "invalid pos");
-		return -BCH_ERR_invalid_bkey;
-	}
+	int ret = 0;
 
-	return 0;
+	bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
+			 bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err,
+			 subvol_pos_bad,
+			 "invalid pos");
+fsck_err:
+	return ret;
 }
 
 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
@@ -230,7 +232,6 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
 {
 	struct btree_iter iter;
 	struct bkey_s_c_subvolume subvol;
-	struct btree_trans_commit_hook *h;
 	u32 snapid;
 	int ret = 0;
 
@@ -246,22 +247,8 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
 
 	snapid = le32_to_cpu(subvol.v->snapshot);
 
-	ret = bch2_btree_delete_at(trans, &iter, 0);
-	if (ret)
-		goto err;
-
-	ret = bch2_snapshot_node_set_deleted(trans, snapid);
-	if (ret)
-		goto err;
-
-	h = bch2_trans_kmalloc(trans, sizeof(*h));
-	ret = PTR_ERR_OR_ZERO(h);
-	if (ret)
-		goto err;
-
-	h->fn = bch2_delete_dead_snapshots_hook;
-	bch2_trans_commit_hook(trans, h);
-err:
+	ret =   bch2_btree_delete_at(trans, &iter, 0) ?:
+		bch2_snapshot_node_set_deleted(trans, snapid);
 	bch2_trans_iter_exit(trans, &iter);
 	return ret;
 }
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index bb14f92e8687..a1003d30ab0a 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -9,7 +9,7 @@ enum bkey_invalid_flags;
 
 int bch2_check_subvols(struct bch_fs *);
 
-int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
 			   enum bkey_invalid_flags, struct printbuf *);
 void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 332d41e1c0a3..f4cad903f4d6 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -13,6 +13,7 @@
 #include "replicas.h"
 #include "quota.h"
 #include "sb-clean.h"
+#include "sb-errors.h"
 #include "sb-members.h"
 #include "super-io.h"
 #include "super.h"
@@ -720,7 +721,7 @@ retry:
 	if (opt_defined(*opts, sb))
 		goto err;
 
-	printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s",
+	printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n",
 	       path, err.buf);
 	printbuf_reset(&err);
 
@@ -782,7 +783,7 @@ got_super:
 
 	ret = bch2_sb_validate(sb, &err, READ);
 	if (ret) {
-		printk(KERN_ERR "bcachefs (%s): error validating superblock: %s",
+		printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n",
 		       path, err.buf);
 		goto err_no_print;
 	}
@@ -790,7 +791,7 @@ out:
 	printbuf_exit(&err);
 	return ret;
 err:
-	printk(KERN_ERR "bcachefs (%s): error reading superblock: %s",
+	printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n",
 	       path, err.buf);
 err_no_print:
 	bch2_free_super(sb);
@@ -805,7 +806,12 @@ static void write_super_endio(struct bio *bio)
 
 	/* XXX: return errors directly */
 
-	if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s",
+	if (bch2_dev_io_err_on(bio->bi_status, ca,
+			       bio_data_dir(bio)
+			       ? BCH_MEMBER_ERROR_write
+			       : BCH_MEMBER_ERROR_read,
+			       "superblock %s error: %s",
+			       bio_data_dir(bio) ? "write" : "read",
 			       bch2_blk_status_to_str(bio->bi_status)))
 		ca->sb_write_error = 1;
 
@@ -892,7 +898,9 @@ int bch2_write_super(struct bch_fs *c)
 	SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
 
 	bch2_sb_counters_from_cpu(c);
-	bch_members_cpy_v2_v1(&c->disk_sb);
+	bch2_sb_members_from_cpu(c);
+	bch2_sb_members_cpy_v2_v1(&c->disk_sb);
+	bch2_sb_errors_from_cpu(c);
 
 	for_each_online_member(ca, c, i)
 		bch2_sb_from_fs(c, ca);
@@ -1175,7 +1183,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
 	prt_printf(out, "Created:");
 	prt_tab(out);
 	if (sb->time_base_lo)
-		pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
+		bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
 	else
 		prt_printf(out, "(not set)");
 	prt_newline(out);
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index b0d8584f475f..f5abd102bff7 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -23,6 +23,11 @@ u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
 				 unsigned,
 				 unsigned);
 
+static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
+{
+	return le32_to_cpu(f->u64s) * sizeof(u64);
+}
+
 #define field_to_type(_f, _name)					\
 	container_of_or_null(_f, struct bch_sb_field_##_name, field)
 
@@ -78,41 +83,6 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
 		__bch2_check_set_feature(c, feat);
 }
 
-/* BCH_SB_FIELD_members_v1: */
-
-static inline bool bch2_member_exists(struct bch_member *m)
-{
-	return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
-}
-
-static inline bool bch2_dev_exists(struct bch_sb *sb,
-				   unsigned dev)
-{
-	if (dev < sb->nr_devices) {
-	struct bch_member m = bch2_sb_member_get(sb, dev);
-		return bch2_member_exists(&m);
-	}
-	return false;
-}
-
-static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
-{
-	return (struct bch_member_cpu) {
-		.nbuckets	= le64_to_cpu(mi->nbuckets),
-		.first_bucket	= le16_to_cpu(mi->first_bucket),
-		.bucket_size	= le16_to_cpu(mi->bucket_size),
-		.group		= BCH_MEMBER_GROUP(mi),
-		.state		= BCH_MEMBER_STATE(mi),
-		.discard	= BCH_MEMBER_DISCARD(mi),
-		.data_allowed	= BCH_MEMBER_DATA_ALLOWED(mi),
-		.durability	= BCH_MEMBER_DURABILITY(mi)
-			? BCH_MEMBER_DURABILITY(mi) - 1
-			: 1,
-		.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
-		.valid		= bch2_member_exists(mi),
-	};
-}
-
 void bch2_sb_maybe_downgrade(struct bch_fs *);
 void bch2_sb_upgrade(struct bch_fs *, unsigned);
 
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 0e85c22672be..24672bb31cbe 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -49,6 +49,7 @@
 #include "recovery.h"
 #include "replicas.h"
 #include "sb-clean.h"
+#include "sb-errors.h"
 #include "sb-members.h"
 #include "snapshot.h"
 #include "subvolume.h"
@@ -400,7 +401,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
 
 	bch_info(c, "going read-write");
 
-	ret = bch2_members_v2_init(c);
+	ret = bch2_sb_members_v2_init(c);
 	if (ret)
 		goto err;
 
@@ -481,6 +482,7 @@ static void __bch2_fs_free(struct bch_fs *c)
 		bch2_time_stats_exit(&c->times[i]);
 
 	bch2_free_pending_node_rewrites(c);
+	bch2_fs_sb_errors_exit(c);
 	bch2_fs_counters_exit(c);
 	bch2_fs_snapshots_exit(c);
 	bch2_fs_quota_exit(c);
@@ -713,6 +715,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 	bch2_fs_quota_init(c);
 	bch2_fs_ec_init_early(c);
 	bch2_fs_move_init(c);
+	bch2_fs_sb_errors_init_early(c);
 
 	INIT_LIST_HEAD(&c->list);
 
@@ -729,8 +732,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 
 	INIT_LIST_HEAD(&c->journal_iters);
 
-	INIT_LIST_HEAD(&c->fsck_errors);
-	mutex_init(&c->fsck_error_lock);
+	INIT_LIST_HEAD(&c->fsck_error_msgs);
+	mutex_init(&c->fsck_error_msgs_lock);
 
 	seqcount_init(&c->gc_pos_lock);
 
@@ -840,6 +843,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 	}
 
 	ret = bch2_fs_counters_init(c) ?:
+	    bch2_fs_sb_errors_init(c) ?:
 	    bch2_io_clock_init(&c->io_clock[READ]) ?:
 	    bch2_io_clock_init(&c->io_clock[WRITE]) ?:
 	    bch2_fs_journal_init(&c->journal) ?:
@@ -942,16 +946,13 @@ int bch2_fs_start(struct bch_fs *c)
 
 	mutex_lock(&c->sb_lock);
 
-	ret = bch2_members_v2_init(c);
+	ret = bch2_sb_members_v2_init(c);
 	if (ret) {
 		mutex_unlock(&c->sb_lock);
 		goto err;
 	}
 
 	for_each_online_member(ca, c, i)
-		bch2_sb_from_fs(c, ca);
-
-	for_each_online_member(ca, c, i)
 		bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now);
 
 	mutex_unlock(&c->sb_lock);
@@ -960,12 +961,6 @@ int bch2_fs_start(struct bch_fs *c)
 		bch2_dev_allocator_add(c, ca);
 	bch2_recalc_capacity(c);
 
-	for (i = 0; i < BCH_TRANSACTIONS_NR; i++) {
-		mutex_lock(&c->btree_transaction_stats[i].lock);
-		bch2_time_stats_init(&c->btree_transaction_stats[i].lock_hold_times);
-		mutex_unlock(&c->btree_transaction_stats[i].lock);
-	}
-
 	ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
 		? bch2_fs_recovery(c)
 		: bch2_fs_initialize(c);
@@ -1140,6 +1135,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
 					struct bch_member *member)
 {
 	struct bch_dev *ca;
+	unsigned i;
 
 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
 	if (!ca)
@@ -1157,6 +1153,10 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
 	bch2_time_stats_init(&ca->io_latency[WRITE]);
 
 	ca->mi = bch2_mi_to_cpu(member);
+
+	for (i = 0; i < ARRAY_SIZE(member->errors); i++)
+		atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i]));
+
 	ca->uuid = member->uuid;
 
 	ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
@@ -1591,7 +1591,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 	dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx);
 
 	if (BCH_MEMBER_GROUP(&dev_mi)) {
-		bch2_disk_path_to_text(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
+		bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
 		if (label.allocation_failure) {
 			ret = -ENOMEM;
 			goto err;
@@ -1631,16 +1631,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
 		goto err_unlock;
 	}
 
-	mi = bch2_sb_field_get(ca->disk_sb.sb, members_v2);
-
-	if (!bch2_sb_field_resize(&ca->disk_sb, members_v2,
-				le32_to_cpu(mi->field.u64s) +
-				sizeof(dev_mi) / sizeof(u64))) {
-		ret = -BCH_ERR_ENOSPC_sb_members;
-		bch_err_msg(c, ret, "setting up new superblock");
-		goto err_unlock;
-	}
-
 	if (dynamic_fault("bcachefs:add:no_slot"))
 		goto no_slot;
 
@@ -1654,6 +1644,8 @@ no_slot:
 
 have_slot:
 	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
+
+	mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
 	u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) +
 			    le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64));
 
@@ -1689,13 +1681,13 @@ have_slot:
 
 	ret = bch2_trans_mark_dev_sb(c, ca);
 	if (ret) {
-		bch_err_msg(c, ret, "marking new superblock");
+		bch_err_msg(ca, ret, "marking new superblock");
 		goto err_late;
 	}
 
 	ret = bch2_fs_freespace_init(c);
 	if (ret) {
-		bch_err_msg(c, ret, "initializing free space");
+		bch_err_msg(ca, ret, "initializing free space");
 		goto err_late;
 	}
 
@@ -1763,19 +1755,26 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
 	if (ca->mi.state == BCH_MEMBER_STATE_rw)
 		__bch2_dev_read_write(c, ca);
 
-	mutex_lock(&c->sb_lock);
-	struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+	if (!ca->mi.freespace_initialized) {
+		ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
+		bch_err_msg(ca, ret, "initializing free space");
+		if (ret)
+			goto err;
+	}
 
-	m->last_mount =
-		cpu_to_le64(ktime_get_real_seconds());
+	if (!ca->journal.nr) {
+		ret = bch2_dev_journal_alloc(ca);
+		bch_err_msg(ca, ret, "allocating journal");
+		if (ret)
+			goto err;
+	}
 
+	mutex_lock(&c->sb_lock);
+	bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
+		cpu_to_le64(ktime_get_real_seconds());
 	bch2_write_super(c);
 	mutex_unlock(&c->sb_lock);
 
-	ret = bch2_fs_freespace_init(c);
-	if (ret)
-		bch_err_msg(c, ret, "initializing free space");
-
 	up_write(&c->state_lock);
 	return 0;
 err:
@@ -1886,9 +1885,9 @@ found:
 struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
 			    struct bch_opts opts)
 {
-	struct bch_sb_handle *sb = NULL;
+	DARRAY(struct bch_sb_handle) sbs = { 0 };
 	struct bch_fs *c = NULL;
-	unsigned i, best_sb = 0;
+	struct bch_sb_handle *sb, *best = NULL;
 	struct printbuf errbuf = PRINTBUF;
 	int ret = 0;
 
@@ -1900,49 +1899,46 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
 		goto err;
 	}
 
-	sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
-	if (!sb) {
-		ret = -ENOMEM;
+	ret = darray_make_room(&sbs, nr_devices);
+	if (ret)
 		goto err;
-	}
 
-	for (i = 0; i < nr_devices; i++) {
-		ret = bch2_read_super(devices[i], &opts, &sb[i]);
+	for (unsigned i = 0; i < nr_devices; i++) {
+		struct bch_sb_handle sb = { NULL };
+
+		ret = bch2_read_super(devices[i], &opts, &sb);
 		if (ret)
 			goto err;
 
+		BUG_ON(darray_push(&sbs, sb));
 	}
 
-	for (i = 1; i < nr_devices; i++)
-		if (le64_to_cpu(sb[i].sb->seq) >
-		    le64_to_cpu(sb[best_sb].sb->seq))
-			best_sb = i;
-
-	i = 0;
-	while (i < nr_devices) {
-		if (i != best_sb &&
-		    !bch2_dev_exists(sb[best_sb].sb, sb[i].sb->dev_idx)) {
-			pr_info("%pg has been removed, skipping", sb[i].bdev);
-			bch2_free_super(&sb[i]);
-			array_remove_item(sb, nr_devices, i);
+	darray_for_each(sbs, sb)
+		if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq))
+			best = sb;
+
+	darray_for_each_reverse(sbs, sb) {
+		if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) {
+			pr_info("%pg has been removed, skipping", sb->bdev);
+			bch2_free_super(sb);
+			darray_remove_item(&sbs, sb);
+			best -= best > sb;
 			continue;
 		}
 
-		ret = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
+		ret = bch2_dev_in_fs(best->sb, sb->sb);
 		if (ret)
 			goto err_print;
-		i++;
 	}
 
-	c = bch2_fs_alloc(sb[best_sb].sb, opts);
-	if (IS_ERR(c)) {
-		ret = PTR_ERR(c);
+	c = bch2_fs_alloc(best->sb, opts);
+	ret = PTR_ERR_OR_ZERO(c);
+	if (ret)
 		goto err;
-	}
 
 	down_write(&c->state_lock);
-	for (i = 0; i < nr_devices; i++) {
-		ret = bch2_dev_attach_bdev(c, &sb[i]);
+	darray_for_each(sbs, sb) {
+		ret = bch2_dev_attach_bdev(c, sb);
 		if (ret) {
 			up_write(&c->state_lock);
 			goto err;
@@ -1961,7 +1957,9 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
 			goto err;
 	}
 out:
-	kfree(sb);
+	darray_for_each(sbs, sb)
+		bch2_free_super(sb);
+	darray_exit(&sbs);
 	printbuf_exit(&errbuf);
 	module_put(THIS_MODULE);
 	return c;
@@ -1971,9 +1969,6 @@ err_print:
 err:
 	if (!IS_ERR_OR_NULL(c))
 		bch2_fs_stop(c);
-	if (sb)
-		for (i = 0; i < nr_devices; i++)
-			bch2_free_super(&sb[i]);
 	c = ERR_PTR(ret);
 	goto out;
 }
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index 78d6138db62d..7dda4985b99f 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -37,16 +37,4 @@ struct bch_member_cpu {
 	u8			valid;
 };
 
-struct bch_disk_group_cpu {
-	bool				deleted;
-	u16				parent;
-	struct bch_devs_mask		devs;
-};
-
-struct bch_disk_groups_cpu {
-	struct rcu_head			rcu;
-	unsigned			nr;
-	struct bch_disk_group_cpu	entries[] __counted_by(nr);
-};
-
 #endif /* _BCACHEFS_SUPER_TYPES_H */
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 397116966a7c..ab743115f169 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -149,7 +149,9 @@ read_attribute(bucket_size);
 read_attribute(first_bucket);
 read_attribute(nbuckets);
 rw_attribute(durability);
-read_attribute(iodone);
+read_attribute(io_done);
+read_attribute(io_errors);
+write_attribute(io_errors_reset);
 
 read_attribute(io_latency_read);
 read_attribute(io_latency_write);
@@ -212,7 +214,7 @@ read_attribute(copy_gc_wait);
 
 rw_attribute(rebalance_enabled);
 sysfs_pd_controller_attribute(rebalance);
-read_attribute(rebalance_work);
+read_attribute(rebalance_status);
 rw_attribute(promote_whole_extents);
 
 read_attribute(new_stripes);
@@ -341,7 +343,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
 
 static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
 {
-	prt_printf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
+	prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree));
 	bch2_bpos_to_text(out, c->gc_gens_pos);
 	prt_printf(out, "\n");
 }
@@ -386,8 +388,8 @@ SHOW(bch2_fs)
 	if (attr == &sysfs_copy_gc_wait)
 		bch2_copygc_wait_to_text(out, c);
 
-	if (attr == &sysfs_rebalance_work)
-		bch2_rebalance_work_to_text(out, c);
+	if (attr == &sysfs_rebalance_status)
+		bch2_rebalance_status_to_text(out, c);
 
 	sysfs_print(promote_whole_extents,	c->promote_whole_extents);
 
@@ -646,7 +648,7 @@ struct attribute *bch2_fs_internal_files[] = {
 	&sysfs_copy_gc_wait,
 
 	&sysfs_rebalance_enabled,
-	&sysfs_rebalance_work,
+	&sysfs_rebalance_status,
 	sysfs_pd_controller_files(rebalance),
 
 	&sysfs_moving_ctxts,
@@ -707,10 +709,8 @@ STORE(bch2_fs_opts_dir)
 	bch2_opt_set_by_id(&c->opts, id, v);
 
 	if ((id == Opt_background_target ||
-	     id == Opt_background_compression) && v) {
-		bch2_rebalance_add_work(c, S64_MAX);
-		rebalance_wakeup(c);
-	}
+	     id == Opt_background_compression) && v)
+		bch2_set_rebalance_needs_scan(c, 0);
 
 	ret = size;
 err:
@@ -882,7 +882,7 @@ static const char * const bch2_rw[] = {
 	NULL
 };
 
-static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
+static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
 {
 	int rw, i;
 
@@ -910,13 +910,8 @@ SHOW(bch2_dev)
 	sysfs_print(discard,		ca->mi.discard);
 
 	if (attr == &sysfs_label) {
-		if (ca->mi.group) {
-			mutex_lock(&c->sb_lock);
-			bch2_disk_path_to_text(out, c->disk_sb.sb,
-					       ca->mi.group - 1);
-			mutex_unlock(&c->sb_lock);
-		}
-
+		if (ca->mi.group)
+			bch2_disk_path_to_text(out, c, ca->mi.group - 1);
 		prt_char(out, '\n');
 	}
 
@@ -930,8 +925,11 @@ SHOW(bch2_dev)
 		prt_char(out, '\n');
 	}
 
-	if (attr == &sysfs_iodone)
-		dev_iodone_to_text(out, ca);
+	if (attr == &sysfs_io_done)
+		dev_io_done_to_text(out, ca);
+
+	if (attr == &sysfs_io_errors)
+		bch2_dev_io_errors_to_text(out, ca);
 
 	sysfs_print(io_latency_read,		atomic64_read(&ca->cur_latency[READ]));
 	sysfs_print(io_latency_write,		atomic64_read(&ca->cur_latency[WRITE]));
@@ -998,6 +996,9 @@ STORE(bch2_dev)
 			return ret;
 	}
 
+	if (attr == &sysfs_io_errors_reset)
+		bch2_dev_errors_reset(ca);
+
 	return size;
 }
 SYSFS_OPS(bch2_dev);
@@ -1015,7 +1016,9 @@ struct attribute *bch2_dev_files[] = {
 	&sysfs_label,
 
 	&sysfs_has_data,
-	&sysfs_iodone,
+	&sysfs_io_done,
+	&sysfs_io_errors,
+	&sysfs_io_errors_reset,
 
 	&sysfs_io_latency_read,
 	&sysfs_io_latency_write,
diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c
index 33efa6005c6f..dc48b52b01b4 100644
--- a/fs/bcachefs/trace.c
+++ b/fs/bcachefs/trace.c
@@ -7,6 +7,7 @@
 #include "btree_locking.h"
 #include "btree_update_interior.h"
 #include "keylist.h"
+#include "move_types.h"
 #include "opts.h"
 #include "six.h"
 
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 19264492151b..893304a1f06e 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(btree_node,
 	TP_printk("%d,%d %u %s %llu:%llu:%u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->level,
-		  bch2_btree_ids[__entry->btree_id],
+		  bch2_btree_id_str(__entry->btree_id),
 		  __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
 );
 
@@ -461,7 +461,7 @@ TRACE_EVENT(btree_path_relock_fail,
 	TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s held %u:%u lock count %u:%u iter seq %u lock seq %u",
 		  __entry->trans_fn,
 		  (void *) __entry->caller_ip,
-		  bch2_btree_ids[__entry->btree_id],
+		  bch2_btree_id_str(__entry->btree_id),
 		  __entry->pos_inode,
 		  __entry->pos_offset,
 		  __entry->pos_snapshot,
@@ -522,7 +522,7 @@ TRACE_EVENT(btree_path_upgrade_fail,
 	TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u",
 		  __entry->trans_fn,
 		  (void *) __entry->caller_ip,
-		  bch2_btree_ids[__entry->btree_id],
+		  bch2_btree_id_str(__entry->btree_id),
 		  __entry->pos_inode,
 		  __entry->pos_offset,
 		  __entry->pos_snapshot,
@@ -767,25 +767,36 @@ DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
 );
 
 TRACE_EVENT(move_data,
-	TP_PROTO(struct bch_fs *c, u64 sectors_moved,
-		 u64 keys_moved),
-	TP_ARGS(c, sectors_moved, keys_moved),
+	TP_PROTO(struct bch_fs *c,
+		 struct bch_move_stats *stats),
+	TP_ARGS(c, stats),
 
 	TP_STRUCT__entry(
-		__field(dev_t,		dev			)
-		__field(u64,		sectors_moved	)
+		__field(dev_t,		dev		)
 		__field(u64,		keys_moved	)
+		__field(u64,		keys_raced	)
+		__field(u64,		sectors_seen	)
+		__field(u64,		sectors_moved	)
+		__field(u64,		sectors_raced	)
 	),
 
 	TP_fast_assign(
-		__entry->dev			= c->dev;
-		__entry->sectors_moved = sectors_moved;
-		__entry->keys_moved = keys_moved;
+		__entry->dev		= c->dev;
+		__entry->keys_moved	= atomic64_read(&stats->keys_moved);
+		__entry->keys_raced	= atomic64_read(&stats->keys_raced);
+		__entry->sectors_seen	= atomic64_read(&stats->sectors_seen);
+		__entry->sectors_moved	= atomic64_read(&stats->sectors_moved);
+		__entry->sectors_raced	= atomic64_read(&stats->sectors_raced);
 	),
 
-	TP_printk("%d,%d sectors_moved %llu keys_moved %llu",
+	TP_printk("%d,%d keys moved %llu raced %llu"
+		  "sectors seen %llu moved %llu raced %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->sectors_moved, __entry->keys_moved)
+		  __entry->keys_moved,
+		  __entry->keys_raced,
+		  __entry->sectors_seen,
+		  __entry->sectors_moved,
+		  __entry->sectors_raced)
 );
 
 TRACE_EVENT(evacuate_bucket,
@@ -1012,7 +1023,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter,
 	TP_printk("%s %pS btree %s pos %llu:%llu:%u",
 		  __entry->trans_fn,
 		  (void *) __entry->caller_ip,
-		  bch2_btree_ids[__entry->btree_id],
+		  bch2_btree_id_str(__entry->btree_id),
 		  __entry->pos_inode,
 		  __entry->pos_offset,
 		  __entry->pos_snapshot)
@@ -1032,13 +1043,16 @@ DEFINE_EVENT(transaction_restart_iter,	trans_restart_btree_node_split,
 	TP_ARGS(trans, caller_ip, path)
 );
 
+struct get_locks_fail;
+
 TRACE_EVENT(trans_restart_upgrade,
 	TP_PROTO(struct btree_trans *trans,
 		 unsigned long caller_ip,
 		 struct btree_path *path,
 		 unsigned old_locks_want,
-		 unsigned new_locks_want),
-	TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want),
+		 unsigned new_locks_want,
+		 struct get_locks_fail *f),
+	TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f),
 
 	TP_STRUCT__entry(
 		__array(char,			trans_fn, 32	)
@@ -1046,6 +1060,11 @@ TRACE_EVENT(trans_restart_upgrade,
 		__field(u8,			btree_id	)
 		__field(u8,			old_locks_want	)
 		__field(u8,			new_locks_want	)
+		__field(u8,			level		)
+		__field(u32,			path_seq	)
+		__field(u32,			node_seq	)
+		__field(u32,			path_alloc_seq	)
+		__field(u32,			downgrade_seq)
 		TRACE_BPOS_entries(pos)
 	),
 
@@ -1055,18 +1074,28 @@ TRACE_EVENT(trans_restart_upgrade,
 		__entry->btree_id		= path->btree_id;
 		__entry->old_locks_want		= old_locks_want;
 		__entry->new_locks_want		= new_locks_want;
+		__entry->level			= f->l;
+		__entry->path_seq		= path->l[f->l].lock_seq;
+		__entry->node_seq		= IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq;
+		__entry->path_alloc_seq		= path->alloc_seq;
+		__entry->downgrade_seq		= path->downgrade_seq;
 		TRACE_BPOS_assign(pos, path->pos)
 	),
 
-	TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u",
+	TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u",
 		  __entry->trans_fn,
 		  (void *) __entry->caller_ip,
-		  bch2_btree_ids[__entry->btree_id],
+		  bch2_btree_id_str(__entry->btree_id),
 		  __entry->pos_inode,
 		  __entry->pos_offset,
 		  __entry->pos_snapshot,
 		  __entry->old_locks_want,
-		  __entry->new_locks_want)
+		  __entry->new_locks_want,
+		  __entry->level,
+		  __entry->path_seq,
+		  __entry->node_seq,
+		  __entry->path_alloc_seq,
+		  __entry->downgrade_seq)
 );
 
 DEFINE_EVENT(transaction_restart_iter,	trans_restart_relock,
@@ -1219,7 +1248,7 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
 	TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u",
 		  __entry->trans_fn,
 		  (void *) __entry->caller_ip,
-		  bch2_btree_ids[__entry->btree_id],
+		  bch2_btree_id_str(__entry->btree_id),
 		  __entry->pos_inode,
 		  __entry->pos_offset,
 		  __entry->pos_snapshot,
@@ -1227,6 +1256,27 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
 		  __entry->new_u64s)
 );
 
+TRACE_EVENT(path_downgrade,
+	TP_PROTO(struct btree_trans *trans,
+		 unsigned long caller_ip,
+		 struct btree_path *path),
+	TP_ARGS(trans, caller_ip, path),
+
+	TP_STRUCT__entry(
+		__array(char,			trans_fn, 32	)
+		__field(unsigned long,		caller_ip	)
+	),
+
+	TP_fast_assign(
+		strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+		__entry->caller_ip		= caller_ip;
+	),
+
+	TP_printk("%s %pS",
+		  __entry->trans_fn,
+		  (void *) __entry->caller_ip)
+);
+
 DEFINE_EVENT(transaction_event,	trans_restart_write_buffer_flush,
 	TP_PROTO(struct btree_trans *trans,
 		 unsigned long caller_ip),
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 08bac0ba8d0b..84b142fcc3df 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -467,6 +467,24 @@ static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
 	prt_printf(out, "%s", u->name);
 }
 
+#ifndef __KERNEL__
+#include <time.h>
+void bch2_prt_datetime(struct printbuf *out, time64_t sec)
+{
+	time_t t = sec;
+	char buf[64];
+	ctime_r(&t, buf);
+	prt_str(out, buf);
+}
+#else
+void bch2_prt_datetime(struct printbuf *out, time64_t sec)
+{
+	char buf[64];
+	snprintf(buf, sizeof(buf), "%ptT", &sec);
+	prt_u64(out, sec);
+}
+#endif
+
 #define TABSTOP_SIZE 12
 
 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 849a37ae497c..2984b57b2958 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -245,26 +245,7 @@ do {									\
 #define prt_bitflags(...)		bch2_prt_bitflags(__VA_ARGS__)
 
 void bch2_pr_time_units(struct printbuf *, u64);
-
-#ifdef __KERNEL__
-static inline void pr_time(struct printbuf *out, u64 time)
-{
-	prt_printf(out, "%llu", time);
-}
-#else
-#include <time.h>
-static inline void pr_time(struct printbuf *out, u64 _time)
-{
-	char time_str[64];
-	time_t time = _time;
-	struct tm *tm = localtime(&time);
-	size_t err = strftime(time_str, sizeof(time_str), "%c", tm);
-	if (!err)
-		prt_printf(out, "(formatting error)");
-	else
-		prt_printf(out, "%s", time_str);
-}
-#endif
+void bch2_prt_datetime(struct printbuf *, time64_t);
 
 #ifdef __KERNEL__
 static inline void uuid_unparse_lower(u8 *uuid, char *out)
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index b069b1a62e25..a39ff0c296ec 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -70,46 +70,38 @@ const struct bch_hash_desc bch2_xattr_hash_desc = {
 	.cmp_bkey	= xattr_cmp_bkey,
 };
 
-int bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k,
+int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k,
 		       enum bkey_invalid_flags flags,
 		       struct printbuf *err)
 {
-	const struct xattr_handler *handler;
 	struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
+	unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len,
+					   le16_to_cpu(xattr.v->x_val_len));
+	int ret = 0;
 
-	if (bkey_val_u64s(k.k) <
-	    xattr_val_u64s(xattr.v->x_name_len,
-			   le16_to_cpu(xattr.v->x_val_len))) {
-		prt_printf(err, "value too small (%zu < %u)",
-		       bkey_val_u64s(k.k),
-		       xattr_val_u64s(xattr.v->x_name_len,
-				      le16_to_cpu(xattr.v->x_val_len)));
-		return -BCH_ERR_invalid_bkey;
-	}
+	bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err,
+			 xattr_val_size_too_small,
+			 "value too small (%zu < %u)",
+			 bkey_val_u64s(k.k), val_u64s);
 
 	/* XXX why +4 ? */
-	if (bkey_val_u64s(k.k) >
-	    xattr_val_u64s(xattr.v->x_name_len,
-			   le16_to_cpu(xattr.v->x_val_len) + 4)) {
-		prt_printf(err, "value too big (%zu > %u)",
-		       bkey_val_u64s(k.k),
-		       xattr_val_u64s(xattr.v->x_name_len,
-				      le16_to_cpu(xattr.v->x_val_len) + 4));
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-	if (!handler) {
-		prt_printf(err, "invalid type (%u)", xattr.v->x_type);
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) {
-		prt_printf(err, "xattr name has invalid characters");
-		return -BCH_ERR_invalid_bkey;
-	}
-
-	return 0;
+	val_u64s = xattr_val_u64s(xattr.v->x_name_len,
+				  le16_to_cpu(xattr.v->x_val_len) + 4);
+
+	bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err,
+			 xattr_val_size_too_big,
+			 "value too big (%zu > %u)",
+			 bkey_val_u64s(k.k), val_u64s);
+
+	bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err,
+			 xattr_invalid_type,
+			 "invalid type (%u)", xattr.v->x_type);
+
+	bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err,
+			 xattr_name_invalid_chars,
+			 "xattr name has invalid characters");
+fsck_err:
+	return ret;
 }
 
 void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
@@ -590,7 +582,7 @@ err:
 	if (value &&
 	    (opt_id == Opt_background_compression ||
 	     opt_id == Opt_background_target))
-		bch2_rebalance_add_work(c, inode->v.i_blocks);
+		bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum);
 
 	return bch2_err_class(ret);
 }
diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h
index f5a52e3a6016..1337f31a5c49 100644
--- a/fs/bcachefs/xattr.h
+++ b/fs/bcachefs/xattr.h
@@ -6,7 +6,7 @@
 
 extern const struct bch_hash_desc bch2_xattr_hash_desc;
 
-int bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c,
+int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c,
 		       enum bkey_invalid_flags, struct printbuf *);
 void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 9acdec56f626..a93d76df8ed8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -96,6 +96,7 @@ static const struct address_space_operations befs_symlink_aops = {
 };
 
 static const struct export_operations befs_export_operations = {
+	.encode_fh	= generic_encode_ino32_fh,
 	.fh_to_dentry	= befs_fh_to_dentry,
 	.fh_to_parent	= befs_fh_to_parent,
 	.get_parent	= befs_get_parent,
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index c53a1d220622..1564eacc253d 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 
 #include "super.h"
+#include "mds_client.h"
 
 static inline void ceph_set_cached_acl(struct inode *inode,
 					int type, struct posix_acl *acl)
@@ -31,6 +32,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
 
 struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int size;
 	unsigned int retry_cnt = 0;
 	const char *name;
@@ -72,8 +74,8 @@ retry:
 	} else if (size == -ENODATA || size == 0) {
 		acl = NULL;
 	} else {
-		pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n",
-				   ceph_vinop(inode), size);
+		pr_err_ratelimited_client(cl, "%llx.%llx failed, err=%d\n",
+					  ceph_vinop(inode), size);
 		acl = ERR_PTR(-EIO);
 	}
 
@@ -105,7 +107,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	case ACL_TYPE_ACCESS:
 		name = XATTR_NAME_POSIX_ACL_ACCESS;
 		if (acl) {
-			ret = posix_acl_update_mode(&nop_mnt_idmap, inode,
+			ret = posix_acl_update_mode(idmap, inode,
 						    &new_mode, &acl);
 			if (ret)
 				goto out;
@@ -140,7 +142,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 		newattrs.ia_ctime = current_time(inode);
 		newattrs.ia_mode = new_mode;
 		newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-		ret = __ceph_setattr(inode, &newattrs, NULL);
+		ret = __ceph_setattr(idmap, inode, &newattrs, NULL);
 		if (ret)
 			goto out_free;
 	}
@@ -151,7 +153,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 			newattrs.ia_ctime = old_ctime;
 			newattrs.ia_mode = old_mode;
 			newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-			__ceph_setattr(inode, &newattrs, NULL);
+			__ceph_setattr(idmap, inode, &newattrs, NULL);
 		}
 		goto out_free;
 	}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 936b9e0b351d..85be3bf18cdf 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -79,18 +79,18 @@ static inline struct ceph_snap_context *page_snap_context(struct page *page)
  */
 static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
 {
-	struct inode *inode;
+	struct inode *inode = mapping->host;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci;
 	struct ceph_snap_context *snapc;
 
 	if (folio_test_dirty(folio)) {
-		dout("%p dirty_folio %p idx %lu -- already dirty\n",
-		     mapping->host, folio, folio->index);
+		doutc(cl, "%llx.%llx %p idx %lu -- already dirty\n",
+		      ceph_vinop(inode), folio, folio->index);
 		VM_BUG_ON_FOLIO(!folio_test_private(folio), folio);
 		return false;
 	}
 
-	inode = mapping->host;
 	ci = ceph_inode(inode);
 
 	/* dirty the head */
@@ -111,12 +111,12 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
 	if (ci->i_wrbuffer_ref == 0)
 		ihold(inode);
 	++ci->i_wrbuffer_ref;
-	dout("%p dirty_folio %p idx %lu head %d/%d -> %d/%d "
-	     "snapc %p seq %lld (%d snaps)\n",
-	     mapping->host, folio, folio->index,
-	     ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
-	     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
-	     snapc, snapc->seq, snapc->num_snaps);
+	doutc(cl, "%llx.%llx %p idx %lu head %d/%d -> %d/%d "
+	      "snapc %p seq %lld (%d snaps)\n",
+	      ceph_vinop(inode), folio, folio->index,
+	      ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
+	      ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
+	      snapc, snapc->seq, snapc->num_snaps);
 	spin_unlock(&ci->i_ceph_lock);
 
 	/*
@@ -137,23 +137,22 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
 static void ceph_invalidate_folio(struct folio *folio, size_t offset,
 				size_t length)
 {
-	struct inode *inode;
-	struct ceph_inode_info *ci;
+	struct inode *inode = folio->mapping->host;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
+	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_snap_context *snapc;
 
-	inode = folio->mapping->host;
-	ci = ceph_inode(inode);
 
 	if (offset != 0 || length != folio_size(folio)) {
-		dout("%p invalidate_folio idx %lu partial dirty page %zu~%zu\n",
-		     inode, folio->index, offset, length);
+		doutc(cl, "%llx.%llx idx %lu partial dirty page %zu~%zu\n",
+		      ceph_vinop(inode), folio->index, offset, length);
 		return;
 	}
 
 	WARN_ON(!folio_test_locked(folio));
 	if (folio_test_private(folio)) {
-		dout("%p invalidate_folio idx %lu full dirty page\n",
-		     inode, folio->index);
+		doutc(cl, "%llx.%llx idx %lu full dirty page\n",
+		      ceph_vinop(inode), folio->index);
 
 		snapc = folio_detach_private(folio);
 		ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
@@ -166,10 +165,10 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset,
 static bool ceph_release_folio(struct folio *folio, gfp_t gfp)
 {
 	struct inode *inode = folio->mapping->host;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 
-	dout("%llx:%llx release_folio idx %lu (%sdirty)\n",
-	     ceph_vinop(inode),
-	     folio->index, folio_test_dirty(folio) ? "" : "not ");
+	doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode),
+	      folio->index, folio_test_dirty(folio) ? "" : "not ");
 
 	if (folio_test_private(folio))
 		return false;
@@ -229,7 +228,7 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
 static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
 {
 	struct inode *inode = subreq->rreq->inode;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 objno, objoff;
 	u32 xlen;
@@ -244,7 +243,8 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
 static void finish_netfs_read(struct ceph_osd_request *req)
 {
 	struct inode *inode = req->r_inode;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
 	struct netfs_io_subrequest *subreq = req->r_priv;
 	struct ceph_osd_req_op *op = &req->r_ops[0];
@@ -254,8 +254,8 @@ static void finish_netfs_read(struct ceph_osd_request *req)
 	ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
 				 req->r_end_latency, osd_data->length, err);
 
-	dout("%s: result %d subreq->len=%zu i_size=%lld\n", __func__, req->r_result,
-	     subreq->len, i_size_read(req->r_inode));
+	doutc(cl, "result %d subreq->len=%zu i_size=%lld\n", req->r_result,
+	      subreq->len, i_size_read(req->r_inode));
 
 	/* no object means success but no data */
 	if (err == -ENOENT)
@@ -348,7 +348,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 	struct netfs_io_request *rreq = subreq->rreq;
 	struct inode *inode = rreq->inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_request *req = NULL;
 	struct ceph_vino vino = ceph_vino(inode);
 	struct iov_iter iter;
@@ -383,7 +384,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 			goto out;
 	}
 
-	dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
+	doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n",
+	      ceph_vinop(inode), subreq->start, subreq->len, len);
 
 	iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
 
@@ -400,8 +402,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 
 		err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
 		if (err < 0) {
-			dout("%s: iov_ter_get_pages_alloc returned %d\n",
-			     __func__, err);
+			doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
+			      ceph_vinop(inode), err);
 			goto out;
 		}
 
@@ -429,12 +431,13 @@ out:
 	ceph_osdc_put_request(req);
 	if (err)
 		netfs_subreq_terminated(subreq, err, false);
-	dout("%s: result %d\n", __func__, err);
+	doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
 }
 
 static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
 {
 	struct inode *inode = rreq->inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int got = 0, want = CEPH_CAP_FILE_CACHE;
 	struct ceph_netfs_request_data *priv;
 	int ret = 0;
@@ -466,12 +469,12 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
 	 */
 	ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
 	if (ret < 0) {
-		dout("start_read %p, error getting cap\n", inode);
+		doutc(cl, "%llx.%llx, error getting cap\n", ceph_vinop(inode));
 		goto out;
 	}
 
 	if (!(got & want)) {
-		dout("start_read %p, no cache cap\n", inode);
+		doutc(cl, "%llx.%llx, no cache cap\n", ceph_vinop(inode));
 		ret = -EACCES;
 		goto out;
 	}
@@ -563,13 +566,14 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl,
 		   struct ceph_snap_context *page_snapc)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_snap_context *snapc = NULL;
 	struct ceph_cap_snap *capsnap = NULL;
 
 	spin_lock(&ci->i_ceph_lock);
 	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
-		     capsnap->context, capsnap->dirty_pages);
+		doutc(cl, " capsnap %p snapc %p has %d dirty pages\n",
+		      capsnap, capsnap->context, capsnap->dirty_pages);
 		if (!capsnap->dirty_pages)
 			continue;
 
@@ -601,8 +605,8 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl,
 	}
 	if (!snapc && ci->i_wrbuffer_ref_head) {
 		snapc = ceph_get_snap_context(ci->i_head_snapc);
-		dout(" head snapc %p has %d dirty pages\n",
-		     snapc, ci->i_wrbuffer_ref_head);
+		doutc(cl, " head snapc %p has %d dirty pages\n", snapc,
+		      ci->i_wrbuffer_ref_head);
 		if (ctl) {
 			ctl->i_size = i_size_read(inode);
 			ctl->truncate_size = ci->i_truncate_size;
@@ -658,7 +662,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	struct folio *folio = page_folio(page);
 	struct inode *inode = page->mapping->host;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_snap_context *snapc, *oldest;
 	loff_t page_off = page_offset(page);
 	int err;
@@ -670,7 +675,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	bool caching = ceph_is_cache_enabled(inode);
 	struct page *bounce_page = NULL;
 
-	dout("writepage %p idx %lu\n", page, page->index);
+	doutc(cl, "%llx.%llx page %p idx %lu\n", ceph_vinop(inode), page,
+	      page->index);
 
 	if (ceph_inode_is_shutdown(inode))
 		return -EIO;
@@ -678,13 +684,14 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	/* verify this is a writeable snap context */
 	snapc = page_snap_context(page);
 	if (!snapc) {
-		dout("writepage %p page %p not dirty?\n", inode, page);
+		doutc(cl, "%llx.%llx page %p not dirty?\n", ceph_vinop(inode),
+		      page);
 		return 0;
 	}
 	oldest = get_oldest_context(inode, &ceph_wbc, snapc);
 	if (snapc->seq > oldest->seq) {
-		dout("writepage %p page %p snapc %p not writeable - noop\n",
-		     inode, page, snapc);
+		doutc(cl, "%llx.%llx page %p snapc %p not writeable - noop\n",
+		      ceph_vinop(inode), page, snapc);
 		/* we should only noop if called by kswapd */
 		WARN_ON(!(current->flags & PF_MEMALLOC));
 		ceph_put_snap_context(oldest);
@@ -695,8 +702,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
 	/* is this a partial page at end of file? */
 	if (page_off >= ceph_wbc.i_size) {
-		dout("folio at %lu beyond eof %llu\n", folio->index,
-				ceph_wbc.i_size);
+		doutc(cl, "%llx.%llx folio at %lu beyond eof %llu\n",
+		      ceph_vinop(inode), folio->index, ceph_wbc.i_size);
 		folio_invalidate(folio, 0, folio_size(folio));
 		return 0;
 	}
@@ -705,8 +712,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		len = ceph_wbc.i_size - page_off;
 
 	wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len;
-	dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
-	     inode, page, page->index, page_off, wlen, snapc, snapc->seq);
+	doutc(cl, "%llx.%llx page %p index %lu on %llu~%llu snapc %p seq %lld\n",
+	      ceph_vinop(inode), page, page->index, page_off, wlen, snapc,
+	      snapc->seq);
 
 	if (atomic_long_inc_return(&fsc->writeback_count) >
 	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
@@ -747,8 +755,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	osd_req_op_extent_osd_data_pages(req, 0,
 			bounce_page ? &bounce_page : &page, wlen, 0,
 			false, false);
-	dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n",
-	     page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not ");
+	doutc(cl, "%llx.%llx %llu~%llu (%llu bytes, %sencrypted)\n",
+	      ceph_vinop(inode), page_off, len, wlen,
+	      IS_ENCRYPTED(inode) ? "" : "not ");
 
 	req->r_mtime = inode_get_mtime(inode);
 	ceph_osdc_start_request(osdc, req);
@@ -767,19 +776,21 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 			wbc = &tmp_wbc;
 		if (err == -ERESTARTSYS) {
 			/* killed by SIGKILL */
-			dout("writepage interrupted page %p\n", page);
+			doutc(cl, "%llx.%llx interrupted page %p\n",
+			      ceph_vinop(inode), page);
 			redirty_page_for_writepage(wbc, page);
 			end_page_writeback(page);
 			return err;
 		}
 		if (err == -EBLOCKLISTED)
 			fsc->blocklisted = true;
-		dout("writepage setting page/mapping error %d %p\n",
-		     err, page);
+		doutc(cl, "%llx.%llx setting page/mapping error %d %p\n",
+		      ceph_vinop(inode), err, page);
 		mapping_set_error(&inode->i_data, err);
 		wbc->pages_skipped++;
 	} else {
-		dout("writepage cleaned page %p\n", page);
+		doutc(cl, "%llx.%llx cleaned page %p\n",
+		      ceph_vinop(inode), page);
 		err = 0;  /* vfs expects us to return 0 */
 	}
 	oldest = detach_page_private(page);
@@ -803,7 +814,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
 	ihold(inode);
 
 	if (wbc->sync_mode == WB_SYNC_NONE &&
-	    ceph_inode_to_client(inode)->write_congested)
+	    ceph_inode_to_fs_client(inode)->write_congested)
 		return AOP_WRITEPAGE_ACTIVATE;
 
 	wait_on_page_fscache(page);
@@ -829,6 +840,7 @@ static void writepages_finish(struct ceph_osd_request *req)
 {
 	struct inode *inode = req->r_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_osd_data *osd_data;
 	struct page *page;
 	int num_pages, total_pages = 0;
@@ -836,11 +848,11 @@ static void writepages_finish(struct ceph_osd_request *req)
 	int rc = req->r_result;
 	struct ceph_snap_context *snapc = req->r_snapc;
 	struct address_space *mapping = inode->i_mapping;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	unsigned int len = 0;
 	bool remove_page;
 
-	dout("writepages_finish %p rc %d\n", inode, rc);
+	doutc(cl, "%llx.%llx rc %d\n", ceph_vinop(inode), rc);
 	if (rc < 0) {
 		mapping_set_error(mapping, rc);
 		ceph_set_error_write(ci);
@@ -862,8 +874,10 @@ static void writepages_finish(struct ceph_osd_request *req)
 	/* clean all pages */
 	for (i = 0; i < req->r_num_ops; i++) {
 		if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) {
-			pr_warn("%s incorrect op %d req %p index %d tid %llu\n",
-				__func__, req->r_ops[i].op, req, i, req->r_tid);
+			pr_warn_client(cl,
+				"%llx.%llx incorrect op %d req %p index %d tid %llu\n",
+				ceph_vinop(inode), req->r_ops[i].op, req, i,
+				req->r_tid);
 			break;
 		}
 
@@ -890,7 +904,7 @@ static void writepages_finish(struct ceph_osd_request *req)
 
 			ceph_put_snap_context(detach_page_private(page));
 			end_page_writeback(page);
-			dout("unlocking %p\n", page);
+			doutc(cl, "unlocking %p\n", page);
 
 			if (remove_page)
 				generic_error_remove_page(inode->i_mapping,
@@ -898,8 +912,9 @@ static void writepages_finish(struct ceph_osd_request *req)
 
 			unlock_page(page);
 		}
-		dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
-		     inode, osd_data->length, rc >= 0 ? num_pages : 0);
+		doutc(cl, "%llx.%llx wrote %llu bytes cleaned %d pages\n",
+		      ceph_vinop(inode), osd_data->length,
+		      rc >= 0 ? num_pages : 0);
 
 		release_pages(osd_data->pages, num_pages);
 	}
@@ -926,7 +941,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_vino vino = ceph_vino(inode);
 	pgoff_t index, start_index, end = -1;
 	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
@@ -944,15 +960,15 @@ static int ceph_writepages_start(struct address_space *mapping,
 	    fsc->write_congested)
 		return 0;
 
-	dout("writepages_start %p (mode=%s)\n", inode,
-	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
-	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
+	doutc(cl, "%llx.%llx (mode=%s)\n", ceph_vinop(inode),
+	      wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
+	      (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
 	if (ceph_inode_is_shutdown(inode)) {
 		if (ci->i_wrbuffer_ref > 0) {
-			pr_warn_ratelimited(
-				"writepage_start %p %lld forced umount\n",
-				inode, ceph_ino(inode));
+			pr_warn_ratelimited_client(cl,
+				"%llx.%llx %lld forced umount\n",
+				ceph_vinop(inode), ceph_ino(inode));
 		}
 		mapping_set_error(mapping, -EIO);
 		return -EIO; /* we're in a forced umount, don't write! */
@@ -976,11 +992,11 @@ retry:
 	if (!snapc) {
 		/* hmm, why does writepages get called when there
 		   is no dirty data? */
-		dout(" no snap context with dirty data?\n");
+		doutc(cl, " no snap context with dirty data?\n");
 		goto out;
 	}
-	dout(" oldest snapc is %p seq %lld (%d snaps)\n",
-	     snapc, snapc->seq, snapc->num_snaps);
+	doutc(cl, " oldest snapc is %p seq %lld (%d snaps)\n", snapc,
+	      snapc->seq, snapc->num_snaps);
 
 	should_loop = false;
 	if (ceph_wbc.head_snapc && snapc != last_snapc) {
@@ -990,13 +1006,13 @@ retry:
 			end = -1;
 			if (index > 0)
 				should_loop = true;
-			dout(" cyclic, start at %lu\n", index);
+			doutc(cl, " cyclic, start at %lu\n", index);
 		} else {
 			index = wbc->range_start >> PAGE_SHIFT;
 			end = wbc->range_end >> PAGE_SHIFT;
 			if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 				range_whole = true;
-			dout(" not cyclic, %lu to %lu\n", index, end);
+			doutc(cl, " not cyclic, %lu to %lu\n", index, end);
 		}
 	} else if (!ceph_wbc.head_snapc) {
 		/* Do not respect wbc->range_{start,end}. Dirty pages
@@ -1005,7 +1021,7 @@ retry:
 		 * associated with 'snapc' get written */
 		if (index > 0)
 			should_loop = true;
-		dout(" non-head snapc, range whole\n");
+		doutc(cl, " non-head snapc, range whole\n");
 	}
 
 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
@@ -1028,12 +1044,12 @@ retry:
 get_more_pages:
 		nr_folios = filemap_get_folios_tag(mapping, &index,
 						   end, tag, &fbatch);
-		dout("pagevec_lookup_range_tag got %d\n", nr_folios);
+		doutc(cl, "pagevec_lookup_range_tag got %d\n", nr_folios);
 		if (!nr_folios && !locked_pages)
 			break;
 		for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
 			page = &fbatch.folios[i]->page;
-			dout("? %p idx %lu\n", page, page->index);
+			doutc(cl, "? %p idx %lu\n", page, page->index);
 			if (locked_pages == 0)
 				lock_page(page);  /* first page */
 			else if (!trylock_page(page))
@@ -1042,15 +1058,15 @@ get_more_pages:
 			/* only dirty pages, or our accounting breaks */
 			if (unlikely(!PageDirty(page)) ||
 			    unlikely(page->mapping != mapping)) {
-				dout("!dirty or !mapping %p\n", page);
+				doutc(cl, "!dirty or !mapping %p\n", page);
 				unlock_page(page);
 				continue;
 			}
 			/* only if matching snap context */
 			pgsnapc = page_snap_context(page);
 			if (pgsnapc != snapc) {
-				dout("page snapc %p %lld != oldest %p %lld\n",
-				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
+				doutc(cl, "page snapc %p %lld != oldest %p %lld\n",
+				      pgsnapc, pgsnapc->seq, snapc, snapc->seq);
 				if (!should_loop &&
 				    !ceph_wbc.head_snapc &&
 				    wbc->sync_mode != WB_SYNC_NONE)
@@ -1061,8 +1077,8 @@ get_more_pages:
 			if (page_offset(page) >= ceph_wbc.i_size) {
 				struct folio *folio = page_folio(page);
 
-				dout("folio at %lu beyond eof %llu\n",
-				     folio->index, ceph_wbc.i_size);
+				doutc(cl, "folio at %lu beyond eof %llu\n",
+				      folio->index, ceph_wbc.i_size);
 				if ((ceph_wbc.size_stable ||
 				    folio_pos(folio) >= i_size_read(inode)) &&
 				    folio_clear_dirty_for_io(folio))
@@ -1072,23 +1088,23 @@ get_more_pages:
 				continue;
 			}
 			if (strip_unit_end && (page->index > strip_unit_end)) {
-				dout("end of strip unit %p\n", page);
+				doutc(cl, "end of strip unit %p\n", page);
 				unlock_page(page);
 				break;
 			}
 			if (PageWriteback(page) || PageFsCache(page)) {
 				if (wbc->sync_mode == WB_SYNC_NONE) {
-					dout("%p under writeback\n", page);
+					doutc(cl, "%p under writeback\n", page);
 					unlock_page(page);
 					continue;
 				}
-				dout("waiting on writeback %p\n", page);
+				doutc(cl, "waiting on writeback %p\n", page);
 				wait_on_page_writeback(page);
 				wait_on_page_fscache(page);
 			}
 
 			if (!clear_page_dirty_for_io(page)) {
-				dout("%p !clear_page_dirty_for_io\n", page);
+				doutc(cl, "%p !clear_page_dirty_for_io\n", page);
 				unlock_page(page);
 				continue;
 			}
@@ -1143,8 +1159,8 @@ get_more_pages:
 			}
 
 			/* note position of first page in fbatch */
-			dout("%p will write page %p idx %lu\n",
-			     inode, page, page->index);
+			doutc(cl, "%llx.%llx will write page %p idx %lu\n",
+			      ceph_vinop(inode), page, page->index);
 
 			if (atomic_long_inc_return(&fsc->writeback_count) >
 			    CONGESTION_ON_THRESH(
@@ -1158,8 +1174,9 @@ get_more_pages:
 						locked_pages ? GFP_NOWAIT : GFP_NOFS);
 				if (IS_ERR(pages[locked_pages])) {
 					if (PTR_ERR(pages[locked_pages]) == -EINVAL)
-						pr_err("%s: inode->i_blkbits=%hhu\n",
-							__func__, inode->i_blkbits);
+						pr_err_client(cl,
+							"inode->i_blkbits=%hhu\n",
+							inode->i_blkbits);
 					/* better not fail on first page! */
 					BUG_ON(locked_pages == 0);
 					pages[locked_pages] = NULL;
@@ -1193,7 +1210,7 @@ get_more_pages:
 
 			if (nr_folios && i == nr_folios &&
 			    locked_pages < max_pages) {
-				dout("reached end fbatch, trying for more\n");
+				doutc(cl, "reached end fbatch, trying for more\n");
 				folio_batch_release(&fbatch);
 				goto get_more_pages;
 			}
@@ -1254,8 +1271,8 @@ new_request:
 				/* Start a new extent */
 				osd_req_op_extent_dup_last(req, op_idx,
 							   cur_offset - offset);
-				dout("writepages got pages at %llu~%llu\n",
-				     offset, len);
+				doutc(cl, "got pages at %llu~%llu\n", offset,
+				      len);
 				osd_req_op_extent_osd_data_pages(req, op_idx,
 							data_pages, len, 0,
 							from_pool, false);
@@ -1288,12 +1305,13 @@ new_request:
 		if (IS_ENCRYPTED(inode))
 			len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE);
 
-		dout("writepages got pages at %llu~%llu\n", offset, len);
+		doutc(cl, "got pages at %llu~%llu\n", offset, len);
 
 		if (IS_ENCRYPTED(inode) &&
 		    ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK))
-			pr_warn("%s: bad encrypted write offset=%lld len=%llu\n",
-				__func__, offset, len);
+			pr_warn_client(cl,
+				"bad encrypted write offset=%lld len=%llu\n",
+				offset, len);
 
 		osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
 						 0, from_pool, false);
@@ -1345,14 +1363,14 @@ new_request:
 			done = true;
 
 release_folios:
-		dout("folio_batch release on %d folios (%p)\n", (int)fbatch.nr,
-		     fbatch.nr ? fbatch.folios[0] : NULL);
+		doutc(cl, "folio_batch release on %d folios (%p)\n",
+		      (int)fbatch.nr, fbatch.nr ? fbatch.folios[0] : NULL);
 		folio_batch_release(&fbatch);
 	}
 
 	if (should_loop && !done) {
 		/* more to do; loop back to beginning of file */
-		dout("writepages looping back to beginning of file\n");
+		doutc(cl, "looping back to beginning of file\n");
 		end = start_index - 1; /* OK even when start_index == 0 */
 
 		/* to write dirty pages associated with next snapc,
@@ -1390,7 +1408,8 @@ release_folios:
 out:
 	ceph_osdc_put_request(req);
 	ceph_put_snap_context(last_snapc);
-	dout("writepages dend - startone, rc = %d\n", rc);
+	doutc(cl, "%llx.%llx dend - startone, rc = %d\n", ceph_vinop(inode),
+	      rc);
 	return rc;
 }
 
@@ -1424,11 +1443,12 @@ static struct ceph_snap_context *
 ceph_find_incompatible(struct page *page)
 {
 	struct inode *inode = page->mapping->host;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
 	if (ceph_inode_is_shutdown(inode)) {
-		dout(" page %p %llx:%llx is shutdown\n", page,
-		     ceph_vinop(inode));
+		doutc(cl, " %llx.%llx page %p is shutdown\n",
+		      ceph_vinop(inode), page);
 		return ERR_PTR(-ESTALE);
 	}
 
@@ -1449,13 +1469,15 @@ ceph_find_incompatible(struct page *page)
 		if (snapc->seq > oldest->seq) {
 			/* not writeable -- return it for the caller to deal with */
 			ceph_put_snap_context(oldest);
-			dout(" page %p snapc %p not current or oldest\n", page, snapc);
+			doutc(cl, " %llx.%llx page %p snapc %p not current or oldest\n",
+			      ceph_vinop(inode), page, snapc);
 			return ceph_get_snap_context(snapc);
 		}
 		ceph_put_snap_context(oldest);
 
 		/* yay, writeable, do it now (without dropping page lock) */
-		dout(" page %p snapc %p not current, but oldest\n", page, snapc);
+		doutc(cl, " %llx.%llx page %p snapc %p not current, but oldest\n",
+		      ceph_vinop(inode), page, snapc);
 		if (clear_page_dirty_for_io(page)) {
 			int r = writepage_nounlock(page, NULL);
 			if (r < 0)
@@ -1524,10 +1546,11 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 {
 	struct folio *folio = page_folio(subpage);
 	struct inode *inode = file_inode(file);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	bool check_cap = false;
 
-	dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file,
-	     inode, folio, (int)pos, (int)copied, (int)len);
+	doutc(cl, "%llx.%llx file %p folio %p %d~%d (%d)\n", ceph_vinop(inode),
+	      file, folio, (int)pos, (int)copied, (int)len);
 
 	if (!folio_test_uptodate(folio)) {
 		/* just return that nothing was copied on a short copy */
@@ -1587,6 +1610,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
 	struct vm_area_struct *vma = vmf->vma;
 	struct inode *inode = file_inode(vma->vm_file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_file_info *fi = vma->vm_file->private_data;
 	loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
 	int want, got, err;
@@ -1598,8 +1622,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
 
 	ceph_block_sigs(&oldset);
 
-	dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
-	     inode, ceph_vinop(inode), off);
+	doutc(cl, "%llx.%llx %llu trying to get caps\n",
+	      ceph_vinop(inode), off);
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
@@ -1610,8 +1634,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
 	if (err < 0)
 		goto out_restore;
 
-	dout("filemap_fault %p %llu got cap refs on %s\n",
-	     inode, off, ceph_cap_string(got));
+	doutc(cl, "%llx.%llx %llu got cap refs on %s\n", ceph_vinop(inode),
+	      off, ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
 	    !ceph_has_inline_data(ci)) {
@@ -1619,8 +1643,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
 		ceph_add_rw_context(fi, &rw_ctx);
 		ret = filemap_fault(vmf);
 		ceph_del_rw_context(fi, &rw_ctx);
-		dout("filemap_fault %p %llu drop cap refs %s ret %x\n",
-		     inode, off, ceph_cap_string(got), ret);
+		doutc(cl, "%llx.%llx %llu drop cap refs %s ret %x\n",
+		      ceph_vinop(inode), off, ceph_cap_string(got), ret);
 	} else
 		err = -EAGAIN;
 
@@ -1661,8 +1685,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
 		ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
 out_inline:
 		filemap_invalidate_unlock_shared(mapping);
-		dout("filemap_fault %p %llu read inline data ret %x\n",
-		     inode, off, ret);
+		doutc(cl, "%llx.%llx %llu read inline data ret %x\n",
+		      ceph_vinop(inode), off, ret);
 	}
 out_restore:
 	ceph_restore_sigs(&oldset);
@@ -1676,6 +1700,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct inode *inode = file_inode(vma->vm_file);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_info *fi = vma->vm_file->private_data;
 	struct ceph_cap_flush *prealloc_cf;
@@ -1702,8 +1727,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
 	else
 		len = offset_in_thp(page, size);
 
-	dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
-	     inode, ceph_vinop(inode), off, len, size);
+	doutc(cl, "%llx.%llx %llu~%zd getting caps i_size %llu\n",
+	      ceph_vinop(inode), off, len, size);
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
 	else
@@ -1714,8 +1739,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
 	if (err < 0)
 		goto out_free;
 
-	dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
-	     inode, off, len, ceph_cap_string(got));
+	doutc(cl, "%llx.%llx %llu~%zd got cap refs on %s\n", ceph_vinop(inode),
+	      off, len, ceph_cap_string(got));
 
 	/* Update time before taking page lock */
 	file_update_time(vma->vm_file);
@@ -1763,8 +1788,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
 			__mark_inode_dirty(inode, dirty);
 	}
 
-	dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
-	     inode, off, len, ceph_cap_string(got), ret);
+	doutc(cl, "%llx.%llx %llu~%zd dropping cap refs on %s ret %x\n",
+	      ceph_vinop(inode), off, len, ceph_cap_string(got), ret);
 	ceph_put_cap_refs_async(ci, got);
 out_free:
 	ceph_restore_sigs(&oldset);
@@ -1778,6 +1803,7 @@ out_free:
 void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 			   char	*data, size_t len)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 
@@ -1798,8 +1824,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 		}
 	}
 
-	dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n",
-	     inode, ceph_vinop(inode), len, locked_page);
+	doutc(cl, "%p %llx.%llx len %zu locked_page %p\n", inode,
+	      ceph_vinop(inode), len, locked_page);
 
 	if (len > 0) {
 		void *kaddr = kmap_atomic(page);
@@ -1823,7 +1849,8 @@ int ceph_uninline_data(struct file *file)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_request *req = NULL;
 	struct ceph_cap_flush *prealloc_cf = NULL;
 	struct folio *folio = NULL;
@@ -1836,8 +1863,8 @@ int ceph_uninline_data(struct file *file)
 	inline_version = ci->i_inline_version;
 	spin_unlock(&ci->i_ceph_lock);
 
-	dout("uninline_data %p %llx.%llx inline_version %llu\n",
-	     inode, ceph_vinop(inode), inline_version);
+	doutc(cl, "%llx.%llx inline_version %llu\n", ceph_vinop(inode),
+	      inline_version);
 
 	if (ceph_inode_is_shutdown(inode)) {
 		err = -EIO;
@@ -1949,8 +1976,8 @@ out_unlock:
 	}
 out:
 	ceph_free_cap_flush(prealloc_cf);
-	dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
-	     inode, ceph_vinop(inode), inline_version, err);
+	doutc(cl, "%llx.%llx inline_version %llu = %d\n",
+	      ceph_vinop(inode), inline_version, err);
 	return err;
 }
 
@@ -1977,8 +2004,9 @@ enum {
 static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 				s64 pool, struct ceph_string *pool_ns)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->netfs.inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(&ci->netfs.inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_request *rd_req = NULL, *wr_req = NULL;
 	struct rb_node **p, *parent;
 	struct ceph_pool_perm *perm;
@@ -2013,10 +2041,10 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 		goto out;
 
 	if (pool_ns)
-		dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n",
-		     pool, (int)pool_ns->len, pool_ns->str);
+		doutc(cl, "pool %lld ns %.*s no perm cached\n", pool,
+		      (int)pool_ns->len, pool_ns->str);
 	else
-		dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool);
+		doutc(cl, "pool %lld no perm cached\n", pool);
 
 	down_write(&mdsc->pool_perm_rwsem);
 	p = &mdsc->pool_perm_tree.rb_node;
@@ -2141,15 +2169,16 @@ out:
 	if (!err)
 		err = have;
 	if (pool_ns)
-		dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n",
-		     pool, (int)pool_ns->len, pool_ns->str, err);
+		doutc(cl, "pool %lld ns %.*s result = %d\n", pool,
+		      (int)pool_ns->len, pool_ns->str, err);
 	else
-		dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err);
+		doutc(cl, "pool %lld result = %d\n", pool, err);
 	return err;
 }
 
 int ceph_pool_perm_check(struct inode *inode, int need)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_string *pool_ns;
 	s64 pool;
@@ -2168,7 +2197,7 @@ int ceph_pool_perm_check(struct inode *inode, int need)
 		return 0;
 	}
 
-	if (ceph_test_mount_opt(ceph_inode_to_client(inode),
+	if (ceph_test_mount_opt(ceph_inode_to_fs_client(inode),
 				NOPOOLPERM))
 		return 0;
 
@@ -2179,13 +2208,11 @@ int ceph_pool_perm_check(struct inode *inode, int need)
 check:
 	if (flags & CEPH_I_POOL_PERM) {
 		if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) {
-			dout("ceph_pool_perm_check pool %lld no read perm\n",
-			     pool);
+			doutc(cl, "pool %lld no read perm\n", pool);
 			return -EPERM;
 		}
 		if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) {
-			dout("ceph_pool_perm_check pool %lld no write perm\n",
-			     pool);
+			doutc(cl, "pool %lld no write perm\n", pool);
 			return -EPERM;
 		}
 		return 0;
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index de1dee46d3df..930fbd54d2c8 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -15,7 +15,7 @@
 void ceph_fscache_register_inode_cookie(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 
 	/* No caching for filesystem? */
 	if (!fsc->fscache)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a104669fcf4c..2c0b8dc3dd0d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -186,10 +186,10 @@ static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
 			mdsc->caps_avail_count += nr_caps;
 		}
 
-		dout("%s: caps %d = %d used + %d resv + %d avail\n",
-		     __func__,
-		     mdsc->caps_total_count, mdsc->caps_use_count,
-		     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+		doutc(mdsc->fsc->client,
+		      "caps %d = %d used + %d resv + %d avail\n",
+		      mdsc->caps_total_count, mdsc->caps_use_count,
+		      mdsc->caps_reserve_count, mdsc->caps_avail_count);
 		BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
 						 mdsc->caps_reserve_count +
 						 mdsc->caps_avail_count);
@@ -202,6 +202,7 @@ static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
 int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 		      struct ceph_cap_reservation *ctx, int need)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int i, j;
 	struct ceph_cap *cap;
 	int have;
@@ -212,7 +213,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 	struct ceph_mds_session *s;
 	LIST_HEAD(newcaps);
 
-	dout("reserve caps ctx=%p need=%d\n", ctx, need);
+	doutc(cl, "ctx=%p need=%d\n", ctx, need);
 
 	/* first reserve any caps that are already allocated */
 	spin_lock(&mdsc->caps_list_lock);
@@ -272,8 +273,8 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 			continue;
 		}
 
-		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
-			ctx, need, have + alloc);
+		pr_warn_client(cl, "ctx=%p ENOMEM need=%d got=%d\n", ctx, need,
+			       have + alloc);
 		err = -ENOMEM;
 		break;
 	}
@@ -298,20 +299,21 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 
 	spin_unlock(&mdsc->caps_list_lock);
 
-	dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
-	     ctx, mdsc->caps_total_count, mdsc->caps_use_count,
-	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+	doutc(cl, "ctx=%p %d = %d used + %d resv + %d avail\n", ctx,
+	      mdsc->caps_total_count, mdsc->caps_use_count,
+	      mdsc->caps_reserve_count, mdsc->caps_avail_count);
 	return err;
 }
 
 void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 			 struct ceph_cap_reservation *ctx)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	bool reclaim = false;
 	if (!ctx->count)
 		return;
 
-	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
+	doutc(cl, "ctx=%p count=%d\n", ctx, ctx->count);
 	spin_lock(&mdsc->caps_list_lock);
 	__ceph_unreserve_caps(mdsc, ctx->count);
 	ctx->count = 0;
@@ -328,6 +330,7 @@ void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 			      struct ceph_cap_reservation *ctx)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_cap *cap = NULL;
 
 	/* temporary, until we do something about cap import/export */
@@ -359,9 +362,9 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 	}
 
 	spin_lock(&mdsc->caps_list_lock);
-	dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
-	     ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
-	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+	doutc(cl, "ctx=%p (%d) %d = %d used + %d resv + %d avail\n", ctx,
+	      ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
+	      mdsc->caps_reserve_count, mdsc->caps_avail_count);
 	BUG_ON(!ctx->count);
 	BUG_ON(ctx->count > mdsc->caps_reserve_count);
 	BUG_ON(list_empty(&mdsc->caps_list));
@@ -382,10 +385,12 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 
 void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
+
 	spin_lock(&mdsc->caps_list_lock);
-	dout("put_cap %p %d = %d used + %d resv + %d avail\n",
-	     cap, mdsc->caps_total_count, mdsc->caps_use_count,
-	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+	doutc(cl, "%p %d = %d used + %d resv + %d avail\n", cap,
+	      mdsc->caps_total_count, mdsc->caps_use_count,
+	      mdsc->caps_reserve_count, mdsc->caps_avail_count);
 	mdsc->caps_use_count--;
 	/*
 	 * Keep some preallocated caps around (ceph_min_count), to
@@ -491,11 +496,13 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
 static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
+	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
+
 	ci->i_hold_caps_max = round_jiffies(jiffies +
 					    opt->caps_wanted_delay_max * HZ);
-	dout("__cap_set_timeouts %p %lu\n", &ci->netfs.inode,
-	     ci->i_hold_caps_max - jiffies);
+	doutc(mdsc->fsc->client, "%p %llx.%llx %lu\n", inode,
+	      ceph_vinop(inode), ci->i_hold_caps_max - jiffies);
 }
 
 /*
@@ -509,8 +516,11 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
 				struct ceph_inode_info *ci)
 {
-	dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->netfs.inode,
-	     ci->i_ceph_flags, ci->i_hold_caps_max);
+	struct inode *inode = &ci->netfs.inode;
+
+	doutc(mdsc->fsc->client, "%p %llx.%llx flags 0x%lx at %lu\n",
+	      inode, ceph_vinop(inode), ci->i_ceph_flags,
+	      ci->i_hold_caps_max);
 	if (!mdsc->stopping) {
 		spin_lock(&mdsc->cap_delay_lock);
 		if (!list_empty(&ci->i_cap_delay_list)) {
@@ -533,7 +543,9 @@ no_change:
 static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
 				      struct ceph_inode_info *ci)
 {
-	dout("__cap_delay_requeue_front %p\n", &ci->netfs.inode);
+	struct inode *inode = &ci->netfs.inode;
+
+	doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 	spin_lock(&mdsc->cap_delay_lock);
 	ci->i_ceph_flags |= CEPH_I_FLUSH;
 	if (!list_empty(&ci->i_cap_delay_list))
@@ -550,7 +562,9 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
 static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
-	dout("__cap_delay_cancel %p\n", &ci->netfs.inode);
+	struct inode *inode = &ci->netfs.inode;
+
+	doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 	if (list_empty(&ci->i_cap_delay_list))
 		return;
 	spin_lock(&mdsc->cap_delay_lock);
@@ -562,6 +576,9 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
 static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 			      unsigned issued)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
+
 	unsigned had = __ceph_caps_issued(ci, NULL);
 
 	lockdep_assert_held(&ci->i_ceph_lock);
@@ -586,7 +603,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 		if (issued & CEPH_CAP_FILE_SHARED)
 			atomic_inc(&ci->i_shared_gen);
 		if (S_ISDIR(ci->netfs.inode.i_mode)) {
-			dout(" marking %p NOT complete\n", &ci->netfs.inode);
+			doutc(cl, " marking %p NOT complete\n", inode);
 			__ceph_dir_clear_complete(ci);
 		}
 	}
@@ -635,7 +652,8 @@ void ceph_add_cap(struct inode *inode,
 		  unsigned seq, unsigned mseq, u64 realmino, int flags,
 		  struct ceph_cap **new_cap)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_cap *cap;
 	int mds = session->s_mds;
@@ -644,8 +662,9 @@ void ceph_add_cap(struct inode *inode,
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
-	     session->s_mds, cap_id, ceph_cap_string(issued), seq);
+	doutc(cl, "%p %llx.%llx mds%d cap %llx %s seq %d\n", inode,
+	      ceph_vinop(inode), session->s_mds, cap_id,
+	      ceph_cap_string(issued), seq);
 
 	gen = atomic_read(&session->s_cap_gen);
 
@@ -723,9 +742,9 @@ void ceph_add_cap(struct inode *inode,
 	actual_wanted = __ceph_caps_wanted(ci);
 	if ((wanted & ~actual_wanted) ||
 	    (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) {
-		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
-		     ceph_cap_string(issued), ceph_cap_string(wanted),
-		     ceph_cap_string(actual_wanted));
+		doutc(cl, "issued %s, mds wanted %s, actual %s, queueing\n",
+		      ceph_cap_string(issued), ceph_cap_string(wanted),
+		      ceph_cap_string(actual_wanted));
 		__cap_delay_requeue(mdsc, ci);
 	}
 
@@ -742,9 +761,9 @@ void ceph_add_cap(struct inode *inode,
 		WARN_ON(ci->i_auth_cap == cap);
 	}
 
-	dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
-	     inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
-	     ceph_cap_string(issued|cap->issued), seq, mds);
+	doutc(cl, "inode %p %llx.%llx cap %p %s now %s seq %d mds%d\n",
+	      inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
+	      ceph_cap_string(issued|cap->issued), seq, mds);
 	cap->cap_id = cap_id;
 	cap->issued = issued;
 	cap->implemented |= issued;
@@ -766,6 +785,8 @@ void ceph_add_cap(struct inode *inode,
  */
 static int __cap_is_valid(struct ceph_cap *cap)
 {
+	struct inode *inode = &cap->ci->netfs.inode;
+	struct ceph_client *cl = cap->session->s_mdsc->fsc->client;
 	unsigned long ttl;
 	u32 gen;
 
@@ -773,9 +794,9 @@ static int __cap_is_valid(struct ceph_cap *cap)
 	ttl = cap->session->s_cap_ttl;
 
 	if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
-		dout("__cap_is_valid %p cap %p issued %s "
-		     "but STALE (gen %u vs %u)\n", &cap->ci->netfs.inode,
-		     cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
+		doutc(cl, "%p %llx.%llx cap %p issued %s but STALE (gen %u vs %u)\n",
+		      inode, ceph_vinop(inode), cap,
+		      ceph_cap_string(cap->issued), cap->cap_gen, gen);
 		return 0;
 	}
 
@@ -789,6 +810,8 @@ static int __cap_is_valid(struct ceph_cap *cap)
  */
 int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int have = ci->i_snap_caps;
 	struct ceph_cap *cap;
 	struct rb_node *p;
@@ -799,8 +822,8 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
 		cap = rb_entry(p, struct ceph_cap, ci_node);
 		if (!__cap_is_valid(cap))
 			continue;
-		dout("__ceph_caps_issued %p cap %p issued %s\n",
-		     &ci->netfs.inode, cap, ceph_cap_string(cap->issued));
+		doutc(cl, "%p %llx.%llx cap %p issued %s\n", inode,
+		      ceph_vinop(inode), cap, ceph_cap_string(cap->issued));
 		have |= cap->issued;
 		if (implemented)
 			*implemented |= cap->implemented;
@@ -843,16 +866,18 @@ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
  */
 static void __touch_cap(struct ceph_cap *cap)
 {
+	struct inode *inode = &cap->ci->netfs.inode;
 	struct ceph_mds_session *s = cap->session;
+	struct ceph_client *cl = s->s_mdsc->fsc->client;
 
 	spin_lock(&s->s_cap_lock);
 	if (!s->s_cap_iterator) {
-		dout("__touch_cap %p cap %p mds%d\n", &cap->ci->netfs.inode, cap,
-		     s->s_mds);
+		doutc(cl, "%p %llx.%llx cap %p mds%d\n", inode,
+		      ceph_vinop(inode), cap, s->s_mds);
 		list_move_tail(&cap->session_caps, &s->s_caps);
 	} else {
-		dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n",
-		     &cap->ci->netfs.inode, cap, s->s_mds);
+		doutc(cl, "%p %llx.%llx cap %p mds%d NOP, iterating over caps\n",
+		      inode, ceph_vinop(inode), cap, s->s_mds);
 	}
 	spin_unlock(&s->s_cap_lock);
 }
@@ -864,15 +889,16 @@ static void __touch_cap(struct ceph_cap *cap)
  */
 int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_cap *cap;
 	struct rb_node *p;
 	int have = ci->i_snap_caps;
 
 	if ((have & mask) == mask) {
-		dout("__ceph_caps_issued_mask ino 0x%llx snap issued %s"
-		     " (mask %s)\n", ceph_ino(&ci->netfs.inode),
-		     ceph_cap_string(have),
-		     ceph_cap_string(mask));
+		doutc(cl, "mask %p %llx.%llx snap issued %s (mask %s)\n",
+		      inode, ceph_vinop(inode), ceph_cap_string(have),
+		      ceph_cap_string(mask));
 		return 1;
 	}
 
@@ -881,10 +907,10 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 		if (!__cap_is_valid(cap))
 			continue;
 		if ((cap->issued & mask) == mask) {
-			dout("__ceph_caps_issued_mask ino 0x%llx cap %p issued %s"
-			     " (mask %s)\n", ceph_ino(&ci->netfs.inode), cap,
-			     ceph_cap_string(cap->issued),
-			     ceph_cap_string(mask));
+			doutc(cl, "mask %p %llx.%llx cap %p issued %s (mask %s)\n",
+			      inode, ceph_vinop(inode), cap,
+			      ceph_cap_string(cap->issued),
+			      ceph_cap_string(mask));
 			if (touch)
 				__touch_cap(cap);
 			return 1;
@@ -893,10 +919,10 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 		/* does a combination of caps satisfy mask? */
 		have |= cap->issued;
 		if ((have & mask) == mask) {
-			dout("__ceph_caps_issued_mask ino 0x%llx combo issued %s"
-			     " (mask %s)\n", ceph_ino(&ci->netfs.inode),
-			     ceph_cap_string(cap->issued),
-			     ceph_cap_string(mask));
+			doutc(cl, "mask %p %llx.%llx combo issued %s (mask %s)\n",
+			      inode, ceph_vinop(inode),
+			      ceph_cap_string(cap->issued),
+			      ceph_cap_string(mask));
 			if (touch) {
 				struct rb_node *q;
 
@@ -922,7 +948,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask,
 				   int touch)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
 	int r;
 
 	r = __ceph_caps_issued_mask(ci, mask, touch);
@@ -954,13 +980,14 @@ int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
 int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
 {
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int ret;
 
 	spin_lock(&ci->i_ceph_lock);
 	ret = __ceph_caps_revoking_other(ci, NULL, mask);
 	spin_unlock(&ci->i_ceph_lock);
-	dout("ceph_caps_revoking %p %s = %d\n", inode,
-	     ceph_cap_string(mask), ret);
+	doutc(cl, "%p %llx.%llx %s = %d\n", inode, ceph_vinop(inode),
+	      ceph_cap_string(mask), ret);
 	return ret;
 }
 
@@ -996,7 +1023,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
 	const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR);
 	const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY);
 	struct ceph_mount_options *opt =
-		ceph_inode_to_client(&ci->netfs.inode)->mount_options;
+		ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options;
 	unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ;
 	unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ;
 
@@ -1107,21 +1134,23 @@ int ceph_is_any_caps(struct inode *inode)
 void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 {
 	struct ceph_mds_session *session = cap->session;
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
 	struct ceph_inode_info *ci = cap->ci;
+	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_client *mdsc;
 	int removed = 0;
 
 	/* 'ci' being NULL means the remove have already occurred */
 	if (!ci) {
-		dout("%s: cap inode is NULL\n", __func__);
+		doutc(cl, "inode is NULL\n");
 		return;
 	}
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	dout("__ceph_remove_cap %p from %p\n", cap, &ci->netfs.inode);
+	doutc(cl, "%p from %p %llx.%llx\n", cap, inode, ceph_vinop(inode));
 
-	mdsc = ceph_inode_to_client(&ci->netfs.inode)->mdsc;
+	mdsc = ceph_inode_to_fs_client(&ci->netfs.inode)->mdsc;
 
 	/* remove from inode's cap rbtree, and clear auth cap */
 	rb_erase(&cap->ci_node, &ci->i_caps);
@@ -1132,8 +1161,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 	spin_lock(&session->s_cap_lock);
 	if (session->s_cap_iterator == cap) {
 		/* not yet, we are iterating over this very cap */
-		dout("__ceph_remove_cap  delaying %p removal from session %p\n",
-		     cap, cap->session);
+		doutc(cl, "delaying %p removal from session %p\n", cap,
+		      cap->session);
 	} else {
 		list_del_init(&cap->session_caps);
 		session->s_nr_caps--;
@@ -1178,20 +1207,21 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 	}
 }
 
-void ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+		     bool queue_release)
 {
 	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_fs_client *fsc;
 
 	/* 'ci' being NULL means the remove have already occurred */
 	if (!ci) {
-		dout("%s: cap inode is NULL\n", __func__);
+		doutc(mdsc->fsc->client, "inode is NULL\n");
 		return;
 	}
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	fsc = ceph_inode_to_client(&ci->netfs.inode);
+	fsc = ceph_inode_to_fs_client(&ci->netfs.inode);
 	WARN_ON_ONCE(ci->i_auth_cap == cap &&
 		     !list_empty(&ci->i_dirty_item) &&
 		     !fsc->blocklisted &&
@@ -1227,15 +1257,19 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
 {
 	struct ceph_mds_caps *fc;
 	void *p;
-	struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
-
-	dout("%s %s %llx %llx caps %s wanted %s dirty %s seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu xattr_ver %llu xattr_len %d\n",
-	     __func__, ceph_cap_op_name(arg->op), arg->cid, arg->ino,
-	     ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted),
-	     ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq,
-	     arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows,
-	     arg->size, arg->max_size, arg->xattr_version,
-	     arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
+	struct ceph_mds_client *mdsc = arg->session->s_mdsc;
+	struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
+
+	doutc(mdsc->fsc->client,
+	      "%s %llx %llx caps %s wanted %s dirty %s seq %u/%u"
+	      " tid %llu/%llu mseq %u follows %lld size %llu/%llu"
+	      " xattr_ver %llu xattr_len %d\n",
+	      ceph_cap_op_name(arg->op), arg->cid, arg->ino,
+	      ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted),
+	      ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq,
+	      arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows,
+	      arg->size, arg->max_size, arg->xattr_version,
+	      arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
 
 	msg->hdr.version = cpu_to_le16(12);
 	msg->hdr.tid = cpu_to_le64(arg->flush_tid);
@@ -1342,6 +1376,8 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
  */
 void __ceph_remove_caps(struct ceph_inode_info *ci)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
 	struct rb_node *p;
 
 	/* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
@@ -1351,7 +1387,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
 	while (p) {
 		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
 		p = rb_next(p);
-		ceph_remove_cap(cap, true);
+		ceph_remove_cap(mdsc, cap, true);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 }
@@ -1370,6 +1406,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
 {
 	struct ceph_inode_info *ci = cap->ci;
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int held, revoking;
 
 	lockdep_assert_held(&ci->i_ceph_lock);
@@ -1378,10 +1415,10 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
 	revoking = cap->implemented & ~cap->issued;
 	retain &= ~revoking;
 
-	dout("%s %p cap %p session %p %s -> %s (revoking %s)\n",
-	     __func__, inode, cap, cap->session,
-	     ceph_cap_string(held), ceph_cap_string(held & retain),
-	     ceph_cap_string(revoking));
+	doutc(cl, "%p %llx.%llx cap %p session %p %s -> %s (revoking %s)\n",
+	      inode, ceph_vinop(inode), cap, cap->session,
+	      ceph_cap_string(held), ceph_cap_string(held & retain),
+	      ceph_cap_string(revoking));
 	BUG_ON((retain & CEPH_CAP_PIN) == 0);
 
 	ci->i_ceph_flags &= ~CEPH_I_FLUSH;
@@ -1497,13 +1534,16 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
 {
 	struct ceph_msg *msg;
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS,
 			   false);
 	if (!msg) {
-		pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n",
-		       ceph_vinop(inode), ceph_cap_string(arg->dirty),
-		       arg->flush_tid);
+		pr_err_client(cl,
+			      "error allocating cap msg: ino (%llx.%llx)"
+			      " flushing %s tid %llu, requeuing cap.\n",
+			      ceph_vinop(inode), ceph_cap_string(arg->dirty),
+			      arg->flush_tid);
 		spin_lock(&ci->i_ceph_lock);
 		__cap_delay_requeue(arg->session->s_mdsc, ci);
 		spin_unlock(&ci->i_ceph_lock);
@@ -1592,11 +1632,13 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
 {
 	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_cap_snap *capsnap;
 	u64 oldest_flush_tid = 0;
 	u64 first_tid = 1, last_tid = 0;
 
-	dout("__flush_snaps %p session %p\n", inode, session);
+	doutc(cl, "%p %llx.%llx session %p\n", inode, ceph_vinop(inode),
+	      session);
 
 	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
 		/*
@@ -1611,7 +1653,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
 
 		/* only flush each capsnap once */
 		if (capsnap->cap_flush.tid > 0) {
-			dout(" already flushed %p, skipping\n", capsnap);
+			doutc(cl, "already flushed %p, skipping\n", capsnap);
 			continue;
 		}
 
@@ -1643,8 +1685,8 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
 		int ret;
 
 		if (!(cap && cap->session == session)) {
-			dout("__flush_snaps %p auth cap %p not mds%d, "
-			     "stop\n", inode, cap, session->s_mds);
+			doutc(cl, "%p %llx.%llx auth cap %p not mds%d, stop\n",
+			      inode, ceph_vinop(inode), cap, session->s_mds);
 			break;
 		}
 
@@ -1665,15 +1707,17 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
 		refcount_inc(&capsnap->nref);
 		spin_unlock(&ci->i_ceph_lock);
 
-		dout("__flush_snaps %p capsnap %p tid %llu %s\n",
-		     inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty));
+		doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n", inode,
+		      ceph_vinop(inode), capsnap, cf->tid,
+		      ceph_cap_string(capsnap->dirty));
 
 		ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
 					oldest_flush_tid);
 		if (ret < 0) {
-			pr_err("__flush_snaps: error sending cap flushsnap, "
-			       "ino (%llx.%llx) tid %llu follows %llu\n",
-				ceph_vinop(inode), cf->tid, capsnap->follows);
+			pr_err_client(cl, "error sending cap flushsnap, "
+				      "ino (%llx.%llx) tid %llu follows %llu\n",
+				      ceph_vinop(inode), cf->tid,
+				      capsnap->follows);
 		}
 
 		ceph_put_cap_snap(capsnap);
@@ -1685,28 +1729,29 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
 		      struct ceph_mds_session **psession)
 {
 	struct inode *inode = &ci->netfs.inode;
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_mds_session *session = NULL;
 	bool need_put = false;
 	int mds;
 
-	dout("ceph_flush_snaps %p\n", inode);
+	doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 	if (psession)
 		session = *psession;
 retry:
 	spin_lock(&ci->i_ceph_lock);
 	if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
-		dout(" no capsnap needs flush, doing nothing\n");
+		doutc(cl, " no capsnap needs flush, doing nothing\n");
 		goto out;
 	}
 	if (!ci->i_auth_cap) {
-		dout(" no auth cap (migrating?), doing nothing\n");
+		doutc(cl, " no auth cap (migrating?), doing nothing\n");
 		goto out;
 	}
 
 	mds = ci->i_auth_cap->session->s_mds;
 	if (session && session->s_mds != mds) {
-		dout(" oops, wrong session %p mutex\n", session);
+		doutc(cl, " oops, wrong session %p mutex\n", session);
 		ceph_put_mds_session(session);
 		session = NULL;
 	}
@@ -1750,23 +1795,25 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
 			   struct ceph_cap_flush **pcf)
 {
 	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(ci->netfs.inode.i_sb)->mdsc;
+		ceph_sb_to_fs_client(ci->netfs.inode.i_sb)->mdsc;
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int was = ci->i_dirty_caps;
 	int dirty = 0;
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
 	if (!ci->i_auth_cap) {
-		pr_warn("__mark_dirty_caps %p %llx mask %s, "
-			"but no auth cap (session was closed?)\n",
-			inode, ceph_ino(inode), ceph_cap_string(mask));
+		pr_warn_client(cl, "%p %llx.%llx mask %s, "
+			       "but no auth cap (session was closed?)\n",
+				inode, ceph_vinop(inode),
+				ceph_cap_string(mask));
 		return 0;
 	}
 
-	dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->netfs.inode,
-	     ceph_cap_string(mask), ceph_cap_string(was),
-	     ceph_cap_string(was | mask));
+	doutc(cl, "%p %llx.%llx %s dirty %s -> %s\n", inode,
+	      ceph_vinop(inode), ceph_cap_string(mask),
+	      ceph_cap_string(was), ceph_cap_string(was | mask));
 	ci->i_dirty_caps |= mask;
 	if (was == 0) {
 		struct ceph_mds_session *session = ci->i_auth_cap->session;
@@ -1779,8 +1826,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
 			ci->i_head_snapc = ceph_get_snap_context(
 				ci->i_snap_realm->cached_context);
 		}
-		dout(" inode %p now dirty snapc %p auth cap %p\n",
-		     &ci->netfs.inode, ci->i_head_snapc, ci->i_auth_cap);
+		doutc(cl, "%p %llx.%llx now dirty snapc %p auth cap %p\n",
+		      inode, ceph_vinop(inode), ci->i_head_snapc,
+		      ci->i_auth_cap);
 		BUG_ON(!list_empty(&ci->i_dirty_item));
 		spin_lock(&mdsc->cap_dirty_lock);
 		list_add(&ci->i_dirty_item, &session->s_cap_dirty);
@@ -1873,7 +1921,8 @@ static u64 __mark_caps_flushing(struct inode *inode,
 				struct ceph_mds_session *session, bool wake,
 				u64 *oldest_flush_tid)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_cap_flush *cf = NULL;
 	int flushing;
@@ -1884,13 +1933,13 @@ static u64 __mark_caps_flushing(struct inode *inode,
 	BUG_ON(!ci->i_prealloc_cap_flush);
 
 	flushing = ci->i_dirty_caps;
-	dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
-	     ceph_cap_string(flushing),
-	     ceph_cap_string(ci->i_flushing_caps),
-	     ceph_cap_string(ci->i_flushing_caps | flushing));
+	doutc(cl, "flushing %s, flushing_caps %s -> %s\n",
+	      ceph_cap_string(flushing),
+	      ceph_cap_string(ci->i_flushing_caps),
+	      ceph_cap_string(ci->i_flushing_caps | flushing));
 	ci->i_flushing_caps |= flushing;
 	ci->i_dirty_caps = 0;
-	dout(" inode %p now !dirty\n", inode);
+	doutc(cl, "%p %llx.%llx now !dirty\n", inode, ceph_vinop(inode));
 
 	swap(cf, ci->i_prealloc_cap_flush);
 	cf->caps = flushing;
@@ -1921,6 +1970,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
 	__releases(ci->i_ceph_lock)
 	__acquires(ci->i_ceph_lock)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u32 invalidating_gen = ci->i_rdcache_gen;
 
@@ -1932,12 +1982,13 @@ static int try_nonblocking_invalidate(struct inode *inode)
 	if (inode->i_data.nrpages == 0 &&
 	    invalidating_gen == ci->i_rdcache_gen) {
 		/* success. */
-		dout("try_nonblocking_invalidate %p success\n", inode);
+		doutc(cl, "%p %llx.%llx success\n", inode,
+		      ceph_vinop(inode));
 		/* save any racing async invalidate some trouble */
 		ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
 		return 0;
 	}
-	dout("try_nonblocking_invalidate %p failed\n", inode);
+	doutc(cl, "%p %llx.%llx failed\n", inode, ceph_vinop(inode));
 	return -1;
 }
 
@@ -1969,6 +2020,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 {
 	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_cap *cap;
 	u64 flush_tid, oldest_flush_tid;
 	int file_wanted, used, cap_used;
@@ -2043,9 +2095,9 @@ retry:
 		}
 	}
 
-	dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s"
-	     " issued %s revoking %s retain %s %s%s%s\n", ceph_vinop(inode),
-	     ceph_cap_string(file_wanted),
+	doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
+	      "flushing %s issued %s revoking %s retain %s %s%s%s\n",
+	     inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
 	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
 	     ceph_cap_string(ci->i_flushing_caps),
 	     ceph_cap_string(issued), ceph_cap_string(revoking),
@@ -2066,10 +2118,10 @@ retry:
 	    (revoking & (CEPH_CAP_FILE_CACHE|
 			 CEPH_CAP_FILE_LAZYIO)) && /*  or revoking cache */
 	    !tried_invalidate) {
-		dout("check_caps trying to invalidate on %llx.%llx\n",
-		     ceph_vinop(inode));
+		doutc(cl, "trying to invalidate on %p %llx.%llx\n",
+		      inode, ceph_vinop(inode));
 		if (try_nonblocking_invalidate(inode) < 0) {
-			dout("check_caps queuing invalidate\n");
+			doutc(cl, "queuing invalidate\n");
 			queue_invalidate = true;
 			ci->i_rdcache_revoking = ci->i_rdcache_gen;
 		}
@@ -2097,35 +2149,35 @@ retry:
 			cap_used &= ~ci->i_auth_cap->issued;
 
 		revoking = cap->implemented & ~cap->issued;
-		dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
-		     cap->mds, cap, ceph_cap_string(cap_used),
-		     ceph_cap_string(cap->issued),
-		     ceph_cap_string(cap->implemented),
-		     ceph_cap_string(revoking));
+		doutc(cl, " mds%d cap %p used %s issued %s implemented %s revoking %s\n",
+		      cap->mds, cap, ceph_cap_string(cap_used),
+		      ceph_cap_string(cap->issued),
+		      ceph_cap_string(cap->implemented),
+		      ceph_cap_string(revoking));
 
 		if (cap == ci->i_auth_cap &&
 		    (cap->issued & CEPH_CAP_FILE_WR)) {
 			/* request larger max_size from MDS? */
 			if (ci->i_wanted_max_size > ci->i_max_size &&
 			    ci->i_wanted_max_size > ci->i_requested_max_size) {
-				dout("requesting new max_size\n");
+				doutc(cl, "requesting new max_size\n");
 				goto ack;
 			}
 
 			/* approaching file_max? */
 			if (__ceph_should_report_size(ci)) {
-				dout("i_size approaching max_size\n");
+				doutc(cl, "i_size approaching max_size\n");
 				goto ack;
 			}
 		}
 		/* flush anything dirty? */
 		if (cap == ci->i_auth_cap) {
 			if ((flags & CHECK_CAPS_FLUSH) && ci->i_dirty_caps) {
-				dout("flushing dirty caps\n");
+				doutc(cl, "flushing dirty caps\n");
 				goto ack;
 			}
 			if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) {
-				dout("flushing snap caps\n");
+				doutc(cl, "flushing snap caps\n");
 				goto ack;
 			}
 		}
@@ -2133,7 +2185,7 @@ retry:
 		/* completed revocation? going down and there are no caps? */
 		if (revoking) {
 			if ((revoking & cap_used) == 0) {
-				dout("completed revocation of %s\n",
+				doutc(cl, "completed revocation of %s\n",
 				      ceph_cap_string(cap->implemented & ~cap->issued));
 				goto ack;
 			}
@@ -2232,7 +2284,7 @@ ack:
  */
 static int try_flush_caps(struct inode *inode, u64 *ptid)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int flushing = 0;
 	u64 flush_tid = 0, oldest_flush_tid = 0;
@@ -2310,7 +2362,8 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid)
  */
 static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req1 = NULL, *req2 = NULL;
 	int ret, err = 0;
@@ -2400,8 +2453,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
 		kfree(sessions);
 	}
 
-	dout("%s %p wait on tid %llu %llu\n", __func__,
-	     inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
+	doutc(cl, "%p %llx.%llx wait on tid %llu %llu\n", inode,
+	      ceph_vinop(inode), req1 ? req1->r_tid : 0ULL,
+	      req2 ? req2->r_tid : 0ULL);
 	if (req1) {
 		ret = !wait_for_completion_timeout(&req1->r_safe_completion,
 					ceph_timeout_jiffies(req1->r_timeout));
@@ -2427,11 +2481,13 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	u64 flush_tid;
 	int ret, err;
 	int dirty;
 
-	dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
+	doutc(cl, "%p %llx.%llx%s\n", inode, ceph_vinop(inode),
+	      datasync ? " datasync" : "");
 
 	ret = file_write_and_wait_range(file, start, end);
 	if (datasync)
@@ -2442,7 +2498,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 		goto out;
 
 	dirty = try_flush_caps(inode, &flush_tid);
-	dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
+	doutc(cl, "dirty caps are %s\n", ceph_cap_string(dirty));
 
 	err = flush_mdlog_and_wait_inode_unsafe_requests(inode);
 
@@ -2463,7 +2519,8 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	if (err < 0)
 		ret = err;
 out:
-	dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
+	doutc(cl, "%p %llx.%llx%s result=%d\n", inode, ceph_vinop(inode),
+	      datasync ? " datasync" : "", ret);
 	return ret;
 }
 
@@ -2476,12 +2533,13 @@ out:
 int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	u64 flush_tid;
 	int err = 0;
 	int dirty;
 	int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync);
 
-	dout("write_inode %p wait=%d\n", inode, wait);
+	doutc(cl, "%p %llx.%llx wait=%d\n", inode, ceph_vinop(inode), wait);
 	ceph_fscache_unpin_writeback(inode, wbc);
 	if (wait) {
 		err = ceph_wait_on_async_create(inode);
@@ -2493,7 +2551,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 				       caps_are_flushed(inode, flush_tid));
 	} else {
 		struct ceph_mds_client *mdsc =
-			ceph_sb_to_client(inode->i_sb)->mdsc;
+			ceph_sb_to_fs_client(inode->i_sb)->mdsc;
 
 		spin_lock(&ci->i_ceph_lock);
 		if (__ceph_caps_dirty(ci))
@@ -2511,6 +2569,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 	__acquires(ci->i_ceph_lock)
 {
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_cap *cap;
 	struct ceph_cap_flush *cf;
 	int ret;
@@ -2536,8 +2595,8 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 
 		cap = ci->i_auth_cap;
 		if (!(cap && cap->session == session)) {
-			pr_err("%p auth cap %p not mds%d ???\n",
-			       inode, cap, session->s_mds);
+			pr_err_client(cl, "%p auth cap %p not mds%d ???\n",
+				      inode, cap, session->s_mds);
 			break;
 		}
 
@@ -2546,8 +2605,9 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 		if (!cf->is_capsnap) {
 			struct cap_msg_args arg;
 
-			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
-			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
+			doutc(cl, "%p %llx.%llx cap %p tid %llu %s\n",
+			      inode, ceph_vinop(inode), cap, cf->tid,
+			      ceph_cap_string(cf->caps));
 			__prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH,
 					 (cf->tid < last_snap_flush ?
 					  CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
@@ -2561,9 +2621,9 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 			struct ceph_cap_snap *capsnap =
 					container_of(cf, struct ceph_cap_snap,
 						    cap_flush);
-			dout("kick_flushing_caps %p capsnap %p tid %llu %s\n",
-			     inode, capsnap, cf->tid,
-			     ceph_cap_string(capsnap->dirty));
+			doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n",
+			      inode, ceph_vinop(inode), capsnap, cf->tid,
+			      ceph_cap_string(capsnap->dirty));
 
 			refcount_inc(&capsnap->nref);
 			spin_unlock(&ci->i_ceph_lock);
@@ -2571,11 +2631,10 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 			ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
 						oldest_flush_tid);
 			if (ret < 0) {
-				pr_err("kick_flushing_caps: error sending "
-					"cap flushsnap, ino (%llx.%llx) "
-					"tid %llu follows %llu\n",
-					ceph_vinop(inode), cf->tid,
-					capsnap->follows);
+				pr_err_client(cl, "error sending cap flushsnap,"
+					      " %p %llx.%llx tid %llu follows %llu\n",
+					      inode, ceph_vinop(inode), cf->tid,
+					      capsnap->follows);
 			}
 
 			ceph_put_cap_snap(capsnap);
@@ -2588,22 +2647,26 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
 				   struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci;
 	struct ceph_cap *cap;
 	u64 oldest_flush_tid;
 
-	dout("early_kick_flushing_caps mds%d\n", session->s_mds);
+	doutc(cl, "mds%d\n", session->s_mds);
 
 	spin_lock(&mdsc->cap_dirty_lock);
 	oldest_flush_tid = __get_oldest_flush_tid(mdsc);
 	spin_unlock(&mdsc->cap_dirty_lock);
 
 	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
+		struct inode *inode = &ci->netfs.inode;
+
 		spin_lock(&ci->i_ceph_lock);
 		cap = ci->i_auth_cap;
 		if (!(cap && cap->session == session)) {
-			pr_err("%p auth cap %p not mds%d ???\n",
-				&ci->netfs.inode, cap, session->s_mds);
+			pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n",
+				      inode, ceph_vinop(inode), cap,
+				      session->s_mds);
 			spin_unlock(&ci->i_ceph_lock);
 			continue;
 		}
@@ -2636,24 +2699,28 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
 void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
 			     struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci;
 	struct ceph_cap *cap;
 	u64 oldest_flush_tid;
 
 	lockdep_assert_held(&session->s_mutex);
 
-	dout("kick_flushing_caps mds%d\n", session->s_mds);
+	doutc(cl, "mds%d\n", session->s_mds);
 
 	spin_lock(&mdsc->cap_dirty_lock);
 	oldest_flush_tid = __get_oldest_flush_tid(mdsc);
 	spin_unlock(&mdsc->cap_dirty_lock);
 
 	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
+		struct inode *inode = &ci->netfs.inode;
+
 		spin_lock(&ci->i_ceph_lock);
 		cap = ci->i_auth_cap;
 		if (!(cap && cap->session == session)) {
-			pr_err("%p auth cap %p not mds%d ???\n",
-				&ci->netfs.inode, cap, session->s_mds);
+			pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n",
+				      inode, ceph_vinop(inode), cap,
+				      session->s_mds);
 			spin_unlock(&ci->i_ceph_lock);
 			continue;
 		}
@@ -2670,11 +2737,13 @@ void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
 {
 	struct ceph_mds_client *mdsc = session->s_mdsc;
 	struct ceph_cap *cap = ci->i_auth_cap;
+	struct inode *inode = &ci->netfs.inode;
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	dout("%s %p flushing %s\n", __func__, &ci->netfs.inode,
-	     ceph_cap_string(ci->i_flushing_caps));
+	doutc(mdsc->fsc->client, "%p %llx.%llx flushing %s\n",
+	      inode, ceph_vinop(inode),
+	      ceph_cap_string(ci->i_flushing_caps));
 
 	if (!list_empty(&ci->i_cap_flush_list)) {
 		u64 oldest_flush_tid;
@@ -2696,6 +2765,9 @@ void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
 void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
 			    bool snap_rwsem_locked)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
+
 	lockdep_assert_held(&ci->i_ceph_lock);
 
 	if (got & CEPH_CAP_PIN)
@@ -2716,10 +2788,10 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
 	}
 	if (got & CEPH_CAP_FILE_BUFFER) {
 		if (ci->i_wb_ref == 0)
-			ihold(&ci->netfs.inode);
+			ihold(inode);
 		ci->i_wb_ref++;
-		dout("%s %p wb %d -> %d (?)\n", __func__,
-		     &ci->netfs.inode, ci->i_wb_ref-1, ci->i_wb_ref);
+		doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode,
+		      ceph_vinop(inode), ci->i_wb_ref-1, ci->i_wb_ref);
 	}
 }
 
@@ -2746,20 +2818,23 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 			    loff_t endoff, int flags, int *got)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int ret = 0;
 	int have, implemented;
 	bool snap_rwsem_locked = false;
 
-	dout("get_cap_refs %p need %s want %s\n", inode,
-	     ceph_cap_string(need), ceph_cap_string(want));
+	doutc(cl, "%p %llx.%llx need %s want %s\n", inode,
+	      ceph_vinop(inode), ceph_cap_string(need),
+	      ceph_cap_string(want));
 
 again:
 	spin_lock(&ci->i_ceph_lock);
 
 	if ((flags & CHECK_FILELOCK) &&
 	    (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
-		dout("try_get_cap_refs %p error filelock\n", inode);
+		doutc(cl, "%p %llx.%llx error filelock\n", inode,
+		      ceph_vinop(inode));
 		ret = -EIO;
 		goto out_unlock;
 	}
@@ -2779,8 +2854,8 @@ again:
 
 	if (have & need & CEPH_CAP_FILE_WR) {
 		if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
-			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
-			     inode, endoff, ci->i_max_size);
+			doutc(cl, "%p %llx.%llx endoff %llu > maxsize %llu\n",
+			      inode, ceph_vinop(inode), endoff, ci->i_max_size);
 			if (endoff > ci->i_requested_max_size)
 				ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN;
 			goto out_unlock;
@@ -2790,7 +2865,8 @@ again:
 		 * can get a final snapshot value for size+mtime.
 		 */
 		if (__ceph_have_pending_cap_snap(ci)) {
-			dout("get_cap_refs %p cap_snap_pending\n", inode);
+			doutc(cl, "%p %llx.%llx cap_snap_pending\n", inode,
+			      ceph_vinop(inode));
 			goto out_unlock;
 		}
 	}
@@ -2808,9 +2884,9 @@ again:
 		int not = want & ~(have & need);
 		int revoking = implemented & ~have;
 		int exclude = revoking & not;
-		dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
-		     inode, ceph_cap_string(have), ceph_cap_string(not),
-		     ceph_cap_string(revoking));
+		doutc(cl, "%p %llx.%llx have %s but not %s (revoking %s)\n",
+		      inode, ceph_vinop(inode), ceph_cap_string(have),
+		      ceph_cap_string(not), ceph_cap_string(revoking));
 		if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) {
 			if (!snap_rwsem_locked &&
 			    !ci->i_head_snapc &&
@@ -2850,28 +2926,31 @@ again:
 			spin_unlock(&s->s_cap_lock);
 		}
 		if (session_readonly) {
-			dout("get_cap_refs %p need %s but mds%d readonly\n",
-			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
+			doutc(cl, "%p %llx.%llx need %s but mds%d readonly\n",
+			      inode, ceph_vinop(inode), ceph_cap_string(need),
+			      ci->i_auth_cap->mds);
 			ret = -EROFS;
 			goto out_unlock;
 		}
 
 		if (ceph_inode_is_shutdown(inode)) {
-			dout("get_cap_refs %p inode is shutdown\n", inode);
+			doutc(cl, "%p %llx.%llx inode is shutdown\n",
+			      inode, ceph_vinop(inode));
 			ret = -ESTALE;
 			goto out_unlock;
 		}
 		mds_wanted = __ceph_caps_mds_wanted(ci, false);
 		if (need & ~mds_wanted) {
-			dout("get_cap_refs %p need %s > mds_wanted %s\n",
-			     inode, ceph_cap_string(need),
-			     ceph_cap_string(mds_wanted));
+			doutc(cl, "%p %llx.%llx need %s > mds_wanted %s\n",
+			      inode, ceph_vinop(inode), ceph_cap_string(need),
+			      ceph_cap_string(mds_wanted));
 			ret = -EUCLEAN;
 			goto out_unlock;
 		}
 
-		dout("get_cap_refs %p have %s need %s\n", inode,
-		     ceph_cap_string(have), ceph_cap_string(need));
+		doutc(cl, "%p %llx.%llx have %s need %s\n", inode,
+		      ceph_vinop(inode), ceph_cap_string(have),
+		      ceph_cap_string(need));
 	}
 out_unlock:
 
@@ -2886,8 +2965,8 @@ out_unlock:
 	else if (ret == 1)
 		ceph_update_cap_hit(&mdsc->metric);
 
-	dout("get_cap_refs %p ret %d got %s\n", inode,
-	     ret, ceph_cap_string(*got));
+	doutc(cl, "%p %llx.%llx ret %d got %s\n", inode,
+	      ceph_vinop(inode), ret, ceph_cap_string(*got));
 	return ret;
 }
 
@@ -2899,13 +2978,14 @@ out_unlock:
 static void check_max_size(struct inode *inode, loff_t endoff)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int check = 0;
 
 	/* do we need to explicitly request a larger max_size? */
 	spin_lock(&ci->i_ceph_lock);
 	if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) {
-		dout("write %p at large endoff %llu, req max_size\n",
-		     inode, endoff);
+		doutc(cl, "write %p %llx.%llx at large endoff %llu, req max_size\n",
+		      inode, ceph_vinop(inode), endoff);
 		ci->i_wanted_max_size = endoff;
 	}
 	/* duplicate ceph_check_caps()'s logic */
@@ -2964,7 +3044,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
 		    int want, loff_t endoff, int *got)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	int ret, _got, flags;
 
 	ret = ceph_pool_perm_check(inode, need);
@@ -3115,10 +3195,12 @@ void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
 static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
 				  struct ceph_cap_snap *capsnap)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
+
 	if (!capsnap->need_flush &&
 	    !capsnap->writing && !capsnap->dirty_pages) {
-		dout("dropping cap_snap %p follows %llu\n",
-		     capsnap, capsnap->follows);
+		doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows);
 		BUG_ON(capsnap->cap_flush.tid > 0);
 		ceph_put_snap_context(capsnap->context);
 		if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps))
@@ -3150,6 +3232,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 				enum put_cap_refs_mode mode)
 {
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int last = 0, put = 0, flushsnaps = 0, wake = 0;
 	bool check_flushsnaps = false;
 
@@ -3172,8 +3255,8 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 			put++;
 			check_flushsnaps = true;
 		}
-		dout("put_cap_refs %p wb %d -> %d (?)\n",
-		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
+		doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode,
+		      ceph_vinop(inode), ci->i_wb_ref+1, ci->i_wb_ref);
 	}
 	if (had & CEPH_CAP_FILE_WR) {
 		if (--ci->i_wr_ref == 0) {
@@ -3213,8 +3296,8 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 	}
 	spin_unlock(&ci->i_ceph_lock);
 
-	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
-	     last ? " last" : "", put ? " put" : "");
+	doutc(cl, "%p %llx.%llx had %s%s%s\n", inode, ceph_vinop(inode),
+	      ceph_cap_string(had), last ? " last" : "", put ? " put" : "");
 
 	switch (mode) {
 	case PUT_CAP_REFS_SYNC:
@@ -3264,6 +3347,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				struct ceph_snap_context *snapc)
 {
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_cap_snap *capsnap = NULL, *iter;
 	int put = 0;
 	bool last = false;
@@ -3287,11 +3371,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 			ceph_put_snap_context(ci->i_head_snapc);
 			ci->i_head_snapc = NULL;
 		}
-		dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n",
-		     inode,
-		     ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr,
-		     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
-		     last ? " LAST" : "");
+		doutc(cl, "on %p %llx.%llx head %d/%d -> %d/%d %s\n",
+		      inode, ceph_vinop(inode), ci->i_wrbuffer_ref+nr,
+		      ci->i_wrbuffer_ref_head+nr, ci->i_wrbuffer_ref,
+		      ci->i_wrbuffer_ref_head, last ? " LAST" : "");
 	} else {
 		list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
 			if (iter->context == snapc) {
@@ -3321,13 +3404,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				}
 			}
 		}
-		dout("put_wrbuffer_cap_refs on %p cap_snap %p "
-		     " snap %lld %d/%d -> %d/%d %s%s\n",
-		     inode, capsnap, capsnap->context->seq,
-		     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
-		     ci->i_wrbuffer_ref, capsnap->dirty_pages,
-		     last ? " (wrbuffer last)" : "",
-		     complete_capsnap ? " (complete capsnap)" : "");
+		doutc(cl, "%p %llx.%llx cap_snap %p snap %lld %d/%d -> %d/%d %s%s\n",
+		      inode, ceph_vinop(inode), capsnap, capsnap->context->seq,
+		      ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
+		      ci->i_wrbuffer_ref, capsnap->dirty_pages,
+		      last ? " (wrbuffer last)" : "",
+		      complete_capsnap ? " (complete capsnap)" : "");
 	}
 
 unlock:
@@ -3350,9 +3432,10 @@ unlock:
  */
 static void invalidate_aliases(struct inode *inode)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct dentry *dn, *prev = NULL;
 
-	dout("invalidate_aliases inode %p\n", inode);
+	doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 	d_prune_aliases(inode);
 	/*
 	 * For non-directory inode, d_find_alias() only returns
@@ -3411,6 +3494,7 @@ static void handle_cap_grant(struct inode *inode,
 	__releases(ci->i_ceph_lock)
 	__releases(session->s_mdsc->snap_rwsem)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int seq = le32_to_cpu(grant->seq);
 	int newcaps = le32_to_cpu(grant->caps);
@@ -3434,10 +3518,11 @@ static void handle_cap_grant(struct inode *inode,
 	if (IS_ENCRYPTED(inode) && size)
 		size = extra_info->fscrypt_file_size;
 
-	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
-	     inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
-	dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
-		i_size_read(inode));
+	doutc(cl, "%p %llx.%llx cap %p mds%d seq %d %s\n", inode,
+	      ceph_vinop(inode), cap, session->s_mds, seq,
+	      ceph_cap_string(newcaps));
+	doutc(cl, " size %llu max_size %llu, i_size %llu\n", size,
+	      max_size, i_size_read(inode));
 
 
 	/*
@@ -3497,15 +3582,17 @@ static void handle_cap_grant(struct inode *inode,
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
 		ci->i_btime = extra_info->btime;
-		dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
-		     from_kuid(&init_user_ns, inode->i_uid),
-		     from_kgid(&init_user_ns, inode->i_gid));
+		doutc(cl, "%p %llx.%llx mode 0%o uid.gid %d.%d\n", inode,
+		      ceph_vinop(inode), inode->i_mode,
+		      from_kuid(&init_user_ns, inode->i_uid),
+		      from_kgid(&init_user_ns, inode->i_gid));
 #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
 		if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len ||
 		    memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth,
 			   ci->fscrypt_auth_len))
-			pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n",
-				__func__, ci->fscrypt_auth_len,
+			pr_warn_ratelimited_client(cl,
+				"cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n",
+				ci->fscrypt_auth_len,
 				extra_info->fscrypt_auth_len);
 #endif
 	}
@@ -3523,8 +3610,8 @@ static void handle_cap_grant(struct inode *inode,
 		u64 version = le64_to_cpu(grant->xattr_version);
 
 		if (version > ci->i_xattrs.version) {
-			dout(" got new xattrs v%llu on %p len %d\n",
-			     version, inode, len);
+			doutc(cl, " got new xattrs v%llu on %p %llx.%llx len %d\n",
+			      version, inode, ceph_vinop(inode), len);
 			if (ci->i_xattrs.blob)
 				ceph_buffer_put(ci->i_xattrs.blob);
 			ci->i_xattrs.blob = ceph_buffer_get(xattr_buf);
@@ -3575,8 +3662,8 @@ static void handle_cap_grant(struct inode *inode,
 
 	if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
 		if (max_size != ci->i_max_size) {
-			dout("max_size %lld -> %llu\n",
-			     ci->i_max_size, max_size);
+			doutc(cl, "max_size %lld -> %llu\n", ci->i_max_size,
+			      max_size);
 			ci->i_max_size = max_size;
 			if (max_size >= ci->i_wanted_max_size) {
 				ci->i_wanted_max_size = 0;  /* reset */
@@ -3590,10 +3677,9 @@ static void handle_cap_grant(struct inode *inode,
 	wanted = __ceph_caps_wanted(ci);
 	used = __ceph_caps_used(ci);
 	dirty = __ceph_caps_dirty(ci);
-	dout(" my wanted = %s, used = %s, dirty %s\n",
-	     ceph_cap_string(wanted),
-	     ceph_cap_string(used),
-	     ceph_cap_string(dirty));
+	doutc(cl, " my wanted = %s, used = %s, dirty %s\n",
+	      ceph_cap_string(wanted), ceph_cap_string(used),
+	      ceph_cap_string(dirty));
 
 	if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
 	    (wanted & ~(cap->mds_wanted | newcaps))) {
@@ -3614,10 +3700,9 @@ static void handle_cap_grant(struct inode *inode,
 	if (cap->issued & ~newcaps) {
 		int revoking = cap->issued & ~newcaps;
 
-		dout("revocation: %s -> %s (revoking %s)\n",
-		     ceph_cap_string(cap->issued),
-		     ceph_cap_string(newcaps),
-		     ceph_cap_string(revoking));
+		doutc(cl, "revocation: %s -> %s (revoking %s)\n",
+		      ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
+		      ceph_cap_string(revoking));
 		if (S_ISREG(inode->i_mode) &&
 		    (revoking & used & CEPH_CAP_FILE_BUFFER))
 			writeback = true;  /* initiate writeback; will delay ack */
@@ -3635,11 +3720,12 @@ static void handle_cap_grant(struct inode *inode,
 		cap->issued = newcaps;
 		cap->implemented |= newcaps;
 	} else if (cap->issued == newcaps) {
-		dout("caps unchanged: %s -> %s\n",
-		     ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
+		doutc(cl, "caps unchanged: %s -> %s\n",
+		      ceph_cap_string(cap->issued),
+		      ceph_cap_string(newcaps));
 	} else {
-		dout("grant: %s -> %s\n", ceph_cap_string(cap->issued),
-		     ceph_cap_string(newcaps));
+		doutc(cl, "grant: %s -> %s\n", ceph_cap_string(cap->issued),
+		      ceph_cap_string(newcaps));
 		/* non-auth MDS is revoking the newly grant caps ? */
 		if (cap == ci->i_auth_cap &&
 		    __ceph_caps_revoking_other(ci, cap, newcaps))
@@ -3727,7 +3813,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 	__releases(ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_cap_flush *cf, *tmp_cf;
 	LIST_HEAD(to_remove);
 	unsigned seq = le32_to_cpu(m->seq);
@@ -3764,11 +3851,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 		}
 	}
 
-	dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
-	     " flushing %s -> %s\n",
-	     inode, session->s_mds, seq, ceph_cap_string(dirty),
-	     ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps),
-	     ceph_cap_string(ci->i_flushing_caps & ~cleaned));
+	doutc(cl, "%p %llx.%llx mds%d seq %d on %s cleaned %s, flushing %s -> %s\n",
+	      inode, ceph_vinop(inode), session->s_mds, seq,
+	      ceph_cap_string(dirty), ceph_cap_string(cleaned),
+	      ceph_cap_string(ci->i_flushing_caps),
+	      ceph_cap_string(ci->i_flushing_caps & ~cleaned));
 
 	if (list_empty(&to_remove) && !cleaned)
 		goto out;
@@ -3784,18 +3871,21 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 		if (list_empty(&ci->i_cap_flush_list)) {
 			list_del_init(&ci->i_flushing_item);
 			if (!list_empty(&session->s_cap_flushing)) {
-				dout(" mds%d still flushing cap on %p\n",
-				     session->s_mds,
-				     &list_first_entry(&session->s_cap_flushing,
-						struct ceph_inode_info,
-						i_flushing_item)->netfs.inode);
+				struct inode *inode =
+					    &list_first_entry(&session->s_cap_flushing,
+							      struct ceph_inode_info,
+							      i_flushing_item)->netfs.inode;
+				doutc(cl, " mds%d still flushing cap on %p %llx.%llx\n",
+				      session->s_mds, inode, ceph_vinop(inode));
 			}
 		}
 		mdsc->num_cap_flushing--;
-		dout(" inode %p now !flushing\n", inode);
+		doutc(cl, " %p %llx.%llx now !flushing\n", inode,
+		      ceph_vinop(inode));
 
 		if (ci->i_dirty_caps == 0) {
-			dout(" inode %p now clean\n", inode);
+			doutc(cl, " %p %llx.%llx now clean\n", inode,
+			      ceph_vinop(inode));
 			BUG_ON(!list_empty(&ci->i_dirty_item));
 			drop = true;
 			if (ci->i_wr_ref == 0 &&
@@ -3833,12 +3923,14 @@ void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
 			   bool *wake_ci, bool *wake_mdsc)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	bool ret;
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
+	doutc(cl, "removing capsnap %p, %p %llx.%llx ci %p\n", capsnap,
+	      inode, ceph_vinop(inode), ci);
 
 	list_del_init(&capsnap->ci_item);
 	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
@@ -3877,29 +3969,31 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 				     struct ceph_mds_session *session)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	u64 follows = le64_to_cpu(m->snap_follows);
 	struct ceph_cap_snap *capsnap = NULL, *iter;
 	bool wake_ci = false;
 	bool wake_mdsc = false;
 
-	dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
-	     inode, ci, session->s_mds, follows);
+	doutc(cl, "%p %llx.%llx ci %p mds%d follows %lld\n", inode,
+	      ceph_vinop(inode), ci, session->s_mds, follows);
 
 	spin_lock(&ci->i_ceph_lock);
 	list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
 		if (iter->follows == follows) {
 			if (iter->cap_flush.tid != flush_tid) {
-				dout(" cap_snap %p follows %lld tid %lld !="
-				     " %lld\n", iter, follows,
-				     flush_tid, iter->cap_flush.tid);
+				doutc(cl, " cap_snap %p follows %lld "
+				      "tid %lld != %lld\n", iter,
+				      follows, flush_tid,
+				      iter->cap_flush.tid);
 				break;
 			}
 			capsnap = iter;
 			break;
 		} else {
-			dout(" skipping cap_snap %p follows %lld\n",
-			     iter, iter->follows);
+			doutc(cl, " skipping cap_snap %p follows %lld\n",
+			      iter, iter->follows);
 		}
 	}
 	if (capsnap)
@@ -3928,6 +4022,7 @@ static bool handle_cap_trunc(struct inode *inode,
 			     struct cap_extra_info *extra_info)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int mds = session->s_mds;
 	int seq = le32_to_cpu(trunc->seq);
 	u32 truncate_seq = le32_to_cpu(trunc->truncate_seq);
@@ -3950,8 +4045,8 @@ static bool handle_cap_trunc(struct inode *inode,
 	if (IS_ENCRYPTED(inode) && size)
 		size = extra_info->fscrypt_file_size;
 
-	dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n",
-	     __func__, inode, mds, seq, truncate_size, truncate_seq);
+	doutc(cl, "%p %llx.%llx mds%d seq %d to %lld truncate seq %d\n",
+	      inode, ceph_vinop(inode), mds, seq, truncate_size, truncate_seq);
 	queue_trunc = ceph_fill_file_size(inode, issued,
 					  truncate_seq, truncate_size, size);
 	return queue_trunc;
@@ -3969,7 +4064,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 			      struct ceph_mds_cap_peer *ph,
 			      struct ceph_mds_session *session)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_session *tsession = NULL;
 	struct ceph_cap *cap, *tcap, *new_cap = NULL;
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -3989,8 +4085,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		target = -1;
 	}
 
-	dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
-	     inode, ci, mds, mseq, target);
+	doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d target %d\n",
+	      inode, ceph_vinop(inode), ci, mds, mseq, target);
 retry:
 	down_read(&mdsc->snap_rwsem);
 	spin_lock(&ci->i_ceph_lock);
@@ -3999,7 +4095,7 @@ retry:
 		goto out_unlock;
 
 	if (target < 0) {
-		ceph_remove_cap(cap, false);
+		ceph_remove_cap(mdsc, cap, false);
 		goto out_unlock;
 	}
 
@@ -4010,12 +4106,13 @@ retry:
 
 	issued = cap->issued;
 	if (issued != cap->implemented)
-		pr_err_ratelimited("handle_cap_export: issued != implemented: "
-				"ino (%llx.%llx) mds%d seq %d mseq %d "
-				"issued %s implemented %s\n",
-				ceph_vinop(inode), mds, cap->seq, cap->mseq,
-				ceph_cap_string(issued),
-				ceph_cap_string(cap->implemented));
+		pr_err_ratelimited_client(cl, "issued != implemented: "
+					  "%p %llx.%llx mds%d seq %d mseq %d"
+					  " issued %s implemented %s\n",
+					  inode, ceph_vinop(inode), mds,
+					  cap->seq, cap->mseq,
+					  ceph_cap_string(issued),
+					  ceph_cap_string(cap->implemented));
 
 
 	tcap = __get_cap_for_mds(ci, target);
@@ -4023,7 +4120,8 @@ retry:
 		/* already have caps from the target */
 		if (tcap->cap_id == t_cap_id &&
 		    ceph_seq_cmp(tcap->seq, t_seq) < 0) {
-			dout(" updating import cap %p mds%d\n", tcap, target);
+			doutc(cl, " updating import cap %p mds%d\n", tcap,
+			      target);
 			tcap->cap_id = t_cap_id;
 			tcap->seq = t_seq - 1;
 			tcap->issue_seq = t_seq - 1;
@@ -4034,7 +4132,7 @@ retry:
 				change_auth_cap_ses(ci, tcap->session);
 			}
 		}
-		ceph_remove_cap(cap, false);
+		ceph_remove_cap(mdsc, cap, false);
 		goto out_unlock;
 	} else if (tsession) {
 		/* add placeholder for the export tagert */
@@ -4051,7 +4149,7 @@ retry:
 			spin_unlock(&mdsc->cap_dirty_lock);
 		}
 
-		ceph_remove_cap(cap, false);
+		ceph_remove_cap(mdsc, cap, false);
 		goto out_unlock;
 	}
 
@@ -4104,6 +4202,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 			      struct ceph_cap **target_cap, int *old_issued)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_cap *cap, *ocap, *new_cap = NULL;
 	int mds = session->s_mds;
 	int issued;
@@ -4124,8 +4223,8 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 		peer = -1;
 	}
 
-	dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
-	     inode, ci, mds, mseq, peer);
+	doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d peer %d\n",
+	      inode, ceph_vinop(inode), ci, mds, mseq, peer);
 retry:
 	cap = __get_cap_for_mds(ci, mds);
 	if (!cap) {
@@ -4151,20 +4250,20 @@ retry:
 
 	ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
 	if (ocap && ocap->cap_id == p_cap_id) {
-		dout(" remove export cap %p mds%d flags %d\n",
-		     ocap, peer, ph->flags);
+		doutc(cl, " remove export cap %p mds%d flags %d\n",
+		      ocap, peer, ph->flags);
 		if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
 		    (ocap->seq != le32_to_cpu(ph->seq) ||
 		     ocap->mseq != le32_to_cpu(ph->mseq))) {
-			pr_err_ratelimited("handle_cap_import: "
-					"mismatched seq/mseq: ino (%llx.%llx) "
-					"mds%d seq %d mseq %d importer mds%d "
-					"has peer seq %d mseq %d\n",
-					ceph_vinop(inode), peer, ocap->seq,
-					ocap->mseq, mds, le32_to_cpu(ph->seq),
+			pr_err_ratelimited_client(cl, "mismatched seq/mseq: "
+					"%p %llx.%llx mds%d seq %d mseq %d"
+					" importer mds%d has peer seq %d mseq %d\n",
+					inode, ceph_vinop(inode), peer,
+					ocap->seq, ocap->mseq, mds,
+					le32_to_cpu(ph->seq),
 					le32_to_cpu(ph->mseq));
 		}
-		ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+		ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
 
 	*old_issued = issued;
@@ -4227,6 +4326,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		      struct ceph_msg *msg)
 {
 	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
 	struct ceph_cap *cap;
@@ -4245,7 +4345,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	bool close_sessions = false;
 	bool do_cap_release = false;
 
-	dout("handle_caps from mds%d\n", session->s_mds);
+	doutc(cl, "from mds%d\n", session->s_mds);
 
 	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
 		return;
@@ -4347,15 +4447,15 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
 	/* lookup ino */
 	inode = ceph_find_inode(mdsc->fsc->sb, vino);
-	dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
-	     vino.snap, inode);
+	doutc(cl, " op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op),
+	      vino.ino, vino.snap, inode);
 
 	mutex_lock(&session->s_mutex);
-	dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
-	     (unsigned)seq);
+	doutc(cl, " mds%d seq %lld cap seq %u\n", session->s_mds,
+	      session->s_seq, (unsigned)seq);
 
 	if (!inode) {
-		dout(" i don't have ino %llx\n", vino.ino);
+		doutc(cl, " i don't have ino %llx\n", vino.ino);
 
 		switch (op) {
 		case CEPH_CAP_OP_IMPORT:
@@ -4410,9 +4510,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	spin_lock(&ci->i_ceph_lock);
 	cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
 	if (!cap) {
-		dout(" no cap on %p ino %llx.%llx from mds%d\n",
-		     inode, ceph_ino(inode), ceph_snap(inode),
-		     session->s_mds);
+		doutc(cl, " no cap on %p ino %llx.%llx from mds%d\n",
+		      inode, ceph_ino(inode), ceph_snap(inode),
+		      session->s_mds);
 		spin_unlock(&ci->i_ceph_lock);
 		switch (op) {
 		case CEPH_CAP_OP_REVOKE:
@@ -4450,8 +4550,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
 	default:
 		spin_unlock(&ci->i_ceph_lock);
-		pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
-		       ceph_cap_op_name(op));
+		pr_err_client(cl, "unknown cap op %d %s\n", op,
+			      ceph_cap_op_name(op));
 	}
 
 done:
@@ -4492,7 +4592,7 @@ flush_cap_releases:
 	goto done;
 
 bad:
-	pr_err("ceph_handle_caps: corrupt message\n");
+	pr_err_client(cl, "corrupt message\n");
 	ceph_msg_dump(msg);
 	goto out;
 }
@@ -4506,6 +4606,7 @@ bad:
  */
 unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
 	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
@@ -4513,14 +4614,14 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 	unsigned long loop_start = jiffies;
 	unsigned long delay = 0;
 
-	dout("check_delayed_caps\n");
+	doutc(cl, "begin\n");
 	spin_lock(&mdsc->cap_delay_lock);
 	while (!list_empty(&mdsc->cap_delay_list)) {
 		ci = list_first_entry(&mdsc->cap_delay_list,
 				      struct ceph_inode_info,
 				      i_cap_delay_list);
 		if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
-			dout("%s caps added recently.  Exiting loop", __func__);
+			doutc(cl, "caps added recently.  Exiting loop");
 			delay = ci->i_hold_caps_max;
 			break;
 		}
@@ -4532,13 +4633,15 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 		inode = igrab(&ci->netfs.inode);
 		if (inode) {
 			spin_unlock(&mdsc->cap_delay_lock);
-			dout("check_delayed_caps on %p\n", inode);
+			doutc(cl, "on %p %llx.%llx\n", inode,
+			      ceph_vinop(inode));
 			ceph_check_caps(ci, 0);
 			iput(inode);
 			spin_lock(&mdsc->cap_delay_lock);
 		}
 	}
 	spin_unlock(&mdsc->cap_delay_lock);
+	doutc(cl, "done\n");
 
 	return delay;
 }
@@ -4549,17 +4652,18 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 static void flush_dirty_session_caps(struct ceph_mds_session *s)
 {
 	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci;
 	struct inode *inode;
 
-	dout("flush_dirty_caps\n");
+	doutc(cl, "begin\n");
 	spin_lock(&mdsc->cap_dirty_lock);
 	while (!list_empty(&s->s_cap_dirty)) {
 		ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info,
 				      i_dirty_item);
 		inode = &ci->netfs.inode;
 		ihold(inode);
-		dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
+		doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 		spin_unlock(&mdsc->cap_dirty_lock);
 		ceph_wait_on_async_create(inode);
 		ceph_check_caps(ci, CHECK_CAPS_FLUSH);
@@ -4567,7 +4671,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
 		spin_lock(&mdsc->cap_dirty_lock);
 	}
 	spin_unlock(&mdsc->cap_dirty_lock);
-	dout("flush_dirty_caps done\n");
+	doutc(cl, "done\n");
 }
 
 void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
@@ -4672,7 +4776,7 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
 
 		if (__ceph_caps_dirty(ci)) {
 			struct ceph_mds_client *mdsc =
-				ceph_inode_to_client(inode)->mdsc;
+				ceph_inode_to_fs_client(inode)->mdsc;
 			__cap_delay_requeue_front(mdsc, ci);
 		}
 	}
@@ -4692,6 +4796,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 			      int mds, int drop, int unless, int force)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_cap *cap;
 	struct ceph_mds_request_release *rel = *p;
 	int used, dirty;
@@ -4701,9 +4806,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 	used = __ceph_caps_used(ci);
 	dirty = __ceph_caps_dirty(ci);
 
-	dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
-	     inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
-	     ceph_cap_string(unless));
+	doutc(cl, "%p %llx.%llx mds%d used|dirty %s drop %s unless %s\n",
+	      inode, ceph_vinop(inode), mds, ceph_cap_string(used|dirty),
+	      ceph_cap_string(drop), ceph_cap_string(unless));
 
 	/* only drop unused, clean caps */
 	drop &= ~(used | dirty);
@@ -4725,12 +4830,13 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 		if (force || (cap->issued & drop)) {
 			if (cap->issued & drop) {
 				int wanted = __ceph_caps_wanted(ci);
-				dout("encode_inode_release %p cap %p "
-				     "%s -> %s, wanted %s -> %s\n", inode, cap,
-				     ceph_cap_string(cap->issued),
-				     ceph_cap_string(cap->issued & ~drop),
-				     ceph_cap_string(cap->mds_wanted),
-				     ceph_cap_string(wanted));
+				doutc(cl, "%p %llx.%llx cap %p %s -> %s, "
+				      "wanted %s -> %s\n", inode,
+				      ceph_vinop(inode), cap,
+				      ceph_cap_string(cap->issued),
+				      ceph_cap_string(cap->issued & ~drop),
+				      ceph_cap_string(cap->mds_wanted),
+				      ceph_cap_string(wanted));
 
 				cap->issued &= ~drop;
 				cap->implemented &= ~drop;
@@ -4739,9 +4845,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 				    !(wanted & CEPH_CAP_ANY_FILE_WR))
 					ci->i_requested_max_size = 0;
 			} else {
-				dout("encode_inode_release %p cap %p %s"
-				     " (force)\n", inode, cap,
-				     ceph_cap_string(cap->issued));
+				doutc(cl, "%p %llx.%llx cap %p %s (force)\n",
+				      inode, ceph_vinop(inode), cap,
+				      ceph_cap_string(cap->issued));
 			}
 
 			rel->ino = cpu_to_le64(ceph_ino(inode));
@@ -4756,8 +4862,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 			*p += sizeof(*rel);
 			ret = 1;
 		} else {
-			dout("encode_inode_release %p cap %p %s (noop)\n",
-			     inode, cap, ceph_cap_string(cap->issued));
+			doutc(cl, "%p %llx.%llx cap %p %s (noop)\n",
+			      inode, ceph_vinop(inode), cap,
+			      ceph_cap_string(cap->issued));
 		}
 	}
 	spin_unlock(&ci->i_ceph_lock);
@@ -4783,6 +4890,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 	struct dentry *parent = NULL;
 	struct ceph_mds_request_release *rel = *p;
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
+	struct ceph_client *cl;
 	int force = 0;
 	int ret;
 
@@ -4804,10 +4912,11 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 	ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
 	dput(parent);
 
+	cl = ceph_inode_to_client(dir);
 	spin_lock(&dentry->d_lock);
 	if (ret && di->lease_session && di->lease_session->s_mds == mds) {
-		dout("encode_dentry_release %p mds%d seq %d\n",
-		     dentry, mds, (int)di->lease_seq);
+		doutc(cl, "%p mds%d seq %d\n",  dentry, mds,
+		      (int)di->lease_seq);
 		rel->dname_seq = cpu_to_le32(di->lease_seq);
 		__ceph_mdsc_drop_dentry_lease(dentry);
 		spin_unlock(&dentry->d_lock);
@@ -4833,12 +4942,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_cap_snap *capsnap;
 	int capsnap_release = 0;
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+	doutc(cl, "removing capsnaps, ci is %p, %p %llx.%llx\n",
+	      ci, inode, ceph_vinop(inode));
 
 	while (!list_empty(&ci->i_cap_snaps)) {
 		capsnap = list_first_entry(&ci->i_cap_snaps,
@@ -4855,8 +4966,9 @@ static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
 
 int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *cl = fsc->client;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	bool is_auth;
 	bool dirty_dropped = false;
@@ -4864,8 +4976,8 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	dout("removing cap %p, ci is %p, inode is %p\n",
-	     cap, ci, &ci->netfs.inode);
+	doutc(cl, "removing cap %p, ci is %p, %p %llx.%llx\n",
+	      cap, ci, inode, ceph_vinop(inode));
 
 	is_auth = (cap == ci->i_auth_cap);
 	__ceph_remove_cap(cap, false);
@@ -4892,19 +5004,19 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali
 		}
 
 		if (!list_empty(&ci->i_dirty_item)) {
-			pr_warn_ratelimited(
-				" dropping dirty %s state for %p %lld\n",
+			pr_warn_ratelimited_client(cl,
+				" dropping dirty %s state for %p %llx.%llx\n",
 				ceph_cap_string(ci->i_dirty_caps),
-				inode, ceph_ino(inode));
+				inode, ceph_vinop(inode));
 			ci->i_dirty_caps = 0;
 			list_del_init(&ci->i_dirty_item);
 			dirty_dropped = true;
 		}
 		if (!list_empty(&ci->i_flushing_item)) {
-			pr_warn_ratelimited(
-				" dropping dirty+flushing %s state for %p %lld\n",
+			pr_warn_ratelimited_client(cl,
+				" dropping dirty+flushing %s state for %p %llx.%llx\n",
 				ceph_cap_string(ci->i_flushing_caps),
-				inode, ceph_ino(inode));
+				inode, ceph_vinop(inode));
 			ci->i_flushing_caps = 0;
 			list_del_init(&ci->i_flushing_item);
 			mdsc->num_cap_flushing--;
@@ -4927,8 +5039,9 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali
 		if (atomic_read(&ci->i_filelock_ref) > 0) {
 			/* make further file lock syscall return -EIO */
 			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
-			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
-					    inode, ceph_ino(inode));
+			pr_warn_ratelimited_client(cl,
+				" dropping file locks for %p %llx.%llx\n",
+				inode, ceph_vinop(inode));
 		}
 
 		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index e3b1c3fab412..3b3c4d8d401e 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -113,7 +113,7 @@ static int ceph_crypt_set_context(struct inode *inode, const void *ctx,
 
 	cia.fscrypt_auth = cfa;
 
-	ret = __ceph_setattr(inode, &attr, &cia);
+	ret = __ceph_setattr(&nop_mnt_idmap, inode, &attr, &cia);
 	if (ret == 0)
 		inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED);
 	kfree(cia.fscrypt_auth);
@@ -129,7 +129,7 @@ static bool ceph_crypt_empty_dir(struct inode *inode)
 
 static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb)
 {
-	return ceph_sb_to_client(sb)->fsc_dummy_enc_policy.policy;
+	return ceph_sb_to_fs_client(sb)->fsc_dummy_enc_policy.policy;
 }
 
 static struct fscrypt_operations ceph_fscrypt_ops = {
@@ -212,6 +212,7 @@ void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
 static struct inode *parse_longname(const struct inode *parent,
 				    const char *name, int *name_len)
 {
+	struct ceph_client *cl = ceph_inode_to_client(parent);
 	struct inode *dir = NULL;
 	struct ceph_vino vino = { .snap = CEPH_NOSNAP };
 	char *inode_number;
@@ -223,12 +224,12 @@ static struct inode *parse_longname(const struct inode *parent,
 	name++;
 	name_end = strrchr(name, '_');
 	if (!name_end) {
-		dout("Failed to parse long snapshot name: %s\n", name);
+		doutc(cl, "failed to parse long snapshot name: %s\n", name);
 		return ERR_PTR(-EIO);
 	}
 	*name_len = (name_end - name);
 	if (*name_len <= 0) {
-		pr_err("Failed to parse long snapshot name\n");
+		pr_err_client(cl, "failed to parse long snapshot name\n");
 		return ERR_PTR(-EIO);
 	}
 
@@ -240,7 +241,7 @@ static struct inode *parse_longname(const struct inode *parent,
 		return ERR_PTR(-ENOMEM);
 	ret = kstrtou64(inode_number, 10, &vino.ino);
 	if (ret) {
-		dout("Failed to parse inode number: %s\n", name);
+		doutc(cl, "failed to parse inode number: %s\n", name);
 		dir = ERR_PTR(ret);
 		goto out;
 	}
@@ -251,7 +252,7 @@ static struct inode *parse_longname(const struct inode *parent,
 		/* This can happen if we're not mounting cephfs on the root */
 		dir = ceph_get_inode(parent->i_sb, vino, NULL);
 		if (IS_ERR(dir))
-			dout("Can't find inode %s (%s)\n", inode_number, name);
+			doutc(cl, "can't find inode %s (%s)\n", inode_number, name);
 	}
 
 out:
@@ -262,6 +263,7 @@ out:
 int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
 				char *buf)
 {
+	struct ceph_client *cl = ceph_inode_to_client(parent);
 	struct inode *dir = parent;
 	struct qstr iname;
 	u32 len;
@@ -330,7 +332,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
 
 	/* base64 encode the encrypted name */
 	elen = ceph_base64_encode(cryptbuf, len, buf);
-	dout("base64-encoded ciphertext name = %.*s\n", elen, buf);
+	doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, buf);
 
 	/* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
 	WARN_ON(elen > 240);
@@ -505,7 +507,10 @@ int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
 				  struct page *page, unsigned int len,
 				  unsigned int offs, u64 lblk_num)
 {
-	dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
+
+	doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode,
+	      ceph_vinop(inode), len, offs, lblk_num);
 	return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num);
 }
 
@@ -514,7 +519,10 @@ int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
 				  unsigned int offs, u64 lblk_num,
 				  gfp_t gfp_flags)
 {
-	dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
+
+	doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode,
+	      ceph_vinop(inode), len, offs, lblk_num);
 	return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num,
 					     gfp_flags);
 }
@@ -583,6 +591,7 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
 				 u64 off, struct ceph_sparse_extent *map,
 				 u32 ext_cnt)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int i, ret = 0;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 objno, objoff;
@@ -590,7 +599,8 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
 
 	/* Nothing to do for empty array */
 	if (ext_cnt == 0) {
-		dout("%s: empty array, ret 0\n", __func__);
+		doutc(cl, "%p %llx.%llx empty array, ret 0\n", inode,
+		      ceph_vinop(inode));
 		return 0;
 	}
 
@@ -604,14 +614,17 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
 		int fret;
 
 		if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) {
-			pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n",
-				__func__, i, ext->off, ext->len);
+			pr_warn_client(cl,
+				"%p %llx.%llx bad encrypted sparse extent "
+				"idx %d off %llx len %llx\n",
+				inode, ceph_vinop(inode), i, ext->off,
+				ext->len);
 			return -EIO;
 		}
 		fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx],
 						 off + pgsoff, ext->len);
-		dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__, i,
-				ext->off, ext->len, fret);
+		doutc(cl, "%p %llx.%llx [%d] 0x%llx~0x%llx fret %d\n", inode,
+		      ceph_vinop(inode), i, ext->off, ext->len, fret);
 		if (fret < 0) {
 			if (ret == 0)
 				ret = fret;
@@ -619,7 +632,7 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
 		}
 		ret = pgsoff + fret;
 	}
-	dout("%s: ret %d\n", __func__, ret);
+	doutc(cl, "ret %d\n", ret);
 	return ret;
 }
 
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3904333fa6c3..24c08078f5aa 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -81,7 +81,7 @@ static int mdsc_show(struct seq_file *s, void *p)
 		if (req->r_inode) {
 			seq_printf(s, " #%llx", ceph_ino(req->r_inode));
 		} else if (req->r_dentry) {
-			path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
+			path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
 						    &pathbase, 0);
 			if (IS_ERR(path))
 				path = NULL;
@@ -100,7 +100,7 @@ static int mdsc_show(struct seq_file *s, void *p)
 		}
 
 		if (req->r_old_dentry) {
-			path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
+			path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen,
 						    &pathbase, 0);
 			if (IS_ERR(path))
 				path = NULL;
@@ -398,7 +398,7 @@ DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
 
 void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
-	dout("ceph_fs_debugfs_cleanup\n");
+	doutc(fsc->client, "begin\n");
 	debugfs_remove(fsc->debugfs_bdi);
 	debugfs_remove(fsc->debugfs_congestion_kb);
 	debugfs_remove(fsc->debugfs_mdsmap);
@@ -407,13 +407,14 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 	debugfs_remove(fsc->debugfs_status);
 	debugfs_remove(fsc->debugfs_mdsc);
 	debugfs_remove_recursive(fsc->debugfs_metrics_dir);
+	doutc(fsc->client, "done\n");
 }
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
 	char name[100];
 
-	dout("ceph_fs_debugfs_init\n");
+	doutc(fsc->client, "begin\n");
 	fsc->debugfs_congestion_kb =
 		debugfs_create_file("writeback_congestion_kb",
 				    0600,
@@ -469,6 +470,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 			    &metrics_size_fops);
 	debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc,
 			    &metrics_caps_fops);
+	doutc(fsc->client, "done\n");
 }
 
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 854cbdd66661..91709934c8b1 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -109,7 +109,9 @@ static int fpos_cmp(loff_t l, loff_t r)
  * regardless of what dir changes take place on the
  * server.
  */
-static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
+static int note_last_dentry(struct ceph_fs_client *fsc,
+			    struct ceph_dir_file_info *dfi,
+			    const char *name,
 		            int len, unsigned next_offset)
 {
 	char *buf = kmalloc(len+1, GFP_KERNEL);
@@ -120,7 +122,7 @@ static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
 	memcpy(dfi->last_name, name, len);
 	dfi->last_name[len] = 0;
 	dfi->next_offset = next_offset;
-	dout("note_last_dentry '%s'\n", dfi->last_name);
+	doutc(fsc->client, "'%s'\n", dfi->last_name);
 	return 0;
 }
 
@@ -130,6 +132,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
 			struct ceph_readdir_cache_control *cache_ctl)
 {
 	struct inode *dir = d_inode(parent);
+	struct ceph_client *cl = ceph_inode_to_client(dir);
 	struct dentry *dentry;
 	unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1;
 	loff_t ptr_pos = idx * sizeof(struct dentry *);
@@ -142,7 +145,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
 		ceph_readdir_cache_release(cache_ctl);
 		cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
 		if (!cache_ctl->page) {
-			dout(" page %lu not found\n", ptr_pgoff);
+			doutc(cl, " page %lu not found\n", ptr_pgoff);
 			return ERR_PTR(-EAGAIN);
 		}
 		/* reading/filling the cache are serialized by
@@ -185,13 +188,16 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 	struct ceph_dir_file_info *dfi = file->private_data;
 	struct dentry *parent = file->f_path.dentry;
 	struct inode *dir = d_inode(parent);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(dir);
+	struct ceph_client *cl = ceph_inode_to_client(dir);
 	struct dentry *dentry, *last = NULL;
 	struct ceph_dentry_info *di;
 	struct ceph_readdir_cache_control cache_ctl = {};
 	u64 idx = 0;
 	int err = 0;
 
-	dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos);
+	doutc(cl, "%p %llx.%llx v%u at %llx\n", dir, ceph_vinop(dir),
+	      (unsigned)shared_gen, ctx->pos);
 
 	/* search start position */
 	if (ctx->pos > 2) {
@@ -221,7 +227,8 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			dput(dentry);
 		}
 
-		dout("__dcache_readdir %p cache idx %llu\n", dir, idx);
+		doutc(cl, "%p %llx.%llx cache idx %llu\n", dir,
+		      ceph_vinop(dir), idx);
 	}
 
 
@@ -257,8 +264,8 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 		spin_unlock(&dentry->d_lock);
 
 		if (emit_dentry) {
-			dout(" %llx dentry %p %pd %p\n", di->offset,
-			     dentry, dentry, d_inode(dentry));
+			doutc(cl, " %llx dentry %p %pd %p\n", di->offset,
+			      dentry, dentry, d_inode(dentry));
 			ctx->pos = di->offset;
 			if (!dir_emit(ctx, dentry->d_name.name,
 				      dentry->d_name.len, ceph_present_inode(d_inode(dentry)),
@@ -281,7 +288,8 @@ out:
 	if (last) {
 		int ret;
 		di = ceph_dentry(last);
-		ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
+		ret = note_last_dentry(fsc, dfi, last->d_name.name,
+				       last->d_name.len,
 				       fpos_off(di->offset) + 1);
 		if (ret < 0)
 			err = ret;
@@ -310,20 +318,23 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *cl = fsc->client;
 	int i;
 	int err;
 	unsigned frag = -1;
 	struct ceph_mds_reply_info_parsed *rinfo;
 
-	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
+	doutc(cl, "%p %llx.%llx file %p pos %llx\n", inode,
+	      ceph_vinop(inode), file, ctx->pos);
 	if (dfi->file_info.flags & CEPH_F_ATEND)
 		return 0;
 
 	/* always start with . and .. */
 	if (ctx->pos == 0) {
-		dout("readdir off 0 -> '.'\n");
+		doutc(cl, "%p %llx.%llx off 0 -> '.'\n", inode,
+		      ceph_vinop(inode));
 		if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode),
 			    inode->i_mode >> 12))
 			return 0;
@@ -337,7 +348,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 		ino = ceph_present_inode(dentry->d_parent->d_inode);
 		spin_unlock(&dentry->d_lock);
 
-		dout("readdir off 1 -> '..'\n");
+		doutc(cl, "%p %llx.%llx off 1 -> '..'\n", inode,
+		      ceph_vinop(inode));
 		if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12))
 			return 0;
 		ctx->pos = 2;
@@ -391,8 +403,8 @@ more:
 			frag = fpos_frag(ctx->pos);
 		}
 
-		dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
-		     ceph_vinop(inode), frag, dfi->last_name);
+		doutc(cl, "fetching %p %llx.%llx frag %x offset '%s'\n",
+		      inode, ceph_vinop(inode), frag, dfi->last_name);
 		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 		if (IS_ERR(req))
 			return PTR_ERR(req);
@@ -446,12 +458,12 @@ more:
 			ceph_mdsc_put_request(req);
 			return err;
 		}
-		dout("readdir got and parsed readdir result=%d on "
-		     "frag %x, end=%d, complete=%d, hash_order=%d\n",
-		     err, frag,
-		     (int)req->r_reply_info.dir_end,
-		     (int)req->r_reply_info.dir_complete,
-		     (int)req->r_reply_info.hash_order);
+		doutc(cl, "%p %llx.%llx got and parsed readdir result=%d"
+		      "on frag %x, end=%d, complete=%d, hash_order=%d\n",
+		      inode, ceph_vinop(inode), err, frag,
+		      (int)req->r_reply_info.dir_end,
+		      (int)req->r_reply_info.dir_complete,
+		      (int)req->r_reply_info.hash_order);
 
 		rinfo = &req->r_reply_info;
 		if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
@@ -481,7 +493,8 @@ more:
 				dfi->dir_ordered_count = req->r_dir_ordered_cnt;
 			}
 		} else {
-			dout("readdir !did_prepopulate\n");
+			doutc(cl, "%p %llx.%llx !did_prepopulate\n", inode,
+			      ceph_vinop(inode));
 			/* disable readdir cache */
 			dfi->readdir_cache_idx = -1;
 			/* preclude from marking dir complete */
@@ -494,8 +507,8 @@ more:
 					rinfo->dir_entries + (rinfo->dir_nr-1);
 			unsigned next_offset = req->r_reply_info.dir_end ?
 					2 : (fpos_off(rde->offset) + 1);
-			err = note_last_dentry(dfi, rde->name, rde->name_len,
-					       next_offset);
+			err = note_last_dentry(fsc, dfi, rde->name,
+					       rde->name_len, next_offset);
 			if (err) {
 				ceph_mdsc_put_request(dfi->last_readdir);
 				dfi->last_readdir = NULL;
@@ -508,9 +521,9 @@ more:
 	}
 
 	rinfo = &dfi->last_readdir->r_reply_info;
-	dout("readdir frag %x num %d pos %llx chunk first %llx\n",
-	     dfi->frag, rinfo->dir_nr, ctx->pos,
-	     rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
+	doutc(cl, "%p %llx.%llx frag %x num %d pos %llx chunk first %llx\n",
+	      inode, ceph_vinop(inode), dfi->frag, rinfo->dir_nr, ctx->pos,
+	      rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
 
 	i = 0;
 	/* search start position */
@@ -530,8 +543,9 @@ more:
 		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
 
 		if (rde->offset < ctx->pos) {
-			pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n",
-				__func__, rde->offset, ctx->pos);
+			pr_warn_client(cl,
+				"%p %llx.%llx rde->offset 0x%llx ctx->pos 0x%llx\n",
+				inode, ceph_vinop(inode), rde->offset, ctx->pos);
 			return -EIO;
 		}
 
@@ -539,9 +553,9 @@ more:
 			return -EIO;
 
 		ctx->pos = rde->offset;
-		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
-		     i, rinfo->dir_nr, ctx->pos,
-		     rde->name_len, rde->name, &rde->inode.in);
+		doutc(cl, "%p %llx.%llx (%d/%d) -> %llx '%.*s' %p\n", inode,
+		      ceph_vinop(inode), i, rinfo->dir_nr, ctx->pos,
+		      rde->name_len, rde->name, &rde->inode.in);
 
 		if (!dir_emit(ctx, rde->name, rde->name_len,
 			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
@@ -552,7 +566,7 @@ more:
 			 * doesn't have enough memory, etc. So for next readdir
 			 * it will continue.
 			 */
-			dout("filldir stopping us...\n");
+			doutc(cl, "filldir stopping us...\n");
 			return 0;
 		}
 
@@ -583,7 +597,8 @@ more:
 			kfree(dfi->last_name);
 			dfi->last_name = NULL;
 		}
-		dout("readdir next frag is %x\n", frag);
+		doutc(cl, "%p %llx.%llx next frag is %x\n", inode,
+		      ceph_vinop(inode), frag);
 		goto more;
 	}
 	dfi->file_info.flags |= CEPH_F_ATEND;
@@ -598,20 +613,23 @@ more:
 		spin_lock(&ci->i_ceph_lock);
 		if (dfi->dir_ordered_count ==
 				atomic64_read(&ci->i_ordered_count)) {
-			dout(" marking %p complete and ordered\n", inode);
+			doutc(cl, " marking %p %llx.%llx complete and ordered\n",
+			      inode, ceph_vinop(inode));
 			/* use i_size to track number of entries in
 			 * readdir cache */
 			BUG_ON(dfi->readdir_cache_idx < 0);
 			i_size_write(inode, dfi->readdir_cache_idx *
 				     sizeof(struct dentry*));
 		} else {
-			dout(" marking %p complete\n", inode);
+			doutc(cl, " marking %llx.%llx complete\n",
+			      ceph_vinop(inode));
 		}
 		__ceph_dir_set_complete(ci, dfi->dir_release_count,
 					dfi->dir_ordered_count);
 		spin_unlock(&ci->i_ceph_lock);
 	}
-	dout("readdir %p file %p done.\n", inode, file);
+	doutc(cl, "%p %llx.%llx file %p done.\n", inode, ceph_vinop(inode),
+	      file);
 	return 0;
 }
 
@@ -657,6 +675,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file->f_mapping->host;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	loff_t retval;
 
 	inode_lock(inode);
@@ -676,7 +695,8 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 
 	if (offset >= 0) {
 		if (need_reset_readdir(dfi, offset)) {
-			dout("dir_llseek dropping %p content\n", file);
+			doutc(cl, "%p %llx.%llx dropping %p content\n",
+			      inode, ceph_vinop(inode), file);
 			reset_readdir(dfi);
 		} else if (is_hash_order(offset) && offset > file->f_pos) {
 			/* for hash offset, we don't know if a forward seek
@@ -703,8 +723,9 @@ out:
 struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
 				   struct dentry *dentry)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
 	struct inode *parent = d_inode(dentry->d_parent); /* we hold i_rwsem */
+	struct ceph_client *cl = ceph_inode_to_client(parent);
 
 	/* .snap dir? */
 	if (ceph_snap(parent) == CEPH_NOSNAP &&
@@ -713,8 +734,9 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
 		struct inode *inode = ceph_get_snapdir(parent);
 
 		res = d_splice_alias(inode, dentry);
-		dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n",
-		     dentry, dentry, inode, res);
+		doutc(cl, "ENOENT on snapdir %p '%pd', linking to "
+		      "snapdir %p %llx.%llx. Spliced dentry %p\n",
+		      dentry, dentry, inode, ceph_vinop(inode), res);
 		if (res)
 			dentry = res;
 	}
@@ -735,12 +757,15 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 				  struct dentry *dentry, int err)
 {
+	struct ceph_client *cl = req->r_mdsc->fsc->client;
+
 	if (err == -ENOENT) {
 		/* no trace? */
 		err = 0;
 		if (!req->r_reply_info.head->is_dentry) {
-			dout("ENOENT and no trace, dentry %p inode %p\n",
-			     dentry, d_inode(dentry));
+			doutc(cl,
+			      "ENOENT and no trace, dentry %p inode %llx.%llx\n",
+			      dentry, ceph_vinop(d_inode(dentry)));
 			if (d_really_is_positive(dentry)) {
 				d_drop(dentry);
 				err = -ENOENT;
@@ -771,15 +796,16 @@ static bool is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 				  unsigned int flags)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_request *req;
 	int op;
 	int mask;
 	int err;
 
-	dout("lookup %p dentry %p '%pd'\n",
-	     dir, dentry, dentry);
+	doutc(cl, "%p %llx.%llx/'%pd' dentry %p\n", dir, ceph_vinop(dir),
+	      dentry, dentry);
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -802,7 +828,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 		struct ceph_dentry_info *di = ceph_dentry(dentry);
 
 		spin_lock(&ci->i_ceph_lock);
-		dout(" dir %p flags are 0x%lx\n", dir, ci->i_ceph_flags);
+		doutc(cl, " dir %llx.%llx flags are 0x%lx\n",
+		      ceph_vinop(dir), ci->i_ceph_flags);
 		if (strncmp(dentry->d_name.name,
 			    fsc->mount_options->snapdir_name,
 			    dentry->d_name.len) &&
@@ -812,7 +839,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 		    __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) {
 			__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
 			spin_unlock(&ci->i_ceph_lock);
-			dout(" dir %p complete, -ENOENT\n", dir);
+			doutc(cl, " dir %llx.%llx complete, -ENOENT\n",
+			      ceph_vinop(dir));
 			d_add(dentry, NULL);
 			di->lease_shared_gen = atomic_read(&ci->i_shared_gen);
 			return NULL;
@@ -850,7 +878,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	}
 	dentry = ceph_finish_lookup(req, dentry, err);
 	ceph_mdsc_put_request(req);  /* will dput(dentry) */
-	dout("lookup result=%p\n", dentry);
+	doutc(cl, "result=%p\n", dentry);
 	return dentry;
 }
 
@@ -885,6 +913,7 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
 		      struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct ceph_acl_sec_ctx as_ctx = {};
 	int err;
@@ -901,8 +930,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
 		goto out;
 	}
 
-	dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
-	     dir, dentry, mode, rdev);
+	doutc(cl, "%p %llx.%llx/'%pd' dentry %p mode 0%ho rdev %d\n",
+	      dir, ceph_vinop(dir), dentry, dentry, mode, rdev);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -924,6 +953,7 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	req->r_parent = dir;
 	ihold(dir);
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+	req->r_mnt_idmap = mnt_idmap_get(idmap);
 	req->r_args.mknod.mode = cpu_to_le32(mode);
 	req->r_args.mknod.rdev = cpu_to_le32(rdev);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
@@ -993,6 +1023,7 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
 			struct dentry *dentry, const char *dest)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct ceph_acl_sec_ctx as_ctx = {};
 	umode_t mode = S_IFLNK | 0777;
@@ -1010,7 +1041,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		goto out;
 	}
 
-	dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
+	doutc(cl, "%p %llx.%llx/'%pd' to '%s'\n", dir, ceph_vinop(dir), dentry,
+	      dest);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1040,6 +1072,7 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	}
 
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+	req->r_mnt_idmap = mnt_idmap_get(idmap);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
@@ -1064,6 +1097,7 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 		      struct dentry *dentry, umode_t mode)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct ceph_acl_sec_ctx as_ctx = {};
 	int err;
@@ -1076,10 +1110,11 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
 		/* mkdir .snap/foo is a MKSNAP */
 		op = CEPH_MDS_OP_MKSNAP;
-		dout("mksnap dir %p snap '%pd' dn %p\n", dir,
-		     dentry, dentry);
+		doutc(cl, "mksnap %llx.%llx/'%pd' dentry %p\n",
+		      ceph_vinop(dir), dentry, dentry);
 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
-		dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
+		doutc(cl, "mkdir %llx.%llx/'%pd' dentry %p mode 0%ho\n",
+		      ceph_vinop(dir), dentry, dentry, mode);
 		op = CEPH_MDS_OP_MKDIR;
 	} else {
 		err = -EROFS;
@@ -1117,6 +1152,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	req->r_parent = dir;
 	ihold(dir);
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+	if (op == CEPH_MDS_OP_MKDIR)
+		req->r_mnt_idmap = mnt_idmap_get(idmap);
 	req->r_args.mkdir.mode = cpu_to_le32(mode);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
 			     CEPH_CAP_XATTR_EXCL;
@@ -1144,6 +1181,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 		     struct dentry *dentry)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -1161,8 +1199,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 	if (err)
 		return err;
 
-	dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
-	     dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
+	doutc(cl, "%p %llx.%llx/'%pd' to '%pd'\n", dir, ceph_vinop(dir),
+	      old_dentry, dentry);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		d_drop(dentry);
@@ -1199,14 +1237,16 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
 				 struct ceph_mds_request *req)
 {
 	struct dentry *dentry = req->r_dentry;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	int result = req->r_err ? req->r_err :
 			le32_to_cpu(req->r_reply_info.head->result);
 
 	if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags))
-		pr_warn("%s dentry %p:%pd async unlink bit is not set\n",
-			__func__, dentry, dentry);
+		pr_warn_client(cl,
+			"dentry %p:%pd async unlink bit is not set\n",
+			dentry, dentry);
 
 	spin_lock(&fsc->async_unlink_conflict_lock);
 	hash_del_rcu(&di->hnode);
@@ -1226,7 +1266,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
 	if (result) {
 		int pathlen = 0;
 		u64 base = 0;
-		char *path = ceph_mdsc_build_path(dentry, &pathlen,
+		char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen,
 						  &base, 0);
 
 		/* mark error on parent + clear complete */
@@ -1240,8 +1280,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
 		/* mark inode itself for an error (since metadata is bogus) */
 		mapping_set_error(req->r_old_inode->i_mapping, result);
 
-		pr_warn("async unlink failure path=(%llx)%s result=%d!\n",
-			base, IS_ERR(path) ? "<<bad>>" : path, result);
+		pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n",
+			       base, IS_ERR(path) ? "<<bad>>" : path, result);
 		ceph_mdsc_free_path(path, pathlen);
 	}
 out:
@@ -1290,7 +1330,8 @@ static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry)
  */
 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct inode *inode = d_inode(dentry);
 	struct ceph_mds_request *req;
@@ -1300,11 +1341,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 
 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
 		/* rmdir .snap/foo is RMSNAP */
-		dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry);
+		doutc(cl, "rmsnap %llx.%llx/'%pd' dn\n", ceph_vinop(dir),
+		      dentry);
 		op = CEPH_MDS_OP_RMSNAP;
 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
-		dout("unlink/rmdir dir %p dn %p inode %p\n",
-		     dir, dentry, inode);
+		doutc(cl, "unlink/rmdir %llx.%llx/'%pd' inode %llx.%llx\n",
+		      ceph_vinop(dir), dentry, ceph_vinop(inode));
 		op = d_is_dir(dentry) ?
 			CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
 	} else
@@ -1327,9 +1369,9 @@ retry:
 	    (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) {
 		struct ceph_dentry_info *di = ceph_dentry(dentry);
 
-		dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir),
-		     dentry->d_name.len, dentry->d_name.name,
-		     ceph_cap_string(req->r_dir_caps));
+		doutc(cl, "async unlink on %llx.%llx/'%pd' caps=%s",
+		      ceph_vinop(dir), dentry,
+		      ceph_cap_string(req->r_dir_caps));
 		set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
 		req->r_callback = ceph_async_unlink_cb;
 		req->r_old_inode = d_inode(dentry);
@@ -1384,6 +1426,7 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		       struct dentry *new_dentry, unsigned int flags)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	int op = CEPH_MDS_OP_RENAME;
 	int err;
@@ -1413,8 +1456,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	if (err)
 		return err;
 
-	dout("rename dir %p dentry %p to dir %p dentry %p\n",
-	     old_dir, old_dentry, new_dir, new_dentry);
+	doutc(cl, "%llx.%llx/'%pd' to %llx.%llx/'%pd'\n",
+	      ceph_vinop(old_dir), old_dentry, ceph_vinop(new_dir),
+	      new_dentry);
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -1459,9 +1503,10 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 void __ceph_dentry_lease_touch(struct ceph_dentry_info *di)
 {
 	struct dentry *dn = di->dentry;
-	struct ceph_mds_client *mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 
-	dout("dentry_lease_touch %p %p '%pd'\n", di, dn, dn);
+	doutc(cl, "%p %p '%pd'\n", di, dn, dn);
 
 	di->flags |= CEPH_DENTRY_LEASE_LIST;
 	if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
@@ -1469,7 +1514,6 @@ void __ceph_dentry_lease_touch(struct ceph_dentry_info *di)
 		return;
 	}
 
-	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 	spin_lock(&mdsc->dentry_list_lock);
 	list_move_tail(&di->lease_list, &mdsc->dentry_leases);
 	spin_unlock(&mdsc->dentry_list_lock);
@@ -1493,10 +1537,10 @@ static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc,
 void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
 {
 	struct dentry *dn = di->dentry;
-	struct ceph_mds_client *mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 
-	dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n",
-	     di, dn, dn, di->offset);
+	doutc(cl, "%p %p '%pd' (offset 0x%llx)\n", di, dn, dn, di->offset);
 
 	if (!list_empty(&di->lease_list)) {
 		if (di->flags & CEPH_DENTRY_LEASE_LIST) {
@@ -1516,7 +1560,6 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
 		return;
 	}
 
-	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 	spin_lock(&mdsc->dentry_list_lock);
 	__dentry_dir_lease_touch(mdsc, di),
 	spin_unlock(&mdsc->dentry_list_lock);
@@ -1530,7 +1573,7 @@ static void __dentry_lease_unlist(struct ceph_dentry_info *di)
 	if (list_empty(&di->lease_list))
 		return;
 
-	mdsc = ceph_sb_to_client(di->dentry->d_sb)->mdsc;
+	mdsc = ceph_sb_to_fs_client(di->dentry->d_sb)->mdsc;
 	spin_lock(&mdsc->dentry_list_lock);
 	list_del_init(&di->lease_list);
 	spin_unlock(&mdsc->dentry_list_lock);
@@ -1757,6 +1800,8 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
 {
 	struct ceph_dentry_info *di;
 	struct ceph_mds_session *session = NULL;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	u32 seq = 0;
 	int valid = 0;
 
@@ -1789,7 +1834,7 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
 					 CEPH_MDS_LEASE_RENEW, seq);
 		ceph_put_mds_session(session);
 	}
-	dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
+	doutc(cl, "dentry %p = %d\n", dentry, valid);
 	return valid;
 }
 
@@ -1832,6 +1877,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
 			      struct ceph_mds_client *mdsc)
 {
 	struct ceph_inode_info *ci = ceph_inode(dir);
+	struct ceph_client *cl = mdsc->fsc->client;
 	int valid;
 	int shared_gen;
 
@@ -1853,8 +1899,9 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
 			valid = 0;
 		spin_unlock(&dentry->d_lock);
 	}
-	dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n",
-	     dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid);
+	doutc(cl, "dir %p %llx.%llx v%u dentry %p '%pd' = %d\n", dir,
+	      ceph_vinop(dir), (unsigned)atomic_read(&ci->i_shared_gen),
+	      dentry, dentry, valid);
 	return valid;
 }
 
@@ -1863,10 +1910,11 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
  */
 static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	int valid = 0;
 	struct dentry *parent;
 	struct inode *dir, *inode;
-	struct ceph_mds_client *mdsc;
 
 	valid = fscrypt_d_revalidate(dentry, flags);
 	if (valid <= 0)
@@ -1884,16 +1932,16 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 		inode = d_inode(dentry);
 	}
 
-	dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry,
-	     dentry, inode, ceph_dentry(dentry)->offset,
-	     !!(dentry->d_flags & DCACHE_NOKEY_NAME));
+	doutc(cl, "%p '%pd' inode %p offset 0x%llx nokey %d\n",
+	      dentry, dentry, inode, ceph_dentry(dentry)->offset,
+	      !!(dentry->d_flags & DCACHE_NOKEY_NAME));
 
-	mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
+	mdsc = ceph_sb_to_fs_client(dir->i_sb)->mdsc;
 
 	/* always trust cached snapped dentries, snapdir dentry */
 	if (ceph_snap(dir) != CEPH_NOSNAP) {
-		dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
-		     dentry, inode);
+		doutc(cl, "%p '%pd' inode %p is SNAPPED\n", dentry,
+		      dentry, inode);
 		valid = 1;
 	} else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
 		valid = 1;
@@ -1948,14 +1996,14 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 				break;
 			}
 			ceph_mdsc_put_request(req);
-			dout("d_revalidate %p lookup result=%d\n",
-			     dentry, err);
+			doutc(cl, "%p '%pd', lookup result=%d\n", dentry,
+			      dentry, err);
 		}
 	} else {
 		percpu_counter_inc(&mdsc->metric.d_lease_hit);
 	}
 
-	dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
+	doutc(cl, "%p '%pd' %s\n", dentry, dentry, valid ? "valid" : "invalid");
 	if (!valid)
 		ceph_dir_clear_complete(dir);
 
@@ -1995,9 +2043,9 @@ static int ceph_d_delete(const struct dentry *dentry)
 static void ceph_d_release(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
 
-	dout("d_release %p\n", dentry);
+	doutc(fsc->client, "dentry %p '%pd'\n", dentry, dentry);
 
 	atomic64_dec(&fsc->mdsc->metric.total_dentries);
 
@@ -2018,10 +2066,12 @@ static void ceph_d_release(struct dentry *dentry)
  */
 static void ceph_d_prune(struct dentry *dentry)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *dir_ci;
 	struct ceph_dentry_info *di;
 
-	dout("ceph_d_prune %pd %p\n", dentry, dentry);
+	doutc(cl, "dentry %p '%pd'\n", dentry, dentry);
 
 	/* do we have a valid parent? */
 	if (IS_ROOT(dentry))
@@ -2064,7 +2114,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 	int left;
 	const int bufsize = 1024;
 
-	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
+	if (!ceph_test_mount_opt(ceph_sb_to_fs_client(inode->i_sb), DIRSTAT))
 		return -EISDIR;
 
 	if (!dfi->dir_info) {
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 8559990a59a5..726af69d4d62 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -36,6 +36,7 @@ struct ceph_nfs_snapfh {
 static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
 			      struct inode *parent_inode)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	static const int snap_handle_length =
 		sizeof(struct ceph_nfs_snapfh) >> 2;
 	struct ceph_nfs_snapfh *sfh = (void *)rawfh;
@@ -79,13 +80,14 @@ static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
 	*max_len = snap_handle_length;
 	ret = FILEID_BTRFS_WITH_PARENT;
 out:
-	dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
+	doutc(cl, "%p %llx.%llx ret=%d\n", inode, ceph_vinop(inode), ret);
 	return ret;
 }
 
 static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
 			  struct inode *parent_inode)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	static const int handle_length =
 		sizeof(struct ceph_nfs_fh) >> 2;
 	static const int connected_handle_length =
@@ -105,15 +107,15 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
 
 	if (parent_inode) {
 		struct ceph_nfs_confh *cfh = (void *)rawfh;
-		dout("encode_fh %llx with parent %llx\n",
-		     ceph_ino(inode), ceph_ino(parent_inode));
+		doutc(cl, "%p %llx.%llx with parent %p %llx.%llx\n", inode,
+		      ceph_vinop(inode), parent_inode, ceph_vinop(parent_inode));
 		cfh->ino = ceph_ino(inode);
 		cfh->parent_ino = ceph_ino(parent_inode);
 		*max_len = connected_handle_length;
 		type = FILEID_INO32_GEN_PARENT;
 	} else {
 		struct ceph_nfs_fh *fh = (void *)rawfh;
-		dout("encode_fh %llx\n", ceph_ino(inode));
+		doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 		fh->ino = ceph_ino(inode);
 		*max_len = handle_length;
 		type = FILEID_INO32_GEN;
@@ -123,7 +125,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
 
 static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
 	struct inode *inode;
 	struct ceph_vino vino;
 	int err;
@@ -205,7 +207,8 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
 					  struct ceph_nfs_snapfh *sfh,
 					  bool want_parent)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct inode *inode;
 	struct ceph_vino vino;
@@ -278,11 +281,10 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
 	ceph_mdsc_put_request(req);
 
 	if (want_parent) {
-		dout("snapfh_to_parent %llx.%llx\n err=%d\n",
-		     vino.ino, vino.snap, err);
+		doutc(cl, "%llx.%llx\n err=%d\n", vino.ino, vino.snap, err);
 	} else {
-		dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
-		      vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
+		doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
+		      vino.snap, sfh->parent_ino, sfh->hash, err);
 	}
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
@@ -297,6 +299,7 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
 					struct fid *fid,
 					int fh_len, int fh_type)
 {
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
 	struct ceph_nfs_fh *fh = (void *)fid->raw;
 
 	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
@@ -310,14 +313,14 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
 	if (fh_len < sizeof(*fh) / 4)
 		return NULL;
 
-	dout("fh_to_dentry %llx\n", fh->ino);
+	doutc(fsc->client, "%llx\n", fh->ino);
 	return __fh_to_dentry(sb, fh->ino);
 }
 
 static struct dentry *__get_parent(struct super_block *sb,
 				   struct dentry *child, u64 ino)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
 	struct ceph_mds_request *req;
 	struct inode *inode;
 	int mask;
@@ -363,6 +366,7 @@ static struct dentry *__get_parent(struct super_block *sb,
 static struct dentry *ceph_get_parent(struct dentry *child)
 {
 	struct inode *inode = d_inode(child);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct dentry *dn;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP) {
@@ -402,8 +406,8 @@ static struct dentry *ceph_get_parent(struct dentry *child)
 		dn = __get_parent(child->d_sb, child, 0);
 	}
 out:
-	dout("get_parent %p ino %llx.%llx err=%ld\n",
-	     child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
+	doutc(cl, "child %p %p %llx.%llx err=%ld\n", child, inode,
+	      ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
 	return dn;
 }
 
@@ -414,6 +418,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
 					struct fid *fid,
 					int fh_len, int fh_type)
 {
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
 	struct ceph_nfs_confh *cfh = (void *)fid->raw;
 	struct dentry *dentry;
 
@@ -427,7 +432,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
 	if (fh_len < sizeof(*cfh) / 4)
 		return NULL;
 
-	dout("fh_to_parent %llx\n", cfh->parent_ino);
+	doutc(fsc->client, "%llx\n", cfh->parent_ino);
 	dentry = __get_parent(sb, NULL, cfh->ino);
 	if (unlikely(dentry == ERR_PTR(-ENOENT)))
 		dentry = __fh_to_dentry(sb, cfh->parent_ino);
@@ -439,7 +444,7 @@ static int __get_snap_name(struct dentry *parent, char *name,
 {
 	struct inode *inode = d_inode(child);
 	struct inode *dir = d_inode(parent);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	struct ceph_mds_request *req = NULL;
 	char *last_name = NULL;
 	unsigned next_offset = 2;
@@ -526,8 +531,8 @@ out:
 	if (req)
 		ceph_mdsc_put_request(req);
 	kfree(last_name);
-	dout("get_snap_name %p ino %llx.%llx err=%d\n",
-	     child, ceph_vinop(inode), err);
+	doutc(fsc->client, "child dentry %p %p %llx.%llx err=%d\n", child,
+	      inode, ceph_vinop(inode), err);
 	return err;
 }
 
@@ -544,7 +549,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return __get_snap_name(parent, name, child);
 
-	mdsc = ceph_inode_to_client(inode)->mdsc;
+	mdsc = ceph_inode_to_fs_client(inode)->mdsc;
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
 				       USE_ANY_MDS);
 	if (IS_ERR(req))
@@ -588,9 +593,9 @@ static int ceph_get_name(struct dentry *parent, char *name,
 		ceph_fname_free_buffer(dir, &oname);
 	}
 out:
-	dout("get_name %p ino %llx.%llx err %d %s%s\n",
-		     child, ceph_vinop(inode), err,
-		     err ? "" : "name ", err ? "" : name);
+	doutc(mdsc->fsc->client, "child dentry %p %p %llx.%llx err %d %s%s\n",
+	      child, inode, ceph_vinop(inode), err, err ? "" : "name ",
+	      err ? "" : name);
 	ceph_mdsc_put_request(req);
 	return err;
 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 649600d0a7b6..3b5aae29e944 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -19,8 +19,9 @@
 #include "io.h"
 #include "metric.h"
 
-static __le32 ceph_flags_sys2wire(u32 flags)
+static __le32 ceph_flags_sys2wire(struct ceph_mds_client *mdsc, u32 flags)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	u32 wire_flags = 0;
 
 	switch (flags & O_ACCMODE) {
@@ -48,7 +49,7 @@ static __le32 ceph_flags_sys2wire(u32 flags)
 #undef ceph_sys2wire
 
 	if (flags)
-		dout("unused open flags: %x\n", flags);
+		doutc(cl, "unused open flags: %x\n", flags);
 
 	return cpu_to_le32(wire_flags);
 }
@@ -189,7 +190,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
 	if (IS_ERR(req))
 		goto out;
 	req->r_fmode = ceph_flags_to_mode(flags);
-	req->r_args.open.flags = ceph_flags_sys2wire(flags);
+	req->r_args.open.flags = ceph_flags_sys2wire(mdsc, flags);
 	req->r_args.open.mode = cpu_to_le32(create_mode);
 out:
 	return req;
@@ -200,12 +201,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mount_options *opt =
-		ceph_inode_to_client(&ci->netfs.inode)->mount_options;
+		ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_file_info *fi;
 	int ret;
 
-	dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
-			inode->i_mode, isdir ? "dir" : "regular");
+	doutc(cl, "%p %llx.%llx %p 0%o (%s)\n", inode, ceph_vinop(inode),
+	      file, inode->i_mode, isdir ? "dir" : "regular");
 	BUG_ON(inode->i_fop->release != ceph_release);
 
 	if (isdir) {
@@ -234,7 +236,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 
 	spin_lock_init(&fi->rw_contexts_lock);
 	INIT_LIST_HEAD(&fi->rw_contexts);
-	fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
+	fi->filp_gen = READ_ONCE(ceph_inode_to_fs_client(inode)->filp_gen);
 
 	if ((file->f_mode & FMODE_WRITE) && ceph_has_inline_data(ci)) {
 		ret = ceph_uninline_data(file);
@@ -259,6 +261,7 @@ error:
  */
 static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int ret = 0;
 
 	switch (inode->i_mode & S_IFMT) {
@@ -271,13 +274,13 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 		break;
 
 	case S_IFLNK:
-		dout("init_file %p %p 0%o (symlink)\n", inode, file,
-		     inode->i_mode);
+		doutc(cl, "%p %llx.%llx %p 0%o (symlink)\n", inode,
+		      ceph_vinop(inode), file, inode->i_mode);
 		break;
 
 	default:
-		dout("init_file %p %p 0%o (special)\n", inode, file,
-		     inode->i_mode);
+		doutc(cl, "%p %llx.%llx %p 0%o (special)\n", inode,
+		      ceph_vinop(inode), file, inode->i_mode);
 		/*
 		 * we need to drop the open ref now, since we don't
 		 * have .release set to ceph_release.
@@ -296,6 +299,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 int ceph_renew_caps(struct inode *inode, int fmode)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req;
 	int err, flags, wanted;
@@ -307,8 +311,9 @@ int ceph_renew_caps(struct inode *inode, int fmode)
 	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
 		int issued = __ceph_caps_issued(ci, NULL);
 		spin_unlock(&ci->i_ceph_lock);
-		dout("renew caps %p want %s issued %s updating mds_wanted\n",
-		     inode, ceph_cap_string(wanted), ceph_cap_string(issued));
+		doutc(cl, "%p %llx.%llx want %s issued %s updating mds_wanted\n",
+		      inode, ceph_vinop(inode), ceph_cap_string(wanted),
+		      ceph_cap_string(issued));
 		ceph_check_caps(ci, 0);
 		return 0;
 	}
@@ -339,7 +344,8 @@ int ceph_renew_caps(struct inode *inode, int fmode)
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	ceph_mdsc_put_request(req);
 out:
-	dout("renew caps %p open result=%d\n", inode, err);
+	doutc(cl, "%p %llx.%llx open result=%d\n", inode, ceph_vinop(inode),
+	      err);
 	return err < 0 ? err : 0;
 }
 
@@ -352,7 +358,8 @@ out:
 int ceph_open(struct inode *inode, struct file *file)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	struct ceph_file_info *fi = file->private_data;
@@ -360,7 +367,7 @@ int ceph_open(struct inode *inode, struct file *file)
 	int flags, fmode, wanted;
 
 	if (fi) {
-		dout("open file %p is already opened\n", file);
+		doutc(cl, "file %p is already opened\n", file);
 		return 0;
 	}
 
@@ -374,8 +381,8 @@ int ceph_open(struct inode *inode, struct file *file)
 			return err;
 	}
 
-	dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
-	     ceph_vinop(inode), file, flags, file->f_flags);
+	doutc(cl, "%p %llx.%llx file %p flags %d (%d)\n", inode,
+	      ceph_vinop(inode), file, flags, file->f_flags);
 	fmode = ceph_flags_to_mode(flags);
 	wanted = ceph_caps_for_mode(fmode);
 
@@ -399,9 +406,9 @@ int ceph_open(struct inode *inode, struct file *file)
 		int mds_wanted = __ceph_caps_mds_wanted(ci, true);
 		int issued = __ceph_caps_issued(ci, NULL);
 
-		dout("open %p fmode %d want %s issued %s using existing\n",
-		     inode, fmode, ceph_cap_string(wanted),
-		     ceph_cap_string(issued));
+		doutc(cl, "open %p fmode %d want %s issued %s using existing\n",
+		      inode, fmode, ceph_cap_string(wanted),
+		      ceph_cap_string(issued));
 		__ceph_touch_fmode(ci, mdsc, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 
@@ -421,7 +428,7 @@ int ceph_open(struct inode *inode, struct file *file)
 
 	spin_unlock(&ci->i_ceph_lock);
 
-	dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
+	doutc(cl, "open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
 	req = prepare_open_request(inode->i_sb, flags, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -435,7 +442,7 @@ int ceph_open(struct inode *inode, struct file *file)
 	if (!err)
 		err = ceph_init_file(inode, file, req->r_fmode);
 	ceph_mdsc_put_request(req);
-	dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode));
+	doutc(cl, "open result=%d on %llx.%llx\n", err, ceph_vinop(inode));
 out:
 	return err;
 }
@@ -515,6 +522,7 @@ no_async:
 
 static void restore_deleg_ino(struct inode *dir, u64 ino)
 {
+	struct ceph_client *cl = ceph_inode_to_client(dir);
 	struct ceph_inode_info *ci = ceph_inode(dir);
 	struct ceph_mds_session *s = NULL;
 
@@ -525,7 +533,8 @@ static void restore_deleg_ino(struct inode *dir, u64 ino)
 	if (s) {
 		int err = ceph_restore_deleg_ino(s, ino);
 		if (err)
-			pr_warn("ceph: unable to restore delegated ino 0x%llx to session: %d\n",
+			pr_warn_client(cl,
+				"unable to restore delegated ino 0x%llx to session: %d\n",
 				ino, err);
 		ceph_put_mds_session(s);
 	}
@@ -557,6 +566,7 @@ static void wake_async_create_waiters(struct inode *inode,
 static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
                                  struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct dentry *dentry = req->r_dentry;
 	struct inode *dinode = d_inode(dentry);
 	struct inode *tinode = req->r_target_inode;
@@ -574,10 +584,11 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
 	if (result) {
 		int pathlen = 0;
 		u64 base = 0;
-		char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
+		char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
 						  &base, 0);
 
-		pr_warn("async create failure path=(%llx)%s result=%d!\n",
+		pr_warn_client(cl,
+			"async create failure path=(%llx)%s result=%d!\n",
 			base, IS_ERR(path) ? "<<bad>>" : path, result);
 		ceph_mdsc_free_path(path, pathlen);
 
@@ -596,14 +607,15 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
 		u64 ino = ceph_vino(tinode).ino;
 
 		if (req->r_deleg_ino != ino)
-			pr_warn("%s: inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n",
-				__func__, req->r_err, req->r_deleg_ino, ino);
+			pr_warn_client(cl,
+				"inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n",
+				req->r_err, req->r_deleg_ino, ino);
 
 		mapping_set_error(tinode->i_mapping, result);
 		wake_async_create_waiters(tinode, req->r_session);
 	} else if (!result) {
-		pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__,
-			req->r_deleg_ino);
+		pr_warn_client(cl, "no req->r_target_inode for 0x%llx\n",
+			       req->r_deleg_ino);
 	}
 out:
 	ceph_mdsc_release_dir_caps(req);
@@ -625,6 +637,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
 	struct timespec64 now;
 	struct ceph_string *pool_ns;
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_vino vino = { .ino = req->r_deleg_ino,
 				  .snap = CEPH_NOSNAP };
 
@@ -655,7 +668,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
 	in.truncate_seq = cpu_to_le32(1);
 	in.truncate_size = cpu_to_le64(-1ULL);
 	in.xattr_version = cpu_to_le64(1);
-	in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid()));
+	in.uid = cpu_to_le32(from_kuid(&init_user_ns,
+				       mapped_fsuid(req->r_mnt_idmap,
+						    &init_user_ns)));
 	if (dir->i_mode & S_ISGID) {
 		in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid));
 
@@ -663,7 +678,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
 	} else {
-		in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid()));
+		in.gid = cpu_to_le32(from_kgid(&init_user_ns,
+				     mapped_fsgid(req->r_mnt_idmap,
+						  &init_user_ns)));
 	}
 	in.mode = cpu_to_le32((u32)mode);
 
@@ -683,7 +700,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
 			      req->r_fmode, NULL);
 	up_read(&mdsc->snap_rwsem);
 	if (ret) {
-		dout("%s failed to fill inode: %d\n", __func__, ret);
+		doutc(cl, "failed to fill inode: %d\n", ret);
 		ceph_dir_clear_complete(dir);
 		if (!d_unhashed(dentry))
 			d_drop(dentry);
@@ -691,8 +708,8 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
 	} else {
 		struct dentry *dn;
 
-		dout("%s d_adding new inode 0x%llx to 0x%llx/%s\n", __func__,
-			vino.ino, ceph_ino(dir), dentry->d_name.name);
+		doutc(cl, "d_adding new inode 0x%llx to 0x%llx/%s\n",
+		      vino.ino, ceph_ino(dir), dentry->d_name.name);
 		ceph_dir_clear_ordered(dir);
 		ceph_init_inode_acls(inode, as_ctx);
 		if (inode->i_state & I_NEW) {
@@ -730,7 +747,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
 int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		     struct file *file, unsigned flags, umode_t mode)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct mnt_idmap *idmap = file_mnt_idmap(file);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	struct inode *new_inode = NULL;
@@ -740,9 +759,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	int mask;
 	int err;
 
-	dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
-	     dir, dentry, dentry,
-	     d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
+	doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n",
+	      dir, ceph_vinop(dir), dentry, dentry,
+	      d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
 
 	if (dentry->d_name.len > NAME_MAX)
 		return -ENAMETOOLONG;
@@ -788,6 +807,8 @@ retry:
 		mask |= CEPH_CAP_XATTR_SHARED;
 	req->r_args.open.mask = cpu_to_le32(mask);
 	req->r_parent = dir;
+	if (req->r_op == CEPH_MDS_OP_CREATE)
+		req->r_mnt_idmap = mnt_idmap_get(idmap);
 	ihold(dir);
 	if (IS_ENCRYPTED(dir)) {
 		set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
@@ -880,17 +901,18 @@ retry:
 		goto out_req;
 	if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) {
 		/* make vfs retry on splice, ENOENT, or symlink */
-		dout("atomic_open finish_no_open on dn %p\n", dn);
+		doutc(cl, "finish_no_open on dn %p\n", dn);
 		err = finish_no_open(file, dn);
 	} else {
 		if (IS_ENCRYPTED(dir) &&
 		    !fscrypt_has_permitted_context(dir, d_inode(dentry))) {
-			pr_warn("Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n",
+			pr_warn_client(cl,
+				"Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n",
 				ceph_vinop(dir), ceph_vinop(d_inode(dentry)));
 			goto out_req;
 		}
 
-		dout("atomic_open finish_open on dn %p\n", dn);
+		doutc(cl, "finish_open on dn %p\n", dn);
 		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
 			struct inode *newino = d_inode(dentry);
 
@@ -905,17 +927,19 @@ out_req:
 	iput(new_inode);
 out_ctx:
 	ceph_release_acl_sec_ctx(&as_ctx);
-	dout("atomic_open result=%d\n", err);
+	doutc(cl, "result=%d\n", err);
 	return err;
 }
 
 int ceph_release(struct inode *inode, struct file *file)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
 	if (S_ISDIR(inode->i_mode)) {
 		struct ceph_dir_file_info *dfi = file->private_data;
-		dout("release inode %p dir file %p\n", inode, file);
+		doutc(cl, "%p %llx.%llx dir file %p\n", inode,
+		      ceph_vinop(inode), file);
 		WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
 
 		ceph_put_fmode(ci, dfi->file_info.fmode, 1);
@@ -927,7 +951,8 @@ int ceph_release(struct inode *inode, struct file *file)
 		kmem_cache_free(ceph_dir_file_cachep, dfi);
 	} else {
 		struct ceph_file_info *fi = file->private_data;
-		dout("release inode %p regular file %p\n", inode, file);
+		doutc(cl, "%p %llx.%llx regular file %p\n", inode,
+		      ceph_vinop(inode), file);
 		WARN_ON(!list_empty(&fi->rw_contexts));
 
 		ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE);
@@ -962,7 +987,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 			 u64 *last_objver)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_client *osdc = &fsc->client->osdc;
 	ssize_t ret;
 	u64 off = *ki_pos;
@@ -971,7 +997,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 	bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
 	u64 objver = 0;
 
-	dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len);
+	doutc(cl, "on inode %p %llx.%llx %llx~%llx\n", inode,
+	      ceph_vinop(inode), *ki_pos, len);
 
 	if (ceph_inode_is_shutdown(inode))
 		return -EIO;
@@ -1005,8 +1032,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 		/* determine new offset/length if encrypted */
 		ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
 
-		dout("sync_read orig %llu~%llu reading %llu~%llu",
-		     off, len, read_off, read_len);
+		doutc(cl, "orig %llu~%llu reading %llu~%llu", off, len,
+		      read_off, read_len);
 
 		req = ceph_osdc_new_request(osdc, &ci->i_layout,
 					ci->i_vino, read_off, &read_len, 0, 1,
@@ -1059,8 +1086,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 			objver = req->r_version;
 
 		i_size = i_size_read(inode);
-		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
-		     off, len, ret, i_size, (more ? " MORE" : ""));
+		doutc(cl, "%llu~%llu got %zd i_size %llu%s\n", off, len,
+		      ret, i_size, (more ? " MORE" : ""));
 
 		/* Fix it to go to end of extent map */
 		if (sparse && ret >= 0)
@@ -1101,8 +1128,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 			int zlen = min(len - ret, i_size - off - ret);
 			int zoff = page_off + ret;
 
-			dout("sync_read zero gap %llu~%llu\n",
-				off + ret, off + ret + zlen);
+			doutc(cl, "zero gap %llu~%llu\n", off + ret,
+			      off + ret + zlen);
 			ceph_zero_page_vector_range(zoff, zlen, pages);
 			ret += zlen;
 		}
@@ -1151,7 +1178,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 		if (last_objver)
 			*last_objver = objver;
 	}
-	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
+	doutc(cl, "result %zd retry_op %d\n", ret, *retry_op);
 	return ret;
 }
 
@@ -1160,9 +1187,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 
-	dout("sync_read on file %p %llx~%zx %s\n", file, iocb->ki_pos,
-	     iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+	doutc(cl, "on file %p %llx~%zx %s\n", file, iocb->ki_pos,
+	      iov_iter_count(to),
+	      (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
 	return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL);
 }
@@ -1190,6 +1219,7 @@ static void ceph_aio_retry_work(struct work_struct *work);
 static void ceph_aio_complete(struct inode *inode,
 			      struct ceph_aio_request *aio_req)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int ret;
 
@@ -1203,7 +1233,7 @@ static void ceph_aio_complete(struct inode *inode,
 	if (!ret)
 		ret = aio_req->total_len;
 
-	dout("ceph_aio_complete %p rc %d\n", inode, ret);
+	doutc(cl, "%p %llx.%llx rc %d\n", inode, ceph_vinop(inode), ret);
 
 	if (ret >= 0 && aio_req->write) {
 		int dirty;
@@ -1242,11 +1272,13 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
 	struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric;
 	unsigned int len = osd_data->bvec_pos.iter.bi_size;
 	bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 
 	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
 	BUG_ON(!osd_data->num_bvecs);
 
-	dout("ceph_aio_complete_req %p rc %d bytes %u\n", inode, rc, len);
+	doutc(cl, "req %p inode %p %llx.%llx, rc %d bytes %u\n", req,
+	      inode, ceph_vinop(inode), rc, len);
 
 	if (rc == -EOLDSNAPC) {
 		struct ceph_aio_work *aio_work;
@@ -1256,7 +1288,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
 		if (aio_work) {
 			INIT_WORK(&aio_work->work, ceph_aio_retry_work);
 			aio_work->req = req;
-			queue_work(ceph_inode_to_client(inode)->inode_wq,
+			queue_work(ceph_inode_to_fs_client(inode)->inode_wq,
 				   &aio_work->work);
 			return;
 		}
@@ -1386,7 +1418,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_client_metric *metric = &fsc->mdsc->metric;
 	struct ceph_vino vino;
 	struct ceph_osd_request *req;
@@ -1405,9 +1438,9 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
 
-	dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n",
-	     (write ? "write" : "read"), file, pos, (unsigned)count,
-	     snapc, snapc ? snapc->seq : 0);
+	doutc(cl, "sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n",
+	      (write ? "write" : "read"), file, pos, (unsigned)count,
+	      snapc, snapc ? snapc->seq : 0);
 
 	if (write) {
 		int ret2;
@@ -1418,7 +1451,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 					pos >> PAGE_SHIFT,
 					(pos + count - 1) >> PAGE_SHIFT);
 		if (ret2 < 0)
-			dout("invalidate_inode_pages2_range returned %d\n", ret2);
+			doutc(cl, "invalidate_inode_pages2_range returned %d\n",
+			      ret2);
 
 		flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE;
 	} else {
@@ -1610,7 +1644,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_client *osdc = &fsc->client->osdc;
 	struct ceph_osd_request *req;
 	struct page **pages;
@@ -1625,8 +1660,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
 
-	dout("sync_write on file %p %lld~%u snapc %p seq %lld\n",
-	     file, pos, (unsigned)count, snapc, snapc->seq);
+	doutc(cl, "on file %p %lld~%u snapc %p seq %lld\n", file, pos,
+	      (unsigned)count, snapc, snapc->seq);
 
 	ret = filemap_write_and_wait_range(inode->i_mapping,
 					   pos, pos + count - 1);
@@ -1670,9 +1705,9 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 		last = (pos + len) != (write_pos + write_len);
 		rmw = first || last;
 
-		dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n",
-		     ci->i_vino.ino, pos, len, write_pos, write_len,
-		     rmw ? "" : "no ");
+		doutc(cl, "ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n",
+		      ci->i_vino.ino, pos, len, write_pos, write_len,
+		      rmw ? "" : "no ");
 
 		/*
 		 * The data is emplaced into the page as it would be if it were
@@ -1881,7 +1916,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 			left -= ret;
 		}
 		if (ret < 0) {
-			dout("sync_write write failed with %d\n", ret);
+			doutc(cl, "write failed with %d\n", ret);
 			ceph_release_page_vector(pages, num_pages);
 			break;
 		}
@@ -1891,7 +1926,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 							 write_pos, write_len,
 							 GFP_KERNEL);
 			if (ret < 0) {
-				dout("encryption failed with %d\n", ret);
+				doutc(cl, "encryption failed with %d\n", ret);
 				ceph_release_page_vector(pages, num_pages);
 				break;
 			}
@@ -1910,7 +1945,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 			break;
 		}
 
-		dout("sync_write write op %lld~%llu\n", write_pos, write_len);
+		doutc(cl, "write op %lld~%llu\n", write_pos, write_len);
 		osd_req_op_extent_osd_data_pages(req, rmw ? 1 : 0, pages, write_len,
 						 offset_in_page(write_pos), false,
 						 true);
@@ -1941,7 +1976,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 					  req->r_end_latency, len, ret);
 		ceph_osdc_put_request(req);
 		if (ret != 0) {
-			dout("sync_write osd write returned %d\n", ret);
+			doutc(cl, "osd write returned %d\n", ret);
 			/* Version changed! Must re-do the rmw cycle */
 			if ((assert_ver && (ret == -ERANGE || ret == -EOVERFLOW)) ||
 			    (!assert_ver && ret == -EEXIST)) {
@@ -1971,13 +2006,13 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 				pos >> PAGE_SHIFT,
 				(pos + len - 1) >> PAGE_SHIFT);
 		if (ret < 0) {
-			dout("invalidate_inode_pages2_range returned %d\n",
-			     ret);
+			doutc(cl, "invalidate_inode_pages2_range returned %d\n",
+			      ret);
 			ret = 0;
 		}
 		pos += len;
 		written += len;
-		dout("sync_write written %d\n", written);
+		doutc(cl, "written %d\n", written);
 		if (pos > i_size_read(inode)) {
 			check_caps = ceph_inode_set_size(inode, pos);
 			if (check_caps)
@@ -1991,7 +2026,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 		ret = written;
 		iocb->ki_pos = pos;
 	}
-	dout("sync_write returning %d\n", ret);
+	doutc(cl, "returning %d\n", ret);
 	return ret;
 }
 
@@ -2010,13 +2045,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	ssize_t ret;
 	int want = 0, got = 0;
 	int retry_op = 0, read = 0;
 
 again:
-	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
-	     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
+	doutc(cl, "%llu~%u trying to get caps on %p %llx.%llx\n",
+	      iocb->ki_pos, (unsigned)len, inode, ceph_vinop(inode));
 
 	if (ceph_inode_is_shutdown(inode))
 		return -ESTALE;
@@ -2044,9 +2080,9 @@ again:
 	    (iocb->ki_flags & IOCB_DIRECT) ||
 	    (fi->flags & CEPH_F_SYNC)) {
 
-		dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
-		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
-		     ceph_cap_string(got));
+		doutc(cl, "sync %p %llx.%llx %llu~%u got cap refs on %s\n",
+		      inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
+		      ceph_cap_string(got));
 
 		if (!ceph_has_inline_data(ci)) {
 			if (!retry_op &&
@@ -2064,16 +2100,16 @@ again:
 		}
 	} else {
 		CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
-		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
-		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
-		     ceph_cap_string(got));
+		doutc(cl, "async %p %llx.%llx %llu~%u got cap refs on %s\n",
+		      inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
+		      ceph_cap_string(got));
 		ceph_add_rw_context(fi, &rw_ctx);
 		ret = generic_file_read_iter(iocb, to);
 		ceph_del_rw_context(fi, &rw_ctx);
 	}
 
-	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
-	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+	doutc(cl, "%p %llx.%llx dropping cap refs on %s = %d\n",
+	      inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	ceph_put_cap_refs(ci, got);
 
 	if (direct_lock)
@@ -2133,8 +2169,8 @@ again:
 		/* hit EOF or hole? */
 		if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
 		    ret < len) {
-			dout("sync_read hit hole, ppos %lld < size %lld"
-			     ", reading more\n", iocb->ki_pos, i_size);
+			doutc(cl, "hit hole, ppos %lld < size %lld, reading more\n",
+			      iocb->ki_pos, i_size);
 
 			read += ret;
 			len -= ret;
@@ -2228,7 +2264,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_client *osdc = &fsc->client->osdc;
 	struct ceph_cap_flush *prealloc_cf;
 	ssize_t count, written = 0;
@@ -2296,8 +2333,9 @@ retry_snap:
 	if (err)
 		goto out;
 
-	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
-	     inode, ceph_vinop(inode), pos, count, i_size_read(inode));
+	doutc(cl, "%p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
+	      inode, ceph_vinop(inode), pos, count,
+	      i_size_read(inode));
 	if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
 		want |= CEPH_CAP_FILE_BUFFER;
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -2313,8 +2351,8 @@ retry_snap:
 
 	inode_inc_iversion_raw(inode);
 
-	dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
+	doutc(cl, "%p %llx.%llx %llu~%zd got cap refs on %s\n",
+	      inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) ||
@@ -2374,14 +2412,14 @@ retry_snap:
 			ceph_check_caps(ci, CHECK_CAPS_FLUSH);
 	}
 
-	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)count,
-	     ceph_cap_string(got));
+	doutc(cl, "%p %llx.%llx %llu~%u  dropping cap refs on %s\n",
+	      inode, ceph_vinop(inode), pos, (unsigned)count,
+	      ceph_cap_string(got));
 	ceph_put_cap_refs(ci, got);
 
 	if (written == -EOLDSNAPC) {
-		dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n",
-		     inode, ceph_vinop(inode), pos, (unsigned)count);
+		doutc(cl, "%p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n",
+		      inode, ceph_vinop(inode), pos, (unsigned)count);
 		goto retry_snap;
 	}
 
@@ -2462,7 +2500,7 @@ static int ceph_zero_partial_object(struct inode *inode,
 				    loff_t offset, loff_t *length)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	struct ceph_osd_request *req;
 	int ret = 0;
 	loff_t zero = 0;
@@ -2553,14 +2591,15 @@ static long ceph_fallocate(struct file *file, int mode,
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_cap_flush *prealloc_cf;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int want, got = 0;
 	int dirty;
 	int ret = 0;
 	loff_t endoff = 0;
 	loff_t size;
 
-	dout("%s %p %llx.%llx mode %x, offset %llu length %llu\n", __func__,
-	     inode, ceph_vinop(inode), mode, offset, length);
+	doutc(cl, "%p %llx.%llx mode %x, offset %llu length %llu\n",
+	      inode, ceph_vinop(inode), mode, offset, length);
 
 	if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 		return -EOPNOTSUPP;
@@ -2689,6 +2728,7 @@ static void put_rd_wr_caps(struct ceph_inode_info *src_ci, int src_got,
 static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
 			   loff_t src_off, loff_t dst_off, size_t len)
 {
+	struct ceph_client *cl = ceph_inode_to_client(src_inode);
 	loff_t size, endoff;
 
 	size = i_size_read(src_inode);
@@ -2699,8 +2739,8 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
 	 * inode.
 	 */
 	if (src_off + len > size) {
-		dout("Copy beyond EOF (%llu + %zu > %llu)\n",
-		     src_off, len, size);
+		doutc(cl, "Copy beyond EOF (%llu + %zu > %llu)\n", src_off,
+		      len, size);
 		return -EOPNOTSUPP;
 	}
 	size = i_size_read(dst_inode);
@@ -2776,6 +2816,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
 	u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
 	u32 src_objlen, dst_objlen;
 	u32 object_size = src_ci->i_layout.object_size;
+	struct ceph_client *cl = fsc->client;
 	int ret;
 
 	src_oloc.pool = src_ci->i_layout.pool_id;
@@ -2817,9 +2858,10 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
 		if (ret) {
 			if (ret == -EOPNOTSUPP) {
 				fsc->have_copy_from2 = false;
-				pr_notice("OSDs don't support copy-from2; disabling copy offload\n");
+				pr_notice_client(cl,
+					"OSDs don't support copy-from2; disabling copy offload\n");
 			}
-			dout("ceph_osdc_copy_from returned %d\n", ret);
+			doutc(cl, "returned %d\n", ret);
 			if (!bytes)
 				bytes = ret;
 			goto out;
@@ -2845,7 +2887,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	struct ceph_inode_info *src_ci = ceph_inode(src_inode);
 	struct ceph_inode_info *dst_ci = ceph_inode(dst_inode);
 	struct ceph_cap_flush *prealloc_cf;
-	struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode);
+	struct ceph_fs_client *src_fsc = ceph_inode_to_fs_client(src_inode);
+	struct ceph_client *cl = src_fsc->client;
 	loff_t size;
 	ssize_t ret = -EIO, bytes;
 	u64 src_objnum, dst_objnum, src_objoff, dst_objoff;
@@ -2853,7 +2896,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	int src_got = 0, dst_got = 0, err, dirty;
 
 	if (src_inode->i_sb != dst_inode->i_sb) {
-		struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode);
+		struct ceph_fs_client *dst_fsc = ceph_inode_to_fs_client(dst_inode);
 
 		if (ceph_fsid_compare(&src_fsc->client->fsid,
 				      &dst_fsc->client->fsid)) {
@@ -2888,7 +2931,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	    (src_ci->i_layout.stripe_count != 1) ||
 	    (dst_ci->i_layout.stripe_count != 1) ||
 	    (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) {
-		dout("Invalid src/dst files layout\n");
+		doutc(cl, "Invalid src/dst files layout\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -2906,12 +2949,12 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	/* Start by sync'ing the source and destination files */
 	ret = file_write_and_wait_range(src_file, src_off, (src_off + len));
 	if (ret < 0) {
-		dout("failed to write src file (%zd)\n", ret);
+		doutc(cl, "failed to write src file (%zd)\n", ret);
 		goto out;
 	}
 	ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len));
 	if (ret < 0) {
-		dout("failed to write dst file (%zd)\n", ret);
+		doutc(cl, "failed to write dst file (%zd)\n", ret);
 		goto out;
 	}
 
@@ -2923,7 +2966,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	err = get_rd_wr_caps(src_file, &src_got,
 			     dst_file, (dst_off + len), &dst_got);
 	if (err < 0) {
-		dout("get_rd_wr_caps returned %d\n", err);
+		doutc(cl, "get_rd_wr_caps returned %d\n", err);
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
@@ -2938,7 +2981,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 					    dst_off >> PAGE_SHIFT,
 					    (dst_off + len) >> PAGE_SHIFT);
 	if (ret < 0) {
-		dout("Failed to invalidate inode pages (%zd)\n", ret);
+		doutc(cl, "Failed to invalidate inode pages (%zd)\n",
+			    ret);
 		ret = 0; /* XXX */
 	}
 	ceph_calc_file_object_mapping(&src_ci->i_layout, src_off,
@@ -2959,7 +3003,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	 * starting at the src_off
 	 */
 	if (src_objoff) {
-		dout("Initial partial copy of %u bytes\n", src_objlen);
+		doutc(cl, "Initial partial copy of %u bytes\n", src_objlen);
 
 		/*
 		 * we need to temporarily drop all caps as we'll be calling
@@ -2970,7 +3014,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 				       &dst_off, src_objlen, flags);
 		/* Abort on short copies or on error */
 		if (ret < (long)src_objlen) {
-			dout("Failed partial copy (%zd)\n", ret);
+			doutc(cl, "Failed partial copy (%zd)\n", ret);
 			goto out;
 		}
 		len -= ret;
@@ -2992,7 +3036,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 			ret = bytes;
 		goto out_caps;
 	}
-	dout("Copied %zu bytes out of %zu\n", bytes, len);
+	doutc(cl, "Copied %zu bytes out of %zu\n", bytes, len);
 	len -= bytes;
 	ret += bytes;
 
@@ -3020,13 +3064,13 @@ out_caps:
 	 * there were errors in remote object copies (len >= object_size).
 	 */
 	if (len && (len < src_ci->i_layout.object_size)) {
-		dout("Final partial copy of %zu bytes\n", len);
+		doutc(cl, "Final partial copy of %zu bytes\n", len);
 		bytes = do_splice_direct(src_file, &src_off, dst_file,
 					 &dst_off, len, flags);
 		if (bytes > 0)
 			ret += bytes;
 		else
-			dout("Failed partial copy (%zd)\n", bytes);
+			doutc(cl, "Failed partial copy (%zd)\n", bytes);
 	}
 
 out:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2e2a303b9e64..0679240f06db 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -129,6 +129,8 @@ void ceph_as_ctx_to_req(struct ceph_mds_request *req,
 struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino,
 			     struct inode *newino)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct inode *inode;
 
 	if (ceph_vino_is_reserved(vino))
@@ -145,12 +147,13 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino,
 	}
 
 	if (!inode) {
-		dout("No inode found for %llx.%llx\n", vino.ino, vino.snap);
+		doutc(cl, "no inode found for %llx.%llx\n", vino.ino, vino.snap);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode),
-	     ceph_vinop(inode), inode, !!(inode->i_state & I_NEW));
+	doutc(cl, "on %llx=%llx.%llx got %p new %d\n",
+	      ceph_present_inode(inode), ceph_vinop(inode), inode,
+	      !!(inode->i_state & I_NEW));
 	return inode;
 }
 
@@ -159,6 +162,7 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino,
  */
 struct inode *ceph_get_snapdir(struct inode *parent)
 {
+	struct ceph_client *cl = ceph_inode_to_client(parent);
 	struct ceph_vino vino = {
 		.ino = ceph_ino(parent),
 		.snap = CEPH_SNAPDIR,
@@ -171,14 +175,14 @@ struct inode *ceph_get_snapdir(struct inode *parent)
 		return inode;
 
 	if (!S_ISDIR(parent->i_mode)) {
-		pr_warn_once("bad snapdir parent type (mode=0%o)\n",
-			     parent->i_mode);
+		pr_warn_once_client(cl, "bad snapdir parent type (mode=0%o)\n",
+				    parent->i_mode);
 		goto err;
 	}
 
 	if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) {
-		pr_warn_once("bad snapdir inode type (mode=0%o)\n",
-			     inode->i_mode);
+		pr_warn_once_client(cl, "bad snapdir inode type (mode=0%o)\n",
+				    inode->i_mode);
 		goto err;
 	}
 
@@ -203,7 +207,7 @@ struct inode *ceph_get_snapdir(struct inode *parent)
 			inode->i_flags |= S_ENCRYPTED;
 			ci->fscrypt_auth_len = pci->fscrypt_auth_len;
 		} else {
-			dout("Failed to alloc snapdir fscrypt_auth\n");
+			doutc(cl, "Failed to alloc snapdir fscrypt_auth\n");
 			ret = -ENOMEM;
 			goto err;
 		}
@@ -249,6 +253,8 @@ const struct inode_operations ceph_file_iops = {
 static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci,
 						    u32 f)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct ceph_inode_frag *frag;
@@ -279,8 +285,7 @@ static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci,
 	rb_link_node(&frag->node, parent, p);
 	rb_insert_color(&frag->node, &ci->i_fragtree);
 
-	dout("get_or_create_frag added %llx.%llx frag %x\n",
-	     ceph_vinop(&ci->netfs.inode), f);
+	doutc(cl, "added %p %llx.%llx frag %x\n", inode, ceph_vinop(inode), f);
 	return frag;
 }
 
@@ -313,6 +318,7 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
 static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
 			      struct ceph_inode_frag *pfrag, int *found)
 {
+	struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
 	u32 t = ceph_frag_make(0, 0);
 	struct ceph_inode_frag *frag;
 	unsigned nway, i;
@@ -336,8 +342,8 @@ static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
 
 		/* choose child */
 		nway = 1 << frag->split_by;
-		dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t,
-		     frag->split_by, nway);
+		doutc(cl, "frag(%x) %x splits by %d (%d ways)\n", v, t,
+		      frag->split_by, nway);
 		for (i = 0; i < nway; i++) {
 			n = ceph_frag_make_child(t, frag->split_by, i);
 			if (ceph_frag_contains_value(n, v)) {
@@ -347,7 +353,7 @@ static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
 		}
 		BUG_ON(i == nway);
 	}
-	dout("choose_frag(%x) = %x\n", v, t);
+	doutc(cl, "frag(%x) = %x\n", v, t);
 
 	return t;
 }
@@ -371,6 +377,7 @@ static int ceph_fill_dirfrag(struct inode *inode,
 			     struct ceph_mds_reply_dirfrag *dirinfo)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_frag *frag;
 	u32 id = le32_to_cpu(dirinfo->frag);
 	int mds = le32_to_cpu(dirinfo->auth);
@@ -395,14 +402,14 @@ static int ceph_fill_dirfrag(struct inode *inode,
 			goto out;
 		if (frag->split_by == 0) {
 			/* tree leaf, remove */
-			dout("fill_dirfrag removed %llx.%llx frag %x"
-			     " (no ref)\n", ceph_vinop(inode), id);
+			doutc(cl, "removed %p %llx.%llx frag %x (no ref)\n",
+			      inode, ceph_vinop(inode), id);
 			rb_erase(&frag->node, &ci->i_fragtree);
 			kfree(frag);
 		} else {
 			/* tree branch, keep and clear */
-			dout("fill_dirfrag cleared %llx.%llx frag %x"
-			     " referral\n", ceph_vinop(inode), id);
+			doutc(cl, "cleared %p %llx.%llx frag %x referral\n",
+			      inode, ceph_vinop(inode), id);
 			frag->mds = -1;
 			frag->ndist = 0;
 		}
@@ -415,8 +422,9 @@ static int ceph_fill_dirfrag(struct inode *inode,
 	if (IS_ERR(frag)) {
 		/* this is not the end of the world; we can continue
 		   with bad/inaccurate delegation info */
-		pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n",
-		       ceph_vinop(inode), le32_to_cpu(dirinfo->frag));
+		pr_err_client(cl, "ENOMEM on mds ref %p %llx.%llx fg %x\n",
+			      inode, ceph_vinop(inode),
+			      le32_to_cpu(dirinfo->frag));
 		err = -ENOMEM;
 		goto out;
 	}
@@ -425,8 +433,8 @@ static int ceph_fill_dirfrag(struct inode *inode,
 	frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP);
 	for (i = 0; i < frag->ndist; i++)
 		frag->dist[i] = le32_to_cpu(dirinfo->dist[i]);
-	dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n",
-	     ceph_vinop(inode), frag->frag, frag->ndist);
+	doutc(cl, "%p %llx.%llx frag %x ndist=%d\n", inode,
+	      ceph_vinop(inode), frag->frag, frag->ndist);
 
 out:
 	mutex_unlock(&ci->i_fragtree_mutex);
@@ -454,6 +462,7 @@ static int ceph_fill_fragtree(struct inode *inode,
 			      struct ceph_frag_tree_head *fragtree,
 			      struct ceph_mds_reply_dirfrag *dirinfo)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_inode_frag *frag, *prev_frag = NULL;
 	struct rb_node *rb_node;
@@ -489,15 +498,15 @@ static int ceph_fill_fragtree(struct inode *inode,
 		     frag_tree_split_cmp, NULL);
 	}
 
-	dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode));
+	doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 	rb_node = rb_first(&ci->i_fragtree);
 	for (i = 0; i < nsplits; i++) {
 		id = le32_to_cpu(fragtree->splits[i].frag);
 		split_by = le32_to_cpu(fragtree->splits[i].by);
 		if (split_by == 0 || ceph_frag_bits(id) + split_by > 24) {
-			pr_err("fill_fragtree %llx.%llx invalid split %d/%u, "
-			       "frag %x split by %d\n", ceph_vinop(inode),
-			       i, nsplits, id, split_by);
+			pr_err_client(cl, "%p %llx.%llx invalid split %d/%u, "
+			       "frag %x split by %d\n", inode,
+			       ceph_vinop(inode), i, nsplits, id, split_by);
 			continue;
 		}
 		frag = NULL;
@@ -529,7 +538,7 @@ static int ceph_fill_fragtree(struct inode *inode,
 		if (frag->split_by == 0)
 			ci->i_fragtree_nsplits++;
 		frag->split_by = split_by;
-		dout(" frag %x split by %d\n", frag->frag, frag->split_by);
+		doutc(cl, " frag %x split by %d\n", frag->frag, frag->split_by);
 		prev_frag = frag;
 	}
 	while (rb_node) {
@@ -554,6 +563,7 @@ out_unlock:
  */
 struct inode *ceph_alloc_inode(struct super_block *sb)
 {
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
 	struct ceph_inode_info *ci;
 	int i;
 
@@ -561,7 +571,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	if (!ci)
 		return NULL;
 
-	dout("alloc_inode %p\n", &ci->netfs.inode);
+	doutc(fsc->client, "%p\n", &ci->netfs.inode);
 
 	/* Set parameters for the netfs library */
 	netfs_inode_init(&ci->netfs, &ceph_netfs_ops);
@@ -675,10 +685,11 @@ void ceph_evict_inode(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_frag *frag;
 	struct rb_node *n;
 
-	dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
+	doutc(cl, "%p ino %llx.%llx\n", inode, ceph_vinop(inode));
 
 	percpu_counter_dec(&mdsc->metric.total_inodes);
 
@@ -701,8 +712,8 @@ void ceph_evict_inode(struct inode *inode)
 	 */
 	if (ci->i_snap_realm) {
 		if (ceph_snap(inode) == CEPH_NOSNAP) {
-			dout(" dropping residual ref to snap realm %p\n",
-			     ci->i_snap_realm);
+			doutc(cl, " dropping residual ref to snap realm %p\n",
+			      ci->i_snap_realm);
 			ceph_change_snap_realm(inode, NULL);
 		} else {
 			ceph_put_snapid_map(mdsc, ci->i_snapid_map);
@@ -743,15 +754,16 @@ static inline blkcnt_t calc_inode_blocks(u64 size)
 int ceph_fill_file_size(struct inode *inode, int issued,
 			u32 truncate_seq, u64 truncate_size, u64 size)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int queue_trunc = 0;
 	loff_t isize = i_size_read(inode);
 
 	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
 	    (truncate_seq == ci->i_truncate_seq && size > isize)) {
-		dout("size %lld -> %llu\n", isize, size);
+		doutc(cl, "size %lld -> %llu\n", isize, size);
 		if (size > 0 && S_ISDIR(inode->i_mode)) {
-			pr_err("fill_file_size non-zero size for directory\n");
+			pr_err_client(cl, "non-zero size for directory\n");
 			size = 0;
 		}
 		i_size_write(inode, size);
@@ -764,8 +776,8 @@ int ceph_fill_file_size(struct inode *inode, int issued,
 			ceph_fscache_update(inode);
 		ci->i_reported_size = size;
 		if (truncate_seq != ci->i_truncate_seq) {
-			dout("%s truncate_seq %u -> %u\n", __func__,
-			     ci->i_truncate_seq, truncate_seq);
+			doutc(cl, "truncate_seq %u -> %u\n",
+			      ci->i_truncate_seq, truncate_seq);
 			ci->i_truncate_seq = truncate_seq;
 
 			/* the MDS should have revoked these caps */
@@ -794,14 +806,15 @@ int ceph_fill_file_size(struct inode *inode, int issued,
 	 * anyway.
 	 */
 	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0) {
-		dout("%s truncate_size %lld -> %llu, encrypted %d\n", __func__,
-		     ci->i_truncate_size, truncate_size, !!IS_ENCRYPTED(inode));
+		doutc(cl, "truncate_size %lld -> %llu, encrypted %d\n",
+		      ci->i_truncate_size, truncate_size,
+		      !!IS_ENCRYPTED(inode));
 
 		ci->i_truncate_size = truncate_size;
 
 		if (IS_ENCRYPTED(inode)) {
-			dout("%s truncate_pagecache_size %lld -> %llu\n",
-			     __func__, ci->i_truncate_pagecache_size, size);
+			doutc(cl, "truncate_pagecache_size %lld -> %llu\n",
+			      ci->i_truncate_pagecache_size, size);
 			ci->i_truncate_pagecache_size = size;
 		} else {
 			ci->i_truncate_pagecache_size = truncate_size;
@@ -814,6 +827,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 			 u64 time_warp_seq, struct timespec64 *ctime,
 			 struct timespec64 *mtime, struct timespec64 *atime)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct timespec64 ictime = inode_get_ctime(inode);
 	int warn = 0;
@@ -825,7 +839,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 		      CEPH_CAP_XATTR_EXCL)) {
 		if (ci->i_version == 0 ||
 		    timespec64_compare(ctime, &ictime) > 0) {
-			dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
+			doutc(cl, "ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
 			     ictime.tv_sec, ictime.tv_nsec,
 			     ctime->tv_sec, ctime->tv_nsec);
 			inode_set_ctime_to_ts(inode, *ctime);
@@ -833,8 +847,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 		if (ci->i_version == 0 ||
 		    ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
 			/* the MDS did a utimes() */
-			dout("mtime %lld.%09ld -> %lld.%09ld "
-			     "tw %d -> %d\n",
+			doutc(cl, "mtime %lld.%09ld -> %lld.%09ld tw %d -> %d\n",
 			     inode_get_mtime_sec(inode),
 			     inode_get_mtime_nsec(inode),
 			     mtime->tv_sec, mtime->tv_nsec,
@@ -849,14 +862,14 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 			/* nobody did utimes(); take the max */
 			ts = inode_get_mtime(inode);
 			if (timespec64_compare(mtime, &ts) > 0) {
-				dout("mtime %lld.%09ld -> %lld.%09ld inc\n",
+				doutc(cl, "mtime %lld.%09ld -> %lld.%09ld inc\n",
 				     ts.tv_sec, ts.tv_nsec,
 				     mtime->tv_sec, mtime->tv_nsec);
 				inode_set_mtime_to_ts(inode, *mtime);
 			}
 			ts = inode_get_atime(inode);
 			if (timespec64_compare(atime, &ts) > 0) {
-				dout("atime %lld.%09ld -> %lld.%09ld inc\n",
+				doutc(cl, "atime %lld.%09ld -> %lld.%09ld inc\n",
 				     ts.tv_sec, ts.tv_nsec,
 				     atime->tv_sec, atime->tv_nsec);
 				inode_set_atime_to_ts(inode, *atime);
@@ -878,13 +891,16 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 		}
 	}
 	if (warn) /* time_warp_seq shouldn't go backwards */
-		dout("%p mds time_warp_seq %llu < %u\n",
-		     inode, time_warp_seq, ci->i_time_warp_seq);
+		doutc(cl, "%p mds time_warp_seq %llu < %u\n", inode,
+		      time_warp_seq, ci->i_time_warp_seq);
 }
 
 #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
-static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym)
+static int decode_encrypted_symlink(struct ceph_mds_client *mdsc,
+				    const char *encsym,
+				    int enclen, u8 **decsym)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int declen;
 	u8 *sym;
 
@@ -894,8 +910,9 @@ static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym)
 
 	declen = ceph_base64_decode(encsym, enclen, sym);
 	if (declen < 0) {
-		pr_err("%s: can't decode symlink (%d). Content: %.*s\n",
-		       __func__, declen, enclen, encsym);
+		pr_err_client(cl,
+			"can't decode symlink (%d). Content: %.*s\n",
+			declen, enclen, encsym);
 		kfree(sym);
 		return -EIO;
 	}
@@ -904,7 +921,9 @@ static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym)
 	return declen;
 }
 #else
-static int decode_encrypted_symlink(const char *encsym, int symlen, u8 **decsym)
+static int decode_encrypted_symlink(struct ceph_mds_client *mdsc,
+				    const char *encsym,
+				    int symlen, u8 **decsym)
 {
 	return -EOPNOTSUPP;
 }
@@ -921,6 +940,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 		    struct ceph_cap_reservation *caps_reservation)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_reply_inode *info = iinfo->in;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int issued, new_issued, info_caps;
@@ -939,25 +959,26 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
 	lockdep_assert_held(&mdsc->snap_rwsem);
 
-	dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
-	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
-	     ci->i_version);
+	doutc(cl, "%p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode),
+	      le64_to_cpu(info->version), ci->i_version);
 
 	/* Once I_NEW is cleared, we can't change type or dev numbers */
 	if (inode->i_state & I_NEW) {
 		inode->i_mode = mode;
 	} else {
 		if (inode_wrong_type(inode, mode)) {
-			pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n",
-				     ceph_vinop(inode), inode->i_mode, mode);
+			pr_warn_once_client(cl,
+				"inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n",
+				ceph_vinop(inode), inode->i_mode, mode);
 			return -ESTALE;
 		}
 
 		if ((S_ISCHR(mode) || S_ISBLK(mode)) && inode->i_rdev != rdev) {
-			pr_warn_once("dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n",
-				     ceph_vinop(inode), MAJOR(inode->i_rdev),
-				     MINOR(inode->i_rdev), MAJOR(rdev),
-				     MINOR(rdev));
+			pr_warn_once_client(cl,
+				"dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n",
+				ceph_vinop(inode), MAJOR(inode->i_rdev),
+				MINOR(inode->i_rdev), MAJOR(rdev),
+				MINOR(rdev));
 			return -ESTALE;
 		}
 	}
@@ -979,8 +1000,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 	if (iinfo->xattr_len > 4) {
 		xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS);
 		if (!xattr_blob)
-			pr_err("%s ENOMEM xattr blob %d bytes\n", __func__,
-			       iinfo->xattr_len);
+			pr_err_client(cl, "ENOMEM xattr blob %d bytes\n",
+				      iinfo->xattr_len);
 	}
 
 	if (iinfo->pool_ns_len > 0)
@@ -1034,9 +1055,10 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 		inode->i_mode = mode;
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
-		dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
-		     from_kuid(&init_user_ns, inode->i_uid),
-		     from_kgid(&init_user_ns, inode->i_gid));
+		doutc(cl, "%p %llx.%llx mode 0%o uid.gid %d.%d\n", inode,
+		      ceph_vinop(inode), inode->i_mode,
+		      from_kuid(&init_user_ns, inode->i_uid),
+		      from_kgid(&init_user_ns, inode->i_gid));
 		ceph_decode_timespec64(&ci->i_btime, &iinfo->btime);
 		ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime);
 	}
@@ -1092,7 +1114,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 			if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) {
 				size = fsize;
 			} else {
-				pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n",
+				pr_warn_client(cl,
+					"fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n",
 					info->size, size);
 			}
 		}
@@ -1104,8 +1127,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 		/* only update max_size on auth cap */
 		if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
 		    ci->i_max_size != le64_to_cpu(info->max_size)) {
-			dout("max_size %lld -> %llu\n", ci->i_max_size,
-					le64_to_cpu(info->max_size));
+			doutc(cl, "max_size %lld -> %llu\n",
+			    ci->i_max_size, le64_to_cpu(info->max_size));
 			ci->i_max_size = le64_to_cpu(info->max_size);
 		}
 	}
@@ -1168,15 +1191,17 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
 			if (IS_ENCRYPTED(inode)) {
 				if (symlen != i_size_read(inode))
-					pr_err("%s %llx.%llx BAD symlink size %lld\n",
-						__func__, ceph_vinop(inode),
+					pr_err_client(cl,
+						"%p %llx.%llx BAD symlink size %lld\n",
+						inode, ceph_vinop(inode),
 						i_size_read(inode));
 
-				err = decode_encrypted_symlink(iinfo->symlink,
+				err = decode_encrypted_symlink(mdsc, iinfo->symlink,
 							       symlen, (u8 **)&sym);
 				if (err < 0) {
-					pr_err("%s decoding encrypted symlink failed: %d\n",
-						__func__, err);
+					pr_err_client(cl,
+						"decoding encrypted symlink failed: %d\n",
+						err);
 					goto out;
 				}
 				symlen = err;
@@ -1184,8 +1209,9 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 				inode->i_blocks = calc_inode_blocks(symlen);
 			} else {
 				if (symlen != i_size_read(inode)) {
-					pr_err("%s %llx.%llx BAD symlink size %lld\n",
-						__func__, ceph_vinop(inode),
+					pr_err_client(cl,
+						"%p %llx.%llx BAD symlink size %lld\n",
+						inode, ceph_vinop(inode),
 						i_size_read(inode));
 					i_size_write(inode, symlen);
 					inode->i_blocks = calc_inode_blocks(symlen);
@@ -1220,8 +1246,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 		inode->i_fop = &ceph_dir_fops;
 		break;
 	default:
-		pr_err("%s %llx.%llx BAD mode 0%o\n", __func__,
-		       ceph_vinop(inode), inode->i_mode);
+		pr_err_client(cl, "%p %llx.%llx BAD mode 0%o\n", inode,
+			      ceph_vinop(inode), inode->i_mode);
 	}
 
 	/* were we issued a capability? */
@@ -1242,7 +1268,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 			    (info_caps & CEPH_CAP_FILE_SHARED) &&
 			    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
 			    !__ceph_dir_is_complete(ci)) {
-				dout(" marking %p complete (empty)\n", inode);
+				doutc(cl, " marking %p complete (empty)\n",
+				      inode);
 				i_size_write(inode, 0);
 				__ceph_dir_set_complete(ci,
 					atomic64_read(&ci->i_release_count),
@@ -1251,8 +1278,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
 			wake = true;
 		} else {
-			dout(" %p got snap_caps %s\n", inode,
-			     ceph_cap_string(info_caps));
+			doutc(cl, " %p got snap_caps %s\n", inode,
+			      ceph_cap_string(info_caps));
 			ci->i_snap_caps |= info_caps;
 		}
 	}
@@ -1268,8 +1295,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
 	if (cap_fmode >= 0) {
 		if (!info_caps)
-			pr_warn("mds issued no caps on %llx.%llx\n",
-				ceph_vinop(inode));
+			pr_warn_client(cl, "mds issued no caps on %llx.%llx\n",
+				       ceph_vinop(inode));
 		__ceph_touch_fmode(ci, mdsc, cap_fmode);
 	}
 
@@ -1315,14 +1342,14 @@ static void __update_dentry_lease(struct inode *dir, struct dentry *dentry,
 				  unsigned long from_time,
 				  struct ceph_mds_session **old_lease_session)
 {
+	struct ceph_client *cl = ceph_inode_to_client(dir);
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	unsigned mask = le16_to_cpu(lease->mask);
 	long unsigned duration = le32_to_cpu(lease->duration_ms);
 	long unsigned ttl = from_time + (duration * HZ) / 1000;
 	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
 
-	dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
-	     dentry, duration, ttl);
+	doutc(cl, "%p duration %lu ms ttl %lu\n", dentry, duration, ttl);
 
 	/* only track leases on regular dentries */
 	if (ceph_snap(dir) != CEPH_NOSNAP)
@@ -1423,6 +1450,7 @@ out_unlock:
  */
 static int splice_dentry(struct dentry **pdn, struct inode *in)
 {
+	struct ceph_client *cl = ceph_inode_to_client(in);
 	struct dentry *dn = *pdn;
 	struct dentry *realdn;
 
@@ -1454,23 +1482,21 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
 		d_drop(dn);
 	realdn = d_splice_alias(in, dn);
 	if (IS_ERR(realdn)) {
-		pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
-		       PTR_ERR(realdn), dn, in, ceph_vinop(in));
+		pr_err_client(cl, "error %ld %p inode %p ino %llx.%llx\n",
+			      PTR_ERR(realdn), dn, in, ceph_vinop(in));
 		return PTR_ERR(realdn);
 	}
 
 	if (realdn) {
-		dout("dn %p (%d) spliced with %p (%d) "
-		     "inode %p ino %llx.%llx\n",
-		     dn, d_count(dn),
-		     realdn, d_count(realdn),
-		     d_inode(realdn), ceph_vinop(d_inode(realdn)));
+		doutc(cl, "dn %p (%d) spliced with %p (%d) inode %p ino %llx.%llx\n",
+		      dn, d_count(dn), realdn, d_count(realdn),
+		      d_inode(realdn), ceph_vinop(d_inode(realdn)));
 		dput(dn);
 		*pdn = realdn;
 	} else {
 		BUG_ON(!ceph_dentry(dn));
-		dout("dn %p attached to %p ino %llx.%llx\n",
-		     dn, d_inode(dn), ceph_vinop(d_inode(dn)));
+		doutc(cl, "dn %p attached to %p ino %llx.%llx\n", dn,
+		      d_inode(dn), ceph_vinop(d_inode(dn)));
 	}
 	return 0;
 }
@@ -1492,14 +1518,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
 	struct inode *in = NULL;
 	struct ceph_vino tvino, dvino;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
+	struct ceph_client *cl = fsc->client;
 	int err = 0;
 
-	dout("fill_trace %p is_dentry %d is_target %d\n", req,
-	     rinfo->head->is_dentry, rinfo->head->is_target);
+	doutc(cl, "%p is_dentry %d is_target %d\n", req,
+	      rinfo->head->is_dentry, rinfo->head->is_target);
 
 	if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
-		dout("fill_trace reply is empty!\n");
+		doutc(cl, "reply is empty!\n");
 		if (rinfo->head->result == 0 && req->r_parent)
 			ceph_invalidate_dir_request(req);
 		return 0;
@@ -1556,13 +1583,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 			tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
 retry_lookup:
 			dn = d_lookup(parent, &dname);
-			dout("d_lookup on parent=%p name=%.*s got %p\n",
-			     parent, dname.len, dname.name, dn);
+			doutc(cl, "d_lookup on parent=%p name=%.*s got %p\n",
+			      parent, dname.len, dname.name, dn);
 
 			if (!dn) {
 				dn = d_alloc(parent, &dname);
-				dout("d_alloc %p '%.*s' = %p\n", parent,
-				     dname.len, dname.name, dn);
+				doutc(cl, "d_alloc %p '%.*s' = %p\n", parent,
+				      dname.len, dname.name, dn);
 				if (!dn) {
 					dput(parent);
 					ceph_fname_free_buffer(dir, &oname);
@@ -1578,8 +1605,8 @@ retry_lookup:
 			} else if (d_really_is_positive(dn) &&
 				   (ceph_ino(d_inode(dn)) != tvino.ino ||
 				    ceph_snap(d_inode(dn)) != tvino.snap)) {
-				dout(" dn %p points to wrong inode %p\n",
-				     dn, d_inode(dn));
+				doutc(cl, " dn %p points to wrong inode %p\n",
+				      dn, d_inode(dn));
 				ceph_dir_clear_ordered(dir);
 				d_delete(dn);
 				dput(dn);
@@ -1604,8 +1631,8 @@ retry_lookup:
 				 rinfo->head->result == 0) ?  req->r_fmode : -1,
 				&req->r_caps_reservation);
 		if (err < 0) {
-			pr_err("ceph_fill_inode badness %p %llx.%llx\n",
-				in, ceph_vinop(in));
+			pr_err_client(cl, "badness %p %llx.%llx\n", in,
+				      ceph_vinop(in));
 			req->r_target_inode = NULL;
 			if (in->i_state & I_NEW)
 				discard_new_inode(in);
@@ -1655,36 +1682,32 @@ retry_lookup:
 		have_lease = have_dir_cap ||
 			le32_to_cpu(rinfo->dlease->duration_ms);
 		if (!have_lease)
-			dout("fill_trace  no dentry lease or dir cap\n");
+			doutc(cl, "no dentry lease or dir cap\n");
 
 		/* rename? */
 		if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
 			struct inode *olddir = req->r_old_dentry_dir;
 			BUG_ON(!olddir);
 
-			dout(" src %p '%pd' dst %p '%pd'\n",
-			     req->r_old_dentry,
-			     req->r_old_dentry,
-			     dn, dn);
-			dout("fill_trace doing d_move %p -> %p\n",
-			     req->r_old_dentry, dn);
+			doutc(cl, " src %p '%pd' dst %p '%pd'\n",
+			      req->r_old_dentry, req->r_old_dentry, dn, dn);
+			doutc(cl, "doing d_move %p -> %p\n", req->r_old_dentry, dn);
 
 			/* d_move screws up sibling dentries' offsets */
 			ceph_dir_clear_ordered(dir);
 			ceph_dir_clear_ordered(olddir);
 
 			d_move(req->r_old_dentry, dn);
-			dout(" src %p '%pd' dst %p '%pd'\n",
-			     req->r_old_dentry,
-			     req->r_old_dentry,
-			     dn, dn);
+			doutc(cl, " src %p '%pd' dst %p '%pd'\n",
+			      req->r_old_dentry, req->r_old_dentry, dn, dn);
 
 			/* ensure target dentry is invalidated, despite
 			   rehashing bug in vfs_rename_dir */
 			ceph_invalidate_dentry_lease(dn);
 
-			dout("dn %p gets new offset %lld\n", req->r_old_dentry,
-			     ceph_dentry(req->r_old_dentry)->offset);
+			doutc(cl, "dn %p gets new offset %lld\n",
+			      req->r_old_dentry,
+			      ceph_dentry(req->r_old_dentry)->offset);
 
 			/* swap r_dentry and r_old_dentry in case that
 			 * splice_dentry() gets called later. This is safe
@@ -1696,9 +1719,9 @@ retry_lookup:
 
 		/* null dentry? */
 		if (!rinfo->head->is_target) {
-			dout("fill_trace null dentry\n");
+			doutc(cl, "null dentry\n");
 			if (d_really_is_positive(dn)) {
-				dout("d_delete %p\n", dn);
+				doutc(cl, "d_delete %p\n", dn);
 				ceph_dir_clear_ordered(dir);
 				d_delete(dn);
 			} else if (have_lease) {
@@ -1722,9 +1745,9 @@ retry_lookup:
 				goto done;
 			dn = req->r_dentry;  /* may have spliced */
 		} else if (d_really_is_positive(dn) && d_inode(dn) != in) {
-			dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
-			     dn, d_inode(dn), ceph_vinop(d_inode(dn)),
-			     ceph_vinop(in));
+			doutc(cl, " %p links to %p %llx.%llx, not %llx.%llx\n",
+			      dn, d_inode(dn), ceph_vinop(d_inode(dn)),
+			      ceph_vinop(in));
 			d_invalidate(dn);
 			have_lease = false;
 		}
@@ -1734,7 +1757,7 @@ retry_lookup:
 					    rinfo->dlease, session,
 					    req->r_request_started);
 		}
-		dout(" final dn %p\n", dn);
+		doutc(cl, " final dn %p\n", dn);
 	} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
 		    req->r_op == CEPH_MDS_OP_MKSNAP) &&
 	           test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
@@ -1745,7 +1768,8 @@ retry_lookup:
 		BUG_ON(!dir);
 		BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
 		BUG_ON(!req->r_dentry);
-		dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry);
+		doutc(cl, " linking snapped dir %p to dn %p\n", in,
+		      req->r_dentry);
 		ceph_dir_clear_ordered(dir);
 		ihold(in);
 		err = splice_dentry(&req->r_dentry, in);
@@ -1767,7 +1791,7 @@ retry_lookup:
 					    &dvino, ptvino);
 	}
 done:
-	dout("fill_trace done err=%d\n", err);
+	doutc(cl, "done err=%d\n", err);
 	return err;
 }
 
@@ -1778,6 +1802,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
 					   struct ceph_mds_session *session)
 {
 	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
 	int i, err = 0;
 
 	for (i = 0; i < rinfo->dir_nr; i++) {
@@ -1792,14 +1817,14 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
 		in = ceph_get_inode(req->r_dentry->d_sb, vino, NULL);
 		if (IS_ERR(in)) {
 			err = PTR_ERR(in);
-			dout("new_inode badness got %d\n", err);
+			doutc(cl, "badness got %d\n", err);
 			continue;
 		}
 		rc = ceph_fill_inode(in, NULL, &rde->inode, NULL, session,
 				     -1, &req->r_caps_reservation);
 		if (rc < 0) {
-			pr_err("ceph_fill_inode badness on %p got %d\n",
-			       in, rc);
+			pr_err_client(cl, "inode badness on %p got %d\n", in,
+				      rc);
 			err = rc;
 			if (in->i_state & I_NEW) {
 				ihold(in);
@@ -1828,6 +1853,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
 			      struct ceph_readdir_cache_control *ctl,
 			      struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = ceph_inode_to_client(dir);
 	struct ceph_inode_info *ci = ceph_inode(dir);
 	unsigned nsize = PAGE_SIZE / sizeof(struct dentry*);
 	unsigned idx = ctl->index % nsize;
@@ -1853,11 +1879,11 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
 
 	if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) &&
 	    req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) {
-		dout("readdir cache dn %p idx %d\n", dn, ctl->index);
+		doutc(cl, "dn %p idx %d\n", dn, ctl->index);
 		ctl->dentries[idx] = dn;
 		ctl->index++;
 	} else {
-		dout("disable readdir cache\n");
+		doutc(cl, "disable readdir cache\n");
 		ctl->index = -1;
 	}
 	return 0;
@@ -1870,6 +1896,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	struct inode *inode = d_inode(parent);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
 	struct qstr dname;
 	struct dentry *dn;
 	struct inode *in;
@@ -1897,19 +1924,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 
 	if (rinfo->dir_dir &&
 	    le32_to_cpu(rinfo->dir_dir->frag) != frag) {
-		dout("readdir_prepopulate got new frag %x -> %x\n",
-		     frag, le32_to_cpu(rinfo->dir_dir->frag));
+		doutc(cl, "got new frag %x -> %x\n", frag,
+			    le32_to_cpu(rinfo->dir_dir->frag));
 		frag = le32_to_cpu(rinfo->dir_dir->frag);
 		if (!rinfo->hash_order)
 			req->r_readdir_offset = 2;
 	}
 
 	if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
-		dout("readdir_prepopulate %d items under SNAPDIR dn %p\n",
-		     rinfo->dir_nr, parent);
+		doutc(cl, "%d items under SNAPDIR dn %p\n",
+		      rinfo->dir_nr, parent);
 	} else {
-		dout("readdir_prepopulate %d items under dn %p\n",
-		     rinfo->dir_nr, parent);
+		doutc(cl, "%d items under dn %p\n", rinfo->dir_nr, parent);
 		if (rinfo->dir_dir)
 			ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
 
@@ -1953,15 +1979,15 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 
 retry_lookup:
 		dn = d_lookup(parent, &dname);
-		dout("d_lookup on parent=%p name=%.*s got %p\n",
-		     parent, dname.len, dname.name, dn);
+		doutc(cl, "d_lookup on parent=%p name=%.*s got %p\n",
+		      parent, dname.len, dname.name, dn);
 
 		if (!dn) {
 			dn = d_alloc(parent, &dname);
-			dout("d_alloc %p '%.*s' = %p\n", parent,
-			     dname.len, dname.name, dn);
+			doutc(cl, "d_alloc %p '%.*s' = %p\n", parent,
+			      dname.len, dname.name, dn);
 			if (!dn) {
-				dout("d_alloc badness\n");
+				doutc(cl, "d_alloc badness\n");
 				err = -ENOMEM;
 				goto out;
 			}
@@ -1974,8 +2000,8 @@ retry_lookup:
 			   (ceph_ino(d_inode(dn)) != tvino.ino ||
 			    ceph_snap(d_inode(dn)) != tvino.snap)) {
 			struct ceph_dentry_info *di = ceph_dentry(dn);
-			dout(" dn %p points to wrong inode %p\n",
-			     dn, d_inode(dn));
+			doutc(cl, " dn %p points to wrong inode %p\n",
+			      dn, d_inode(dn));
 
 			spin_lock(&dn->d_lock);
 			if (di->offset > 0 &&
@@ -1997,7 +2023,7 @@ retry_lookup:
 		} else {
 			in = ceph_get_inode(parent->d_sb, tvino, NULL);
 			if (IS_ERR(in)) {
-				dout("new_inode badness\n");
+				doutc(cl, "new_inode badness\n");
 				d_drop(dn);
 				dput(dn);
 				err = PTR_ERR(in);
@@ -2008,7 +2034,8 @@ retry_lookup:
 		ret = ceph_fill_inode(in, NULL, &rde->inode, NULL, session,
 				      -1, &req->r_caps_reservation);
 		if (ret < 0) {
-			pr_err("ceph_fill_inode badness on %p\n", in);
+			pr_err_client(cl, "badness on %p %llx.%llx\n", in,
+				      ceph_vinop(in));
 			if (d_really_is_negative(dn)) {
 				if (in->i_state & I_NEW) {
 					ihold(in);
@@ -2025,8 +2052,8 @@ retry_lookup:
 
 		if (d_really_is_negative(dn)) {
 			if (ceph_security_xattr_deadlock(in)) {
-				dout(" skip splicing dn %p to inode %p"
-				     " (security xattr deadlock)\n", dn, in);
+				doutc(cl, " skip splicing dn %p to inode %p"
+				      " (security xattr deadlock)\n", dn, in);
 				iput(in);
 				skipped++;
 				goto next_item;
@@ -2058,17 +2085,18 @@ out:
 		req->r_readdir_cache_idx = cache_ctl.index;
 	}
 	ceph_readdir_cache_release(&cache_ctl);
-	dout("readdir_prepopulate done\n");
+	doutc(cl, "done\n");
 	return err;
 }
 
 bool ceph_inode_set_size(struct inode *inode, loff_t size)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	bool ret;
 
 	spin_lock(&ci->i_ceph_lock);
-	dout("set_size %p %llu -> %llu\n", inode, i_size_read(inode), size);
+	doutc(cl, "set_size %p %llu -> %llu\n", inode, i_size_read(inode), size);
 	i_size_write(inode, size);
 	ceph_fscache_update(inode);
 	inode->i_blocks = calc_inode_blocks(size);
@@ -2082,22 +2110,25 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
 
 void ceph_queue_inode_work(struct inode *inode, int work_bit)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	set_bit(work_bit, &ci->i_work_mask);
 
 	ihold(inode);
 	if (queue_work(fsc->inode_wq, &ci->i_work)) {
-		dout("queue_inode_work %p, mask=%lx\n", inode, ci->i_work_mask);
+		doutc(cl, "%p %llx.%llx mask=%lx\n", inode,
+		      ceph_vinop(inode), ci->i_work_mask);
 	} else {
-		dout("queue_inode_work %p already queued, mask=%lx\n",
-		     inode, ci->i_work_mask);
+		doutc(cl, "%p %llx.%llx already queued, mask=%lx\n",
+		      inode, ceph_vinop(inode), ci->i_work_mask);
 		iput(inode);
 	}
 }
 
 static void ceph_do_invalidate_pages(struct inode *inode)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u32 orig_gen;
 	int check = 0;
@@ -2107,8 +2138,9 @@ static void ceph_do_invalidate_pages(struct inode *inode)
 	mutex_lock(&ci->i_truncate_mutex);
 
 	if (ceph_inode_is_shutdown(inode)) {
-		pr_warn_ratelimited("%s: inode %llx.%llx is shut down\n",
-				    __func__, ceph_vinop(inode));
+		pr_warn_ratelimited_client(cl,
+			"%p %llx.%llx is shut down\n", inode,
+			ceph_vinop(inode));
 		mapping_set_error(inode->i_mapping, -EIO);
 		truncate_pagecache(inode, 0);
 		mutex_unlock(&ci->i_truncate_mutex);
@@ -2116,8 +2148,8 @@ static void ceph_do_invalidate_pages(struct inode *inode)
 	}
 
 	spin_lock(&ci->i_ceph_lock);
-	dout("invalidate_pages %p gen %d revoking %d\n", inode,
-	     ci->i_rdcache_gen, ci->i_rdcache_revoking);
+	doutc(cl, "%p %llx.%llx gen %d revoking %d\n", inode,
+	      ceph_vinop(inode), ci->i_rdcache_gen, ci->i_rdcache_revoking);
 	if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
 		if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
 			check = 1;
@@ -2129,21 +2161,21 @@ static void ceph_do_invalidate_pages(struct inode *inode)
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (invalidate_inode_pages2(inode->i_mapping) < 0) {
-		pr_err("invalidate_inode_pages2 %llx.%llx failed\n",
-		       ceph_vinop(inode));
+		pr_err_client(cl, "invalidate_inode_pages2 %llx.%llx failed\n",
+			      ceph_vinop(inode));
 	}
 
 	spin_lock(&ci->i_ceph_lock);
 	if (orig_gen == ci->i_rdcache_gen &&
 	    orig_gen == ci->i_rdcache_revoking) {
-		dout("invalidate_pages %p gen %d successful\n", inode,
-		     ci->i_rdcache_gen);
+		doutc(cl, "%p %llx.%llx gen %d successful\n", inode,
+		      ceph_vinop(inode), ci->i_rdcache_gen);
 		ci->i_rdcache_revoking--;
 		check = 1;
 	} else {
-		dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
-		     inode, orig_gen, ci->i_rdcache_gen,
-		     ci->i_rdcache_revoking);
+		doutc(cl, "%p %llx.%llx gen %d raced, now %d revoking %d\n",
+		      inode, ceph_vinop(inode), orig_gen, ci->i_rdcache_gen,
+		      ci->i_rdcache_revoking);
 		if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
 			check = 1;
 	}
@@ -2160,6 +2192,7 @@ out:
  */
 void __ceph_do_pending_vmtruncate(struct inode *inode)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 to;
 	int wrbuffer_refs, finish = 0;
@@ -2168,7 +2201,8 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
 retry:
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_truncate_pending == 0) {
-		dout("%s %p none pending\n", __func__, inode);
+		doutc(cl, "%p %llx.%llx none pending\n", inode,
+		      ceph_vinop(inode));
 		spin_unlock(&ci->i_ceph_lock);
 		mutex_unlock(&ci->i_truncate_mutex);
 		return;
@@ -2180,7 +2214,8 @@ retry:
 	 */
 	if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
 		spin_unlock(&ci->i_ceph_lock);
-		dout("%s %p flushing snaps first\n", __func__, inode);
+		doutc(cl, "%p %llx.%llx flushing snaps first\n", inode,
+		      ceph_vinop(inode));
 		filemap_write_and_wait_range(&inode->i_data, 0,
 					     inode->i_sb->s_maxbytes);
 		goto retry;
@@ -2191,8 +2226,8 @@ retry:
 
 	to = ci->i_truncate_pagecache_size;
 	wrbuffer_refs = ci->i_wrbuffer_ref;
-	dout("%s %p (%d) to %lld\n", __func__, inode,
-	     ci->i_truncate_pending, to);
+	doutc(cl, "%p %llx.%llx (%d) to %lld\n", inode, ceph_vinop(inode),
+	      ci->i_truncate_pending, to);
 	spin_unlock(&ci->i_ceph_lock);
 
 	ceph_fscache_resize(inode, to);
@@ -2220,9 +2255,10 @@ static void ceph_inode_work(struct work_struct *work)
 	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
 						 i_work);
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 
 	if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) {
-		dout("writeback %p\n", inode);
+		doutc(cl, "writeback %p %llx.%llx\n", inode, ceph_vinop(inode));
 		filemap_fdatawrite(&inode->i_data);
 	}
 	if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask))
@@ -2294,6 +2330,7 @@ static int fill_fscrypt_truncate(struct inode *inode,
 				 struct ceph_mds_request *req,
 				 struct iattr *attr)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int boff = attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE;
 	loff_t pos, orig_pos = round_down(attr->ia_size,
@@ -2316,9 +2353,9 @@ static int fill_fscrypt_truncate(struct inode *inode,
 
 	issued = __ceph_caps_issued(ci, NULL);
 
-	dout("%s size %lld -> %lld got cap refs on %s, issued %s\n", __func__,
-	     i_size, attr->ia_size, ceph_cap_string(got),
-	     ceph_cap_string(issued));
+	doutc(cl, "size %lld -> %lld got cap refs on %s, issued %s\n",
+	      i_size, attr->ia_size, ceph_cap_string(got),
+	      ceph_cap_string(issued));
 
 	/* Try to writeback the dirty pagecaches */
 	if (issued & (CEPH_CAP_FILE_BUFFER)) {
@@ -2373,8 +2410,7 @@ static int fill_fscrypt_truncate(struct inode *inode,
 	 * If the Rados object doesn't exist, it will be set to 0.
 	 */
 	if (!objver) {
-		dout("%s hit hole, ppos %lld < size %lld\n", __func__,
-		     pos, i_size);
+		doutc(cl, "hit hole, ppos %lld < size %lld\n", pos, i_size);
 
 		header.data_len = cpu_to_le32(8 + 8 + 4);
 		header.file_offset = 0;
@@ -2383,8 +2419,8 @@ static int fill_fscrypt_truncate(struct inode *inode,
 		header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE);
 		header.file_offset = cpu_to_le64(orig_pos);
 
-		dout("%s encrypt block boff/bsize %d/%lu\n", __func__,
-		     boff, CEPH_FSCRYPT_BLOCK_SIZE);
+		doutc(cl, "encrypt block boff/bsize %d/%lu\n", boff,
+		      CEPH_FSCRYPT_BLOCK_SIZE);
 
 		/* truncate and zero out the extra contents for the last block */
 		memset(iov.iov_base + boff, 0, PAGE_SIZE - boff);
@@ -2412,8 +2448,8 @@ static int fill_fscrypt_truncate(struct inode *inode,
 	}
 	req->r_pagelist = pagelist;
 out:
-	dout("%s %p size dropping cap refs on %s\n", __func__,
-	     inode, ceph_cap_string(got));
+	doutc(cl, "%p %llx.%llx size dropping cap refs on %s\n", inode,
+	      ceph_vinop(inode), ceph_cap_string(got));
 	ceph_put_cap_refs(ci, got);
 	if (iov.iov_base)
 		kunmap_local(iov.iov_base);
@@ -2424,13 +2460,14 @@ out:
 	return ret;
 }
 
-int __ceph_setattr(struct inode *inode, struct iattr *attr,
-		   struct ceph_iattr *cia)
+int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
+		   struct iattr *attr, struct ceph_iattr *cia)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	unsigned int ia_valid = attr->ia_valid;
 	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_cap_flush *prealloc_cf;
 	loff_t isize = i_size_read(inode);
 	int issued;
@@ -2469,7 +2506,8 @@ retry:
 		}
 	}
 
-	dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
+	doutc(cl, "%p %llx.%llx issued %s\n", inode, ceph_vinop(inode),
+	      ceph_cap_string(issued));
 #if IS_ENABLED(CONFIG_FS_ENCRYPTION)
 	if (cia && cia->fscrypt_auth) {
 		u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth);
@@ -2480,8 +2518,8 @@ retry:
 			goto out;
 		}
 
-		dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n",
-			ceph_vinop(inode), ci->fscrypt_auth_len, len);
+		doutc(cl, "%p %llx.%llx fscrypt_auth len %u to %u)\n", inode,
+		      ceph_vinop(inode), ci->fscrypt_auth_len, len);
 
 		/* It should never be re-set once set */
 		WARN_ON_ONCE(ci->fscrypt_auth);
@@ -2509,38 +2547,44 @@ retry:
 #endif /* CONFIG_FS_ENCRYPTION */
 
 	if (ia_valid & ATTR_UID) {
-		dout("setattr %p uid %d -> %d\n", inode,
-		     from_kuid(&init_user_ns, inode->i_uid),
-		     from_kuid(&init_user_ns, attr->ia_uid));
+		kuid_t fsuid = from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid);
+
+		doutc(cl, "%p %llx.%llx uid %d -> %d\n", inode,
+		      ceph_vinop(inode),
+		      from_kuid(&init_user_ns, inode->i_uid),
+		      from_kuid(&init_user_ns, attr->ia_uid));
 		if (issued & CEPH_CAP_AUTH_EXCL) {
-			inode->i_uid = attr->ia_uid;
+			inode->i_uid = fsuid;
 			dirtied |= CEPH_CAP_AUTH_EXCL;
 		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
-			   !uid_eq(attr->ia_uid, inode->i_uid)) {
+			   !uid_eq(fsuid, inode->i_uid)) {
 			req->r_args.setattr.uid = cpu_to_le32(
-				from_kuid(&init_user_ns, attr->ia_uid));
+				from_kuid(&init_user_ns, fsuid));
 			mask |= CEPH_SETATTR_UID;
 			release |= CEPH_CAP_AUTH_SHARED;
 		}
 	}
 	if (ia_valid & ATTR_GID) {
-		dout("setattr %p gid %d -> %d\n", inode,
-		     from_kgid(&init_user_ns, inode->i_gid),
-		     from_kgid(&init_user_ns, attr->ia_gid));
+		kgid_t fsgid = from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid);
+
+		doutc(cl, "%p %llx.%llx gid %d -> %d\n", inode,
+		      ceph_vinop(inode),
+		      from_kgid(&init_user_ns, inode->i_gid),
+		      from_kgid(&init_user_ns, attr->ia_gid));
 		if (issued & CEPH_CAP_AUTH_EXCL) {
-			inode->i_gid = attr->ia_gid;
+			inode->i_gid = fsgid;
 			dirtied |= CEPH_CAP_AUTH_EXCL;
 		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
-			   !gid_eq(attr->ia_gid, inode->i_gid)) {
+			   !gid_eq(fsgid, inode->i_gid)) {
 			req->r_args.setattr.gid = cpu_to_le32(
-				from_kgid(&init_user_ns, attr->ia_gid));
+				from_kgid(&init_user_ns, fsgid));
 			mask |= CEPH_SETATTR_GID;
 			release |= CEPH_CAP_AUTH_SHARED;
 		}
 	}
 	if (ia_valid & ATTR_MODE) {
-		dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode,
-		     attr->ia_mode);
+		doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode,
+		      ceph_vinop(inode), inode->i_mode, attr->ia_mode);
 		if (issued & CEPH_CAP_AUTH_EXCL) {
 			inode->i_mode = attr->ia_mode;
 			dirtied |= CEPH_CAP_AUTH_EXCL;
@@ -2556,9 +2600,10 @@ retry:
 	if (ia_valid & ATTR_ATIME) {
 		struct timespec64 atime = inode_get_atime(inode);
 
-		dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode,
-		     atime.tv_sec, atime.tv_nsec,
-		     attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
+		doutc(cl, "%p %llx.%llx atime %lld.%09ld -> %lld.%09ld\n",
+		      inode, ceph_vinop(inode),
+		      atime.tv_sec, atime.tv_nsec,
+		      attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
 		if (issued & CEPH_CAP_FILE_EXCL) {
 			ci->i_time_warp_seq++;
 			inode_set_atime_to_ts(inode, attr->ia_atime);
@@ -2578,7 +2623,8 @@ retry:
 		}
 	}
 	if (ia_valid & ATTR_SIZE) {
-		dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
+		doutc(cl, "%p %llx.%llx size %lld -> %lld\n", inode,
+		      ceph_vinop(inode), isize, attr->ia_size);
 		/*
 		 * Only when the new size is smaller and not aligned to
 		 * CEPH_FSCRYPT_BLOCK_SIZE will the RMW is needed.
@@ -2631,9 +2677,10 @@ retry:
 	if (ia_valid & ATTR_MTIME) {
 		struct timespec64 mtime = inode_get_mtime(inode);
 
-		dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
-		     mtime.tv_sec, mtime.tv_nsec,
-		     attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
+		doutc(cl, "%p %llx.%llx mtime %lld.%09ld -> %lld.%09ld\n",
+		      inode, ceph_vinop(inode),
+		      mtime.tv_sec, mtime.tv_nsec,
+		      attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
 		if (issued & CEPH_CAP_FILE_EXCL) {
 			ci->i_time_warp_seq++;
 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
@@ -2656,11 +2703,12 @@ retry:
 	if (ia_valid & ATTR_CTIME) {
 		bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
 					 ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
-		dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
-		     inode_get_ctime_sec(inode),
-		     inode_get_ctime_nsec(inode),
-		     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
-		     only ? "ctime only" : "ignored");
+		doutc(cl, "%p %llx.%llx ctime %lld.%09ld -> %lld.%09ld (%s)\n",
+		      inode, ceph_vinop(inode),
+		      inode_get_ctime_sec(inode),
+		      inode_get_ctime_nsec(inode),
+		      attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
+		      only ? "ctime only" : "ignored");
 		if (only) {
 			/*
 			 * if kernel wants to dirty ctime but nothing else,
@@ -2678,7 +2726,8 @@ retry:
 		}
 	}
 	if (ia_valid & ATTR_FILE)
-		dout("setattr %p ATTR_FILE ... hrm!\n", inode);
+		doutc(cl, "%p %llx.%llx ATTR_FILE ... hrm!\n", inode,
+		      ceph_vinop(inode));
 
 	if (dirtied) {
 		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
@@ -2719,16 +2768,17 @@ retry:
 		 */
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
 		if (err == -EAGAIN && truncate_retry--) {
-			dout("setattr %p result=%d (%s locally, %d remote), retry it!\n",
-			     inode, err, ceph_cap_string(dirtied), mask);
+			doutc(cl, "%p %llx.%llx result=%d (%s locally, %d remote), retry it!\n",
+			      inode, ceph_vinop(inode), err,
+			      ceph_cap_string(dirtied), mask);
 			ceph_mdsc_put_request(req);
 			ceph_free_cap_flush(prealloc_cf);
 			goto retry;
 		}
 	}
 out:
-	dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
-	     ceph_cap_string(dirtied), mask);
+	doutc(cl, "%p %llx.%llx result=%d (%s locally, %d remote)\n", inode,
+	      ceph_vinop(inode), err, ceph_cap_string(dirtied), mask);
 
 	ceph_mdsc_put_request(req);
 	ceph_free_cap_flush(prealloc_cf);
@@ -2746,7 +2796,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 		 struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	int err;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -2759,7 +2809,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (err)
 		return err;
 
-	err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
+	err = setattr_prepare(idmap, dentry, attr);
 	if (err != 0)
 		return err;
 
@@ -2771,10 +2821,10 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	    ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
 		return -EDQUOT;
 
-	err = __ceph_setattr(inode, attr, NULL);
+	err = __ceph_setattr(idmap, inode, attr, NULL);
 
 	if (err >= 0 && (attr->ia_valid & ATTR_MODE))
-		err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode);
+		err = posix_acl_chmod(idmap, dentry, attr->ia_mode);
 
 	return err;
 }
@@ -2816,19 +2866,21 @@ int ceph_try_to_choose_auth_mds(struct inode *inode, int mask)
 int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
 		      int mask, bool force)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int mode;
 	int err;
 
 	if (ceph_snap(inode) == CEPH_SNAPDIR) {
-		dout("do_getattr inode %p SNAPDIR\n", inode);
+		doutc(cl, "inode %p %llx.%llx SNAPDIR\n", inode,
+		      ceph_vinop(inode));
 		return 0;
 	}
 
-	dout("do_getattr inode %p mask %s mode 0%o\n",
-	     inode, ceph_cap_string(mask), inode->i_mode);
+	doutc(cl, "inode %p %llx.%llx mask %s mode 0%o\n", inode,
+	      ceph_vinop(inode), ceph_cap_string(mask), inode->i_mode);
 	if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1))
 			return 0;
 
@@ -2855,14 +2907,15 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
 		}
 	}
 	ceph_mdsc_put_request(req);
-	dout("do_getattr result=%d\n", err);
+	doutc(cl, "result=%d\n", err);
 	return err;
 }
 
 int ceph_do_getvxattr(struct inode *inode, const char *name, void *value,
 		      size_t size)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int mode = USE_AUTH_MDS;
@@ -2892,7 +2945,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value,
 	xattr_value = req->r_reply_info.xattr_info.xattr_value;
 	xattr_value_len = req->r_reply_info.xattr_info.xattr_value_len;
 
-	dout("do_getvxattr xattr_value_len:%zu, size:%zu\n", xattr_value_len, size);
+	doutc(cl, "xattr_value_len:%zu, size:%zu\n", xattr_value_len, size);
 
 	err = (int)xattr_value_len;
 	if (size == 0)
@@ -2907,7 +2960,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value,
 put:
 	ceph_mdsc_put_request(req);
 out:
-	dout("do_getvxattr result=%d\n", err);
+	doutc(cl, "result=%d\n", err);
 	return err;
 }
 
@@ -2927,7 +2980,7 @@ int ceph_permission(struct mnt_idmap *idmap, struct inode *inode,
 	err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false);
 
 	if (!err)
-		err = generic_permission(&nop_mnt_idmap, inode, mask);
+		err = generic_permission(idmap, inode, mask);
 	return err;
 }
 
@@ -2984,7 +3037,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
 			return err;
 	}
 
-	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 	stat->ino = ceph_present_inode(inode);
 
 	/*
@@ -3007,7 +3060,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
 		stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
 
 	if (S_ISDIR(inode->i_mode)) {
-		if (ceph_test_mount_opt(ceph_sb_to_client(sb), RBYTES)) {
+		if (ceph_test_mount_opt(ceph_sb_to_fs_client(sb), RBYTES)) {
 			stat->size = ci->i_rbytes;
 		} else if (ceph_snap(inode) == CEPH_SNAPDIR) {
 			struct ceph_inode_info *pci;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 91a84917d203..e861de3c79b9 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -65,7 +65,7 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
 static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
 	struct inode *inode = file_inode(file);
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	struct ceph_ioctl_layout l;
 	struct ceph_inode_info *ci = ceph_inode(file_inode(file));
@@ -140,7 +140,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
 	struct ceph_mds_request *req;
 	struct ceph_ioctl_layout l;
 	int err;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
 
 	/* copy and validate */
 	if (copy_from_user(&l, arg, sizeof(l)))
@@ -183,7 +183,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_client *osdc =
-		&ceph_sb_to_client(inode->i_sb)->client->osdc;
+		&ceph_sb_to_fs_client(inode->i_sb)->client->osdc;
 	struct ceph_object_locator oloc;
 	CEPH_DEFINE_OID_ONSTACK(oid);
 	u32 xlen;
@@ -244,7 +244,8 @@ static long ceph_ioctl_lazyio(struct file *file)
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 
 	if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
 		spin_lock(&ci->i_ceph_lock);
@@ -252,11 +253,13 @@ static long ceph_ioctl_lazyio(struct file *file)
 		ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++;
 		__ceph_touch_fmode(ci, mdsc, fi->fmode);
 		spin_unlock(&ci->i_ceph_lock);
-		dout("ioctl_layzio: file %p marked lazy\n", file);
+		doutc(cl, "file %p %p %llx.%llx marked lazy\n", file, inode,
+		      ceph_vinop(inode));
 
 		ceph_check_caps(ci, 0);
 	} else {
-		dout("ioctl_layzio: file %p already lazy\n", file);
+		doutc(cl, "file %p %p %llx.%llx already lazy\n", file, inode,
+		      ceph_vinop(inode));
 	}
 	return 0;
 }
@@ -355,10 +358,12 @@ static const char *ceph_ioctl_cmd_name(const unsigned int cmd)
 
 long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = file_inode(file);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	int ret;
 
-	dout("ioctl file %p cmd %s arg %lu\n", file,
-	     ceph_ioctl_cmd_name(cmd), arg);
+	doutc(fsc->client, "file %p %p %llx.%llx cmd %s arg %lu\n", file,
+	      inode, ceph_vinop(inode), ceph_ioctl_cmd_name(cmd), arg);
 	switch (cmd) {
 	case CEPH_IOC_GET_LAYOUT:
 		return ceph_ioctl_get_layout(file, (void __user *)arg);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index cb51c7e9c8e2..e07ad29ff8b9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -77,6 +77,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 			     int cmd, u8 wait, struct file_lock *fl)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	int err;
 	u64 length = 0;
@@ -111,10 +112,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 
 	owner = secure_addr(fl->fl_owner);
 
-	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
-	     "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type,
-	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
-	     wait, fl->fl_type);
+	doutc(cl, "rule: %d, op: %d, owner: %llx, pid: %llu, "
+		    "start: %llu, length: %llu, wait: %d, type: %d\n",
+		    (int)lock_type, (int)operation, owner, (u64)fl->fl_pid,
+		    fl->fl_start, length, wait, fl->fl_type);
 
 	req->r_args.filelock_change.rule = lock_type;
 	req->r_args.filelock_change.type = cmd;
@@ -147,16 +148,17 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 
 	}
 	ceph_mdsc_put_request(req);
-	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-	     "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type,
-	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
-	     length, wait, fl->fl_type, err);
+	doutc(cl, "rule: %d, op: %d, pid: %llu, start: %llu, "
+	      "length: %llu, wait: %d, type: %d, err code %d\n",
+	      (int)lock_type, (int)operation, (u64)fl->fl_pid,
+	      fl->fl_start, length, wait, fl->fl_type, err);
 	return err;
 }
 
 static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
                                          struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *intr_req;
 	struct inode *inode = req->r_inode;
 	int err, lock_type;
@@ -174,8 +176,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 	if (!err)
 		return 0;
 
-	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
-	     req->r_tid);
+	doutc(cl, "request %llu was interrupted\n", req->r_tid);
 
 	mutex_lock(&mdsc->mutex);
 	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
@@ -246,6 +247,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int err = 0;
 	u16 op = CEPH_MDS_OP_SETFILELOCK;
 	u8 wait = 0;
@@ -257,7 +259,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	if (ceph_inode_is_shutdown(inode))
 		return -ESTALE;
 
-	dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
+	doutc(cl, "fl_owner: %p\n", fl->fl_owner);
 
 	/* set wait bit as appropriate, then make command as Ceph expects it*/
 	if (IS_GETLK(cmd))
@@ -292,7 +294,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
 	if (!err) {
 		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) {
-			dout("mds locked, locking locally\n");
+			doutc(cl, "locking locally\n");
 			err = posix_lock_file(file, fl, NULL);
 			if (err) {
 				/* undo! This should only happen if
@@ -300,8 +302,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				 * deadlock. */
 				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
 						  CEPH_LOCK_UNLOCK, 0, fl);
-				dout("got %d on posix_lock_file, undid lock\n",
-				     err);
+				doutc(cl, "got %d on posix_lock_file, undid lock\n",
+				      err);
 			}
 		}
 	}
@@ -312,6 +314,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int err = 0;
 	u8 wait = 0;
 	u8 lock_cmd;
@@ -322,7 +325,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 	if (ceph_inode_is_shutdown(inode))
 		return -ESTALE;
 
-	dout("ceph_flock, fl_file: %p\n", fl->fl_file);
+	doutc(cl, "fl_file: %p\n", fl->fl_file);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
@@ -359,7 +362,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 			ceph_lock_message(CEPH_LOCK_FLOCK,
 					  CEPH_MDS_OP_SETFILELOCK,
 					  inode, CEPH_LOCK_UNLOCK, 0, fl);
-			dout("got %d on locks_lock_file_wait, undid lock\n", err);
+			doutc(cl, "got %d on locks_lock_file_wait, undid lock\n",
+			      err);
 		}
 	}
 	return err;
@@ -371,6 +375,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
  */
 void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct file_lock *lock;
 	struct file_lock_context *ctx;
 
@@ -386,17 +391,20 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 			++(*flock_count);
 		spin_unlock(&ctx->flc_lock);
 	}
-	dout("counted %d flock locks and %d fcntl locks\n",
-	     *flock_count, *fcntl_count);
+	doutc(cl, "counted %d flock locks and %d fcntl locks\n",
+	      *flock_count, *fcntl_count);
 }
 
 /*
  * Given a pointer to a lock, convert it to a ceph filelock
  */
-static int lock_to_ceph_filelock(struct file_lock *lock,
+static int lock_to_ceph_filelock(struct inode *inode,
+				 struct file_lock *lock,
 				 struct ceph_filelock *cephlock)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int err = 0;
+
 	cephlock->start = cpu_to_le64(lock->fl_start);
 	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
 	cephlock->client = cpu_to_le64(0);
@@ -414,7 +422,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
 		cephlock->type = CEPH_LOCK_UNLOCK;
 		break;
 	default:
-		dout("Have unknown lock type %d\n", lock->fl_type);
+		doutc(cl, "Have unknown lock type %d\n", lock->fl_type);
 		err = -EINVAL;
 	}
 
@@ -432,13 +440,14 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
 {
 	struct file_lock *lock;
 	struct file_lock_context *ctx = locks_inode_context(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	int err = 0;
 	int seen_fcntl = 0;
 	int seen_flock = 0;
 	int l = 0;
 
-	dout("encoding %d flock and %d fcntl locks\n", num_flock_locks,
-	     num_fcntl_locks);
+	doutc(cl, "encoding %d flock and %d fcntl locks\n", num_flock_locks,
+	      num_fcntl_locks);
 
 	if (!ctx)
 		return 0;
@@ -450,7 +459,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
 			err = -ENOSPC;
 			goto fail;
 		}
-		err = lock_to_ceph_filelock(lock, &flocks[l]);
+		err = lock_to_ceph_filelock(inode, lock, &flocks[l]);
 		if (err)
 			goto fail;
 		++l;
@@ -461,7 +470,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
 			err = -ENOSPC;
 			goto fail;
 		}
-		err = lock_to_ceph_filelock(lock, &flocks[l]);
+		err = lock_to_ceph_filelock(inode, lock, &flocks[l]);
 		if (err)
 			goto fail;
 		++l;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index de798444bb97..d95eb525519a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -12,6 +12,7 @@
 #include <linux/bits.h>
 #include <linux/ktime.h>
 #include <linux/bitmap.h>
+#include <linux/mnt_idmapping.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -411,6 +412,7 @@ static int parse_reply_info_readdir(void **p, void *end,
 				    u64 features)
 {
 	struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
+	struct ceph_client *cl = req->r_mdsc->fsc->client;
 	u32 num, i = 0;
 	int err;
 
@@ -433,7 +435,7 @@ static int parse_reply_info_readdir(void **p, void *end,
 	BUG_ON(!info->dir_entries);
 	if ((unsigned long)(info->dir_entries + num) >
 	    (unsigned long)info->dir_entries + info->dir_buf_size) {
-		pr_err("dir contents are larger than expected\n");
+		pr_err_client(cl, "dir contents are larger than expected\n");
 		WARN_ON(1);
 		goto bad;
 	}
@@ -454,7 +456,7 @@ static int parse_reply_info_readdir(void **p, void *end,
 		ceph_decode_need(p, end, _name_len, bad);
 		_name = *p;
 		*p += _name_len;
-		dout("parsed dir dname '%.*s'\n", _name_len, _name);
+		doutc(cl, "parsed dir dname '%.*s'\n", _name_len, _name);
 
 		if (info->hash_order)
 			rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
@@ -514,8 +516,8 @@ static int parse_reply_info_readdir(void **p, void *end,
 		rde->is_nokey = false;
 		err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey);
 		if (err) {
-			pr_err("%s unable to decode %.*s, got %d\n", __func__,
-			       _name_len, _name, err);
+			pr_err_client(cl, "unable to decode %.*s, got %d\n",
+				      _name_len, _name, err);
 			goto out_bad;
 		}
 		rde->name = oname.name;
@@ -539,7 +541,7 @@ done:
 bad:
 	err = -EIO;
 out_bad:
-	pr_err("problem parsing dir contents %d\n", err);
+	pr_err_client(cl, "problem parsing dir contents %d\n", err);
 	return err;
 }
 
@@ -570,10 +572,11 @@ bad:
 static int ceph_parse_deleg_inos(void **p, void *end,
 				 struct ceph_mds_session *s)
 {
+	struct ceph_client *cl = s->s_mdsc->fsc->client;
 	u32 sets;
 
 	ceph_decode_32_safe(p, end, sets, bad);
-	dout("got %u sets of delegated inodes\n", sets);
+	doutc(cl, "got %u sets of delegated inodes\n", sets);
 	while (sets--) {
 		u64 start, len;
 
@@ -582,8 +585,9 @@ static int ceph_parse_deleg_inos(void **p, void *end,
 
 		/* Don't accept a delegation of system inodes */
 		if (start < CEPH_INO_SYSTEM_BASE) {
-			pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n",
-					start, len);
+			pr_warn_ratelimited_client(cl,
+				"ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n",
+				start, len);
 			continue;
 		}
 		while (len--) {
@@ -591,10 +595,10 @@ static int ceph_parse_deleg_inos(void **p, void *end,
 					    DELEGATED_INO_AVAILABLE,
 					    GFP_KERNEL);
 			if (!err) {
-				dout("added delegated inode 0x%llx\n",
-				     start - 1);
+				doutc(cl, "added delegated inode 0x%llx\n", start - 1);
 			} else if (err == -EBUSY) {
-				pr_warn("MDS delegated inode 0x%llx more than once.\n",
+				pr_warn_client(cl,
+					"MDS delegated inode 0x%llx more than once.\n",
 					start - 1);
 			} else {
 				return err;
@@ -744,6 +748,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
 			    struct ceph_mds_request *req, u64 features)
 {
 	struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
+	struct ceph_client *cl = s->s_mdsc->fsc->client;
 	void *p, *end;
 	u32 len;
 	int err;
@@ -783,7 +788,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
 bad:
 	err = -EIO;
 out_bad:
-	pr_err("mds parse_reply err %d\n", err);
+	pr_err_client(cl, "mds parse_reply err %d\n", err);
 	ceph_msg_dump(msg);
 	return err;
 }
@@ -830,7 +835,8 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
  */
 int ceph_wait_on_conflict_unlink(struct dentry *dentry)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb);
+	struct ceph_client *cl = fsc->client;
 	struct dentry *pdentry = dentry->d_parent;
 	struct dentry *udentry, *found = NULL;
 	struct ceph_dentry_info *di;
@@ -855,8 +861,8 @@ int ceph_wait_on_conflict_unlink(struct dentry *dentry)
 			goto next;
 
 		if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags))
-			pr_warn("%s dentry %p:%pd async unlink bit is not set\n",
-				__func__, dentry, dentry);
+			pr_warn_client(cl, "dentry %p:%pd async unlink bit is not set\n",
+				       dentry, dentry);
 
 		if (!d_same_name(udentry, pdentry, &dname))
 			goto next;
@@ -872,8 +878,8 @@ next:
 	if (likely(!found))
 		return 0;
 
-	dout("%s dentry %p:%pd conflict with old %p:%pd\n", __func__,
-	     dentry, dentry, found, found);
+	doutc(cl, "dentry %p:%pd conflict with old %p:%pd\n", dentry, dentry,
+	      found, found);
 
 	err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT,
 			  TASK_KILLABLE);
@@ -957,6 +963,7 @@ static int __verify_registered_session(struct ceph_mds_client *mdsc,
 static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 						 int mds)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_session *s;
 
 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO)
@@ -973,7 +980,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 		int newmax = 1 << get_count_order(mds + 1);
 		struct ceph_mds_session **sa;
 
-		dout("%s: realloc to %d\n", __func__, newmax);
+		doutc(cl, "realloc to %d\n", newmax);
 		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
 		if (!sa)
 			goto fail_realloc;
@@ -986,7 +993,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 		mdsc->max_sessions = newmax;
 	}
 
-	dout("%s: mds%d\n", __func__, mds);
+	doutc(cl, "mds%d\n", mds);
 	s->s_mdsc = mdsc;
 	s->s_mds = mds;
 	s->s_state = CEPH_MDS_SESSION_NEW;
@@ -1029,7 +1036,7 @@ fail_realloc:
 static void __unregister_session(struct ceph_mds_client *mdsc,
 			       struct ceph_mds_session *s)
 {
-	dout("__unregister_session mds%d %p\n", s->s_mds, s);
+	doutc(mdsc->fsc->client, "mds%d %p\n", s->s_mds, s);
 	BUG_ON(mdsc->sessions[s->s_mds] != s);
 	mdsc->sessions[s->s_mds] = NULL;
 	ceph_con_close(&s->s_con);
@@ -1116,6 +1123,8 @@ void ceph_mdsc_release_request(struct kref *kref)
 	kfree(req->r_path1);
 	kfree(req->r_path2);
 	put_cred(req->r_cred);
+	if (req->r_mnt_idmap)
+		mnt_idmap_put(req->r_mnt_idmap);
 	if (req->r_pagelist)
 		ceph_pagelist_release(req->r_pagelist);
 	kfree(req->r_fscrypt_auth);
@@ -1155,6 +1164,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
 			       struct ceph_mds_request *req,
 			       struct inode *dir)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int ret = 0;
 
 	req->r_tid = ++mdsc->last_tid;
@@ -1162,18 +1172,20 @@ static void __register_request(struct ceph_mds_client *mdsc,
 		ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation,
 					req->r_num_caps);
 		if (ret < 0) {
-			pr_err("__register_request %p "
-			       "failed to reserve caps: %d\n", req, ret);
+			pr_err_client(cl, "%p failed to reserve caps: %d\n",
+				      req, ret);
 			/* set req->r_err to fail early from __do_request */
 			req->r_err = ret;
 			return;
 		}
 	}
-	dout("__register_request %p tid %lld\n", req, req->r_tid);
+	doutc(cl, "%p tid %lld\n", req, req->r_tid);
 	ceph_mdsc_get_request(req);
 	insert_request(&mdsc->request_tree, req);
 
 	req->r_cred = get_current_cred();
+	if (!req->r_mnt_idmap)
+		req->r_mnt_idmap = &nop_mnt_idmap;
 
 	if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
 		mdsc->oldest_tid = req->r_tid;
@@ -1192,7 +1204,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
 static void __unregister_request(struct ceph_mds_client *mdsc,
 				 struct ceph_mds_request *req)
 {
-	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+	doutc(mdsc->fsc->client, "%p tid %lld\n", req, req->r_tid);
 
 	/* Never leave an unregistered request on an unsafe list! */
 	list_del_init(&req->r_unsafe_item);
@@ -1278,6 +1290,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	int mds = -1;
 	u32 hash = req->r_direct_hash;
 	bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
+	struct ceph_client *cl = mdsc->fsc->client;
 
 	if (random)
 		*random = false;
@@ -1289,8 +1302,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	if (req->r_resend_mds >= 0 &&
 	    (__have_session(mdsc, req->r_resend_mds) ||
 	     ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) {
-		dout("%s using resend_mds mds%d\n", __func__,
-		     req->r_resend_mds);
+		doutc(cl, "using resend_mds mds%d\n", req->r_resend_mds);
 		return req->r_resend_mds;
 	}
 
@@ -1307,7 +1319,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 			rcu_read_lock();
 			inode = get_nonsnap_parent(req->r_dentry);
 			rcu_read_unlock();
-			dout("%s using snapdir's parent %p\n", __func__, inode);
+			doutc(cl, "using snapdir's parent %p %llx.%llx\n",
+			      inode, ceph_vinop(inode));
 		}
 	} else if (req->r_dentry) {
 		/* ignore race with rename; old or new d_parent is okay */
@@ -1327,7 +1340,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 			/* direct snapped/virtual snapdir requests
 			 * based on parent dir inode */
 			inode = get_nonsnap_parent(parent);
-			dout("%s using nonsnap parent %p\n", __func__, inode);
+			doutc(cl, "using nonsnap parent %p %llx.%llx\n",
+			      inode, ceph_vinop(inode));
 		} else {
 			/* dentry target */
 			inode = d_inode(req->r_dentry);
@@ -1343,10 +1357,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 		rcu_read_unlock();
 	}
 
-	dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash,
-	     hash, mode);
 	if (!inode)
 		goto random;
+
+	doutc(cl, "%p %llx.%llx is_hash=%d (0x%x) mode %d\n", inode,
+	      ceph_vinop(inode), (int)is_hash, hash, mode);
 	ci = ceph_inode(inode);
 
 	if (is_hash && S_ISDIR(inode->i_mode)) {
@@ -1362,9 +1377,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 				get_random_bytes(&r, 1);
 				r %= frag.ndist;
 				mds = frag.dist[r];
-				dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n",
-				     __func__, inode, ceph_vinop(inode),
-				     frag.frag, mds, (int)r, frag.ndist);
+				doutc(cl, "%p %llx.%llx frag %u mds%d (%d/%d)\n",
+				      inode, ceph_vinop(inode), frag.frag,
+				      mds, (int)r, frag.ndist);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
 				    CEPH_MDS_STATE_ACTIVE &&
 				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
@@ -1377,9 +1392,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 			if (frag.mds >= 0) {
 				/* choose auth mds */
 				mds = frag.mds;
-				dout("%s %p %llx.%llx frag %u mds%d (auth)\n",
-				     __func__, inode, ceph_vinop(inode),
-				     frag.frag, mds);
+				doutc(cl, "%p %llx.%llx frag %u mds%d (auth)\n",
+				      inode, ceph_vinop(inode), frag.frag, mds);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
 				    CEPH_MDS_STATE_ACTIVE) {
 					if (!ceph_mdsmap_is_laggy(mdsc->mdsmap,
@@ -1403,9 +1417,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 		goto random;
 	}
 	mds = cap->session->s_mds;
-	dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__,
-	     inode, ceph_vinop(inode), mds,
-	     cap == ci->i_auth_cap ? "auth " : "", cap);
+	doutc(cl, "%p %llx.%llx mds%d (%scap %p)\n", inode,
+	      ceph_vinop(inode), mds,
+	      cap == ci->i_auth_cap ? "auth " : "", cap);
 	spin_unlock(&ci->i_ceph_lock);
 out:
 	iput(inode);
@@ -1416,7 +1430,7 @@ random:
 		*random = true;
 
 	mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
-	dout("%s chose random mds%d\n", __func__, mds);
+	doutc(cl, "chose random mds%d\n", mds);
 	return mds;
 }
 
@@ -1529,6 +1543,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	int metadata_key_count = 0;
 	struct ceph_options *opt = mdsc->fsc->client->options;
 	struct ceph_mount_options *fsopt = mdsc->fsc->mount_options;
+	struct ceph_client *cl = mdsc->fsc->client;
 	size_t size, count;
 	void *p, *end;
 	int ret;
@@ -1567,7 +1582,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
 			   GFP_NOFS, false);
 	if (!msg) {
-		pr_err("ENOMEM creating session open msg\n");
+		pr_err_client(cl, "ENOMEM creating session open msg\n");
 		return ERR_PTR(-ENOMEM);
 	}
 	p = msg->front.iov_base;
@@ -1607,14 +1622,14 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 
 	ret = encode_supported_features(&p, end);
 	if (ret) {
-		pr_err("encode_supported_features failed!\n");
+		pr_err_client(cl, "encode_supported_features failed!\n");
 		ceph_msg_put(msg);
 		return ERR_PTR(ret);
 	}
 
 	ret = encode_metric_spec(&p, end);
 	if (ret) {
-		pr_err("encode_metric_spec failed!\n");
+		pr_err_client(cl, "encode_metric_spec failed!\n");
 		ceph_msg_put(msg);
 		return ERR_PTR(ret);
 	}
@@ -1642,8 +1657,8 @@ static int __open_session(struct ceph_mds_client *mdsc,
 
 	/* wait for mds to go active? */
 	mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
-	dout("open_session to mds%d (%s)\n", mds,
-	     ceph_mds_state_name(mstate));
+	doutc(mdsc->fsc->client, "open_session to mds%d (%s)\n", mds,
+	      ceph_mds_state_name(mstate));
 	session->s_state = CEPH_MDS_SESSION_OPENING;
 	session->s_renew_requested = jiffies;
 
@@ -1686,8 +1701,9 @@ struct ceph_mds_session *
 ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target)
 {
 	struct ceph_mds_session *session;
+	struct ceph_client *cl = mdsc->fsc->client;
 
-	dout("open_export_target_session to mds%d\n", target);
+	doutc(cl, "to mds%d\n", target);
 
 	mutex_lock(&mdsc->mutex);
 	session = __open_export_target_session(mdsc, target);
@@ -1702,13 +1718,14 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
 	struct ceph_mds_info *mi;
 	struct ceph_mds_session *ts;
 	int i, mds = session->s_mds;
+	struct ceph_client *cl = mdsc->fsc->client;
 
 	if (mds >= mdsc->mdsmap->possible_max_rank)
 		return;
 
 	mi = &mdsc->mdsmap->m_info[mds];
-	dout("open_export_target_sessions for mds%d (%d targets)\n",
-	     session->s_mds, mi->num_export_targets);
+	doutc(cl, "for mds%d (%d targets)\n", session->s_mds,
+	      mi->num_export_targets);
 
 	for (i = 0; i < mi->num_export_targets; i++) {
 		ts = __open_export_target_session(mdsc, mi->export_targets[i]);
@@ -1731,11 +1748,13 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
 static void detach_cap_releases(struct ceph_mds_session *session,
 				struct list_head *target)
 {
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
+
 	lockdep_assert_held(&session->s_cap_lock);
 
 	list_splice_init(&session->s_cap_releases, target);
 	session->s_num_cap_releases = 0;
-	dout("dispose_cap_releases mds%d\n", session->s_mds);
+	doutc(cl, "mds%d\n", session->s_mds);
 }
 
 static void dispose_cap_releases(struct ceph_mds_client *mdsc,
@@ -1753,16 +1772,17 @@ static void dispose_cap_releases(struct ceph_mds_client *mdsc,
 static void cleanup_session_requests(struct ceph_mds_client *mdsc,
 				     struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct rb_node *p;
 
-	dout("cleanup_session_requests mds%d\n", session->s_mds);
+	doutc(cl, "mds%d\n", session->s_mds);
 	mutex_lock(&mdsc->mutex);
 	while (!list_empty(&session->s_unsafe)) {
 		req = list_first_entry(&session->s_unsafe,
 				       struct ceph_mds_request, r_unsafe_item);
-		pr_warn_ratelimited(" dropping unsafe request %llu\n",
-				    req->r_tid);
+		pr_warn_ratelimited_client(cl, " dropping unsafe request %llu\n",
+					   req->r_tid);
 		if (req->r_target_inode)
 			mapping_set_error(req->r_target_inode->i_mapping, -EIO);
 		if (req->r_unsafe_dir)
@@ -1791,13 +1811,14 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
 			      int (*cb)(struct inode *, int mds, void *),
 			      void *arg)
 {
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
 	struct list_head *p;
 	struct ceph_cap *cap;
 	struct inode *inode, *last_inode = NULL;
 	struct ceph_cap *old_cap = NULL;
 	int ret;
 
-	dout("iterate_session_caps %p mds%d\n", session, session->s_mds);
+	doutc(cl, "%p mds%d\n", session, session->s_mds);
 	spin_lock(&session->s_cap_lock);
 	p = session->s_caps.next;
 	while (p != &session->s_caps) {
@@ -1828,8 +1849,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
 		spin_lock(&session->s_cap_lock);
 		p = p->next;
 		if (!cap->ci) {
-			dout("iterate_session_caps  finishing cap %p removal\n",
-			     cap);
+			doutc(cl, "finishing cap %p removal\n", cap);
 			BUG_ON(cap->session != session);
 			cap->session = NULL;
 			list_del_init(&cap->session_caps);
@@ -1858,6 +1878,7 @@ out:
 static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	bool invalidate = false;
 	struct ceph_cap *cap;
 	int iputs = 0;
@@ -1865,8 +1886,8 @@ static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
 	spin_lock(&ci->i_ceph_lock);
 	cap = __get_cap_for_mds(ci, mds);
 	if (cap) {
-		dout(" removing cap %p, ci is %p, inode is %p\n",
-		     cap, ci, &ci->netfs.inode);
+		doutc(cl, " removing cap %p, ci is %p, inode is %p\n",
+		      cap, ci, &ci->netfs.inode);
 
 		iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
 	}
@@ -1890,7 +1911,7 @@ static void remove_session_caps(struct ceph_mds_session *session)
 	struct super_block *sb = fsc->sb;
 	LIST_HEAD(dispose);
 
-	dout("remove_session_caps on %p\n", session);
+	doutc(fsc->client, "on %p\n", session);
 	ceph_iterate_session_caps(session, remove_session_caps_cb, fsc);
 
 	wake_up_all(&fsc->mdsc->cap_flushing_wq);
@@ -1971,7 +1992,9 @@ static int wake_up_session_cb(struct inode *inode, int mds, void *arg)
 
 static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
 {
-	dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
+
+	doutc(cl, "session %p mds%d\n", session, session->s_mds);
 	ceph_iterate_session_caps(session, wake_up_session_cb,
 				  (void *)(unsigned long)ev);
 }
@@ -1985,25 +2008,26 @@ static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
 static int send_renew_caps(struct ceph_mds_client *mdsc,
 			   struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_msg *msg;
 	int state;
 
 	if (time_after_eq(jiffies, session->s_cap_ttl) &&
 	    time_after_eq(session->s_cap_ttl, session->s_renew_requested))
-		pr_info("mds%d caps stale\n", session->s_mds);
+		pr_info_client(cl, "mds%d caps stale\n", session->s_mds);
 	session->s_renew_requested = jiffies;
 
 	/* do not try to renew caps until a recovering mds has reconnected
 	 * with its clients. */
 	state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds);
 	if (state < CEPH_MDS_STATE_RECONNECT) {
-		dout("send_renew_caps ignoring mds%d (%s)\n",
-		     session->s_mds, ceph_mds_state_name(state));
+		doutc(cl, "ignoring mds%d (%s)\n", session->s_mds,
+		      ceph_mds_state_name(state));
 		return 0;
 	}
 
-	dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
-		ceph_mds_state_name(state));
+	doutc(cl, "to mds%d (%s)\n", session->s_mds,
+	      ceph_mds_state_name(state));
 	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
 				      ++session->s_renew_seq);
 	if (!msg)
@@ -2015,10 +2039,11 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
 static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
 			     struct ceph_mds_session *session, u64 seq)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_msg *msg;
 
-	dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
-	     session->s_mds, ceph_session_state_name(session->s_state), seq);
+	doutc(cl, "to mds%d (%s)s seq %lld\n", session->s_mds,
+	      ceph_session_state_name(session->s_state), seq);
 	msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
 	if (!msg)
 		return -ENOMEM;
@@ -2035,6 +2060,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
 static void renewed_caps(struct ceph_mds_client *mdsc,
 			 struct ceph_mds_session *session, int is_renew)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int was_stale;
 	int wake = 0;
 
@@ -2046,15 +2072,17 @@ static void renewed_caps(struct ceph_mds_client *mdsc,
 
 	if (was_stale) {
 		if (time_before(jiffies, session->s_cap_ttl)) {
-			pr_info("mds%d caps renewed\n", session->s_mds);
+			pr_info_client(cl, "mds%d caps renewed\n",
+				       session->s_mds);
 			wake = 1;
 		} else {
-			pr_info("mds%d caps still stale\n", session->s_mds);
+			pr_info_client(cl, "mds%d caps still stale\n",
+				       session->s_mds);
 		}
 	}
-	dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n",
-	     session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh",
-	     time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh");
+	doutc(cl, "mds%d ttl now %lu, was %s, now %s\n", session->s_mds,
+	      session->s_cap_ttl, was_stale ? "stale" : "fresh",
+	      time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh");
 	spin_unlock(&session->s_cap_lock);
 
 	if (wake)
@@ -2066,11 +2094,11 @@ static void renewed_caps(struct ceph_mds_client *mdsc,
  */
 static int request_close_session(struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
 	struct ceph_msg *msg;
 
-	dout("request_close_session mds%d state %s seq %lld\n",
-	     session->s_mds, ceph_session_state_name(session->s_state),
-	     session->s_seq);
+	doutc(cl, "mds%d state %s seq %lld\n", session->s_mds,
+	      ceph_session_state_name(session->s_state), session->s_seq);
 	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE,
 				      session->s_seq);
 	if (!msg)
@@ -2126,6 +2154,8 @@ out:
  */
 static int trim_caps_cb(struct inode *inode, int mds, void *arg)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	int *remaining = arg;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int used, wanted, oissued, mine;
@@ -2145,9 +2175,10 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg)
 	wanted = __ceph_caps_file_wanted(ci);
 	oissued = __ceph_caps_issued_other(ci, cap);
 
-	dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n",
-	     inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
-	     ceph_cap_string(used), ceph_cap_string(wanted));
+	doutc(cl, "%p %llx.%llx cap %p mine %s oissued %s used %s wanted %s\n",
+	      inode, ceph_vinop(inode), cap, ceph_cap_string(mine),
+	      ceph_cap_string(oissued), ceph_cap_string(used),
+	      ceph_cap_string(wanted));
 	if (cap == ci->i_auth_cap) {
 		if (ci->i_dirty_caps || ci->i_flushing_caps ||
 		    !list_empty(&ci->i_cap_snaps))
@@ -2173,7 +2204,7 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg)
 
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
-		ceph_remove_cap(cap, true);
+		ceph_remove_cap(mdsc, cap, true);
 		(*remaining)--;
 	} else {
 		struct dentry *dentry;
@@ -2187,8 +2218,8 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg)
 			count = atomic_read(&inode->i_count);
 			if (count == 1)
 				(*remaining)--;
-			dout("trim_caps_cb %p cap %p pruned, count now %d\n",
-			     inode, cap, count);
+			doutc(cl, "%p %llx.%llx cap %p pruned, count now %d\n",
+			      inode, ceph_vinop(inode), cap, count);
 		} else {
 			dput(dentry);
 		}
@@ -2207,17 +2238,18 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
 		   struct ceph_mds_session *session,
 		   int max_caps)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int trim_caps = session->s_nr_caps - max_caps;
 
-	dout("trim_caps mds%d start: %d / %d, trim %d\n",
-	     session->s_mds, session->s_nr_caps, max_caps, trim_caps);
+	doutc(cl, "mds%d start: %d / %d, trim %d\n", session->s_mds,
+	      session->s_nr_caps, max_caps, trim_caps);
 	if (trim_caps > 0) {
 		int remaining = trim_caps;
 
 		ceph_iterate_session_caps(session, trim_caps_cb, &remaining);
-		dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
-		     session->s_mds, session->s_nr_caps, max_caps,
-			trim_caps - remaining);
+		doutc(cl, "mds%d done: %d / %d, trimmed %d\n",
+		      session->s_mds, session->s_nr_caps, max_caps,
+		      trim_caps - remaining);
 	}
 
 	ceph_flush_cap_releases(mdsc, session);
@@ -2227,6 +2259,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
 static int check_caps_flush(struct ceph_mds_client *mdsc,
 			    u64 want_flush_tid)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int ret = 1;
 
 	spin_lock(&mdsc->cap_dirty_lock);
@@ -2235,8 +2268,8 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
 			list_first_entry(&mdsc->cap_flush_list,
 					 struct ceph_cap_flush, g_list);
 		if (cf->tid <= want_flush_tid) {
-			dout("check_caps_flush still flushing tid "
-			     "%llu <= %llu\n", cf->tid, want_flush_tid);
+			doutc(cl, "still flushing tid %llu <= %llu\n",
+			      cf->tid, want_flush_tid);
 			ret = 0;
 		}
 	}
@@ -2252,12 +2285,14 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
 static void wait_caps_flush(struct ceph_mds_client *mdsc,
 			    u64 want_flush_tid)
 {
-	dout("check_caps_flush want %llu\n", want_flush_tid);
+	struct ceph_client *cl = mdsc->fsc->client;
+
+	doutc(cl, "want %llu\n", want_flush_tid);
 
 	wait_event(mdsc->cap_flushing_wq,
 		   check_caps_flush(mdsc, want_flush_tid));
 
-	dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
+	doutc(cl, "ok, flushed thru %llu\n", want_flush_tid);
 }
 
 /*
@@ -2266,6 +2301,7 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc,
 static void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
 				   struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_msg *msg = NULL;
 	struct ceph_mds_cap_release *head;
 	struct ceph_mds_cap_item *item;
@@ -2324,7 +2360,7 @@ again:
 			msg->front.iov_len += sizeof(*cap_barrier);
 
 			msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-			dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
+			doutc(cl, "mds%d %p\n", session->s_mds, msg);
 			ceph_con_send(&session->s_con, msg);
 			msg = NULL;
 		}
@@ -2344,13 +2380,13 @@ again:
 		msg->front.iov_len += sizeof(*cap_barrier);
 
 		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-		dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
+		doutc(cl, "mds%d %p\n", session->s_mds, msg);
 		ceph_con_send(&session->s_con, msg);
 	}
 	return;
 out_err:
-	pr_err("send_cap_releases mds%d, failed to allocate message\n",
-		session->s_mds);
+	pr_err_client(cl, "mds%d, failed to allocate message\n",
+		      session->s_mds);
 	spin_lock(&session->s_cap_lock);
 	list_splice(&tmp_list, &session->s_cap_releases);
 	session->s_num_cap_releases += num_cap_releases;
@@ -2373,16 +2409,17 @@ static void ceph_cap_release_work(struct work_struct *work)
 void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
 		             struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	if (mdsc->stopping)
 		return;
 
 	ceph_get_mds_session(session);
 	if (queue_work(mdsc->fsc->cap_wq,
 		       &session->s_cap_release_work)) {
-		dout("cap release work queued\n");
+		doutc(cl, "cap release work queued\n");
 	} else {
 		ceph_put_mds_session(session);
-		dout("failed to queue cap release work\n");
+		doutc(cl, "failed to queue cap release work\n");
 	}
 }
 
@@ -2410,13 +2447,14 @@ static void ceph_cap_reclaim_work(struct work_struct *work)
 
 void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	if (mdsc->stopping)
 		return;
 
         if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) {
-                dout("caps reclaim work queued\n");
+                doutc(cl, "caps reclaim work queued\n");
         } else {
-                dout("failed to queue caps release work\n");
+                doutc(cl, "failed to queue caps release work\n");
         }
 }
 
@@ -2588,6 +2626,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
 
 /**
  * ceph_mdsc_build_path - build a path string to a given dentry
+ * @mdsc: mds client
  * @dentry: dentry to which path should be built
  * @plen: returned length of string
  * @pbase: returned base inode number
@@ -2607,9 +2646,10 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
  * Encode hidden .snap dirs as a double /, i.e.
  *   foo/.snap/bar -> foo//bar
  */
-char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
-			   int for_wire)
+char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
+			   int *plen, u64 *pbase, int for_wire)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct dentry *cur;
 	struct inode *inode;
 	char *path;
@@ -2635,8 +2675,7 @@ retry:
 		spin_lock(&cur->d_lock);
 		inode = d_inode(cur);
 		if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
-			dout("build_path path+%d: %p SNAPDIR\n",
-			     pos, cur);
+			doutc(cl, "path+%d: %p SNAPDIR\n", pos, cur);
 			spin_unlock(&cur->d_lock);
 			parent = dget_parent(cur);
 		} else if (for_wire && inode && dentry != cur &&
@@ -2714,21 +2753,21 @@ retry:
 		 * A rename didn't occur, but somehow we didn't end up where
 		 * we thought we would. Throw a warning and try again.
 		 */
-		pr_warn("build_path did not end path lookup where expected (pos = %d)\n",
-			pos);
+		pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n",
+			       pos);
 		goto retry;
 	}
 
 	*pbase = base;
 	*plen = PATH_MAX - 1 - pos;
-	dout("build_path on %p %d built %llx '%.*s'\n",
-	     dentry, d_count(dentry), base, *plen, path + pos);
+	doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry),
+	      base, *plen, path + pos);
 	return path + pos;
 }
 
-static int build_dentry_path(struct dentry *dentry, struct inode *dir,
-			     const char **ppath, int *ppathlen, u64 *pino,
-			     bool *pfreepath, bool parent_locked)
+static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
+			     struct inode *dir, const char **ppath, int *ppathlen,
+			     u64 *pino, bool *pfreepath, bool parent_locked)
 {
 	char *path;
 
@@ -2744,7 +2783,7 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
 		return 0;
 	}
 	rcu_read_unlock();
-	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
+	path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 	*ppath = path;
@@ -2756,6 +2795,7 @@ static int build_inode_path(struct inode *inode,
 			    const char **ppath, int *ppathlen, u64 *pino,
 			    bool *pfreepath)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	struct dentry *dentry;
 	char *path;
 
@@ -2765,7 +2805,7 @@ static int build_inode_path(struct inode *inode,
 		return 0;
 	}
 	dentry = d_find_alias(inode);
-	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
+	path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
 	dput(dentry);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
@@ -2778,27 +2818,28 @@ static int build_inode_path(struct inode *inode,
  * request arguments may be specified via an inode *, a dentry *, or
  * an explicit ino+path.
  */
-static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
-				  struct inode *rdiri, const char *rpath,
-				  u64 rino, const char **ppath, int *pathlen,
-				  u64 *ino, bool *freepath, bool parent_locked)
+static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode,
+				 struct dentry *rdentry, struct inode *rdiri,
+				 const char *rpath, u64 rino, const char **ppath,
+				 int *pathlen, u64 *ino, bool *freepath,
+				 bool parent_locked)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int r = 0;
 
 	if (rinode) {
 		r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
-		dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
-		     ceph_snap(rinode));
+		doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
+		      ceph_snap(rinode));
 	} else if (rdentry) {
-		r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
+		r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino,
 					freepath, parent_locked);
-		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
-		     *ppath);
+		doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath);
 	} else if (rpath || rino) {
 		*ino = rino;
 		*ppath = rpath;
 		*pathlen = rpath ? strlen(rpath) : 0;
-		dout(" path %.*s\n", *pathlen, rpath);
+		doutc(cl, " path %.*s\n", *pathlen, rpath);
 	}
 
 	return r;
@@ -2840,6 +2881,17 @@ static void encode_mclientrequest_tail(void **p,
 	}
 }
 
+static inline u16 mds_supported_head_version(struct ceph_mds_session *session)
+{
+	if (!test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, &session->s_features))
+		return 1;
+
+	if (!test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features))
+		return 2;
+
+	return CEPH_MDS_REQUEST_HEAD_VERSION;
+}
+
 static struct ceph_mds_request_head_legacy *
 find_legacy_request_head(void *p, u64 features)
 {
@@ -2861,6 +2913,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 {
 	int mds = session->s_mds;
 	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_msg *msg;
 	struct ceph_mds_request_head_legacy *lhead;
 	const char *path1 = NULL;
@@ -2874,10 +2927,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	void *p, *end;
 	int ret;
 	bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
-	bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
-				     &session->s_features);
+	u16 request_head_version = mds_supported_head_version(session);
+	kuid_t caller_fsuid = req->r_cred->fsuid;
+	kgid_t caller_fsgid = req->r_cred->fsgid;
 
-	ret = set_request_path_attr(req->r_inode, req->r_dentry,
+	ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry,
 			      req->r_parent, req->r_path1, req->r_ino1.ino,
 			      &path1, &pathlen1, &ino1, &freepath1,
 			      test_bit(CEPH_MDS_R_PARENT_LOCKED,
@@ -2891,7 +2945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	if (req->r_old_dentry &&
 	    !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
 		old_dentry = req->r_old_dentry;
-	ret = set_request_path_attr(NULL, old_dentry,
+	ret = set_request_path_attr(mdsc, NULL, old_dentry,
 			      req->r_old_dentry_dir,
 			      req->r_path2, req->r_ino2.ino,
 			      &path2, &pathlen2, &ino2, &freepath2, true);
@@ -2916,8 +2970,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	 */
 	if (legacy)
 		len = sizeof(struct ceph_mds_request_head_legacy);
-	else if (old_version)
+	else if (request_head_version == 1)
 		len = sizeof(struct ceph_mds_request_head_old);
+	else if (request_head_version == 2)
+		len = offsetofend(struct ceph_mds_request_head, ext_num_fwd);
 	else
 		len = sizeof(struct ceph_mds_request_head);
 
@@ -2967,6 +3023,30 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	lhead = find_legacy_request_head(msg->front.iov_base,
 					 session->s_con.peer_features);
 
+	if ((req->r_mnt_idmap != &nop_mnt_idmap) &&
+	    !test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) {
+		WARN_ON_ONCE(!IS_CEPH_MDS_OP_NEWINODE(req->r_op));
+
+		if (enable_unsafe_idmap) {
+			pr_warn_once_client(cl,
+				"idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID"
+				" is not supported by MDS. UID/GID-based restrictions may"
+				" not work properly.\n");
+
+			caller_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns,
+						   VFSUIDT_INIT(req->r_cred->fsuid));
+			caller_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns,
+						   VFSGIDT_INIT(req->r_cred->fsgid));
+		} else {
+			pr_err_ratelimited_client(cl,
+				"idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID"
+				" is not supported by MDS. Fail request with -EIO.\n");
+
+			ret = -EIO;
+			goto out_err;
+		}
+	}
+
 	/*
 	 * The ceph_mds_request_head_legacy didn't contain a version field, and
 	 * one was added when we moved the message version from 3->4.
@@ -2974,17 +3054,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	if (legacy) {
 		msg->hdr.version = cpu_to_le16(3);
 		p = msg->front.iov_base + sizeof(*lhead);
-	} else if (old_version) {
+	} else if (request_head_version == 1) {
 		struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
 
 		msg->hdr.version = cpu_to_le16(4);
 		ohead->version = cpu_to_le16(1);
 		p = msg->front.iov_base + sizeof(*ohead);
+	} else if (request_head_version == 2) {
+		struct ceph_mds_request_head *nhead = msg->front.iov_base;
+
+		msg->hdr.version = cpu_to_le16(6);
+		nhead->version = cpu_to_le16(2);
+
+		p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, ext_num_fwd);
 	} else {
 		struct ceph_mds_request_head *nhead = msg->front.iov_base;
+		kuid_t owner_fsuid;
+		kgid_t owner_fsgid;
 
 		msg->hdr.version = cpu_to_le16(6);
 		nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+		nhead->struct_len = cpu_to_le32(sizeof(struct ceph_mds_request_head));
+
+		if (IS_CEPH_MDS_OP_NEWINODE(req->r_op)) {
+			owner_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns,
+						VFSUIDT_INIT(req->r_cred->fsuid));
+			owner_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns,
+						VFSGIDT_INIT(req->r_cred->fsgid));
+			nhead->owner_uid = cpu_to_le32(from_kuid(&init_user_ns, owner_fsuid));
+			nhead->owner_gid = cpu_to_le32(from_kgid(&init_user_ns, owner_fsgid));
+		} else {
+			nhead->owner_uid = cpu_to_le32(-1);
+			nhead->owner_gid = cpu_to_le32(-1);
+		}
+
 		p = msg->front.iov_base + sizeof(*nhead);
 	}
 
@@ -2993,9 +3096,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
 	lhead->op = cpu_to_le32(req->r_op);
 	lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
-						  req->r_cred->fsuid));
+						  caller_fsuid));
 	lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
-						  req->r_cred->fsgid));
+						  caller_fsgid));
 	lhead->ino = cpu_to_le64(req->r_deleg_ino);
 	lhead->args = req->r_args;
 
@@ -3099,6 +3202,7 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 {
 	int mds = session->s_mds;
 	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request_head_legacy *lhead;
 	struct ceph_mds_request_head *nhead;
 	struct ceph_msg *msg;
@@ -3117,8 +3221,8 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 	       old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE);
 	       if ((old_version && req->r_attempts >= old_max_retry) ||
 		   ((uint32_t)req->r_attempts >= U32_MAX)) {
-			pr_warn_ratelimited("%s request tid %llu seq overflow\n",
-					    __func__, req->r_tid);
+			pr_warn_ratelimited_client(cl, "request tid %llu seq overflow\n",
+						   req->r_tid);
 			return -EMULTIHOP;
 	       }
 	}
@@ -3133,8 +3237,8 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 		else
 			req->r_sent_on_mseq = -1;
 	}
-	dout("%s %p tid %lld %s (attempt %d)\n", __func__, req,
-	     req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+	doutc(cl, "%p tid %lld %s (attempt %d)\n", req, req->r_tid,
+	      ceph_mds_op_name(req->r_op), req->r_attempts);
 
 	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 		void *p;
@@ -3202,7 +3306,7 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 		nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
 	}
 
-	dout(" r_parent = %p\n", req->r_parent);
+	doutc(cl, " r_parent = %p\n", req->r_parent);
 	return 0;
 }
 
@@ -3230,6 +3334,7 @@ static int __send_request(struct ceph_mds_session *session,
 static void __do_request(struct ceph_mds_client *mdsc,
 			struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_session *session = NULL;
 	int mds = -1;
 	int err = 0;
@@ -3242,29 +3347,29 @@ static void __do_request(struct ceph_mds_client *mdsc,
 	}
 
 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) {
-		dout("do_request metadata corrupted\n");
+		doutc(cl, "metadata corrupted\n");
 		err = -EIO;
 		goto finish;
 	}
 	if (req->r_timeout &&
 	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
-		dout("do_request timed out\n");
+		doutc(cl, "timed out\n");
 		err = -ETIMEDOUT;
 		goto finish;
 	}
 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
-		dout("do_request forced umount\n");
+		doutc(cl, "forced umount\n");
 		err = -EIO;
 		goto finish;
 	}
 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
 		if (mdsc->mdsmap_err) {
 			err = mdsc->mdsmap_err;
-			dout("do_request mdsmap err %d\n", err);
+			doutc(cl, "mdsmap err %d\n", err);
 			goto finish;
 		}
 		if (mdsc->mdsmap->m_epoch == 0) {
-			dout("do_request no mdsmap, waiting for map\n");
+			doutc(cl, "no mdsmap, waiting for map\n");
 			list_add(&req->r_wait, &mdsc->waiting_for_map);
 			return;
 		}
@@ -3285,7 +3390,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
 			err = -EJUKEBOX;
 			goto finish;
 		}
-		dout("do_request no mds or not active, waiting for map\n");
+		doutc(cl, "no mds or not active, waiting for map\n");
 		list_add(&req->r_wait, &mdsc->waiting_for_map);
 		return;
 	}
@@ -3301,8 +3406,8 @@ static void __do_request(struct ceph_mds_client *mdsc,
 	}
 	req->r_session = ceph_get_mds_session(session);
 
-	dout("do_request mds%d session %p state %s\n", mds, session,
-	     ceph_session_state_name(session->s_state));
+	doutc(cl, "mds%d session %p state %s\n", mds, session,
+	      ceph_session_state_name(session->s_state));
 
 	/*
 	 * The old ceph will crash the MDSs when see unknown OPs
@@ -3393,8 +3498,8 @@ static void __do_request(struct ceph_mds_client *mdsc,
 		spin_lock(&ci->i_ceph_lock);
 		cap = ci->i_auth_cap;
 		if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) {
-			dout("do_request session changed for auth cap %d -> %d\n",
-			     cap->session->s_mds, session->s_mds);
+			doutc(cl, "session changed for auth cap %d -> %d\n",
+			      cap->session->s_mds, session->s_mds);
 
 			/* Remove the auth cap from old session */
 			spin_lock(&cap->session->s_cap_lock);
@@ -3421,7 +3526,7 @@ out_session:
 	ceph_put_mds_session(session);
 finish:
 	if (err) {
-		dout("__do_request early error %d\n", err);
+		doutc(cl, "early error %d\n", err);
 		req->r_err = err;
 		complete_request(mdsc, req);
 		__unregister_request(mdsc, req);
@@ -3435,6 +3540,7 @@ finish:
 static void __wake_requests(struct ceph_mds_client *mdsc,
 			    struct list_head *head)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	LIST_HEAD(tmp_list);
 
@@ -3444,7 +3550,8 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
 		req = list_entry(tmp_list.next,
 				 struct ceph_mds_request, r_wait);
 		list_del_init(&req->r_wait);
-		dout(" wake request %p tid %llu\n", req, req->r_tid);
+		doutc(cl, " wake request %p tid %llu\n", req,
+		      req->r_tid);
 		__do_request(mdsc, req);
 	}
 }
@@ -3455,10 +3562,11 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
  */
 static void kick_requests(struct ceph_mds_client *mdsc, int mds)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct rb_node *p = rb_first(&mdsc->request_tree);
 
-	dout("kick_requests mds%d\n", mds);
+	doutc(cl, "kick_requests mds%d\n", mds);
 	while (p) {
 		req = rb_entry(p, struct ceph_mds_request, r_node);
 		p = rb_next(p);
@@ -3468,7 +3576,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
 			continue; /* only new requests */
 		if (req->r_session &&
 		    req->r_session->s_mds == mds) {
-			dout(" kicking tid %llu\n", req->r_tid);
+			doutc(cl, " kicking tid %llu\n", req->r_tid);
 			list_del_init(&req->r_wait);
 			__do_request(mdsc, req);
 		}
@@ -3478,6 +3586,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
 int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
 			      struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int err = 0;
 
 	/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
@@ -3499,8 +3608,7 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
 	if (req->r_inode) {
 		err = ceph_wait_on_async_create(req->r_inode);
 		if (err) {
-			dout("%s: wait for async create returned: %d\n",
-			     __func__, err);
+			doutc(cl, "wait for async create returned: %d\n", err);
 			return err;
 		}
 	}
@@ -3508,13 +3616,12 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
 	if (!err && req->r_old_inode) {
 		err = ceph_wait_on_async_create(req->r_old_inode);
 		if (err) {
-			dout("%s: wait for async create returned: %d\n",
-			     __func__, err);
+			doutc(cl, "wait for async create returned: %d\n", err);
 			return err;
 		}
 	}
 
-	dout("submit_request on %p for inode %p\n", req, dir);
+	doutc(cl, "submit_request on %p for inode %p\n", req, dir);
 	mutex_lock(&mdsc->mutex);
 	__register_request(mdsc, req, dir);
 	__do_request(mdsc, req);
@@ -3527,10 +3634,11 @@ int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
 			   struct ceph_mds_request *req,
 			   ceph_mds_request_wait_callback_t wait_func)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int err;
 
 	/* wait */
-	dout("do_request waiting\n");
+	doutc(cl, "do_request waiting\n");
 	if (wait_func) {
 		err = wait_func(mdsc, req);
 	} else {
@@ -3544,14 +3652,14 @@ int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
 		else
 			err = timeleft;  /* killed */
 	}
-	dout("do_request waited, got %d\n", err);
+	doutc(cl, "do_request waited, got %d\n", err);
 	mutex_lock(&mdsc->mutex);
 
 	/* only abort if we didn't race with a real reply */
 	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
 		err = le32_to_cpu(req->r_reply_info.head->result);
 	} else if (err < 0) {
-		dout("aborted request %lld with %d\n", req->r_tid, err);
+		doutc(cl, "aborted request %lld with %d\n", req->r_tid, err);
 
 		/*
 		 * ensure we aren't running concurrently with
@@ -3582,15 +3690,16 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 			 struct inode *dir,
 			 struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	int err;
 
-	dout("do_request on %p\n", req);
+	doutc(cl, "do_request on %p\n", req);
 
 	/* issue */
 	err = ceph_mdsc_submit_request(mdsc, dir, req);
 	if (!err)
 		err = ceph_mdsc_wait_request(mdsc, req, NULL);
-	dout("do_request %p done, result %d\n", req, err);
+	doutc(cl, "do_request %p done, result %d\n", req, err);
 	return err;
 }
 
@@ -3602,8 +3711,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 {
 	struct inode *dir = req->r_parent;
 	struct inode *old_dir = req->r_old_dentry_dir;
+	struct ceph_client *cl = req->r_mdsc->fsc->client;
 
-	dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir);
+	doutc(cl, "invalidate_dir_request %p %p (complete, lease(s))\n",
+	      dir, old_dir);
 
 	ceph_dir_clear_complete(dir);
 	if (old_dir)
@@ -3624,6 +3735,7 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 {
 	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct ceph_mds_reply_head *head = msg->front.iov_base;
 	struct ceph_mds_reply_info_parsed *rinfo;  /* parsed reply info */
@@ -3634,7 +3746,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	bool close_sessions = false;
 
 	if (msg->front.iov_len < sizeof(*head)) {
-		pr_err("mdsc_handle_reply got corrupt (short) reply\n");
+		pr_err_client(cl, "got corrupt (short) reply\n");
 		ceph_msg_dump(msg);
 		return;
 	}
@@ -3644,17 +3756,17 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	mutex_lock(&mdsc->mutex);
 	req = lookup_get_request(mdsc, tid);
 	if (!req) {
-		dout("handle_reply on unknown tid %llu\n", tid);
+		doutc(cl, "on unknown tid %llu\n", tid);
 		mutex_unlock(&mdsc->mutex);
 		return;
 	}
-	dout("handle_reply %p\n", req);
+	doutc(cl, "handle_reply %p\n", req);
 
 	/* correct session? */
 	if (req->r_session != session) {
-		pr_err("mdsc_handle_reply got %llu on session mds%d"
-		       " not mds%d\n", tid, session->s_mds,
-		       req->r_session ? req->r_session->s_mds : -1);
+		pr_err_client(cl, "got %llu on session mds%d not mds%d\n",
+			      tid, session->s_mds,
+			      req->r_session ? req->r_session->s_mds : -1);
 		mutex_unlock(&mdsc->mutex);
 		goto out;
 	}
@@ -3662,14 +3774,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	/* dup? */
 	if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
 	    (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
-		pr_warn("got a dup %s reply on %llu from mds%d\n",
-			   head->safe ? "safe" : "unsafe", tid, mds);
+		pr_warn_client(cl, "got a dup %s reply on %llu from mds%d\n",
+			       head->safe ? "safe" : "unsafe", tid, mds);
 		mutex_unlock(&mdsc->mutex);
 		goto out;
 	}
 	if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
-		pr_warn("got unsafe after safe on %llu from mds%d\n",
-			   tid, mds);
+		pr_warn_client(cl, "got unsafe after safe on %llu from mds%d\n",
+			       tid, mds);
 		mutex_unlock(&mdsc->mutex);
 		goto out;
 	}
@@ -3692,7 +3804,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 			 * response.  And even if it did, there is nothing
 			 * useful we could do with a revised return value.
 			 */
-			dout("got safe reply %llu, mds%d\n", tid, mds);
+			doutc(cl, "got safe reply %llu, mds%d\n", tid, mds);
 
 			mutex_unlock(&mdsc->mutex);
 			goto out;
@@ -3702,7 +3814,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 		list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
 	}
 
-	dout("handle_reply tid %lld result %d\n", tid, result);
+	doutc(cl, "tid %lld result %d\n", tid, result);
 	if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features))
 		err = parse_reply_info(session, msg, req, (u64)-1);
 	else
@@ -3742,7 +3854,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 	mutex_lock(&session->s_mutex);
 	if (err < 0) {
-		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
+		pr_err_client(cl, "got corrupt reply mds%d(tid:%lld)\n",
+			      mds, tid);
 		ceph_msg_dump(msg);
 		goto out_err;
 	}
@@ -3806,7 +3919,7 @@ out_err:
 			set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
 		}
 	} else {
-		dout("reply arrived after request %lld was aborted\n", tid);
+		doutc(cl, "reply arrived after request %lld was aborted\n", tid);
 	}
 	mutex_unlock(&mdsc->mutex);
 
@@ -3835,6 +3948,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 			   struct ceph_mds_session *session,
 			   struct ceph_msg *msg)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	u64 tid = le64_to_cpu(msg->hdr.tid);
 	u32 next_mds;
@@ -3852,12 +3966,12 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 	req = lookup_get_request(mdsc, tid);
 	if (!req) {
 		mutex_unlock(&mdsc->mutex);
-		dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
+		doutc(cl, "forward tid %llu to mds%d - req dne\n", tid, next_mds);
 		return;  /* dup reply? */
 	}
 
 	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
-		dout("forward tid %llu aborted, unregistering\n", tid);
+		doutc(cl, "forward tid %llu aborted, unregistering\n", tid);
 		__unregister_request(mdsc, req);
 	} else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) {
 		/*
@@ -3873,10 +3987,11 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
 		mutex_unlock(&req->r_fill_mutex);
 		aborted = true;
-		pr_warn_ratelimited("forward tid %llu seq overflow\n", tid);
+		pr_warn_ratelimited_client(cl, "forward tid %llu seq overflow\n",
+					   tid);
 	} else {
 		/* resend. forward race not possible; mds would drop */
-		dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
+		doutc(cl, "forward tid %llu to mds%d (we resend)\n", tid, next_mds);
 		BUG_ON(req->r_err);
 		BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
 		req->r_attempts = 0;
@@ -3894,7 +4009,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 	return;
 
 bad:
-	pr_err("mdsc_handle_forward decode error err=%d\n", err);
+	pr_err_client(cl, "decode error err=%d\n", err);
 	ceph_msg_dump(msg);
 }
 
@@ -3933,6 +4048,7 @@ static void handle_session(struct ceph_mds_session *session,
 			   struct ceph_msg *msg)
 {
 	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	int mds = session->s_mds;
 	int msg_version = le16_to_cpu(msg->hdr.version);
 	void *p = msg->front.iov_base;
@@ -3980,7 +4096,8 @@ static void handle_session(struct ceph_mds_session *session,
 		/* version >= 5, flags   */
 		ceph_decode_32_safe(&p, end, flags, bad);
 		if (flags & CEPH_SESSION_BLOCKLISTED) {
-			pr_warn("mds%d session blocklisted\n", session->s_mds);
+			pr_warn_client(cl, "mds%d session blocklisted\n",
+				       session->s_mds);
 			blocklisted = true;
 		}
 	}
@@ -3996,22 +4113,24 @@ static void handle_session(struct ceph_mds_session *session,
 
 	mutex_lock(&session->s_mutex);
 
-	dout("handle_session mds%d %s %p state %s seq %llu\n",
-	     mds, ceph_session_op_name(op), session,
-	     ceph_session_state_name(session->s_state), seq);
+	doutc(cl, "mds%d %s %p state %s seq %llu\n", mds,
+	      ceph_session_op_name(op), session,
+	      ceph_session_state_name(session->s_state), seq);
 
 	if (session->s_state == CEPH_MDS_SESSION_HUNG) {
 		session->s_state = CEPH_MDS_SESSION_OPEN;
-		pr_info("mds%d came back\n", session->s_mds);
+		pr_info_client(cl, "mds%d came back\n", session->s_mds);
 	}
 
 	switch (op) {
 	case CEPH_SESSION_OPEN:
 		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
-			pr_info("mds%d reconnect success\n", session->s_mds);
+			pr_info_client(cl, "mds%d reconnect success\n",
+				       session->s_mds);
 
 		if (session->s_state == CEPH_MDS_SESSION_OPEN) {
-			pr_notice("mds%d is already opened\n", session->s_mds);
+			pr_notice_client(cl, "mds%d is already opened\n",
+					 session->s_mds);
 		} else {
 			session->s_state = CEPH_MDS_SESSION_OPEN;
 			session->s_features = features;
@@ -4041,7 +4160,8 @@ static void handle_session(struct ceph_mds_session *session,
 
 	case CEPH_SESSION_CLOSE:
 		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
-			pr_info("mds%d reconnect denied\n", session->s_mds);
+			pr_info_client(cl, "mds%d reconnect denied\n",
+				       session->s_mds);
 		session->s_state = CEPH_MDS_SESSION_CLOSED;
 		cleanup_session_requests(mdsc, session);
 		remove_session_caps(session);
@@ -4050,8 +4170,8 @@ static void handle_session(struct ceph_mds_session *session,
 		break;
 
 	case CEPH_SESSION_STALE:
-		pr_info("mds%d caps went stale, renewing\n",
-			session->s_mds);
+		pr_info_client(cl, "mds%d caps went stale, renewing\n",
+			       session->s_mds);
 		atomic_inc(&session->s_cap_gen);
 		session->s_cap_ttl = jiffies - 1;
 		send_renew_caps(mdsc, session);
@@ -4072,7 +4192,7 @@ static void handle_session(struct ceph_mds_session *session,
 		break;
 
 	case CEPH_SESSION_FORCE_RO:
-		dout("force_session_readonly %p\n", session);
+		doutc(cl, "force_session_readonly %p\n", session);
 		spin_lock(&session->s_cap_lock);
 		session->s_readonly = true;
 		spin_unlock(&session->s_cap_lock);
@@ -4081,7 +4201,8 @@ static void handle_session(struct ceph_mds_session *session,
 
 	case CEPH_SESSION_REJECT:
 		WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING);
-		pr_info("mds%d rejected session\n", session->s_mds);
+		pr_info_client(cl, "mds%d rejected session\n",
+			       session->s_mds);
 		session->s_state = CEPH_MDS_SESSION_REJECTED;
 		cleanup_session_requests(mdsc, session);
 		remove_session_caps(session);
@@ -4091,7 +4212,7 @@ static void handle_session(struct ceph_mds_session *session,
 		break;
 
 	default:
-		pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
+		pr_err_client(cl, "bad op %d mds%d\n", op, mds);
 		WARN_ON(1);
 	}
 
@@ -4108,30 +4229,32 @@ static void handle_session(struct ceph_mds_session *session,
 	return;
 
 bad:
-	pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds,
-	       (int)msg->front.iov_len);
+	pr_err_client(cl, "corrupt message mds%d len %d\n", mds,
+		      (int)msg->front.iov_len);
 	ceph_msg_dump(msg);
 	return;
 }
 
 void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = req->r_mdsc->fsc->client;
 	int dcaps;
 
 	dcaps = xchg(&req->r_dir_caps, 0);
 	if (dcaps) {
-		dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
+		doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
 		ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
 	}
 }
 
 void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
 {
+	struct ceph_client *cl = req->r_mdsc->fsc->client;
 	int dcaps;
 
 	dcaps = xchg(&req->r_dir_caps, 0);
 	if (dcaps) {
-		dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
+		doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
 		ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
 						dcaps);
 	}
@@ -4146,7 +4269,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 	struct ceph_mds_request *req, *nreq;
 	struct rb_node *p;
 
-	dout("replay_unsafe_requests mds%d\n", session->s_mds);
+	doutc(mdsc->fsc->client, "mds%d\n", session->s_mds);
 
 	mutex_lock(&mdsc->mutex);
 	list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
@@ -4290,6 +4413,8 @@ out_unlock:
  */
 static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	union {
 		struct ceph_mds_cap_reconnect v2;
 		struct ceph_mds_cap_reconnect_v1 v1;
@@ -4307,7 +4432,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
 	dentry = d_find_primary(inode);
 	if (dentry) {
 		/* set pathbase to parent dir when msg_version >= 2 */
-		path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase,
+		path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase,
 					    recon_state->msg_version >= 2);
 		dput(dentry);
 		if (IS_ERR(path)) {
@@ -4326,9 +4451,9 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
 		err = 0;
 		goto out_err;
 	}
-	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
-	     inode, ceph_vinop(inode), cap, cap->cap_id,
-	     ceph_cap_string(cap->issued));
+	doutc(cl, " adding %p ino %llx.%llx cap %p %lld %s\n", inode,
+	      ceph_vinop(inode), cap, cap->cap_id,
+	      ceph_cap_string(cap->issued));
 
 	cap->seq = 0;        /* reset cap seq */
 	cap->issue_seq = 0;  /* and issue_seq */
@@ -4482,6 +4607,7 @@ static int encode_snap_realms(struct ceph_mds_client *mdsc,
 {
 	struct rb_node *p;
 	struct ceph_pagelist *pagelist = recon_state->pagelist;
+	struct ceph_client *cl = mdsc->fsc->client;
 	int err = 0;
 
 	if (recon_state->msg_version >= 4) {
@@ -4520,8 +4646,8 @@ static int encode_snap_realms(struct ceph_mds_client *mdsc,
 			ceph_pagelist_encode_32(pagelist, sizeof(sr_rec));
 		}
 
-		dout(" adding snap realm %llx seq %lld parent %llx\n",
-		     realm->ino, realm->seq, realm->parent_ino);
+		doutc(cl, " adding snap realm %llx seq %lld parent %llx\n",
+		      realm->ino, realm->seq, realm->parent_ino);
 		sr_rec.ino = cpu_to_le64(realm->ino);
 		sr_rec.seq = cpu_to_le64(realm->seq);
 		sr_rec.parent = cpu_to_le64(realm->parent_ino);
@@ -4550,6 +4676,7 @@ fail:
 static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 			       struct ceph_mds_session *session)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_msg *reply;
 	int mds = session->s_mds;
 	int err = -ENOMEM;
@@ -4558,7 +4685,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	};
 	LIST_HEAD(dispose);
 
-	pr_info("mds%d reconnect start\n", mds);
+	pr_info_client(cl, "mds%d reconnect start\n", mds);
 
 	recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!recon_state.pagelist)
@@ -4574,8 +4701,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	session->s_state = CEPH_MDS_SESSION_RECONNECTING;
 	session->s_seq = 0;
 
-	dout("session %p state %s\n", session,
-	     ceph_session_state_name(session->s_state));
+	doutc(cl, "session %p state %s\n", session,
+	      ceph_session_state_name(session->s_state));
 
 	atomic_inc(&session->s_cap_gen);
 
@@ -4709,7 +4836,8 @@ fail:
 fail_nomsg:
 	ceph_pagelist_release(recon_state.pagelist);
 fail_nopagelist:
-	pr_err("error %d preparing reconnect for mds%d\n", err, mds);
+	pr_err_client(cl, "error %d preparing reconnect for mds%d\n",
+		      err, mds);
 	return;
 }
 
@@ -4728,9 +4856,9 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 	int oldstate, newstate;
 	struct ceph_mds_session *s;
 	unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0};
+	struct ceph_client *cl = mdsc->fsc->client;
 
-	dout("check_new_map new %u old %u\n",
-	     newmap->m_epoch, oldmap->m_epoch);
+	doutc(cl, "new %u old %u\n", newmap->m_epoch, oldmap->m_epoch);
 
 	if (newmap->m_info) {
 		for (i = 0; i < newmap->possible_max_rank; i++) {
@@ -4746,12 +4874,12 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		oldstate = ceph_mdsmap_get_state(oldmap, i);
 		newstate = ceph_mdsmap_get_state(newmap, i);
 
-		dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n",
-		     i, ceph_mds_state_name(oldstate),
-		     ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
-		     ceph_mds_state_name(newstate),
-		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
-		     ceph_session_state_name(s->s_state));
+		doutc(cl, "mds%d state %s%s -> %s%s (session %s)\n",
+		      i, ceph_mds_state_name(oldstate),
+		      ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
+		      ceph_mds_state_name(newstate),
+		      ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
+		      ceph_session_state_name(s->s_state));
 
 		if (i >= newmap->possible_max_rank) {
 			/* force close session for stopped mds */
@@ -4804,7 +4932,8 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		    newstate >= CEPH_MDS_STATE_ACTIVE) {
 			if (oldstate != CEPH_MDS_STATE_CREATING &&
 			    oldstate != CEPH_MDS_STATE_STARTING)
-				pr_info("mds%d recovery completed\n", s->s_mds);
+				pr_info_client(cl, "mds%d recovery completed\n",
+					       s->s_mds);
 			kick_requests(mdsc, i);
 			mutex_unlock(&mdsc->mutex);
 			mutex_lock(&s->s_mutex);
@@ -4848,12 +4977,13 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 			s = __open_export_target_session(mdsc, i);
 			if (IS_ERR(s)) {
 				err = PTR_ERR(s);
-				pr_err("failed to open export target session, err %d\n",
-				       err);
+				pr_err_client(cl,
+					      "failed to open export target session, err %d\n",
+					      err);
 				continue;
 			}
 		}
-		dout("send reconnect to export target mds.%d\n", i);
+		doutc(cl, "send reconnect to export target mds.%d\n", i);
 		mutex_unlock(&mdsc->mutex);
 		send_mds_reconnect(mdsc, s);
 		ceph_put_mds_session(s);
@@ -4869,8 +4999,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
 		    s->s_state == CEPH_MDS_SESSION_HUNG ||
 		    s->s_state == CEPH_MDS_SESSION_CLOSING) {
-			dout(" connecting to export targets of laggy mds%d\n",
-			     i);
+			doutc(cl, " connecting to export targets of laggy mds%d\n", i);
 			__open_export_target_sessions(mdsc, s);
 		}
 	}
@@ -4897,6 +5026,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 			 struct ceph_mds_session *session,
 			 struct ceph_msg *msg)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct super_block *sb = mdsc->fsc->sb;
 	struct inode *inode;
 	struct dentry *parent, *dentry;
@@ -4908,7 +5038,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 	struct qstr dname;
 	int release = 0;
 
-	dout("handle_lease from mds%d\n", mds);
+	doutc(cl, "from mds%d\n", mds);
 
 	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
 		return;
@@ -4926,20 +5056,19 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 
 	/* lookup inode */
 	inode = ceph_find_inode(sb, vino);
-	dout("handle_lease %s, ino %llx %p %.*s\n",
-	     ceph_lease_op_name(h->action), vino.ino, inode,
-	     dname.len, dname.name);
+	doutc(cl, "%s, ino %llx %p %.*s\n", ceph_lease_op_name(h->action),
+	      vino.ino, inode, dname.len, dname.name);
 
 	mutex_lock(&session->s_mutex);
 	if (!inode) {
-		dout("handle_lease no inode %llx\n", vino.ino);
+		doutc(cl, "no inode %llx\n", vino.ino);
 		goto release;
 	}
 
 	/* dentry */
 	parent = d_find_alias(inode);
 	if (!parent) {
-		dout("no parent dentry on inode %p\n", inode);
+		doutc(cl, "no parent dentry on inode %p\n", inode);
 		WARN_ON(1);
 		goto release;  /* hrm... */
 	}
@@ -4999,7 +5128,7 @@ out:
 bad:
 	ceph_dec_mds_stopping_blocker(mdsc);
 
-	pr_err("corrupt lease message\n");
+	pr_err_client(cl, "corrupt lease message\n");
 	ceph_msg_dump(msg);
 }
 
@@ -5007,13 +5136,14 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
 			      struct dentry *dentry, char action,
 			      u32 seq)
 {
+	struct ceph_client *cl = session->s_mdsc->fsc->client;
 	struct ceph_msg *msg;
 	struct ceph_mds_lease *lease;
 	struct inode *dir;
 	int len = sizeof(*lease) + sizeof(u32) + NAME_MAX;
 
-	dout("lease_send_msg identry %p %s to mds%d\n",
-	     dentry, ceph_lease_op_name(action), session->s_mds);
+	doutc(cl, "identry %p %s to mds%d\n", dentry, ceph_lease_op_name(action),
+	      session->s_mds);
 
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
 	if (!msg)
@@ -5046,6 +5176,7 @@ static void lock_unlock_session(struct ceph_mds_session *s)
 
 static void maybe_recover_session(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_fs_client *fsc = mdsc->fsc;
 
 	if (!ceph_test_mount_opt(fsc, CLEANRECOVER))
@@ -5057,17 +5188,19 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 	if (!READ_ONCE(fsc->blocklisted))
 		return;
 
-	pr_info("auto reconnect after blocklisted\n");
+	pr_info_client(cl, "auto reconnect after blocklisted\n");
 	ceph_force_reconnect(fsc->sb);
 }
 
 bool check_session_state(struct ceph_mds_session *s)
 {
+	struct ceph_client *cl = s->s_mdsc->fsc->client;
+
 	switch (s->s_state) {
 	case CEPH_MDS_SESSION_OPEN:
 		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
 			s->s_state = CEPH_MDS_SESSION_HUNG;
-			pr_info("mds%d hung\n", s->s_mds);
+			pr_info_client(cl, "mds%d hung\n", s->s_mds);
 		}
 		break;
 	case CEPH_MDS_SESSION_CLOSING:
@@ -5087,6 +5220,8 @@ bool check_session_state(struct ceph_mds_session *s)
  */
 void inc_session_sequence(struct ceph_mds_session *s)
 {
+	struct ceph_client *cl = s->s_mdsc->fsc->client;
+
 	lockdep_assert_held(&s->s_mutex);
 
 	s->s_seq++;
@@ -5094,11 +5229,11 @@ void inc_session_sequence(struct ceph_mds_session *s)
 	if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
 		int ret;
 
-		dout("resending session close request for mds%d\n", s->s_mds);
+		doutc(cl, "resending session close request for mds%d\n", s->s_mds);
 		ret = request_close_session(s);
 		if (ret < 0)
-			pr_err("unable to close session to mds%d: %d\n",
-			       s->s_mds, ret);
+			pr_err_client(cl, "unable to close session to mds%d: %d\n",
+				      s->s_mds, ret);
 	}
 }
 
@@ -5127,7 +5262,7 @@ static void delayed_work(struct work_struct *work)
 	int renew_caps;
 	int i;
 
-	dout("mdsc delayed_work\n");
+	doutc(mdsc->fsc->client, "mdsc delayed_work\n");
 
 	if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
 		return;
@@ -5256,6 +5391,7 @@ err_mdsc:
  */
 static void wait_requests(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_options *opts = mdsc->fsc->client->options;
 	struct ceph_mds_request *req;
 
@@ -5263,25 +5399,25 @@ static void wait_requests(struct ceph_mds_client *mdsc)
 	if (__get_oldest_req(mdsc)) {
 		mutex_unlock(&mdsc->mutex);
 
-		dout("wait_requests waiting for requests\n");
+		doutc(cl, "waiting for requests\n");
 		wait_for_completion_timeout(&mdsc->safe_umount_waiters,
 				    ceph_timeout_jiffies(opts->mount_timeout));
 
 		/* tear down remaining requests */
 		mutex_lock(&mdsc->mutex);
 		while ((req = __get_oldest_req(mdsc))) {
-			dout("wait_requests timed out on tid %llu\n",
-			     req->r_tid);
+			doutc(cl, "timed out on tid %llu\n", req->r_tid);
 			list_del_init(&req->r_wait);
 			__unregister_request(mdsc, req);
 		}
 	}
 	mutex_unlock(&mdsc->mutex);
-	dout("wait_requests done\n");
+	doutc(cl, "done\n");
 }
 
 void send_flush_mdlog(struct ceph_mds_session *s)
 {
+	struct ceph_client *cl = s->s_mdsc->fsc->client;
 	struct ceph_msg *msg;
 
 	/*
@@ -5291,13 +5427,13 @@ void send_flush_mdlog(struct ceph_mds_session *s)
 		return;
 
 	mutex_lock(&s->s_mutex);
-	dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds,
-	     ceph_session_state_name(s->s_state), s->s_seq);
+	doutc(cl, "request mdlog flush to mds%d (%s)s seq %lld\n",
+	      s->s_mds, ceph_session_state_name(s->s_state), s->s_seq);
 	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG,
 				      s->s_seq);
 	if (!msg) {
-		pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n",
-		       s->s_mds, ceph_session_state_name(s->s_state), s->s_seq);
+		pr_err_client(cl, "failed to request mdlog flush to mds%d (%s) seq %lld\n",
+			      s->s_mds, ceph_session_state_name(s->s_state), s->s_seq);
 	} else {
 		ceph_con_send(&s->s_con, msg);
 	}
@@ -5310,7 +5446,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
  */
 void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 {
-	dout("pre_umount\n");
+	doutc(mdsc->fsc->client, "begin\n");
 	mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
 
 	ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
@@ -5325,6 +5461,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 	ceph_msgr_flush();
 
 	ceph_cleanup_quotarealms_inodes(mdsc);
+	doutc(mdsc->fsc->client, "done\n");
 }
 
 /*
@@ -5333,12 +5470,13 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc,
 						 u64 want_tid)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req = NULL, *nextreq;
 	struct ceph_mds_session *last_session = NULL;
 	struct rb_node *n;
 
 	mutex_lock(&mdsc->mutex);
-	dout("%s want %lld\n", __func__, want_tid);
+	doutc(cl, "want %lld\n", want_tid);
 restart:
 	req = __get_oldest_req(mdsc);
 	while (req && req->r_tid <= want_tid) {
@@ -5372,8 +5510,8 @@ restart:
 			} else {
 				ceph_put_mds_session(s);
 			}
-			dout("%s wait on %llu (want %llu)\n", __func__,
-			     req->r_tid, want_tid);
+			doutc(cl, "wait on %llu (want %llu)\n",
+			      req->r_tid, want_tid);
 			wait_for_completion(&req->r_safe_completion);
 
 			mutex_lock(&mdsc->mutex);
@@ -5391,17 +5529,18 @@ restart:
 	}
 	mutex_unlock(&mdsc->mutex);
 	ceph_put_mds_session(last_session);
-	dout("%s done\n", __func__);
+	doutc(cl, "done\n");
 }
 
 void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	u64 want_tid, want_flush;
 
 	if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
 		return;
 
-	dout("sync\n");
+	doutc(cl, "sync\n");
 	mutex_lock(&mdsc->mutex);
 	want_tid = mdsc->last_tid;
 	mutex_unlock(&mdsc->mutex);
@@ -5417,8 +5556,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 	}
 	spin_unlock(&mdsc->cap_dirty_lock);
 
-	dout("sync want tid %lld flush_seq %lld\n",
-	     want_tid, want_flush);
+	doutc(cl, "sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
 
 	flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid);
 	wait_caps_flush(mdsc, want_flush);
@@ -5440,11 +5578,12 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
 void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 {
 	struct ceph_options *opts = mdsc->fsc->client->options;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_session *session;
 	int i;
 	int skipped = 0;
 
-	dout("close_sessions\n");
+	doutc(cl, "begin\n");
 
 	/* close sessions */
 	mutex_lock(&mdsc->mutex);
@@ -5462,7 +5601,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 	}
 	mutex_unlock(&mdsc->mutex);
 
-	dout("waiting for sessions to close\n");
+	doutc(cl, "waiting for sessions to close\n");
 	wait_event_timeout(mdsc->session_close_wq,
 			   done_closing_sessions(mdsc, skipped),
 			   ceph_timeout_jiffies(opts->mount_timeout));
@@ -5490,7 +5629,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 	cancel_work_sync(&mdsc->cap_reclaim_work);
 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
 
-	dout("stopped\n");
+	doutc(cl, "done\n");
 }
 
 void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
@@ -5498,7 +5637,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
 	struct ceph_mds_session *session;
 	int mds;
 
-	dout("force umount\n");
+	doutc(mdsc->fsc->client, "force umount\n");
 
 	mutex_lock(&mdsc->mutex);
 	for (mds = 0; mds < mdsc->max_sessions; mds++) {
@@ -5529,7 +5668,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
 
 static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 {
-	dout("stop\n");
+	doutc(mdsc->fsc->client, "stop\n");
 	/*
 	 * Make sure the delayed work stopped before releasing
 	 * the resources.
@@ -5550,7 +5689,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 {
 	struct ceph_mds_client *mdsc = fsc->mdsc;
-	dout("mdsc_destroy %p\n", mdsc);
+	doutc(fsc->client, "%p\n", mdsc);
 
 	if (!mdsc)
 		return;
@@ -5564,12 +5703,13 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 
 	fsc->mdsc = NULL;
 	kfree(mdsc);
-	dout("mdsc_destroy %p done\n", mdsc);
+	doutc(fsc->client, "%p done\n", mdsc);
 }
 
 void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 {
 	struct ceph_fs_client *fsc = mdsc->fsc;
+	struct ceph_client *cl = fsc->client;
 	const char *mds_namespace = fsc->mount_options->mds_namespace;
 	void *p = msg->front.iov_base;
 	void *end = p + msg->front.iov_len;
@@ -5581,7 +5721,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 	ceph_decode_need(&p, end, sizeof(u32), bad);
 	epoch = ceph_decode_32(&p);
 
-	dout("handle_fsmap epoch %u\n", epoch);
+	doutc(cl, "epoch %u\n", epoch);
 
 	/* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */
 	ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad);
@@ -5626,7 +5766,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 	return;
 
 bad:
-	pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
+	pr_err_client(cl, "error decoding fsmap %d. Shutting down mount.\n",
+		      err);
 	ceph_umount_begin(mdsc->fsc->sb);
 	ceph_msg_dump(msg);
 err_out:
@@ -5641,6 +5782,7 @@ err_out:
  */
 void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	u32 epoch;
 	u32 maplen;
 	void *p = msg->front.iov_base;
@@ -5655,18 +5797,17 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 		return;
 	epoch = ceph_decode_32(&p);
 	maplen = ceph_decode_32(&p);
-	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
+	doutc(cl, "epoch %u len %d\n", epoch, (int)maplen);
 
 	/* do we need it? */
 	mutex_lock(&mdsc->mutex);
 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
-		dout("handle_map epoch %u <= our %u\n",
-		     epoch, mdsc->mdsmap->m_epoch);
+		doutc(cl, "epoch %u <= our %u\n", epoch, mdsc->mdsmap->m_epoch);
 		mutex_unlock(&mdsc->mutex);
 		return;
 	}
 
-	newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client));
+	newmap = ceph_mdsmap_decode(mdsc, &p, end, ceph_msgr2(mdsc->fsc->client));
 	if (IS_ERR(newmap)) {
 		err = PTR_ERR(newmap);
 		goto bad_unlock;
@@ -5695,7 +5836,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 bad_unlock:
 	mutex_unlock(&mdsc->mutex);
 bad:
-	pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
+	pr_err_client(cl, "error decoding mdsmap %d. Shutting down mount.\n",
+		      err);
 	ceph_umount_begin(mdsc->fsc->sb);
 	ceph_msg_dump(msg);
 	return;
@@ -5726,7 +5868,8 @@ static void mds_peer_reset(struct ceph_connection *con)
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
 
-	pr_warn("mds%d closed our session\n", s->s_mds);
+	pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n",
+		       s->s_mds);
 	if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
 		send_mds_reconnect(mdsc, s);
 }
@@ -5735,6 +5878,7 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
 	int type = le16_to_cpu(msg->hdr.type);
 
 	mutex_lock(&mdsc->mutex);
@@ -5774,8 +5918,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 		break;
 
 	default:
-		pr_err("received unknown message type %d %s\n", type,
-		       ceph_msg_type_name(type));
+		pr_err_client(cl, "received unknown message type %d %s\n",
+			      type, ceph_msg_type_name(type));
 	}
 out:
 	ceph_msg_put(msg);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 5a3714bdd64a..2e6ddaa13d72 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -14,9 +14,9 @@
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
-#include <linux/ceph/mdsmap.h>
 #include <linux/ceph/auth.h>
 
+#include "mdsmap.h"
 #include "metric.h"
 #include "super.h"
 
@@ -33,8 +33,10 @@ enum ceph_feature_type {
 	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
 	CEPHFS_FEATURE_OP_GETVXATTR,
 	CEPHFS_FEATURE_32BITS_RETRY_FWD,
+	CEPHFS_FEATURE_NEW_SNAPREALM_INFO,
+	CEPHFS_FEATURE_HAS_OWNER_UIDGID,
 
-	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD,
+	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID,
 };
 
 #define CEPHFS_FEATURES_CLIENT_SUPPORTED {	\
@@ -49,6 +51,7 @@ enum ceph_feature_type {
 	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,	\
 	CEPHFS_FEATURE_OP_GETVXATTR,		\
 	CEPHFS_FEATURE_32BITS_RETRY_FWD,	\
+	CEPHFS_FEATURE_HAS_OWNER_UIDGID,	\
 }
 
 /*
@@ -300,6 +303,7 @@ struct ceph_mds_request {
 	int r_fmode;        /* file mode, if expecting cap */
 	int r_request_release_offset;
 	const struct cred *r_cred;
+	struct mnt_idmap *r_mnt_idmap;
 	struct timespec64 r_stamp;
 
 	/* for choosing which mds to send this request to */
@@ -581,7 +585,8 @@ static inline void ceph_mdsc_free_path(char *path, int len)
 		__putname(path - (PATH_MAX - 1 - len));
 }
 
-extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
+extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc,
+				  struct dentry *dentry, int *plen, u64 *base,
 				  int for_wire);
 
 extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
@@ -614,4 +619,6 @@ static inline int ceph_wait_on_async_create(struct inode *inode)
 extern int ceph_wait_on_conflict_unlink(struct dentry *dentry);
 extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session);
 extern int ceph_restore_deleg_ino(struct ceph_mds_session *session, u64 ino);
+
+extern bool enable_unsafe_idmap;
 #endif
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 7dac21ee6ce7..fae97c25ce58 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -7,10 +7,11 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#include <linux/ceph/mdsmap.h>
 #include <linux/ceph/messenger.h>
 #include <linux/ceph/decode.h>
 
+#include "mdsmap.h"
+#include "mds_client.h"
 #include "super.h"
 
 #define CEPH_MDS_IS_READY(i, ignore_laggy) \
@@ -114,8 +115,10 @@ bad:
  * Ignore any fields we don't care about (there are quite a few of
  * them).
  */
-struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
+struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
+				       void *end, bool msgr2)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mdsmap *m;
 	const void *start = *p;
 	int i, j, n;
@@ -233,20 +236,18 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
 			*p = info_end;
 		}
 
-		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n",
-		     i+1, n, global_id, mds, inc,
-		     ceph_pr_addr(&addr),
-		     ceph_mds_state_name(state),
-		     laggy ? "(laggy)" : "");
+		doutc(cl, "%d/%d %lld mds%d.%d %s %s%s\n", i+1, n, global_id,
+		      mds, inc, ceph_pr_addr(&addr),
+		      ceph_mds_state_name(state), laggy ? "(laggy)" : "");
 
 		if (mds < 0 || mds >= m->possible_max_rank) {
-			pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
+			pr_warn_client(cl, "got incorrect mds(%d)\n", mds);
 			continue;
 		}
 
 		if (state <= 0) {
-			dout("mdsmap_decode got incorrect state(%s)\n",
-			     ceph_mds_state_name(state));
+			doutc(cl, "got incorrect state(%s)\n",
+			      ceph_mds_state_name(state));
 			continue;
 		}
 
@@ -385,16 +386,16 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
 		m->m_max_xattr_size = 0;
 	}
 bad_ext:
-	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
-	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
+	doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
+	      !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
 	*p = end;
-	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
+	doutc(cl, "success epoch %u\n", m->m_epoch);
 	return m;
 nomem:
 	err = -ENOMEM;
 	goto out_err;
 corrupt:
-	pr_err("corrupt mdsmap\n");
+	pr_err_client(cl, "corrupt mdsmap\n");
 	print_hex_dump(KERN_DEBUG, "mdsmap: ",
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       start, end - start, true);
diff --git a/include/linux/ceph/mdsmap.h b/fs/ceph/mdsmap.h
index 4c3e0648dc27..89f1931f1ba6 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -5,6 +5,8 @@
 #include <linux/bug.h>
 #include <linux/ceph/types.h>
 
+struct ceph_mds_client;
+
 /*
  * mds map - describe servers in the mds cluster.
  *
@@ -65,7 +67,8 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
 }
 
 extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
-struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
+struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
+				       void *end, bool msgr2);
 extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
 extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
 
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index 6d3584f16f9a..871c1090e520 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 	struct ceph_client_metric *m = &mdsc->metric;
 	u64 nr_caps = atomic64_read(&m->total_caps);
 	u32 header_len = sizeof(struct ceph_metric_header);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_msg *msg;
 	s64 sum;
 	s32 items = 0;
@@ -51,8 +52,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
 	if (!msg) {
-		pr_err("send metrics to mds%d, failed to allocate message\n",
-		       s->s_mds);
+		pr_err_client(cl, "to mds%d, failed to allocate message\n",
+			      s->s_mds);
 		return false;
 	}
 
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index f7fcf7f08ec6..9d36c3532de1 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -43,6 +43,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
 {
 	struct super_block *sb = mdsc->fsc->sb;
 	struct ceph_mds_quota *h = msg->front.iov_base;
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_vino vino;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
@@ -51,8 +52,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
 		return;
 
 	if (msg->front.iov_len < sizeof(*h)) {
-		pr_err("%s corrupt message mds%d len %d\n", __func__,
-		       session->s_mds, (int)msg->front.iov_len);
+		pr_err_client(cl, "corrupt message mds%d len %d\n",
+			      session->s_mds, (int)msg->front.iov_len);
 		ceph_msg_dump(msg);
 		goto out;
 	}
@@ -62,7 +63,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
 	vino.snap = CEPH_NOSNAP;
 	inode = ceph_find_inode(sb, vino);
 	if (!inode) {
-		pr_warn("Failed to find inode %llu\n", vino.ino);
+		pr_warn_client(cl, "failed to find inode %llx\n", vino.ino);
 		goto out;
 	}
 	ci = ceph_inode(inode);
@@ -85,6 +86,7 @@ find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
 {
 	struct ceph_quotarealm_inode *qri = NULL;
 	struct rb_node **node, *parent = NULL;
+	struct ceph_client *cl = mdsc->fsc->client;
 
 	mutex_lock(&mdsc->quotarealms_inodes_mutex);
 	node = &(mdsc->quotarealms_inodes.rb_node);
@@ -110,7 +112,7 @@ find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
 			rb_link_node(&qri->node, parent, node);
 			rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
 		} else
-			pr_warn("Failed to alloc quotarealms_inode\n");
+			pr_warn_client(cl, "Failed to alloc quotarealms_inode\n");
 	}
 	mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 
@@ -129,6 +131,7 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 					     struct super_block *sb,
 					     struct ceph_snap_realm *realm)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_quotarealm_inode *qri;
 	struct inode *in;
 
@@ -161,8 +164,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 	}
 
 	if (IS_ERR(in)) {
-		dout("Can't lookup inode %llx (err: %ld)\n",
-		     realm->ino, PTR_ERR(in));
+		doutc(cl, "Can't lookup inode %llx (err: %ld)\n", realm->ino,
+		      PTR_ERR(in));
 		qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
 	} else {
 		qri->timeout = 0;
@@ -213,6 +216,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 					       enum quota_get_realm which_quota,
 					       bool retry)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci = NULL;
 	struct ceph_snap_realm *realm, *next;
 	struct inode *in;
@@ -226,8 +230,9 @@ restart:
 	if (realm)
 		ceph_get_snap_realm(mdsc, realm);
 	else
-		pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
-				   "null i_snap_realm\n", ceph_vinop(inode));
+		pr_err_ratelimited_client(cl,
+				"%p %llx.%llx null i_snap_realm\n",
+				inode, ceph_vinop(inode));
 	while (realm) {
 		bool has_inode;
 
@@ -317,6 +322,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 				 loff_t delta)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci;
 	struct ceph_snap_realm *realm, *next;
 	struct inode *in;
@@ -332,8 +338,9 @@ restart:
 	if (realm)
 		ceph_get_snap_realm(mdsc, realm);
 	else
-		pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
-				   "null i_snap_realm\n", ceph_vinop(inode));
+		pr_err_ratelimited_client(cl,
+				"%p %llx.%llx null i_snap_realm\n",
+				inode, ceph_vinop(inode));
 	while (realm) {
 		bool has_inode;
 
@@ -383,7 +390,7 @@ restart:
 			break;
 		default:
 			/* Shouldn't happen */
-			pr_warn("Invalid quota check op (%d)\n", op);
+			pr_warn_client(cl, "Invalid quota check op (%d)\n", op);
 			exceeded = true; /* Just break the loop */
 		}
 		iput(in);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 6732e1ea97d9..c65f2b202b2b 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -138,7 +138,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
 	__insert_snap_realm(&mdsc->snap_realms, realm);
 	mdsc->num_snap_realms++;
 
-	dout("%s %llx %p\n", __func__, realm->ino, realm);
+	doutc(mdsc->fsc->client, "%llx %p\n", realm->ino, realm);
 	return realm;
 }
 
@@ -150,6 +150,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
 static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc,
 						   u64 ino)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct rb_node *n = mdsc->snap_realms.rb_node;
 	struct ceph_snap_realm *r;
 
@@ -162,7 +163,7 @@ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc,
 		else if (ino > r->ino)
 			n = n->rb_right;
 		else {
-			dout("%s %llx %p\n", __func__, r->ino, r);
+			doutc(cl, "%llx %p\n", r->ino, r);
 			return r;
 		}
 	}
@@ -188,9 +189,10 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc,
 static void __destroy_snap_realm(struct ceph_mds_client *mdsc,
 				 struct ceph_snap_realm *realm)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	lockdep_assert_held_write(&mdsc->snap_rwsem);
 
-	dout("%s %p %llx\n", __func__, realm, realm->ino);
+	doutc(cl, "%p %llx\n", realm, realm->ino);
 
 	rb_erase(&realm->node, &mdsc->snap_realms);
 	mdsc->num_snap_realms--;
@@ -290,6 +292,7 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc,
 				    struct ceph_snap_realm *realm,
 				    u64 parentino)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_snap_realm *parent;
 
 	lockdep_assert_held_write(&mdsc->snap_rwsem);
@@ -303,8 +306,8 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc,
 		if (IS_ERR(parent))
 			return PTR_ERR(parent);
 	}
-	dout("%s %llx %p: %llx %p -> %llx %p\n", __func__, realm->ino,
-	     realm, realm->parent_ino, realm->parent, parentino, parent);
+	doutc(cl, "%llx %p: %llx %p -> %llx %p\n", realm->ino, realm,
+	      realm->parent_ino, realm->parent, parentino, parent);
 	if (realm->parent) {
 		list_del_init(&realm->child_item);
 		ceph_put_snap_realm(mdsc, realm->parent);
@@ -329,10 +332,12 @@ static int cmpu64_rev(const void *a, const void *b)
 /*
  * build the snap context for a given realm.
  */
-static int build_snap_context(struct ceph_snap_realm *realm,
+static int build_snap_context(struct ceph_mds_client *mdsc,
+			      struct ceph_snap_realm *realm,
 			      struct list_head *realm_queue,
 			      struct list_head *dirty_realms)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_snap_realm *parent = realm->parent;
 	struct ceph_snap_context *snapc;
 	int err = 0;
@@ -360,10 +365,10 @@ static int build_snap_context(struct ceph_snap_realm *realm,
 	    realm->cached_context->seq == realm->seq &&
 	    (!parent ||
 	     realm->cached_context->seq >= parent->cached_context->seq)) {
-		dout("%s %llx %p: %p seq %lld (%u snaps) (unchanged)\n",
-		     __func__, realm->ino, realm, realm->cached_context,
-		     realm->cached_context->seq,
-		     (unsigned int)realm->cached_context->num_snaps);
+		doutc(cl, "%llx %p: %p seq %lld (%u snaps) (unchanged)\n",
+		      realm->ino, realm, realm->cached_context,
+		      realm->cached_context->seq,
+		      (unsigned int)realm->cached_context->num_snaps);
 		return 0;
 	}
 
@@ -400,8 +405,8 @@ static int build_snap_context(struct ceph_snap_realm *realm,
 
 	sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL);
 	snapc->num_snaps = num;
-	dout("%s %llx %p: %p seq %lld (%u snaps)\n", __func__, realm->ino,
-	     realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps);
+	doutc(cl, "%llx %p: %p seq %lld (%u snaps)\n", realm->ino, realm,
+	      snapc, snapc->seq, (unsigned int) snapc->num_snaps);
 
 	ceph_put_snap_context(realm->cached_context);
 	realm->cached_context = snapc;
@@ -418,16 +423,18 @@ fail:
 		ceph_put_snap_context(realm->cached_context);
 		realm->cached_context = NULL;
 	}
-	pr_err("%s %llx %p fail %d\n", __func__, realm->ino, realm, err);
+	pr_err_client(cl, "%llx %p fail %d\n", realm->ino, realm, err);
 	return err;
 }
 
 /*
  * rebuild snap context for the given realm and all of its children.
  */
-static void rebuild_snap_realms(struct ceph_snap_realm *realm,
+static void rebuild_snap_realms(struct ceph_mds_client *mdsc,
+				struct ceph_snap_realm *realm,
 				struct list_head *dirty_realms)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	LIST_HEAD(realm_queue);
 	int last = 0;
 	bool skip = false;
@@ -451,9 +458,10 @@ static void rebuild_snap_realms(struct ceph_snap_realm *realm,
 			continue;
 		}
 
-		last = build_snap_context(_realm, &realm_queue, dirty_realms);
-		dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm,
-		     last > 0 ? "is deferred" : !last ? "succeeded" : "failed");
+		last = build_snap_context(mdsc, _realm, &realm_queue,
+					  dirty_realms);
+		doutc(cl, "%llx %p, %s\n", realm->ino, realm,
+		      last > 0 ? "is deferred" : !last ? "succeeded" : "failed");
 
 		/* is any child in the list ? */
 		list_for_each_entry(child, &_realm->children, child_item) {
@@ -523,6 +531,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 				struct ceph_cap_snap **pcapsnap)
 {
 	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_snap_context *old_snapc, *new_snapc;
 	struct ceph_cap_snap *capsnap = *pcapsnap;
 	struct ceph_buffer *old_blob = NULL;
@@ -548,14 +557,14 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 		   as no new writes are allowed to start when pending, so any
 		   writes in progress now were started before the previous
 		   cap_snap.  lucky us. */
-		dout("%s %p %llx.%llx already pending\n",
-		     __func__, inode, ceph_vinop(inode));
+		doutc(cl, "%p %llx.%llx already pending\n", inode,
+		      ceph_vinop(inode));
 		goto update_snapc;
 	}
 	if (ci->i_wrbuffer_ref_head == 0 &&
 	    !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) {
-		dout("%s %p %llx.%llx nothing dirty|writing\n",
-		     __func__, inode, ceph_vinop(inode));
+		doutc(cl, "%p %llx.%llx nothing dirty|writing\n", inode,
+		      ceph_vinop(inode));
 		goto update_snapc;
 	}
 
@@ -575,15 +584,15 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 	} else {
 		if (!(used & CEPH_CAP_FILE_WR) &&
 		    ci->i_wrbuffer_ref_head == 0) {
-			dout("%s %p %llx.%llx no new_snap|dirty_page|writing\n",
-			     __func__, inode, ceph_vinop(inode));
+			doutc(cl, "%p %llx.%llx no new_snap|dirty_page|writing\n",
+			      inode, ceph_vinop(inode));
 			goto update_snapc;
 		}
 	}
 
-	dout("%s %p %llx.%llx cap_snap %p queuing under %p %s %s\n",
-	     __func__, inode, ceph_vinop(inode), capsnap, old_snapc,
-	     ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush");
+	doutc(cl, "%p %llx.%llx cap_snap %p queuing under %p %s %s\n",
+	      inode, ceph_vinop(inode), capsnap, old_snapc,
+	      ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush");
 	ihold(inode);
 
 	capsnap->follows = old_snapc->seq;
@@ -615,9 +624,9 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
 
 	if (used & CEPH_CAP_FILE_WR) {
-		dout("%s %p %llx.%llx cap_snap %p snapc %p seq %llu used WR,"
-		     " now pending\n", __func__, inode, ceph_vinop(inode),
-		     capsnap, old_snapc, old_snapc->seq);
+		doutc(cl, "%p %llx.%llx cap_snap %p snapc %p seq %llu used WR,"
+		      " now pending\n", inode, ceph_vinop(inode), capsnap,
+		      old_snapc, old_snapc->seq);
 		capsnap->writing = 1;
 	} else {
 		/* note mtime, size NOW. */
@@ -634,7 +643,7 @@ update_snapc:
 		ci->i_head_snapc = NULL;
 	} else {
 		ci->i_head_snapc = ceph_get_snap_context(new_snapc);
-		dout(" new snapc is %p\n", new_snapc);
+		doutc(cl, " new snapc is %p\n", new_snapc);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -655,6 +664,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 {
 	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+	struct ceph_client *cl = mdsc->fsc->client;
 
 	BUG_ON(capsnap->writing);
 	capsnap->size = i_size_read(inode);
@@ -667,11 +677,12 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 	capsnap->truncate_size = ci->i_truncate_size;
 	capsnap->truncate_seq = ci->i_truncate_seq;
 	if (capsnap->dirty_pages) {
-		dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
-		     "still has %d dirty pages\n", __func__, inode,
-		     ceph_vinop(inode), capsnap, capsnap->context,
-		     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
-		     capsnap->size, capsnap->dirty_pages);
+		doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s "
+		      "s=%llu still has %d dirty pages\n", inode,
+		      ceph_vinop(inode), capsnap, capsnap->context,
+		      capsnap->context->seq,
+		      ceph_cap_string(capsnap->dirty),
+		      capsnap->size, capsnap->dirty_pages);
 		return 0;
 	}
 
@@ -680,20 +691,20 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 	 * And trigger to flush the buffer immediately.
 	 */
 	if (ci->i_wrbuffer_ref) {
-		dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
-		     "used WRBUFFER, delaying\n", __func__, inode,
-		     ceph_vinop(inode), capsnap, capsnap->context,
-		     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
-		     capsnap->size);
+		doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s "
+		      "s=%llu used WRBUFFER, delaying\n", inode,
+		      ceph_vinop(inode), capsnap, capsnap->context,
+		      capsnap->context->seq, ceph_cap_string(capsnap->dirty),
+		      capsnap->size);
 		ceph_queue_writeback(inode);
 		return 0;
 	}
 
 	ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
-	dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n",
-	     __func__, inode, ceph_vinop(inode), capsnap, capsnap->context,
-	     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
-	     capsnap->size);
+	doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n",
+	      inode, ceph_vinop(inode), capsnap, capsnap->context,
+	      capsnap->context->seq, ceph_cap_string(capsnap->dirty),
+	      capsnap->size);
 
 	spin_lock(&mdsc->snap_flush_lock);
 	if (list_empty(&ci->i_snap_flush_item)) {
@@ -708,13 +719,15 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
  * Queue cap_snaps for snap writeback for this realm and its children.
  * Called under snap_rwsem, so realm topology won't change.
  */
-static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
+static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc,
+				  struct ceph_snap_realm *realm)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci;
 	struct inode *lastinode = NULL;
 	struct ceph_cap_snap *capsnap = NULL;
 
-	dout("%s %p %llx inode\n", __func__, realm, realm->ino);
+	doutc(cl, "%p %llx inode\n", realm, realm->ino);
 
 	spin_lock(&realm->inodes_with_caps_lock);
 	list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) {
@@ -733,8 +746,9 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
 		if (!capsnap) {
 			capsnap = kmem_cache_zalloc(ceph_cap_snap_cachep, GFP_NOFS);
 			if (!capsnap) {
-				pr_err("ENOMEM allocating ceph_cap_snap on %p\n",
-				       inode);
+				pr_err_client(cl,
+					"ENOMEM allocating ceph_cap_snap on %p\n",
+					inode);
 				return;
 			}
 		}
@@ -752,7 +766,7 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
 
 	if (capsnap)
 		kmem_cache_free(ceph_cap_snap_cachep, capsnap);
-	dout("%s %p %llx done\n", __func__, realm, realm->ino);
+	doutc(cl, "%p %llx done\n", realm, realm->ino);
 }
 
 /*
@@ -766,6 +780,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
 			   void *p, void *e, bool deletion,
 			   struct ceph_snap_realm **realm_ret)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_snap_realm *ri;    /* encoded */
 	__le64 *snaps;                     /* encoded */
 	__le64 *prior_parent_snaps;        /* encoded */
@@ -780,7 +795,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
 
 	lockdep_assert_held_write(&mdsc->snap_rwsem);
 
-	dout("%s deletion=%d\n", __func__, deletion);
+	doutc(cl, "deletion=%d\n", deletion);
 more:
 	realm = NULL;
 	rebuild_snapcs = 0;
@@ -810,8 +825,8 @@ more:
 	rebuild_snapcs += err;
 
 	if (le64_to_cpu(ri->seq) > realm->seq) {
-		dout("%s updating %llx %p %lld -> %lld\n", __func__,
-		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
+		doutc(cl, "updating %llx %p %lld -> %lld\n", realm->ino,
+		      realm, realm->seq, le64_to_cpu(ri->seq));
 		/* update realm parameters, snap lists */
 		realm->seq = le64_to_cpu(ri->seq);
 		realm->created = le64_to_cpu(ri->created);
@@ -834,16 +849,16 @@ more:
 
 		rebuild_snapcs = 1;
 	} else if (!realm->cached_context) {
-		dout("%s %llx %p seq %lld new\n", __func__,
-		     realm->ino, realm, realm->seq);
+		doutc(cl, "%llx %p seq %lld new\n", realm->ino, realm,
+		      realm->seq);
 		rebuild_snapcs = 1;
 	} else {
-		dout("%s %llx %p seq %lld unchanged\n", __func__,
-		     realm->ino, realm, realm->seq);
+		doutc(cl, "%llx %p seq %lld unchanged\n", realm->ino, realm,
+		      realm->seq);
 	}
 
-	dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino,
-	     realm, rebuild_snapcs, p, e);
+	doutc(cl, "done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino,
+	      realm, rebuild_snapcs, p, e);
 
 	/*
 	 * this will always track the uppest parent realm from which
@@ -855,7 +870,7 @@ more:
 
 	/* rebuild_snapcs when we reach the _end_ (root) of the trace */
 	if (realm_to_rebuild && p >= e)
-		rebuild_snap_realms(realm_to_rebuild, &dirty_realms);
+		rebuild_snap_realms(mdsc, realm_to_rebuild, &dirty_realms);
 
 	if (!first_realm)
 		first_realm = realm;
@@ -873,7 +888,7 @@ more:
 		realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
 					 dirty_item);
 		list_del_init(&realm->dirty_item);
-		queue_realm_cap_snaps(realm);
+		queue_realm_cap_snaps(mdsc, realm);
 	}
 
 	if (realm_ret)
@@ -891,7 +906,7 @@ fail:
 		ceph_put_snap_realm(mdsc, realm);
 	if (first_realm)
 		ceph_put_snap_realm(mdsc, first_realm);
-	pr_err("%s error %d\n", __func__, err);
+	pr_err_client(cl, "error %d\n", err);
 
 	/*
 	 * When receiving a corrupted snap trace we don't know what
@@ -905,11 +920,12 @@ fail:
 	WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO);
 	ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr);
 	if (ret)
-		pr_err("%s failed to blocklist %s: %d\n", __func__,
-		       ceph_pr_addr(&client->msgr.inst.addr), ret);
+		pr_err_client(cl, "failed to blocklist %s: %d\n",
+			      ceph_pr_addr(&client->msgr.inst.addr), ret);
 
-	WARN(1, "%s: %s%sdo remount to continue%s",
-	     __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr),
+	WARN(1, "[client.%lld] %s %s%sdo remount to continue%s",
+	     client->monc.auth->global_id, __func__,
+	     ret ? "" : ceph_pr_addr(&client->msgr.inst.addr),
 	     ret ? "" : " was blocklisted, ",
 	     err == -EIO ? " after corrupted snaptrace is fixed" : "");
 
@@ -925,11 +941,12 @@ fail:
  */
 static void flush_snaps(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci;
 	struct inode *inode;
 	struct ceph_mds_session *session = NULL;
 
-	dout("%s\n", __func__);
+	doutc(cl, "begin\n");
 	spin_lock(&mdsc->snap_flush_lock);
 	while (!list_empty(&mdsc->snap_flush_list)) {
 		ci = list_first_entry(&mdsc->snap_flush_list,
@@ -944,7 +961,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
 	spin_unlock(&mdsc->snap_flush_lock);
 
 	ceph_put_mds_session(session);
-	dout("%s done\n", __func__);
+	doutc(cl, "done\n");
 }
 
 /**
@@ -960,7 +977,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
 void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
 	struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
 
 	lockdep_assert_held(&ci->i_ceph_lock);
@@ -1000,6 +1017,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 		      struct ceph_mds_session *session,
 		      struct ceph_msg *msg)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct super_block *sb = mdsc->fsc->sb;
 	int mds = session->s_mds;
 	u64 split;
@@ -1030,8 +1048,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 	trace_len = le32_to_cpu(h->trace_len);
 	p += sizeof(*h);
 
-	dout("%s from mds%d op %s split %llx tracelen %d\n", __func__,
-	     mds, ceph_snap_op_name(op), split, trace_len);
+	doutc(cl, "from mds%d op %s split %llx tracelen %d\n", mds,
+	      ceph_snap_op_name(op), split, trace_len);
 
 	down_write(&mdsc->snap_rwsem);
 	locked_rwsem = 1;
@@ -1062,7 +1080,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 				goto out;
 		}
 
-		dout("splitting snap_realm %llx %p\n", realm->ino, realm);
+		doutc(cl, "splitting snap_realm %llx %p\n", realm->ino, realm);
 		for (i = 0; i < num_split_inos; i++) {
 			struct ceph_vino vino = {
 				.ino = le64_to_cpu(split_inos[i]),
@@ -1087,13 +1105,13 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			 */
 			if (ci->i_snap_realm->created >
 			    le64_to_cpu(ri->created)) {
-				dout(" leaving %p %llx.%llx in newer realm %llx %p\n",
-				     inode, ceph_vinop(inode), ci->i_snap_realm->ino,
-				     ci->i_snap_realm);
+				doutc(cl, " leaving %p %llx.%llx in newer realm %llx %p\n",
+				      inode, ceph_vinop(inode), ci->i_snap_realm->ino,
+				      ci->i_snap_realm);
 				goto skip_inode;
 			}
-			dout(" will move %p %llx.%llx to split realm %llx %p\n",
-			     inode, ceph_vinop(inode), realm->ino, realm);
+			doutc(cl, " will move %p %llx.%llx to split realm %llx %p\n",
+			      inode, ceph_vinop(inode), realm->ino, realm);
 
 			ceph_get_snap_realm(mdsc, realm);
 			ceph_change_snap_realm(inode, realm);
@@ -1154,7 +1172,7 @@ skip_inode:
 	return;
 
 bad:
-	pr_err("%s corrupt snap message from mds%d\n", __func__, mds);
+	pr_err_client(cl, "corrupt snap message from mds%d\n", mds);
 	ceph_msg_dump(msg);
 out:
 	if (locked_rwsem)
@@ -1170,6 +1188,7 @@ out:
 struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc,
 					    u64 snap)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_snapid_map *sm, *exist;
 	struct rb_node **p, *parent;
 	int ret;
@@ -1192,8 +1211,8 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc,
 	}
 	spin_unlock(&mdsc->snapid_map_lock);
 	if (exist) {
-		dout("%s found snapid map %llx -> %x\n", __func__,
-		     exist->snap, exist->dev);
+		doutc(cl, "found snapid map %llx -> %x\n", exist->snap,
+		      exist->dev);
 		return exist;
 	}
 
@@ -1237,13 +1256,12 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc,
 	if (exist) {
 		free_anon_bdev(sm->dev);
 		kfree(sm);
-		dout("%s found snapid map %llx -> %x\n", __func__,
-		     exist->snap, exist->dev);
+		doutc(cl, "found snapid map %llx -> %x\n", exist->snap,
+		      exist->dev);
 		return exist;
 	}
 
-	dout("%s create snapid map %llx -> %x\n", __func__,
-	     sm->snap, sm->dev);
+	doutc(cl, "create snapid map %llx -> %x\n", sm->snap, sm->dev);
 	return sm;
 }
 
@@ -1268,6 +1286,7 @@ void ceph_put_snapid_map(struct ceph_mds_client* mdsc,
 
 void ceph_trim_snapid_map(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_snapid_map *sm;
 	unsigned long now;
 	LIST_HEAD(to_free);
@@ -1289,7 +1308,7 @@ void ceph_trim_snapid_map(struct ceph_mds_client *mdsc)
 	while (!list_empty(&to_free)) {
 		sm = list_first_entry(&to_free, struct ceph_snapid_map, lru);
 		list_del(&sm->lru);
-		dout("trim snapid map %llx -> %x\n", sm->snap, sm->dev);
+		doutc(cl, "trim snapid map %llx -> %x\n", sm->snap, sm->dev);
 		free_anon_bdev(sm->dev);
 		kfree(sm);
 	}
@@ -1297,6 +1316,7 @@ void ceph_trim_snapid_map(struct ceph_mds_client *mdsc)
 
 void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_snapid_map *sm;
 	struct rb_node *p;
 	LIST_HEAD(to_free);
@@ -1315,8 +1335,8 @@ void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc)
 		list_del(&sm->lru);
 		free_anon_bdev(sm->dev);
 		if (WARN_ON_ONCE(atomic_read(&sm->ref))) {
-			pr_err("snapid map %llx -> %x still in use\n",
-			       sm->snap, sm->dev);
+			pr_err_client(cl, "snapid map %llx -> %x still in use\n",
+				      sm->snap, sm->dev);
 		}
 		kfree(sm);
 	}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2d7f5a8d4a92..5ec102f6b1ac 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -44,28 +44,29 @@ static LIST_HEAD(ceph_fsc_list);
  */
 static void ceph_put_super(struct super_block *s)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
 
-	dout("put_super\n");
+	doutc(fsc->client, "begin\n");
 	ceph_fscrypt_free_dummy_policy(fsc);
 	ceph_mdsc_close_sessions(fsc->mdsc);
+	doutc(fsc->client, "done\n");
 }
 
 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry));
 	struct ceph_mon_client *monc = &fsc->client->monc;
 	struct ceph_statfs st;
 	int i, err;
 	u64 data_pool;
 
+	doutc(fsc->client, "begin\n");
 	if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
 		data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
 	} else {
 		data_pool = CEPH_NOPOOL;
 	}
 
-	dout("statfs\n");
 	err = ceph_monc_do_statfs(monc, data_pool, &st);
 	if (err < 0)
 		return err;
@@ -113,24 +114,26 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 	/* fold the fs_cluster_id into the upper bits */
 	buf->f_fsid.val[1] = monc->fs_cluster_id;
 
+	doutc(fsc->client, "done\n");
 	return 0;
 }
 
 static int ceph_sync_fs(struct super_block *sb, int wait)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
+	struct ceph_client *cl = fsc->client;
 
 	if (!wait) {
-		dout("sync_fs (non-blocking)\n");
+		doutc(cl, "(non-blocking)\n");
 		ceph_flush_dirty_caps(fsc->mdsc);
-		dout("sync_fs (non-blocking) done\n");
+		doutc(cl, "(non-blocking) done\n");
 		return 0;
 	}
 
-	dout("sync_fs (blocking)\n");
+	doutc(cl, "(blocking)\n");
 	ceph_osdc_sync(&fsc->client->osdc);
 	ceph_mdsc_sync(fsc->mdsc);
-	dout("sync_fs (blocking) done\n");
+	doutc(cl, "(blocking) done\n");
 	return 0;
 }
 
@@ -341,7 +344,7 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
 	char *dev_name = param->string, *dev_name_end;
 	int ret;
 
-	dout("%s '%s'\n", __func__, dev_name);
+	dout("'%s'\n", dev_name);
 	if (!dev_name || !*dev_name)
 		return invalfc(fc, "Empty source");
 
@@ -413,7 +416,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
 		return ret;
 
 	token = fs_parse(fc, ceph_mount_parameters, param, &result);
-	dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
+	dout("%s: fs_parse '%s' token %d\n",__func__, param->key, token);
 	if (token < 0)
 		return token;
 
@@ -684,7 +687,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
  */
 static int ceph_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb);
 	struct ceph_mount_options *fsopt = fsc->mount_options;
 	size_t pos;
 	int ret;
@@ -881,7 +884,7 @@ static void flush_fs_workqueues(struct ceph_fs_client *fsc)
 
 static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
-	dout("destroy_fs_client %p\n", fsc);
+	doutc(fsc->client, "%p\n", fsc);
 
 	spin_lock(&ceph_fsc_lock);
 	list_del(&fsc->metric_wakeup);
@@ -896,7 +899,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 	ceph_destroy_client(fsc->client);
 
 	kfree(fsc);
-	dout("destroy_fs_client %p done\n", fsc);
+	dout("%s: %p done\n", __func__, fsc);
 }
 
 /*
@@ -1015,9 +1018,9 @@ static void __ceph_umount_begin(struct ceph_fs_client *fsc)
  */
 void ceph_umount_begin(struct super_block *sb)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
 
-	dout("ceph_umount_begin - starting forced umount\n");
+	doutc(fsc->client, "starting forced umount\n");
 	if (!fsc)
 		return;
 	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
@@ -1045,13 +1048,14 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 				       const char *path,
 				       unsigned long started)
 {
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req = NULL;
 	int err;
 	struct dentry *root;
 
 	/* open dir */
-	dout("open_root_inode opening '%s'\n", path);
+	doutc(cl, "opening '%s'\n", path);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
 	if (IS_ERR(req))
 		return ERR_CAST(req);
@@ -1071,13 +1075,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 	if (err == 0) {
 		struct inode *inode = req->r_target_inode;
 		req->r_target_inode = NULL;
-		dout("open_root_inode success\n");
+		doutc(cl, "success\n");
 		root = d_make_root(inode);
 		if (!root) {
 			root = ERR_PTR(-ENOMEM);
 			goto out;
 		}
-		dout("open_root_inode success, root dentry is %p\n", root);
+		doutc(cl, "success, root dentry is %p\n", root);
 	} else {
 		root = ERR_PTR(err);
 	}
@@ -1136,11 +1140,12 @@ static int ceph_apply_test_dummy_encryption(struct super_block *sb,
 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 				      struct fs_context *fc)
 {
+	struct ceph_client *cl = fsc->client;
 	int err;
 	unsigned long started = jiffies;  /* note the start time */
 	struct dentry *root;
 
-	dout("mount start %p\n", fsc);
+	doutc(cl, "mount start %p\n", fsc);
 	mutex_lock(&fsc->client->mount_mutex);
 
 	if (!fsc->sb->s_root) {
@@ -1163,7 +1168,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 		if (err)
 			goto out;
 
-		dout("mount opening path '%s'\n", path);
+		doutc(cl, "mount opening path '%s'\n", path);
 
 		ceph_fs_debugfs_init(fsc);
 
@@ -1178,7 +1183,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 	}
 
 	fsc->mount_state = CEPH_MOUNT_MOUNTED;
-	dout("mount success\n");
+	doutc(cl, "mount success\n");
 	mutex_unlock(&fsc->client->mount_mutex);
 	return root;
 
@@ -1191,9 +1196,10 @@ out:
 static int ceph_set_super(struct super_block *s, struct fs_context *fc)
 {
 	struct ceph_fs_client *fsc = s->s_fs_info;
+	struct ceph_client *cl = fsc->client;
 	int ret;
 
-	dout("set_super %p\n", s);
+	doutc(cl, "%p\n", s);
 
 	s->s_maxbytes = MAX_LFS_FILESIZE;
 
@@ -1226,31 +1232,32 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc)
 	struct ceph_fs_client *new = fc->s_fs_info;
 	struct ceph_mount_options *fsopt = new->mount_options;
 	struct ceph_options *opt = new->client->options;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
+	struct ceph_client *cl = fsc->client;
 
-	dout("ceph_compare_super %p\n", sb);
+	doutc(cl, "%p\n", sb);
 
 	if (compare_mount_options(fsopt, opt, fsc)) {
-		dout("monitor(s)/mount options don't match\n");
+		doutc(cl, "monitor(s)/mount options don't match\n");
 		return 0;
 	}
 	if ((opt->flags & CEPH_OPT_FSID) &&
 	    ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) {
-		dout("fsid doesn't match\n");
+		doutc(cl, "fsid doesn't match\n");
 		return 0;
 	}
 	if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) {
-		dout("flags differ\n");
+		doutc(cl, "flags differ\n");
 		return 0;
 	}
 
 	if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) {
-		dout("client is blocklisted (and CLEANRECOVER is not set)\n");
+		doutc(cl, "client is blocklisted (and CLEANRECOVER is not set)\n");
 		return 0;
 	}
 
 	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
-		dout("client has been forcibly unmounted\n");
+		doutc(cl, "client has been forcibly unmounted\n");
 		return 0;
 	}
 
@@ -1322,9 +1329,9 @@ static int ceph_get_tree(struct fs_context *fc)
 		goto out;
 	}
 
-	if (ceph_sb_to_client(sb) != fsc) {
+	if (ceph_sb_to_fs_client(sb) != fsc) {
 		destroy_fs_client(fsc);
-		fsc = ceph_sb_to_client(sb);
+		fsc = ceph_sb_to_fs_client(sb);
 		dout("get_sb got existing client %p\n", fsc);
 	} else {
 		dout("get_sb using new client %p\n", fsc);
@@ -1338,8 +1345,9 @@ static int ceph_get_tree(struct fs_context *fc)
 		err = PTR_ERR(res);
 		goto out_splat;
 	}
-	dout("root %p inode %p ino %llx.%llx\n", res,
-	     d_inode(res), ceph_vinop(d_inode(res)));
+
+	doutc(fsc->client, "root %p inode %p ino %llx.%llx\n", res,
+		    d_inode(res), ceph_vinop(d_inode(res)));
 	fc->root = fsc->sb->s_root;
 	return 0;
 
@@ -1377,7 +1385,7 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
 	struct ceph_parse_opts_ctx *pctx = fc->fs_private;
 	struct ceph_mount_options *fsopt = pctx->opts;
 	struct super_block *sb = fc->root->d_sb;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
 
 	err = ceph_apply_test_dummy_encryption(sb, fc, fsopt);
 	if (err)
@@ -1397,7 +1405,8 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
 		kfree(fsc->mount_options->mon_addr);
 		fsc->mount_options->mon_addr = fsopt->mon_addr;
 		fsopt->mon_addr = NULL;
-		pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
+		pr_notice_client(fsc->client,
+			"monitor addresses recorded, but not used for reconnection");
 	}
 
 	sync_filesystem(sb);
@@ -1516,11 +1525,12 @@ void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc)
 
 static void ceph_kill_sb(struct super_block *s)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	bool wait;
 
-	dout("kill_sb %p\n", s);
+	doutc(cl, "%p\n", s);
 
 	ceph_mdsc_pre_umount(mdsc);
 	flush_fs_workqueues(fsc);
@@ -1551,9 +1561,9 @@ static void ceph_kill_sb(struct super_block *s)
 					&mdsc->stopping_waiter,
 					fsc->client->options->mount_timeout);
 		if (!timeleft) /* timed out */
-			pr_warn("umount timed out, %ld\n", timeleft);
+			pr_warn_client(cl, "umount timed out, %ld\n", timeleft);
 		else if (timeleft < 0) /* killed */
-			pr_warn("umount was killed, %ld\n", timeleft);
+			pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
 	}
 
 	mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
@@ -1572,13 +1582,13 @@ static struct file_system_type ceph_fs_type = {
 	.name		= "ceph",
 	.init_fs_context = ceph_init_fs_context,
 	.kill_sb	= ceph_kill_sb,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("ceph");
 
 int ceph_force_reconnect(struct super_block *sb)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
 	int err = 0;
 
 	fsc->mount_state = CEPH_MOUNT_RECOVER;
@@ -1671,6 +1681,11 @@ static const struct kernel_param_ops param_ops_mount_syntax = {
 module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
 module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
 
+bool enable_unsafe_idmap = false;
+module_param(enable_unsafe_idmap, bool, 0644);
+MODULE_PARM_DESC(enable_unsafe_idmap,
+		 "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
+
 module_init(init_ceph);
 module_exit(exit_ceph);
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 98844fc8a2f7..fe0f64a0acb2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -488,13 +488,13 @@ ceph_inode(const struct inode *inode)
 }
 
 static inline struct ceph_fs_client *
-ceph_inode_to_client(const struct inode *inode)
+ceph_inode_to_fs_client(const struct inode *inode)
 {
 	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
 }
 
 static inline struct ceph_fs_client *
-ceph_sb_to_client(const struct super_block *sb)
+ceph_sb_to_fs_client(const struct super_block *sb)
 {
 	return (struct ceph_fs_client *)sb->s_fs_info;
 }
@@ -502,7 +502,13 @@ ceph_sb_to_client(const struct super_block *sb)
 static inline struct ceph_mds_client *
 ceph_sb_to_mdsc(const struct super_block *sb)
 {
-	return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc;
+	return (struct ceph_mds_client *)ceph_sb_to_fs_client(sb)->mdsc;
+}
+
+static inline struct ceph_client *
+ceph_inode_to_client(const struct inode *inode)
+{
+	return (struct ceph_client *)ceph_inode_to_fs_client(inode)->client;
 }
 
 static inline struct ceph_vino
@@ -558,7 +564,7 @@ static inline u64 ceph_snap(struct inode *inode)
  */
 static inline u64 ceph_present_ino(struct super_block *sb, u64 ino)
 {
-	if (unlikely(ceph_test_mount_opt(ceph_sb_to_client(sb), INO32)))
+	if (unlikely(ceph_test_mount_opt(ceph_sb_to_fs_client(sb), INO32)))
 		return ceph_ino_to_ino32(ino);
 	return ino;
 }
@@ -1094,8 +1100,8 @@ struct ceph_iattr {
 	struct ceph_fscrypt_auth	*fscrypt_auth;
 };
 
-extern int __ceph_setattr(struct inode *inode, struct iattr *attr,
-			  struct ceph_iattr *cia);
+extern int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
+			  struct iattr *attr, struct ceph_iattr *cia);
 extern int ceph_setattr(struct mnt_idmap *idmap,
 			struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct mnt_idmap *idmap,
@@ -1106,7 +1112,7 @@ void ceph_inode_shutdown(struct inode *inode);
 static inline bool ceph_inode_is_shutdown(struct inode *inode)
 {
 	unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
 	int state = READ_ONCE(fsc->mount_state);
 
 	return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN;
@@ -1223,7 +1229,8 @@ extern void ceph_add_cap(struct inode *inode,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
-extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+			    bool queue_release);
 extern void __ceph_remove_caps(struct ceph_inode_info *ci);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 097ce7f74073..e066a556eccb 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -57,7 +57,8 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
 static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
 				    size_t size)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
+	struct ceph_client *cl = fsc->client;
 	struct ceph_osd_client *osdc = &fsc->client->osdc;
 	struct ceph_string *pool_ns;
 	s64 pool = ci->i_layout.pool_id;
@@ -69,7 +70,7 @@ static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
 
 	pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
 
-	dout("ceph_vxattrcb_layout %p\n", &ci->netfs.inode);
+	doutc(cl, "%p\n", &ci->netfs.inode);
 	down_read(&osdc->lock);
 	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
 	if (pool_name) {
@@ -161,7 +162,7 @@ static ssize_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
 					 char *val, size_t size)
 {
 	ssize_t ret;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
 	struct ceph_osd_client *osdc = &fsc->client->osdc;
 	s64 pool = ci->i_layout.pool_id;
 	const char *pool_name;
@@ -313,7 +314,7 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
 static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
 					  char *val, size_t size)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
 
 	return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
 }
@@ -321,7 +322,7 @@ static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
 static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
 				       char *val, size_t size)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb);
 
 	return ceph_fmt_xattr(val, size, "client%lld",
 			      ceph_client_gid(fsc->client));
@@ -570,6 +571,8 @@ static int __set_xattr(struct ceph_inode_info *ci,
 			   int flags, int update_xattr,
 			   struct ceph_inode_xattr **newxattr)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct ceph_inode_xattr *xattr = NULL;
@@ -626,7 +629,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
 		xattr->should_free_name = update_xattr;
 
 		ci->i_xattrs.count++;
-		dout("%s count=%d\n", __func__, ci->i_xattrs.count);
+		doutc(cl, "count=%d\n", ci->i_xattrs.count);
 	} else {
 		kfree(*newxattr);
 		*newxattr = NULL;
@@ -654,13 +657,13 @@ static int __set_xattr(struct ceph_inode_info *ci,
 	if (new) {
 		rb_link_node(&xattr->node, parent, p);
 		rb_insert_color(&xattr->node, &ci->i_xattrs.index);
-		dout("%s p=%p\n", __func__, p);
+		doutc(cl, "p=%p\n", p);
 	}
 
-	dout("%s added %llx.%llx xattr %p %.*s=%.*s%s\n", __func__,
-	     ceph_vinop(&ci->netfs.inode), xattr, name_len, name,
-	     min(val_len, MAX_XATTR_VAL_PRINT_LEN), val,
-	     val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : "");
+	doutc(cl, "added %p %llx.%llx xattr %p %.*s=%.*s%s\n", inode,
+	      ceph_vinop(inode), xattr, name_len, name, min(val_len,
+	      MAX_XATTR_VAL_PRINT_LEN), val,
+	      val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : "");
 
 	return 0;
 }
@@ -668,6 +671,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
 static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 			   const char *name)
 {
+	struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct ceph_inode_xattr *xattr = NULL;
@@ -688,13 +692,13 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 		else {
 			int len = min(xattr->val_len, MAX_XATTR_VAL_PRINT_LEN);
 
-			dout("%s %s: found %.*s%s\n", __func__, name, len,
-			     xattr->val, xattr->val_len > len ? "..." : "");
+			doutc(cl, "%s found %.*s%s\n", name, len, xattr->val,
+			      xattr->val_len > len ? "..." : "");
 			return xattr;
 		}
 	}
 
-	dout("%s %s: not found\n", __func__, name);
+	doutc(cl, "%s not found\n", name);
 
 	return NULL;
 }
@@ -735,19 +739,20 @@ static int __remove_xattr(struct ceph_inode_info *ci,
 static char *__copy_xattr_names(struct ceph_inode_info *ci,
 				char *dest)
 {
+	struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
 	struct rb_node *p;
 	struct ceph_inode_xattr *xattr = NULL;
 
 	p = rb_first(&ci->i_xattrs.index);
-	dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
+	doutc(cl, "count=%d\n", ci->i_xattrs.count);
 
 	while (p) {
 		xattr = rb_entry(p, struct ceph_inode_xattr, node);
 		memcpy(dest, xattr->name, xattr->name_len);
 		dest[xattr->name_len] = '\0';
 
-		dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
-		     xattr->name_len, ci->i_xattrs.names_size);
+		doutc(cl, "dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
+		      xattr->name_len, ci->i_xattrs.names_size);
 
 		dest += xattr->name_len + 1;
 		p = rb_next(p);
@@ -758,19 +763,19 @@ static char *__copy_xattr_names(struct ceph_inode_info *ci,
 
 void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 {
+	struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
 	struct rb_node *p, *tmp;
 	struct ceph_inode_xattr *xattr = NULL;
 
 	p = rb_first(&ci->i_xattrs.index);
 
-	dout("__ceph_destroy_xattrs p=%p\n", p);
+	doutc(cl, "p=%p\n", p);
 
 	while (p) {
 		xattr = rb_entry(p, struct ceph_inode_xattr, node);
 		tmp = p;
 		p = rb_next(tmp);
-		dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
-		     xattr->name_len, xattr->name);
+		doutc(cl, "next p=%p (%.*s)\n", p, xattr->name_len, xattr->name);
 		rb_erase(tmp, &ci->i_xattrs.index);
 
 		__free_xattr(xattr);
@@ -787,6 +792,7 @@ static int __build_xattrs(struct inode *inode)
 	__releases(ci->i_ceph_lock)
 	__acquires(ci->i_ceph_lock)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	u32 namelen;
 	u32 numattr = 0;
 	void *p, *end;
@@ -798,8 +804,8 @@ static int __build_xattrs(struct inode *inode)
 	int err = 0;
 	int i;
 
-	dout("__build_xattrs() len=%d\n",
-	     ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
+	doutc(cl, "len=%d\n",
+	      ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
 
 	if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
 		return 0; /* already built */
@@ -874,6 +880,8 @@ bad:
 static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 				    int val_size)
 {
+	struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode);
+
 	/*
 	 * 4 bytes for the length, and additional 4 bytes per each xattr name,
 	 * 4 bytes per each value
@@ -881,9 +889,8 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 	int size = 4 + ci->i_xattrs.count*(4 + 4) +
 			     ci->i_xattrs.names_size +
 			     ci->i_xattrs.vals_size;
-	dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
-	     ci->i_xattrs.count, ci->i_xattrs.names_size,
-	     ci->i_xattrs.vals_size);
+	doutc(cl, "c=%d names.size=%d vals.size=%d\n", ci->i_xattrs.count,
+	      ci->i_xattrs.names_size, ci->i_xattrs.vals_size);
 
 	if (name_size)
 		size += 4 + 4 + name_size + val_size;
@@ -899,12 +906,14 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
  */
 struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 {
+	struct inode *inode = &ci->netfs.inode;
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct rb_node *p;
 	struct ceph_inode_xattr *xattr = NULL;
 	struct ceph_buffer *old_blob = NULL;
 	void *dest;
 
-	dout("__build_xattrs_blob %p\n", &ci->netfs.inode);
+	doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
 	if (ci->i_xattrs.dirty) {
 		int need = __get_required_blob_size(ci, 0, 0);
 
@@ -962,6 +971,7 @@ static inline int __get_request_mask(struct inode *in) {
 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 		      size_t size)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_inode_xattr *xattr;
 	struct ceph_vxattr *vxattr;
@@ -1000,8 +1010,9 @@ handle_non_vxattrs:
 	req_mask = __get_request_mask(inode);
 
 	spin_lock(&ci->i_ceph_lock);
-	dout("getxattr %p name '%s' ver=%lld index_ver=%lld\n", inode, name,
-	     ci->i_xattrs.version, ci->i_xattrs.index_version);
+	doutc(cl, "%p %llx.%llx name '%s' ver=%lld index_ver=%lld\n", inode,
+	      ceph_vinop(inode), name, ci->i_xattrs.version,
+	      ci->i_xattrs.index_version);
 
 	if (ci->i_xattrs.version == 0 ||
 	    !((req_mask & CEPH_CAP_XATTR_SHARED) ||
@@ -1010,8 +1021,9 @@ handle_non_vxattrs:
 
 		/* security module gets xattr while filling trace */
 		if (current->journal_info) {
-			pr_warn_ratelimited("sync getxattr %p "
-					    "during filling trace\n", inode);
+			pr_warn_ratelimited_client(cl,
+				"sync %p %llx.%llx during filling trace\n",
+				inode, ceph_vinop(inode));
 			return -EBUSY;
 		}
 
@@ -1053,14 +1065,16 @@ out:
 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 {
 	struct inode *inode = d_inode(dentry);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	bool len_only = (size == 0);
 	u32 namelen;
 	int err;
 
 	spin_lock(&ci->i_ceph_lock);
-	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
-	     ci->i_xattrs.version, ci->i_xattrs.index_version);
+	doutc(cl, "%p %llx.%llx ver=%lld index_ver=%lld\n", inode,
+	      ceph_vinop(inode), ci->i_xattrs.version,
+	      ci->i_xattrs.index_version);
 
 	if (ci->i_xattrs.version == 0 ||
 	    !__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1)) {
@@ -1094,7 +1108,8 @@ out:
 static int ceph_sync_setxattr(struct inode *inode, const char *name,
 			      const char *value, size_t size, int flags)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb);
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1119,7 +1134,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 			flags |= CEPH_XATTR_REMOVE;
 	}
 
-	dout("setxattr value size: %zu\n", size);
+	doutc(cl, "name %s value size %zu\n", name, size);
 
 	/* do request */
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1148,10 +1163,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 	req->r_num_caps = 1;
 	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 
-	dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
+	doutc(cl, "xattr.ver (before): %lld\n", ci->i_xattrs.version);
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	ceph_mdsc_put_request(req);
-	dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
+	doutc(cl, "xattr.ver (after): %lld\n", ci->i_xattrs.version);
 
 out:
 	if (pagelist)
@@ -1162,9 +1177,10 @@ out:
 int __ceph_setxattr(struct inode *inode, const char *name,
 			const void *value, size_t size, int flags)
 {
+	struct ceph_client *cl = ceph_inode_to_client(inode);
 	struct ceph_vxattr *vxattr;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc;
 	struct ceph_cap_flush *prealloc_cf = NULL;
 	struct ceph_buffer *old_blob = NULL;
 	int issued;
@@ -1220,9 +1236,9 @@ retry:
 	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
 	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
 	    (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) {
-		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
-		     __func__, ci->i_xattrs.version, required_blob_size,
-		     mdsc->mdsmap->m_max_xattr_size);
+		doutc(cl, "sync version: %llu size: %d max: %llu\n",
+		      ci->i_xattrs.version, required_blob_size,
+		      mdsc->mdsmap->m_max_xattr_size);
 		goto do_sync;
 	}
 
@@ -1236,8 +1252,8 @@ retry:
 		}
 	}
 
-	dout("setxattr %p name '%s' issued %s\n", inode, name,
-	     ceph_cap_string(issued));
+	doutc(cl, "%p %llx.%llx name '%s' issued %s\n", inode,
+	      ceph_vinop(inode), name, ceph_cap_string(issued));
 	__build_xattrs(inode);
 
 	if (!ci->i_xattrs.prealloc_blob ||
@@ -1246,7 +1262,8 @@ retry:
 
 		spin_unlock(&ci->i_ceph_lock);
 		ceph_buffer_put(old_blob); /* Shouldn't be required */
-		dout(" pre-allocating new blob size=%d\n", required_blob_size);
+		doutc(cl, " pre-allocating new blob size=%d\n",
+		      required_blob_size);
 		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
 		if (!blob)
 			goto do_sync_unlocked;
@@ -1285,8 +1302,9 @@ do_sync_unlocked:
 
 	/* security module set xattr while filling trace */
 	if (current->journal_info) {
-		pr_warn_ratelimited("sync setxattr %p "
-				    "during filling trace\n", inode);
+		pr_warn_ratelimited_client(cl,
+				"sync %p %llx.%llx during filling trace\n",
+				inode, ceph_vinop(inode));
 		err = -EBUSY;
 	} else {
 		err = ceph_sync_setxattr(inode, name, value, size, flags);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index b784c393a504..77240953a92e 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -46,6 +46,7 @@ static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 			 EFI_VARIABLE_BOOTSERVICE_ACCESS |
 			 EFI_VARIABLE_RUNTIME_ACCESS;
 	u64 storage_space, remaining_space, max_variable_size;
+	u64 id = huge_encode_dev(dentry->d_sb->s_dev);
 	efi_status_t status;
 
 	/* Some UEFI firmware does not implement QueryVariableInfo() */
@@ -69,6 +70,7 @@ static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_blocks	= storage_space;
 	buf->f_bfree	= remaining_space;
 	buf->f_type	= dentry->d_sb->s_magic;
+	buf->f_fsid	= u64_to_fsid(id);
 
 	/*
 	 * In f_bavail we declare the free space that the kernel will allow writing
diff --git a/fs/efs/super.c b/fs/efs/super.c
index b287f47c165b..f17fdac76b2e 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -123,6 +123,7 @@ static const struct super_operations efs_superblock_operations = {
 };
 
 static const struct export_operations efs_export_ops = {
+	.encode_fh	= generic_encode_ino32_fh,
 	.fh_to_dentry	= efs_fh_to_dentry,
 	.fh_to_parent	= efs_fh_to_parent,
 	.get_parent	= efs_get_parent,
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 976dc39a88f7..3789d6224513 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -567,6 +567,7 @@ static struct dentry *erofs_get_parent(struct dentry *child)
 }
 
 static const struct export_operations erofs_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry = erofs_fh_to_dentry,
 	.fh_to_parent = erofs_fh_to_parent,
 	.get_parent = erofs_get_parent,
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 02c4e2937879..bfdfafe00993 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -295,6 +295,7 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (attr->ia_valid & ATTR_SIZE)
 		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
 
+	setattr_copy(&nop_mnt_idmap, inode, attr);
 	exfat_truncate_inode_atime(inode);
 
 	if (attr->ia_valid & ATTR_SIZE) {
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 875234179d1f..e7ff58b8e68c 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -56,18 +56,18 @@ int __exfat_write_inode(struct inode *inode, int sync)
 			&ep->dentry.file.create_time,
 			&ep->dentry.file.create_date,
 			&ep->dentry.file.create_time_cs);
+	ts = inode_get_mtime(inode);
 	exfat_set_entry_time(sbi, &ts,
 			     &ep->dentry.file.modify_tz,
 			     &ep->dentry.file.modify_time,
 			     &ep->dentry.file.modify_date,
 			     &ep->dentry.file.modify_time_cs);
-	inode_set_mtime_to_ts(inode, ts);
+	ts = inode_get_atime(inode);
 	exfat_set_entry_time(sbi, &ts,
 			     &ep->dentry.file.access_tz,
 			     &ep->dentry.file.access_time,
 			     &ep->dentry.file.access_date,
 			     NULL);
-	inode_set_atime_to_ts(inode, ts);
 
 	/* File size should be zero if there is no cluster allocated */
 	on_disk_size = i_size_read(inode);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c20704aa21b3..3ae0154c5680 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -342,43 +342,30 @@ out:
 	return error;
 }
 
+#define FILEID_INO64_GEN_LEN 3
+
 /**
- * export_encode_fh - default export_operations->encode_fh function
+ * exportfs_encode_ino64_fid - encode non-decodeable 64bit ino file id
  * @inode:   the object to encode
  * @fid:     where to store the file handle fragment
- * @max_len: maximum length to store there
- * @parent:  parent directory inode, if wanted
+ * @max_len: maximum length to store there (in 4 byte units)
  *
- * This default encode_fh function assumes that the 32 inode number
- * is suitable for locating an inode, and that the generation number
- * can be used to check that it is still valid.  It places them in the
- * filehandle fragment where export_decode_fh expects to find them.
+ * This generic function is used to encode a non-decodeable file id for
+ * fanotify for filesystems that do not support NFS export.
  */
-static int export_encode_fh(struct inode *inode, struct fid *fid,
-		int *max_len, struct inode *parent)
+static int exportfs_encode_ino64_fid(struct inode *inode, struct fid *fid,
+				     int *max_len)
 {
-	int len = *max_len;
-	int type = FILEID_INO32_GEN;
-
-	if (parent && (len < 4)) {
-		*max_len = 4;
-		return FILEID_INVALID;
-	} else if (len < 2) {
-		*max_len = 2;
+	if (*max_len < FILEID_INO64_GEN_LEN) {
+		*max_len = FILEID_INO64_GEN_LEN;
 		return FILEID_INVALID;
 	}
 
-	len = 2;
-	fid->i32.ino = inode->i_ino;
-	fid->i32.gen = inode->i_generation;
-	if (parent) {
-		fid->i32.parent_ino = parent->i_ino;
-		fid->i32.parent_gen = parent->i_generation;
-		len = 4;
-		type = FILEID_INO32_GEN_PARENT;
-	}
-	*max_len = len;
-	return type;
+	fid->i64.ino = inode->i_ino;
+	fid->i64.gen = inode->i_generation;
+	*max_len = FILEID_INO64_GEN_LEN;
+
+	return FILEID_INO64_GEN;
 }
 
 /**
@@ -396,17 +383,13 @@ int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
 {
 	const struct export_operations *nop = inode->i_sb->s_export_op;
 
-	/*
-	 * If a decodeable file handle was requested, we need to make sure that
-	 * filesystem can decode file handles.
-	 */
-	if (nop && !(flags & EXPORT_FH_FID) && !nop->fh_to_dentry)
+	if (!exportfs_can_encode_fh(nop, flags))
 		return -EOPNOTSUPP;
 
-	if (nop && nop->encode_fh)
-		return nop->encode_fh(inode, fid->raw, max_len, parent);
+	if (!nop && (flags & EXPORT_FH_FID))
+		return exportfs_encode_ino64_fid(inode, fid, max_len);
 
-	return export_encode_fh(inode, fid, max_len, parent);
+	return nop->encode_fh(inode, fid->raw, max_len, parent);
 }
 EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
 
@@ -456,7 +439,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
 	/*
 	 * Try to get any dentry for the given file handle from the filesystem.
 	 */
-	if (!nop || !nop->fh_to_dentry)
+	if (!exportfs_can_decode_fh(nop))
 		return ERR_PTR(-ESTALE);
 	result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
 	if (IS_ERR_OR_NULL(result))
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 645ee6142f69..01f9addc8b1f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -397,6 +397,7 @@ static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 
 static const struct export_operations ext2_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry = ext2_fh_to_dentry,
 	.fh_to_parent = ext2_fh_to_parent,
 	.get_parent = ext2_get_parent,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 54a9dde7483a..c5fcf377ab1f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1654,6 +1654,7 @@ static const struct super_operations ext4_sops = {
 };
 
 static const struct export_operations ext4_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry = ext4_fh_to_dentry,
 	.fh_to_parent = ext4_fh_to_parent,
 	.get_parent = ext4_get_parent,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1ed68158bac5..033af907c3b1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3330,6 +3330,7 @@ static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 
 static const struct export_operations f2fs_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry = f2fs_fh_to_dentry,
 	.fh_to_parent = f2fs_fh_to_parent,
 	.get_parent = f2fs_get_parent,
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index 3626eb585a98..c52e63e10d35 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -279,6 +279,7 @@ static struct dentry *fat_get_parent(struct dentry *child_dir)
 }
 
 const struct export_operations fat_export_ops = {
+	.encode_fh	= generic_encode_ino32_fh,
 	.fh_to_dentry   = fat_fh_to_dentry,
 	.fh_to_parent   = fat_fh_to_parent,
 	.get_parent     = fat_get_parent,
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6ea8d35a9382..18b3ba8dc8ea 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -26,12 +26,8 @@ static long do_sys_name_to_handle(const struct path *path,
 	/*
 	 * We need to make sure whether the file system support decoding of
 	 * the file handle if decodeable file handle was requested.
-	 * Otherwise, even empty export_operations are sufficient to opt-in
-	 * to encoding FIDs.
 	 */
-	if (!path->dentry->d_sb->s_export_op ||
-	    (!(fh_flags & EXPORT_FH_FID) &&
-	     !path->dentry->d_sb->s_export_op->fh_to_dentry))
+	if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags))
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 310d73e254df..e6e2a2185e7c 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -76,6 +76,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)
 {
 	struct vxfs_sb_info		*infp = VXFS_SBI(dentry->d_sb);
 	struct vxfs_sb *raw_sb = infp->vsi_raw;
+	u64 id = huge_encode_dev(dentry->d_sb->s_bdev->bd_dev);
 
 	bufp->f_type = VXFS_SUPER_MAGIC;
 	bufp->f_bsize = dentry->d_sb->s_blocksize;
@@ -84,6 +85,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)
 	bufp->f_bavail = 0;
 	bufp->f_files = 0;
 	bufp->f_ffree = fs32_to_cpu(infp, raw_sb->vs_ifree);
+	bufp->f_fsid = u64_to_fsid(id);
 	bufp->f_namelen = VXFS_NAMELEN;
 
 	return 0;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index caa8121ad99c..74d4f09d5827 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -999,7 +999,7 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
 	}
 
 	*max_len = len;
-	return parent ? 0x82 : 0x81;
+	return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN;
 }
 
 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
@@ -1007,7 +1007,8 @@ static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
 {
 	struct fuse_inode_handle handle;
 
-	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+	if ((fh_type != FILEID_INO64_GEN &&
+	     fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3)
 		return NULL;
 
 	handle.nodeid = (u64) fid->raw[0] << 32;
@@ -1021,7 +1022,7 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb,
 {
 	struct fuse_inode_handle parent;
 
-	if (fh_type != 0x82 || fh_len < 6)
+	if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6)
 		return NULL;
 
 	parent.nodeid = (u64) fid->raw[3] << 32;
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index d4deb2b19959..82f5b09c04e6 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -11,9 +11,9 @@
 
 #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)
 
-extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu);
-extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
-			struct posix_acl *acl, int type);
+struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu);
+int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+		 struct posix_acl *acl, int type);
 
 #endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 6b060fc9e260..9611bfceda4b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -155,7 +155,7 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
-	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
+	if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE))
 		goto out;
 	if (folio_test_checked(folio) || current->journal_info)
 		goto out_ignore;
@@ -214,12 +214,12 @@ static int gfs2_write_jdata_batch(struct address_space *mapping,
 	unsigned nrblocks;
 	int i;
 	int ret;
-	int nr_pages = 0;
+	size_t size = 0;
 	int nr_folios = folio_batch_count(fbatch);
 
 	for (i = 0; i < nr_folios; i++)
-		nr_pages += folio_nr_pages(fbatch->folios[i]);
-	nrblocks = nr_pages * (PAGE_SIZE >> inode->i_blkbits);
+		size += folio_size(fbatch->folios[i]);
+	nrblocks = size >> inode->i_blkbits;
 
 	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
 	if (ret < 0)
@@ -403,27 +403,27 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
 }
 
 /**
- * stuffed_readpage - Fill in a Linux page with stuffed file data
+ * stuffed_readpage - Fill in a Linux folio with stuffed file data
  * @ip: the inode
- * @page: the page
+ * @folio: the folio
  *
  * Returns: errno
  */
-static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+static int stuffed_readpage(struct gfs2_inode *ip, struct folio *folio)
 {
 	struct buffer_head *dibh;
-	u64 dsize = i_size_read(&ip->i_inode);
-	void *kaddr;
+	size_t i_size = i_size_read(&ip->i_inode);
+	void *data;
 	int error;
 
 	/*
 	 * Due to the order of unstuffing files and ->fault(), we can be
-	 * asked for a zero page in the case of a stuffed file being extended,
+	 * asked for a zero folio in the case of a stuffed file being extended,
 	 * so we need to supply one here. It doesn't happen often.
 	 */
-	if (unlikely(page->index)) {
-		zero_user(page, 0, PAGE_SIZE);
-		SetPageUptodate(page);
+	if (unlikely(folio->index)) {
+		folio_zero_range(folio, 0, folio_size(folio));
+		folio_mark_uptodate(folio);
 		return 0;
 	}
 
@@ -431,13 +431,11 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		return error;
 
-	kaddr = kmap_local_page(page);
-	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
-	memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
-	kunmap_local(kaddr);
-	flush_dcache_page(page);
+	data = dibh->b_data + sizeof(struct gfs2_dinode);
+	memcpy_to_folio(folio, 0, data, i_size);
+	folio_zero_range(folio, i_size, folio_size(folio) - i_size);
 	brelse(dibh);
-	SetPageUptodate(page);
+	folio_mark_uptodate(folio);
 
 	return 0;
 }
@@ -458,7 +456,7 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
 	    (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) {
 		error = iomap_read_folio(folio, &gfs2_iomap_ops);
 	} else if (gfs2_is_stuffed(ip)) {
-		error = stuffed_readpage(ip, &folio->page);
+		error = stuffed_readpage(ip, folio);
 		folio_unlock(folio);
 	} else {
 		error = mpage_read_folio(folio, gfs2_block_map);
@@ -479,31 +477,29 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
  *
  */
 
-int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
-                       unsigned size)
+ssize_t gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
+			   size_t size)
 {
 	struct address_space *mapping = ip->i_inode.i_mapping;
 	unsigned long index = *pos >> PAGE_SHIFT;
-	unsigned offset = *pos & (PAGE_SIZE - 1);
-	unsigned copied = 0;
-	unsigned amt;
-	struct page *page;
+	size_t copied = 0;
 
 	do {
-		page = read_cache_page(mapping, index, gfs2_read_folio, NULL);
-		if (IS_ERR(page)) {
-			if (PTR_ERR(page) == -EINTR)
+		size_t offset, chunk;
+		struct folio *folio;
+
+		folio = read_cache_folio(mapping, index, gfs2_read_folio, NULL);
+		if (IS_ERR(folio)) {
+			if (PTR_ERR(folio) == -EINTR)
 				continue;
-			return PTR_ERR(page);
+			return PTR_ERR(folio);
 		}
-		amt = size - copied;
-		if (offset + size > PAGE_SIZE)
-			amt = PAGE_SIZE - offset;
-		memcpy_from_page(buf + copied, page, offset, amt);
-		put_page(page);
-		copied += amt;
-		index++;
-		offset = 0;
+		offset = *pos + copied - folio_pos(folio);
+		chunk = min(size - copied, folio_size(folio) - offset);
+		memcpy_from_folio(buf + copied, folio, offset, chunk);
+		index = folio_next_index(folio);
+		folio_put(folio);
+		copied += chunk;
 	} while(copied < size);
 	(*pos) += size;
 	return size;
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index f08322ef41cf..a10c4334d248 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -8,8 +8,8 @@
 
 #include "incore.h"
 
-extern void adjust_fs_space(struct inode *inode);
-extern void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
-				    size_t from, size_t len);
+void adjust_fs_space(struct inode *inode);
+void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
+			     size_t from, size_t len);
 
 #endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6eb6f1bd9e34..d9ccfd27e4f1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -104,7 +104,7 @@ static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio)
 		   and write it out to disk */
 
 		unsigned int n = 1;
-		error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
+		error = gfs2_alloc_blocks(ip, &block, &n, 0);
 		if (error)
 			goto out_brelse;
 		if (isdir) {
@@ -315,6 +315,12 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 	}
 }
 
+static inline struct buffer_head *
+metapath_dibh(struct metapath *mp)
+{
+	return mp->mp_bh[0];
+}
+
 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
 			     unsigned int x, unsigned int h)
 {
@@ -413,13 +419,12 @@ static void release_metapath(struct metapath *mp)
  * gfs2_extent_length - Returns length of an extent of blocks
  * @bh: The metadata block
  * @ptr: Current position in @bh
- * @limit: Max extent length to return
  * @eob: Set to 1 if we hit "end of block"
  *
  * Returns: The length of the extent (minimum of one block)
  */
 
-static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
+static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob)
 {
 	const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
 	const __be64 *first = ptr;
@@ -658,7 +663,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct buffer_head *dibh = mp->mp_bh[0];
+	struct buffer_head *dibh = metapath_dibh(mp);
 	u64 bn;
 	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 	size_t dblks = iomap->length >> inode->i_blkbits;
@@ -700,7 +705,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 	i = mp->mp_aheight;
 	do {
 		n = blks - alloced;
-		ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+		ret = gfs2_alloc_blocks(ip, &bn, &n, 0);
 		if (ret)
 			goto out;
 		alloced += n;
@@ -911,7 +916,7 @@ unstuff:
 		goto do_alloc;
 
 	bh = mp->mp_bh[ip->i_height - 1];
-	len = gfs2_extent_length(bh, ptr, len, &eob);
+	len = gfs2_extent_length(bh, ptr, &eob);
 
 	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 	iomap->length = len << inode->i_blkbits;
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index e5b7d17131ed..4e8b1e8ebdf3 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -46,24 +46,24 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
 extern const struct iomap_ops gfs2_iomap_ops;
 extern const struct iomap_writeback_ops gfs2_writeback_ops;
 
-extern int gfs2_unstuff_dinode(struct gfs2_inode *ip);
-extern int gfs2_block_map(struct inode *inode, sector_t lblock,
-			  struct buffer_head *bh, int create);
-extern int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
-			  struct iomap *iomap);
-extern int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
-			    struct iomap *iomap);
-extern int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
-			   unsigned int *extlen);
-extern int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
-			     unsigned *extlen, bool *new);
-extern int gfs2_setattr_size(struct inode *inode, u64 size);
-extern int gfs2_truncatei_resume(struct gfs2_inode *ip);
-extern int gfs2_file_dealloc(struct gfs2_inode *ip);
-extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
-				     unsigned int len);
-extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd);
-extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd);
-extern int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length);
+int gfs2_unstuff_dinode(struct gfs2_inode *ip);
+int gfs2_block_map(struct inode *inode, sector_t lblock,
+		   struct buffer_head *bh, int create);
+int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
+		   struct iomap *iomap);
+int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
+		     struct iomap *iomap);
+int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
+		    unsigned int *extlen);
+int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
+		      unsigned *extlen, bool *new);
+int gfs2_setattr_size(struct inode *inode, u64 size);
+int gfs2_truncatei_resume(struct gfs2_inode *ip);
+int gfs2_file_dealloc(struct gfs2_inode *ip);
+int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
+			      unsigned int len);
+int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd);
+void gfs2_free_journal_extents(struct gfs2_jdesc *jd);
+int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length);
 
 #endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 61ddd03ea111..560e4624c09f 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -868,7 +868,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
 	struct gfs2_dirent *dent;
 	struct timespec64 tv = current_time(inode);
 
-	error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+	error = gfs2_alloc_blocks(ip, &bn, &n, 0);
 	if (error)
 		return NULL;
 	bh = gfs2_meta_new(ip->i_gl, bn);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 5b76480c17c9..25a857c78b53 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -23,32 +23,32 @@ struct gfs2_diradd {
 	int save_loc;
 };
 
-extern struct inode *gfs2_dir_search(struct inode *dir,
-				     const struct qstr *filename,
-				     bool fail_on_exist);
-extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
-			  const struct gfs2_inode *ip);
-extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-			const struct gfs2_inode *ip, struct gfs2_diradd *da);
+struct inode *gfs2_dir_search(struct inode *dir,
+			      const struct qstr *filename,
+			      bool fail_on_exist);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+		   const struct gfs2_inode *ip);
+int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
+		 const struct gfs2_inode *ip, struct gfs2_diradd *da);
 static inline void gfs2_dir_no_add(struct gfs2_diradd *da)
 {
 	brelse(da->bh);
 	da->bh = NULL;
 }
-extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
-extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
-			 struct file_ra_state *f_ra);
-extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-			  const struct gfs2_inode *nip, unsigned int new_type);
+int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
+int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
+		  struct file_ra_state *f_ra);
+int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
+		   const struct gfs2_inode *nip, unsigned int new_type);
 
-extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
+int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
 
-extern int gfs2_diradd_alloc_required(struct inode *dir,
-				      const struct qstr *filename,
-				      struct gfs2_diradd *da);
-extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
-				   struct buffer_head **bhp);
-extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
+int gfs2_diradd_alloc_required(struct inode *dir,
+			       const struct qstr *filename,
+			       struct gfs2_diradd *da);
+int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
+			    struct buffer_head **bhp);
+void gfs2_dir_hash_inval(struct gfs2_inode *ip);
 
 static inline u32 gfs2_disk_hash(const char *data, int len)
 {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index f2700477a300..4b66efc1a82a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -418,7 +418,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 	struct inode *inode = file_inode(vmf->vma->vm_file);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_alloc_parms ap = { .aflags = 0, };
+	struct gfs2_alloc_parms ap = {};
 	u64 offset = page_offset(page);
 	unsigned int data_blocks, ind_blocks, rblocks;
 	vm_fault_t ret = VM_FAULT_LOCKED;
@@ -1120,14 +1120,16 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (ret)
 		goto out_unlock;
 
-	ret = file_update_time(file);
-	if (ret)
-		goto out_unlock;
-
 	if (iocb->ki_flags & IOCB_DIRECT) {
 		struct address_space *mapping = file->f_mapping;
 		ssize_t buffered, ret2;
 
+		/*
+		 * Note that under direct I/O, we don't allow and inode
+		 * timestamp updates, so we're not calling file_update_time()
+		 * here.
+		 */
+
 		ret = gfs2_file_direct_write(iocb, from, &gh);
 		if (ret < 0 || !iov_iter_count(from))
 			goto out_unlock;
@@ -1154,6 +1156,10 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		if (!ret || ret2 > 0)
 			ret += ret2;
 	} else {
+		ret = file_update_time(file);
+		if (ret)
+			goto out_unlock;
+
 		ret = gfs2_file_buffered_write(iocb, from, &gh);
 		if (likely(ret > 0))
 			ret = generic_write_sync(iocb, ret);
@@ -1245,7 +1251,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 	struct inode *inode = file_inode(file);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_alloc_parms ap = { .aflags = 0, };
+	struct gfs2_alloc_parms ap = {};
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 	loff_t bytes, max_bytes, max_blks;
 	int error;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d5fa75eac0bf..d6bf1f8c25dc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1524,7 +1524,6 @@ fail:
 		return;
 	}
 	list_add_tail(&gh->gh_list, insert_pt);
-	gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
 	spin_unlock(&gl->gl_lockref.lock);
 	if (sdp->sd_lockstruct.ls_ops->lm_cancel)
 		sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index c8685ca7d2a2..61197598abfd 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -156,21 +156,6 @@ out:
 	return gh;
 }
 
-static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
-{
-	return gl->gl_state == LM_ST_EXCLUSIVE;
-}
-
-static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
-{
-	return gl->gl_state == LM_ST_DEFERRED;
-}
-
-static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
-{
-	return gl->gl_state == LM_ST_SHARED;
-}
-
 static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
 {
 	if (gl->gl_ops->go_flags & GLOF_ASPACE) {
@@ -181,40 +166,40 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
 	return NULL;
 }
 
-extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
-			  const struct gfs2_glock_operations *glops,
-			  int create, struct gfs2_glock **glp);
-extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl);
-extern void gfs2_glock_put(struct gfs2_glock *gl);
-extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
+int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
+		   const struct gfs2_glock_operations *glops,
+		   int create, struct gfs2_glock **glp);
+struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl);
+void gfs2_glock_put(struct gfs2_glock *gl);
+void gfs2_glock_queue_put(struct gfs2_glock *gl);
 
-extern void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
-			       u16 flags, struct gfs2_holder *gh,
-			       unsigned long ip);
+void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
+		        u16 flags, struct gfs2_holder *gh,
+		        unsigned long ip);
 static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
 				    u16 flags, struct gfs2_holder *gh) {
 	__gfs2_holder_init(gl, state, flags, gh, _RET_IP_);
 }
 
-extern void gfs2_holder_reinit(unsigned int state, u16 flags,
-			       struct gfs2_holder *gh);
-extern void gfs2_holder_uninit(struct gfs2_holder *gh);
-extern int gfs2_glock_nq(struct gfs2_holder *gh);
-extern int gfs2_glock_poll(struct gfs2_holder *gh);
-extern int gfs2_instantiate(struct gfs2_holder *gh);
-extern int gfs2_glock_holder_ready(struct gfs2_holder *gh);
-extern int gfs2_glock_wait(struct gfs2_holder *gh);
-extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_glock_dq(struct gfs2_holder *gh);
-extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
-extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
-extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
-			     const struct gfs2_glock_operations *glops,
-			     unsigned int state, u16 flags,
-			     struct gfs2_holder *gh);
-extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
+void gfs2_holder_reinit(unsigned int state, u16 flags,
+		        struct gfs2_holder *gh);
+void gfs2_holder_uninit(struct gfs2_holder *gh);
+int gfs2_glock_nq(struct gfs2_holder *gh);
+int gfs2_glock_poll(struct gfs2_holder *gh);
+int gfs2_instantiate(struct gfs2_holder *gh);
+int gfs2_glock_holder_ready(struct gfs2_holder *gh);
+int gfs2_glock_wait(struct gfs2_holder *gh);
+int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
+void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
+int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
+		      const struct gfs2_glock_operations *glops,
+		      unsigned int state, u16 flags,
+		      struct gfs2_holder *gh);
+int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
 			    bool fsid);
 #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) {		\
 			gfs2_dump_glock(NULL, gl, true);	\
@@ -228,7 +213,7 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
 			gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \
 	while (0)
 
-extern __printf(2, 3)
+__printf(2, 3)
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 
 /**
@@ -256,27 +241,27 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
 	return error;
 }
 
-extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
-extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-extern bool gfs2_queue_try_to_evict(struct gfs2_glock *gl);
-extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
-extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
-extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
-extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp);
-extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
-extern void gfs2_glock_free(struct gfs2_glock *gl);
+void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
+void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
+bool gfs2_queue_try_to_evict(struct gfs2_glock *gl);
+void gfs2_cancel_delete_work(struct gfs2_glock *gl);
+void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+void gfs2_gl_dq_holders(struct gfs2_sbd *sdp);
+void gfs2_glock_thaw(struct gfs2_sbd *sdp);
+void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
+void gfs2_glock_free(struct gfs2_glock *gl);
 
-extern int __init gfs2_glock_init(void);
-extern void gfs2_glock_exit(void);
+int __init gfs2_glock_init(void);
+void gfs2_glock_exit(void);
 
-extern void gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
-extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
-extern void gfs2_register_debugfs(void);
-extern void gfs2_unregister_debugfs(void);
+void gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
+void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
+void gfs2_register_debugfs(void);
+void gfs2_unregister_debugfs(void);
 
-extern void glock_set_object(struct gfs2_glock *gl, void *object);
-extern void glock_clear_object(struct gfs2_glock *gl, void *object);
+void glock_set_object(struct gfs2_glock *gl, void *object);
+void glock_clear_object(struct gfs2_glock *gl, void *object);
 
 extern const struct lm_lockops gfs2_dlm_ops;
 
@@ -295,7 +280,7 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
 	return !list_empty(&gh->gh_list);
 }
 
-extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
-extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
+void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
+bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
 
 #endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index e7d334c277a1..b41c78bd2cc0 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -615,18 +615,6 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl)
 }
 
 /**
- * freeze_go_demote_ok
- * @gl: the glock
- *
- * Always returns 0
- */
-
-static int freeze_go_demote_ok(const struct gfs2_glock *gl)
-{
-	return 0;
-}
-
-/**
  * iopen_go_callback - schedule the dcache entry for the inode to be deleted
  * @gl: the glock
  * @remote: true if this came from a different cluster node
@@ -745,7 +733,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
 
 const struct gfs2_glock_operations gfs2_freeze_glops = {
 	.go_xmote_bh = freeze_go_xmote_bh,
-	.go_demote_ok = freeze_go_demote_ok,
 	.go_callback = freeze_go_callback,
 	.go_type = LM_TYPE_NONDISK,
 	.go_flags = GLOF_NONDISK,
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index 695898afcaf1..9341423798df 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -22,7 +22,7 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
 extern const struct gfs2_glock_operations gfs2_journal_glops;
 extern const struct gfs2_glock_operations *gfs2_glops_list[];
 
-extern int gfs2_inode_metasync(struct gfs2_glock *gl);
-extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync);
+int gfs2_inode_metasync(struct gfs2_glock *gl);
+void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync);
 
 #endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a8c95c5293c6..95a334d64da2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -863,7 +863,7 @@ static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
 	preempt_enable();
 }
 
-extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl);
+struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl);
 
 static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip)
 {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7fe77bc771e5..1b95db2c3aac 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -266,17 +266,18 @@ fail_iput:
 }
 
 
-struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
+/**
+ * gfs2_lookup_meta - Look up an inode in a metadata directory
+ * @dip: The directory
+ * @name: The name of the inode
+ */
+struct inode *gfs2_lookup_meta(struct inode *dip, const char *name)
 {
 	struct qstr qstr;
 	struct inode *inode;
+
 	gfs2_str2qstr(&qstr, name);
 	inode = gfs2_lookupi(dip, &qstr, 1);
-	/* gfs2_lookupi has inconsistent callers: vfs
-	 * related routines expect NULL for no entry found,
-	 * gfs2_lookup_simple callers expect ENOENT
-	 * and do not check for NULL.
-	 */
 	if (IS_ERR_OR_NULL(inode))
 		return inode ? inode : ERR_PTR(-ENOENT);
 
@@ -418,7 +419,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
 	if (error)
 		goto out_ipreserv;
 
-	error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation);
+	error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1);
 	if (error)
 		goto out_trans_end;
 
@@ -1867,16 +1868,24 @@ out:
 int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode,
 		    int mask)
 {
+	int may_not_block = mask & MAY_NOT_BLOCK;
 	struct gfs2_inode *ip;
 	struct gfs2_holder i_gh;
+	struct gfs2_glock *gl;
 	int error;
 
 	gfs2_holder_mark_uninitialized(&i_gh);
 	ip = GFS2_I(inode);
-	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-		if (mask & MAY_NOT_BLOCK)
+	gl = rcu_dereference_check(ip->i_gl, !may_not_block);
+	if (unlikely(!gl)) {
+		/* inode is getting torn down, must be RCU mode */
+		WARN_ON_ONCE(!may_not_block);
+		return -ECHILD;
+        }
+	if (gfs2_glock_is_locked_by_me(gl) == NULL) {
+		if (may_not_block)
 			return -ECHILD;
-		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+		error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 		if (error)
 			return error;
 	}
@@ -1921,7 +1930,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	kuid_t ouid, nuid;
 	kgid_t ogid, ngid;
 	int error;
-	struct gfs2_alloc_parms ap;
+	struct gfs2_alloc_parms ap = {};
 
 	ouid = inode->i_uid;
 	ogid = inode->i_gid;
@@ -2154,7 +2163,7 @@ static int gfs2_update_time(struct inode *inode, int flags)
 	int error;
 
 	gh = gfs2_glock_is_locked_by_me(gl);
-	if (gh && !gfs2_glock_is_held_excl(gl)) {
+	if (gh && gl->gl_state != LM_ST_EXCLUSIVE) {
 		gfs2_glock_dq(gh);
 		gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh);
 		error = gfs2_glock_nq(gh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index c8c5814e7295..fd15d1c6b6fb 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -13,9 +13,9 @@
 #include "util.h"
 
 bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask);
-extern int gfs2_internal_read(struct gfs2_inode *ip,
-			      char *buf, loff_t *pos, unsigned size);
-extern void gfs2_set_aops(struct inode *inode);
+ssize_t gfs2_internal_read(struct gfs2_inode *ip,
+			   char *buf, loff_t *pos, size_t size);
+void gfs2_set_aops(struct inode *inode);
 
 static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
@@ -44,19 +44,17 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 
 static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks)
 {
-	inode->i_blocks = blocks <<
-		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
+	inode->i_blocks = blocks << (inode->i_blkbits - 9);
 }
 
 static inline u64 gfs2_get_inode_blocks(const struct inode *inode)
 {
-	return inode->i_blocks >>
-		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
+	return inode->i_blocks >> (inode->i_blkbits - 9);
 }
 
 static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change)
 {
-	change <<= inode->i_blkbits - GFS2_BASIC_BLOCK_SHIFT;
+	change <<= inode->i_blkbits - 9;
 	gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change));
 	inode->i_blocks += change;
 }
@@ -88,33 +86,33 @@ err:
 	return -EIO;
 }
 
-extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-				       u64 no_addr, u64 no_formal_ino,
-				       unsigned int blktype);
-extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
-					 u64 no_formal_ino,
-					 unsigned int blktype);
-
-extern int gfs2_inode_refresh(struct gfs2_inode *ip);
-
-extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
-				  int is_root);
-extern int gfs2_permission(struct mnt_idmap *idmap,
-			   struct inode *inode, int mask);
-extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
-extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-extern int gfs2_open_common(struct inode *inode, struct file *file);
-extern loff_t gfs2_seek_data(struct file *file, loff_t offset);
-extern loff_t gfs2_seek_hole(struct file *file, loff_t offset);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
+			        u64 no_addr, u64 no_formal_ino,
+			        unsigned int blktype);
+struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+				  u64 no_formal_ino,
+				  unsigned int blktype);
+
+int gfs2_inode_refresh(struct gfs2_inode *ip);
+
+struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
+			   int is_root);
+int gfs2_permission(struct mnt_idmap *idmap,
+		    struct inode *inode, int mask);
+struct inode *gfs2_lookup_meta(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+int gfs2_open_common(struct inode *inode, struct file *file);
+loff_t gfs2_seek_data(struct file *file, loff_t offset);
+loff_t gfs2_seek_hole(struct file *file, loff_t offset);
 
 extern const struct file_operations gfs2_file_fops_nolock;
 extern const struct file_operations gfs2_dir_fops_nolock;
 
-extern int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-extern int gfs2_fileattr_set(struct mnt_idmap *idmap,
-			     struct dentry *dentry, struct fileattr *fa);
-extern void gfs2_set_inode_flags(struct inode *inode);
- 
+int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int gfs2_fileattr_set(struct mnt_idmap *idmap,
+		      struct dentry *dentry, struct fileattr *fa);
+void gfs2_set_inode_flags(struct inode *inode);
+
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
 extern const struct file_operations gfs2_file_fops;
 extern const struct file_operations gfs2_dir_fops;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 653cffcbf869..c27b05099c1e 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -70,29 +70,29 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
 	}
 }
 
-extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
-extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
-extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
-extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
-extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
-extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
-extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
-				 unsigned int *extra_revokes);
-extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
-			     unsigned int *extra_revokes);
-extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
-				  u64 seq, u32 tail, u32 lblock, u32 flags,
-				  blk_opf_t op_flags);
-extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
-			   u32 type);
-extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
-extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
-extern void log_flush_wait(struct gfs2_sbd *sdp);
+void gfs2_ordered_del_inode(struct gfs2_inode *ip);
+unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
+void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
+void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
+bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+			  unsigned int *extra_revokes);
+void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+		      unsigned int *extra_revokes);
+void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+			   u64 seq, u32 tail, u32 lblock, u32 flags,
+			   blk_opf_t op_flags);
+void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
+		    u32 type);
+void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
+void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
+void log_flush_wait(struct gfs2_sbd *sdp);
 
-extern int gfs2_logd(void *data);
-extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
-extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
-extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
-extern void gfs2_ail_drain(struct gfs2_sbd *sdp);
+int gfs2_logd(void *data);
+void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
+void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
+void gfs2_flush_revokes(struct gfs2_sbd *sdp);
+void gfs2_ail_drain(struct gfs2_sbd *sdp);
 
 #endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 1412ffba1d44..07890c7b145d 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -11,16 +11,18 @@
 #include "incore.h"
 
 extern const struct gfs2_log_operations *gfs2_log_ops[];
-extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
-extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn);
-extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
-			   struct page *page, unsigned size, unsigned offset,
-			   u64 blkno);
-extern void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf);
-extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
-extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
-			   struct gfs2_log_header_host *head, bool keep_cache);
-extern void gfs2_drain_revokes(struct gfs2_sbd *sdp);
+
+void gfs2_log_incr_head(struct gfs2_sbd *sdp);
+u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn);
+void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+		    struct page *page, unsigned size, unsigned offset,
+		    u64 blkno);
+void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf);
+void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
+int gfs2_find_jhead(struct gfs2_jdesc *jd,
+		    struct gfs2_log_header_host *head, bool keep_cache);
+void gfs2_drain_revokes(struct gfs2_sbd *sdp);
+
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
 	return sdp->sd_ldptrs;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index d0a58cdd433a..831d988c2ceb 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -50,21 +50,21 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping)
 		return inode->i_sb->s_fs_info;
 }
 
-extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
-extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
-			  int rahead, struct buffer_head **bhp);
-extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
-extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
-				       int create);
+struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
+int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
+		   int rahead, struct buffer_head **bhp);
+int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
+struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
+			        int create);
 enum {
 	REMOVE_JDATA = 0,
 	REMOVE_META = 1,
 };
 
-extern void gfs2_remove_from_journal(struct buffer_head *bh, int meta);
-extern void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
-extern int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num,
-			    struct buffer_head **bhp);
+void gfs2_remove_from_journal(struct buffer_head *bh, int meta);
+void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
+int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num,
+		     struct buffer_head **bhp);
 
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
 					 struct buffer_head **bhp)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ecf789b7168c..b108c5d26839 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -292,8 +292,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
 		return error;
 	}
 
-	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
-			       GFS2_BASIC_BLOCK_SHIFT;
+	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9;
 	sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
 	sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
 			  sizeof(struct gfs2_dinode)) / sizeof(u64);
@@ -648,7 +647,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
 	struct gfs2_jdesc *jd;
 	struct gfs2_inode *ip;
 
-	sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
+	sdp->sd_statfs_inode = gfs2_lookup_meta(master, "statfs");
 	if (IS_ERR(sdp->sd_statfs_inode)) {
 		error = PTR_ERR(sdp->sd_statfs_inode);
 		fs_err(sdp, "can't read in statfs inode: %d\n", error);
@@ -657,7 +656,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
 	if (sdp->sd_args.ar_spectator)
 		goto out;
 
-	pn = gfs2_lookup_simple(master, "per_node");
+	pn = gfs2_lookup_meta(master, "per_node");
 	if (IS_ERR(pn)) {
 		error = PTR_ERR(pn);
 		fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -674,7 +673,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
 			goto free_local;
 		}
 		sprintf(buf, "statfs_change%u", jd->jd_jid);
-		lsi->si_sc_inode = gfs2_lookup_simple(pn, buf);
+		lsi->si_sc_inode = gfs2_lookup_meta(pn, buf);
 		if (IS_ERR(lsi->si_sc_inode)) {
 			error = PTR_ERR(lsi->si_sc_inode);
 			fs_err(sdp, "can't find local \"sc\" file#%u: %d\n",
@@ -739,7 +738,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 	if (undo)
 		goto fail_statfs;
 
-	sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
+	sdp->sd_jindex = gfs2_lookup_meta(master, "jindex");
 	if (IS_ERR(sdp->sd_jindex)) {
 		fs_err(sdp, "can't lookup journal index: %d\n", error);
 		return PTR_ERR(sdp->sd_jindex);
@@ -888,7 +887,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 		goto fail;
 
 	/* Read in the resource index inode */
-	sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
+	sdp->sd_rindex = gfs2_lookup_meta(master, "rindex");
 	if (IS_ERR(sdp->sd_rindex)) {
 		error = PTR_ERR(sdp->sd_rindex);
 		fs_err(sdp, "can't get resource index inode: %d\n", error);
@@ -897,7 +896,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 	sdp->sd_rindex_uptodate = 0;
 
 	/* Read in the quota inode */
-	sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
+	sdp->sd_quota_inode = gfs2_lookup_meta(master, "quota");
 	if (IS_ERR(sdp->sd_quota_inode)) {
 		error = PTR_ERR(sdp->sd_quota_inode);
 		fs_err(sdp, "can't get quota file inode: %d\n", error);
@@ -941,7 +940,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
 	if (undo)
 		goto fail_qc_gh;
 
-	pn = gfs2_lookup_simple(master, "per_node");
+	pn = gfs2_lookup_meta(master, "per_node");
 	if (IS_ERR(pn)) {
 		error = PTR_ERR(pn);
 		fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -949,7 +948,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
 	}
 
 	sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
-	sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
+	sdp->sd_qc_inode = gfs2_lookup_meta(pn, buf);
 	if (IS_ERR(sdp->sd_qc_inode)) {
 		error = PTR_ERR(sdp->sd_qc_inode);
 		fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
@@ -1187,10 +1186,9 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 
 	/* Set up the buffer cache and fill in some fake block size values
 	   to allow us to read-in the on-disk superblock. */
-	sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
+	sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, 512);
 	sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
-	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
-                               GFS2_BASIC_BLOCK_SHIFT;
+	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9;
 	sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
 
 	sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
@@ -1278,10 +1276,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 
 	if (!sb_rdonly(sb)) {
 		error = init_threads(sdp);
-		if (error) {
-			gfs2_withdraw_delayed(sdp);
+		if (error)
 			goto fail_per_node;
-		}
 	}
 
 	error = gfs2_freeze_lock_shared(sdp);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 5cbbc1a46a92..95dae7838b4e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -470,6 +470,17 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
 	    (sync_gen && (qd->qd_sync_gen >= *sync_gen)))
 		return 0;
 
+	/*
+	 * If qd_change is 0 it means a pending quota change was negated.
+	 * We should not sync it, but we still have a qd reference and slot
+	 * reference taken by gfs2_quota_change -> do_qc that need to be put.
+	 */
+	if (!qd->qd_change && test_and_clear_bit(QDF_CHANGE, &qd->qd_flags)) {
+		slot_put(qd);
+		qd_put(qd);
+		return 0;
+	}
+
 	if (!lockref_get_not_dead(&qd->qd_lockref))
 		return 0;
 
@@ -912,7 +923,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 {
 	struct gfs2_sbd *sdp = (*qda)->qd_sbd;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
-	struct gfs2_alloc_parms ap = { .aflags = 0, };
+	struct gfs2_alloc_parms ap = {};
 	unsigned int data_blocks, ind_blocks;
 	struct gfs2_holder *ghs, i_gh;
 	unsigned int qx, x;
@@ -1086,8 +1097,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	u32 x;
 	int error;
 
-	if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
-	    sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
+	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return 0;
 
 	error = gfs2_quota_hold(ip, uid, gid);
@@ -1194,17 +1204,16 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 
 #define MAX_LINE 256
 
-static int print_message(struct gfs2_quota_data *qd, char *type)
+static void print_message(struct gfs2_quota_data *qd, char *type)
 {
 	struct gfs2_sbd *sdp = qd->qd_sbd;
 
-	if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
+	if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) {
 		fs_info(sdp, "quota %s for %s %u\n",
 			type,
 			(qd->qd_id.type == USRQUOTA) ? "user" : "group",
 			from_kqid(&init_user_ns, qd->qd_id));
-
-	return 0;
+	}
 }
 
 /**
@@ -1274,7 +1283,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
 					 * HZ)) {
 			quota_send_warning(qd->qd_id,
 					   sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
-			error = print_message(qd, "warning");
+			print_message(qd, "warning");
+			error = 0;
 			qd->qd_last_warn = jiffies;
 		}
 	}
@@ -1288,8 +1298,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	u32 x;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
-	if ((sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
-	    sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) ||
+	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ||
 	    gfs2_assert_warn(sdp, change))
 		return;
 	if (ip->i_diskflags & GFS2_DIF_SYSTEM)
@@ -1746,7 +1755,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
 	if (gfs2_is_stuffed(ip))
 		alloc_required = 1;
 	if (alloc_required) {
-		struct gfs2_alloc_parms ap = { .aflags = 0, };
+		struct gfs2_alloc_parms ap = {};
 		gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
 				       &data_blocks, &ind_blocks);
 		blocks = 1 + data_blocks + ind_blocks;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 36f54b426b0c..f462d9cb3087 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -15,27 +15,27 @@ struct gfs2_sbd;
 #define NO_UID_QUOTA_CHANGE INVALID_UID
 #define NO_GID_QUOTA_CHANGE INVALID_GID
 
-extern int gfs2_qa_get(struct gfs2_inode *ip);
-extern void gfs2_qa_put(struct gfs2_inode *ip);
-extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
-extern void gfs2_quota_unhold(struct gfs2_inode *ip);
+int gfs2_qa_get(struct gfs2_inode *ip);
+void gfs2_qa_put(struct gfs2_inode *ip);
+int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+void gfs2_quota_unhold(struct gfs2_inode *ip);
 
-extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
-extern void gfs2_quota_unlock(struct gfs2_inode *ip);
+int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+void gfs2_quota_unlock(struct gfs2_inode *ip);
 
-extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
-			    struct gfs2_alloc_parms *ap);
-extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
-			      kuid_t uid, kgid_t gid);
+int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+		     struct gfs2_alloc_parms *ap);
+void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
+		       kuid_t uid, kgid_t gid);
 
-extern int gfs2_quota_sync(struct super_block *sb, int type);
-extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
+int gfs2_quota_sync(struct super_block *sb, int type);
+int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
 
-extern int gfs2_quota_init(struct gfs2_sbd *sdp);
-extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
-extern int gfs2_quotad(void *data);
+int gfs2_quota_init(struct gfs2_sbd *sdp);
+void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
+int gfs2_quotad(void *data);
 
-extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
+void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
 
 static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
 					struct gfs2_alloc_parms *ap)
@@ -50,8 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
 	ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
 	if (ret)
 		return ret;
-	if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
-	    sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
+	if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT)
 		return 0;
 	ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
 	if (ret)
@@ -63,6 +62,7 @@ extern const struct quotactl_ops gfs2_quotactl_ops;
 int __init gfs2_qd_shrinker_init(void);
 void gfs2_qd_shrinker_exit(void);
 extern struct list_lru gfs2_qd_lru;
-extern void __init gfs2_quota_hash_init(void);
+
+void __init gfs2_quota_hash_init(void);
 
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 7a0c9d0b7503..6a0fd42e1120 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -17,18 +17,18 @@ static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk)
 	        *blk = 0;
 }
 
-extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
+int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
 			   struct buffer_head **bh);
 
-extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
-extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
-extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
+int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
+int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
+void gfs2_revoke_clean(struct gfs2_jdesc *jd);
 
-extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
-extern void gfs2_recover_func(struct work_struct *work);
-extern int __get_log_header(struct gfs2_sbd *sdp,
-			    const struct gfs2_log_header *lh, unsigned int blkno,
-			    struct gfs2_log_header_host *head);
+int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
+void gfs2_recover_func(struct work_struct *work);
+int __get_log_header(struct gfs2_sbd *sdp,
+		     const struct gfs2_log_header *lh, unsigned int blkno,
+		     struct gfs2_log_header_host *head);
 
 #endif /* __RECOVERY_DOT_H__ */
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9308190895c8..c2060203b98a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2411,13 +2411,12 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
  * @bn: Used to return the starting block number
  * @nblocks: requested number of blocks/extent length (value/result)
  * @dinode: 1 if we're allocating a dinode block, else 0
- * @generation: the generation number of the inode
  *
  * Returns: 0 or error
  */
 
 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
-		      bool dinode, u64 *generation)
+		      bool dinode)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
@@ -2477,10 +2476,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	rbm.rgd->rd_free -= *nblocks;
 	spin_unlock(&rbm.rgd->rd_rsspin);
 	if (dinode) {
+		u64 generation;
+
 		rbm.rgd->rd_dinodes++;
-		*generation = rbm.rgd->rd_igeneration++;
-		if (*generation == 0)
-			*generation = rbm.rgd->rd_igeneration++;
+		generation = rbm.rgd->rd_igeneration++;
+		if (generation == 0)
+			generation = rbm.rgd->rd_igeneration++;
+		ip->i_generation = generation;
 	}
 
 	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 00b30cf893af..8d20e99385db 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -22,38 +22,38 @@ struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
 
-extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
+void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
 
-extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
-extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
-extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
+struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
+struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
+struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
 
-extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
-extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
-extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
-extern int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl);
-extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd);
+void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
+int gfs2_rindex_update(struct gfs2_sbd *sdp);
+void gfs2_free_clones(struct gfs2_rgrpd *rgd);
+int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl);
+void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd);
 
-extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 
 #define GFS2_AF_ORLOV 1
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
-				struct gfs2_alloc_parms *ap);
-extern void gfs2_inplace_release(struct gfs2_inode *ip);
-
-extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
-			     bool dinode, u64 *generation);
-
-extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip);
-extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
-			       u64 bstart, u32 blen, int meta);
-extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
-			   u64 bstart, u32 blen);
-extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
-extern void gfs2_unlink_di(struct inode *inode);
-extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
-			       unsigned int type);
+int gfs2_inplace_reserve(struct gfs2_inode *ip,
+			 struct gfs2_alloc_parms *ap);
+void gfs2_inplace_release(struct gfs2_inode *ip);
+
+int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
+		      bool dinode);
+
+void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
+void gfs2_rs_delete(struct gfs2_inode *ip);
+void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+		        u64 bstart, u32 blen, int meta);
+void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+		    u64 bstart, u32 blen);
+void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
+void gfs2_unlink_di(struct inode *inode);
+int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
+		        unsigned int type);
 
 struct gfs2_rgrp_list {
 	unsigned int rl_rgrps;
@@ -62,18 +62,19 @@ struct gfs2_rgrp_list {
 	struct gfs2_holder *rl_ghs;
 };
 
-extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
-			   u64 block);
-extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist,
-			     unsigned int state, u16 flags);
-extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
-extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
-			   const char *fs_id_buf);
-extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
-				   struct buffer_head *bh,
-				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
-extern int gfs2_fitrim(struct file *filp, void __user *argp);
+void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
+		    u64 block);
+void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist,
+		      unsigned int state, u16 flags);
+void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
+		    const char *fs_id_buf);
+int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+			    struct buffer_head *bh,
+			    const struct gfs2_bitmap *bi, unsigned minlen,
+			    u64 *ptrimmed);
+int gfs2_fitrim(struct file *filp, void __user *argp);
 
 /* This is how to tell if a reservation is in the rgrp tree: */
 static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
@@ -88,9 +89,9 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 	return first <= block && block < last;
 }
 
-extern void check_and_update_goal(struct gfs2_inode *ip);
+void check_and_update_goal(struct gfs2_inode *ip);
 
-extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
-extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
+void rgrp_lock_local(struct gfs2_rgrpd *rgd);
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
 
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 52a878fa7139..d21c04a22d73 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -602,13 +602,15 @@ restart:
 	}
 	spin_unlock(&sdp->sd_jindex_spin);
 
-	if (!sb_rdonly(sb)) {
+	if (!sb_rdonly(sb))
 		gfs2_make_fs_ro(sdp);
-	}
-	if (gfs2_withdrawn(sdp)) {
-		gfs2_destroy_threads(sdp);
+	else {
+		if (gfs2_withdrawn(sdp))
+			gfs2_destroy_threads(sdp);
+
 		gfs2_quota_cleanup(sdp);
 	}
+
 	WARN_ON(gfs2_withdrawing(sdp));
 
 	/*  At this point, we're through modifying the disk  */
@@ -1006,6 +1008,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = sc.sc_dinodes + sc.sc_free;
 	buf->f_ffree = sc.sc_free;
 	buf->f_namelen = GFS2_FNAMESIZE;
+	buf->f_fsid = uuid_to_fsid(sb->s_uuid.b);
 
 	return 0;
 }
@@ -1299,18 +1302,8 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
 	 * As a last resort, if another node keeps holding the iopen glock
 	 * without showing any activity on the inode glock, we will eventually
 	 * time out and fail the iopen glock upgrade.
-	 *
-	 * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
-	 * locking request as an optimization to notify lock holders as soon as
-	 * possible.  Without that flag, they'd be notified implicitly by the
-	 * second locking request.
 	 */
 
-	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
-	error = gfs2_glock_nq(gh);
-	if (error != GLR_TRYFAILED)
-		return !error;
-
 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
 	error = gfs2_glock_nq(gh);
 	if (error)
@@ -1550,7 +1543,7 @@ out:
 		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
 		gfs2_glock_add_to_lru(ip->i_gl);
 		gfs2_glock_put_eventually(ip->i_gl);
-		ip->i_gl = NULL;
+		rcu_assign_pointer(ip->i_gl, NULL);
 	}
 }
 
@@ -1576,7 +1569,7 @@ static void gfs2_free_inode(struct inode *inode)
 	kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
 }
 
-extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
+void free_local_statfs_inodes(struct gfs2_sbd *sdp)
 {
 	struct local_statfs_inode *lsi, *safe;
 
@@ -1591,8 +1584,8 @@ extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
 	}
 }
 
-extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
-					     unsigned int index)
+struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+				      unsigned int index)
 {
 	struct local_statfs_inode *lsi;
 
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index b4ddf6244586..b27a774d9580 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -15,7 +15,7 @@
 #define GFS2_FS_FORMAT_MIN (1801)
 #define GFS2_FS_FORMAT_MAX (1802)
 
-extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
+void gfs2_lm_unmount(struct gfs2_sbd *sdp);
 
 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 {
@@ -26,33 +26,33 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 	return x;
 }
 
-extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
+void gfs2_jindex_free(struct gfs2_sbd *sdp);
 
-extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
-extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
-extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
-				     struct gfs2_inode **ipp);
+struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
+int gfs2_jdesc_check(struct gfs2_jdesc *jd);
+int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
+			      struct gfs2_inode **ipp);
 
-extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
-extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
-extern void gfs2_destroy_threads(struct gfs2_sbd *sdp);
-extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
-extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
-			       s64 dinodes);
-extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
-				  const void *buf);
-extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
-				   void *buf);
-extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
-extern int gfs2_statfs_sync(struct super_block *sb, int type);
-extern void gfs2_freeze_func(struct work_struct *work);
-extern void gfs2_thaw_freeze_initiator(struct super_block *sb);
+int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
+void gfs2_online_uevent(struct gfs2_sbd *sdp);
+void gfs2_destroy_threads(struct gfs2_sbd *sdp);
+int gfs2_statfs_init(struct gfs2_sbd *sdp);
+void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
+		        s64 dinodes);
+void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
+			   const void *buf);
+void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
+			    void *buf);
+void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
+int gfs2_statfs_sync(struct super_block *sb, int type);
+void gfs2_freeze_func(struct work_struct *work);
+void gfs2_thaw_freeze_initiator(struct super_block *sb);
 
-extern void free_local_statfs_inodes(struct gfs2_sbd *sdp);
-extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
-					     unsigned int index);
-extern void free_sbd(struct gfs2_sbd *sdp);
+void free_local_statfs_inodes(struct gfs2_sbd *sdp);
+struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+				      unsigned int index);
+void free_sbd(struct gfs2_sbd *sdp);
 
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index c76ad9a4c75a..f8ce5302280d 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -34,17 +34,17 @@ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned
 	return rgd->rd_length;
 }
 
-extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
-			      unsigned int blocks, unsigned int revokes,
-			      unsigned long ip);
-extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
-			    unsigned int revokes);
-
-extern void gfs2_trans_end(struct gfs2_sbd *sdp);
-extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
-extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
-extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
-extern void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
-extern void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr);
+int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+		       unsigned int blocks, unsigned int revokes,
+		       unsigned long ip);
+int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
+		     unsigned int revokes);
+
+void gfs2_trans_end(struct gfs2_sbd *sdp);
+void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
+void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
+void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr);
 
 #endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index cdb839529175..11c9d59b6889 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -147,10 +147,10 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
 		    char *file, unsigned int line);
 
-extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
-			       bool verbose);
-extern int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp);
-extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh);
+int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+		        bool verbose);
+int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp);
+void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh);
 
 #define gfs2_io_error(sdp) \
 gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 79d5c5559512..8c96ba6230d1 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -639,7 +639,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
 	u64 block;
 	int error;
 
-	error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
+	error = gfs2_alloc_blocks(ip, &block, &n, 0);
 	if (error)
 		return error;
 	gfs2_trans_remove_revoke(sdp, block, 1);
@@ -701,7 +701,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 			int mh_size = sizeof(struct gfs2_meta_header);
 			unsigned int n = 1;
 
-			error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
+			error = gfs2_alloc_blocks(ip, &block, &n, 0);
 			if (error)
 				return error;
 			gfs2_trans_remove_revoke(sdp, block, 1);
@@ -1002,7 +1002,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	} else {
 		u64 blk;
 		unsigned int n = 1;
-		error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL);
+		error = gfs2_alloc_blocks(ip, &blk, &n, 0);
 		if (error)
 			return error;
 		gfs2_trans_remove_revoke(sdp, blk, 1);
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 2aed9d7d483d..eb12eb7e37c1 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -50,14 +50,14 @@ struct gfs2_ea_location {
 	struct gfs2_ea_header *el_prev;
 };
 
-extern int __gfs2_xattr_set(struct inode *inode, const char *name,
-			    const void *value, size_t size,
-			    int flags, int type);
-extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
-extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
+int __gfs2_xattr_set(struct inode *inode, const char *name,
+		     const void *value, size_t size,
+		     int flags, int type);
+ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
+int gfs2_ea_dealloc(struct gfs2_inode *ip);
 
 /* Exported to acl.c */
 
-extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
+int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
 
 #endif /* __EATTR_DOT_H__ */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 54b3d489b6a7..f757d4f7ad98 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1179,7 +1179,9 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
 	struct hstate *h = hstate_inode(d_inode(dentry));
+	u64 id = huge_encode_dev(dentry->d_sb->s_dev);
 
+	buf->f_fsid = u64_to_fsid(id);
 	buf->f_type = HUGETLBFS_MAGIC;
 	buf->f_bsize = huge_page_size(h);
 	if (sbinfo) {
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7ea37f49f1e1..f99591a634b4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -150,6 +150,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child)
 }
 
 static const struct export_operations jffs2_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.get_parent = jffs2_get_parent,
 	.fh_to_dentry = jffs2_fh_to_dentry,
 	.fh_to_parent = jffs2_fh_to_parent,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 966826c394ee..8d8e556bd610 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -896,6 +896,7 @@ static const struct super_operations jfs_super_operations = {
 };
 
 static const struct export_operations jfs_export_operations = {
+	.encode_fh	= generic_encode_ino32_fh,
 	.fh_to_dentry	= jfs_fh_to_dentry,
 	.fh_to_parent	= jfs_fh_to_parent,
 	.get_parent	= jfs_get_parent,
diff --git a/fs/libfs.c b/fs/libfs.c
index abe2b5a40ba1..e9440d55073c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -41,6 +41,9 @@ EXPORT_SYMBOL(simple_getattr);
 
 int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	u64 id = huge_encode_dev(dentry->d_sb->s_dev);
+
+	buf->f_fsid = u64_to_fsid(id);
 	buf->f_type = dentry->d_sb->s_magic;
 	buf->f_bsize = PAGE_SIZE;
 	buf->f_namelen = NAME_MAX;
@@ -1310,6 +1313,47 @@ ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
 EXPORT_SYMBOL_GPL(simple_attr_write_signed);
 
 /**
+ * generic_encode_ino32_fh - generic export_operations->encode_fh function
+ * @inode:   the object to encode
+ * @fh:      where to store the file handle fragment
+ * @max_len: maximum length to store there (in 4 byte units)
+ * @parent:  parent directory inode, if wanted
+ *
+ * This generic encode_fh function assumes that the 32 inode number
+ * is suitable for locating an inode, and that the generation number
+ * can be used to check that it is still valid.  It places them in the
+ * filehandle fragment where export_decode_fh expects to find them.
+ */
+int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len,
+			    struct inode *parent)
+{
+	struct fid *fid = (void *)fh;
+	int len = *max_len;
+	int type = FILEID_INO32_GEN;
+
+	if (parent && (len < 4)) {
+		*max_len = 4;
+		return FILEID_INVALID;
+	} else if (len < 2) {
+		*max_len = 2;
+		return FILEID_INVALID;
+	}
+
+	len = 2;
+	fid->i32.ino = inode->i_ino;
+	fid->i32.gen = inode->i_generation;
+	if (parent) {
+		fid->i32.parent_ino = parent->i_ino;
+		fid->i32.parent_gen = parent->i_generation;
+		len = 4;
+		type = FILEID_INO32_GEN_PARENT;
+	}
+	*max_len = len;
+	return type;
+}
+EXPORT_SYMBOL_GPL(generic_encode_ino32_fh);
+
+/**
  * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
  * @sb:		filesystem to do the file handle conversion on
  * @fid:	file handle to convert
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 4905665c47d0..57d1dedf3f8f 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -256,6 +256,7 @@ struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap)
 
 	return idmap;
 }
+EXPORT_SYMBOL_GPL(mnt_idmap_get);
 
 /**
  * mnt_idmap_put - put a reference to an idmapping
@@ -271,3 +272,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
 		kfree(idmap);
 	}
 }
+EXPORT_SYMBOL_GPL(mnt_idmap_put);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 7df2503cef6c..01ac733a6320 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -125,7 +125,7 @@ config PNFS_BLOCK
 
 config PNFS_FLEXFILE_LAYOUT
 	tristate
-	depends on NFS_V4_1 && NFS_V3
+	depends on NFS_V4_1
 	default NFS_V4
 
 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index cf7365581031..fa1a14def45c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -448,6 +448,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
 	delegation->cred = get_cred(cred);
 	delegation->inode = inode;
 	delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
+	delegation->test_gen = 0;
 	spin_lock_init(&delegation->lock);
 
 	spin_lock(&clp->cl_lock);
@@ -1294,6 +1295,8 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server,
 	struct inode *inode;
 	const struct cred *cred;
 	nfs4_stateid stateid;
+	unsigned long gen = ++server->delegation_gen;
+
 restart:
 	rcu_read_lock();
 restart_locked:
@@ -1303,7 +1306,8 @@ restart_locked:
 		    test_bit(NFS_DELEGATION_RETURNING,
 					&delegation->flags) ||
 		    test_bit(NFS_DELEGATION_TEST_EXPIRED,
-					&delegation->flags) == 0)
+					&delegation->flags) == 0 ||
+			delegation->test_gen == gen)
 			continue;
 		inode = nfs_delegation_grab_inode(delegation);
 		if (inode == NULL)
@@ -1312,6 +1316,7 @@ restart_locked:
 		cred = get_cred_rcu(delegation->cred);
 		nfs4_stateid_copy(&stateid, &delegation->stateid);
 		spin_unlock(&delegation->lock);
+		delegation->test_gen = gen;
 		clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
 		rcu_read_unlock();
 		nfs_delegation_test_free_expired(inode, &stateid, cred);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 1c378992b7c0..a6f495d012cf 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -21,6 +21,7 @@ struct nfs_delegation {
 	fmode_t type;
 	unsigned long pagemod_limit;
 	__u64 change_attr;
+	unsigned long test_gen;
 	unsigned long flags;
 	refcount_t refcount;
 	spinlock_t lock;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e6a51fd94fea..13dffe4201e6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2532,7 +2532,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
 int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		struct dentry *dentry, const char *symname)
 {
-	struct page *page;
+	struct folio *folio;
 	char *kaddr;
 	struct iattr attr;
 	unsigned int pathlen = strlen(symname);
@@ -2547,24 +2547,24 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	attr.ia_mode = S_IFLNK | S_IRWXUGO;
 	attr.ia_valid = ATTR_MODE;
 
-	page = alloc_page(GFP_USER);
-	if (!page)
+	folio = folio_alloc(GFP_USER, 0);
+	if (!folio)
 		return -ENOMEM;
 
-	kaddr = page_address(page);
+	kaddr = folio_address(folio);
 	memcpy(kaddr, symname, pathlen);
 	if (pathlen < PAGE_SIZE)
 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
 
 	trace_nfs_symlink_enter(dir, dentry);
-	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
+	error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr);
 	trace_nfs_symlink_exit(dir, dentry, error);
 	if (error != 0) {
 		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
 			dir->i_sb->s_id, dir->i_ino,
 			dentry, symname, error);
 		d_drop(dentry);
-		__free_page(page);
+		folio_put(folio);
 		return error;
 	}
 
@@ -2574,18 +2574,13 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	 * No big deal if we can't add this page to the page cache here.
 	 * READLINK will get the missing page from the server if needed.
 	 */
-	if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
-							GFP_KERNEL)) {
-		SetPageUptodate(page);
-		unlock_page(page);
-		/*
-		 * add_to_page_cache_lru() grabs an extra page refcount.
-		 * Drop it here to avoid leaking this page later.
-		 */
-		put_page(page);
-	} else
-		__free_page(page);
+	if (filemap_add_folio(d_inode(dentry)->i_mapping, folio, 0,
+							GFP_KERNEL) == 0) {
+		folio_mark_uptodate(folio);
+		folio_unlock(folio);
+	}
 
+	folio_put(folio);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_symlink);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 4bf208a0a8e9..2de66e4e8280 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -543,9 +543,10 @@ out:
 }
 
 static int
-nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio,
 		  unsigned int len, struct iattr *sattr)
 {
+	struct page *page = &folio->page;
 	struct nfs3_createdata *data;
 	struct dentry *d_alias;
 	int status = -ENOMEM;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 827d00e2f094..581698f1b7b2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -209,6 +209,7 @@ struct nfs4_exception {
 	struct inode *inode;
 	nfs4_stateid *stateid;
 	long timeout;
+	unsigned short retrans;
 	unsigned char task_is_privileged : 1;
 	unsigned char delay : 1,
 		      recovering : 1,
@@ -546,6 +547,7 @@ extern unsigned short max_session_slots;
 extern unsigned short max_session_cb_slots;
 extern unsigned short send_implementation_id;
 extern bool recover_lost_locks;
+extern short nfs_delay_retrans;
 
 #define NFS4_CLIENT_ID_UNIQ_LEN		(64)
 extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a654d7234f51..8a943fffaad5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -585,6 +585,21 @@ wait_on_recovery:
 	return 0;
 }
 
+/*
+ * Track the number of NFS4ERR_DELAY related retransmissions and return
+ * EAGAIN if the 'softerr' mount option is set, and we've exceeded the limit
+ * set by 'nfs_delay_retrans'.
+ */
+static int nfs4_exception_should_retrans(const struct nfs_server *server,
+					 struct nfs4_exception *exception)
+{
+	if (server->flags & NFS_MOUNT_SOFTERR && nfs_delay_retrans >= 0) {
+		if (exception->retrans++ >= (unsigned short)nfs_delay_retrans)
+			return -EAGAIN;
+	}
+	return 0;
+}
+
 /* This is the error handling routine for processes that are allowed
  * to sleep.
  */
@@ -595,6 +610,11 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
 
 	ret = nfs4_do_handle_exception(server, errorcode, exception);
 	if (exception->delay) {
+		int ret2 = nfs4_exception_should_retrans(server, exception);
+		if (ret2 < 0) {
+			exception->retry = 0;
+			return ret2;
+		}
 		ret = nfs4_delay(&exception->timeout,
 				exception->interruptible);
 		goto out_retry;
@@ -623,6 +643,11 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
 
 	ret = nfs4_do_handle_exception(server, errorcode, exception);
 	if (exception->delay) {
+		int ret2 = nfs4_exception_should_retrans(server, exception);
+		if (ret2 < 0) {
+			exception->retry = 0;
+			return ret2;
+		}
 		rpc_delay(task, nfs4_update_delay(&exception->timeout));
 		goto out_retry;
 	}
@@ -5011,9 +5036,10 @@ static void nfs4_free_createdata(struct nfs4_createdata *data)
 }
 
 static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
-		struct page *page, unsigned int len, struct iattr *sattr,
+		struct folio *folio, unsigned int len, struct iattr *sattr,
 		struct nfs4_label *label)
 {
+	struct page *page = &folio->page;
 	struct nfs4_createdata *data;
 	int status = -ENAMETOOLONG;
 
@@ -5038,7 +5064,7 @@ out:
 }
 
 static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
-		struct page *page, unsigned int len, struct iattr *sattr)
+		struct folio *folio, unsigned int len, struct iattr *sattr)
 {
 	struct nfs4_exception exception = {
 		.interruptible = true,
@@ -5049,7 +5075,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
 	label = nfs4_label_init_security(dir, dentry, sattr, &l);
 
 	do {
-		err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label);
+		err = _nfs4_proc_symlink(dir, dentry, folio, len, sattr, label);
 		trace_nfs4_symlink(dir, &dentry->d_name, err);
 		err = nfs4_handle_exception(NFS_SERVER(dir), err,
 				&exception);
@@ -5622,7 +5648,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
 
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
 	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
-	nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr);
+	nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr);
 }
 
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -5663,7 +5689,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
 	data->res.server = server;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
-	nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
+	nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client,
+			NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
 }
 
 static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args,
@@ -8934,6 +8961,7 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
 
 	sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED);
 
+try_again:
 	/* Test connection for session trunking. Async exchange_id call */
 	task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt);
 	if (IS_ERR(task))
@@ -8946,11 +8974,15 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
 
 	if (status == 0)
 		rpc_clnt_xprt_switch_add_xprt(clnt, xprt);
-	else if (rpc_clnt_xprt_switch_has_addr(clnt,
+	else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt,
 				(struct sockaddr *)&xprt->addr))
 		rpc_clnt_xprt_switch_remove_xprt(clnt, xprt);
 
 	rpc_put_task(task);
+	if (status == -NFS4ERR_DELAY) {
+		ssleep(1);
+		goto try_again;
+	}
 }
 EXPORT_SYMBOL_GPL(nfs4_test_session_trunk);
 
@@ -9621,6 +9653,9 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 
 	nfs4_sequence_free_slot(&lgp->res.seq_res);
 
+	exception->state = NULL;
+	exception->stateid = NULL;
+
 	switch (nfs4err) {
 	case 0:
 		goto out;
@@ -9716,7 +9751,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
 };
 
 struct pnfs_layout_segment *
-nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
+nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
+		    struct nfs4_exception *exception)
 {
 	struct inode *inode = lgp->args.inode;
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -9736,13 +9772,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
 			 RPC_TASK_MOVEABLE,
 	};
 	struct pnfs_layout_segment *lseg = NULL;
-	struct nfs4_exception exception = {
-		.inode = inode,
-		.timeout = *timeout,
-	};
 	int status = 0;
 
 	nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
+	exception->retry = 0;
 
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
@@ -9753,11 +9786,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
 		goto out;
 
 	if (task->tk_status < 0) {
-		status = nfs4_layoutget_handle_exception(task, lgp, &exception);
-		*timeout = exception.timeout;
+		exception->retry = 1;
+		status = nfs4_layoutget_handle_exception(task, lgp, exception);
 	} else if (lgp->res.layoutp->len == 0) {
+		exception->retry = 1;
 		status = -EAGAIN;
-		*timeout = nfs4_update_delay(&exception.timeout);
+		nfs4_update_delay(&exception->timeout);
 	} else
 		lseg = pnfs_layout_process(lgp);
 out:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 84343aefbbd6..21a365357629 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1980,7 +1980,9 @@ pnfs_update_layout(struct inode *ino,
 	struct pnfs_layout_segment *lseg = NULL;
 	struct nfs4_layoutget *lgp;
 	nfs4_stateid stateid;
-	long timeout = 0;
+	struct nfs4_exception exception = {
+		.inode = ino,
+	};
 	unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
 	bool first;
 
@@ -2144,7 +2146,7 @@ lookup_again:
 	lgp->lo = lo;
 	pnfs_get_layout_hdr(lo);
 
-	lseg = nfs4_proc_layoutget(lgp, &timeout);
+	lseg = nfs4_proc_layoutget(lgp, &exception);
 	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
 				 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
 	nfs_layoutget_end(lo);
@@ -2171,6 +2173,8 @@ lookup_again:
 			goto out_put_layout_hdr;
 		}
 		if (lseg) {
+			if (!exception.retry)
+				goto out_put_layout_hdr;
 			if (first)
 				pnfs_clear_first_layoutget(lo);
 			trace_pnfs_update_layout(ino, pos, count,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d886c8226d8f..db57a85500ee 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -35,6 +35,7 @@
 #include <linux/nfs_page.h>
 #include <linux/workqueue.h>
 
+struct nfs4_exception;
 struct nfs4_opendata;
 
 enum {
@@ -245,7 +246,9 @@ extern size_t max_response_pages(struct nfs_server *server);
 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 				   struct pnfs_device *dev,
 				   const struct cred *cred);
-extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout);
+extern struct pnfs_layout_segment *
+nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
+		    struct nfs4_exception *exception);
 extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
 
 /* pnfs.c */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e3570c656b0f..ad3a321ae997 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -396,9 +396,10 @@ nfs_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
 }
 
 static int
-nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio,
 		 unsigned int len, struct iattr *sattr)
 {
+	struct page *page = &folio->page;
 	struct nfs_fh *fh;
 	struct nfs_fattr *fattr;
 	struct nfs_symlinkargs	arg = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2667ab753d42..075b31c93f87 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1371,6 +1371,7 @@ unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE;
 unsigned short send_implementation_id = 1;
 char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
 bool recover_lost_locks = false;
+short nfs_delay_retrans = -1;
 
 EXPORT_SYMBOL_GPL(nfs_callback_nr_threads);
 EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
@@ -1381,6 +1382,7 @@ EXPORT_SYMBOL_GPL(max_session_cb_slots);
 EXPORT_SYMBOL_GPL(send_implementation_id);
 EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
 EXPORT_SYMBOL_GPL(recover_lost_locks);
+EXPORT_SYMBOL_GPL(nfs_delay_retrans);
 
 #define NFS_CALLBACK_MAXPORTNR (65535U)
 
@@ -1429,5 +1431,9 @@ MODULE_PARM_DESC(recover_lost_locks,
 		 "If the server reports that a lock might be lost, "
 		 "try to recover it risking data corruption.");
 
-
+module_param_named(delay_retrans, nfs_delay_retrans, short, 0644);
+MODULE_PARM_DESC(delay_retrans,
+		 "Unless negative, specifies the number of times the NFSv4 "
+		 "client retries a request before returning an EAGAIN error, "
+		 "after a reply of NFS4ERR_DELAY from the server.");
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9d82d50ce0b1..b664caea8b4e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -739,6 +739,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 					&pgio);
 		pgio.pg_error = 0;
 		nfs_pageio_complete(&pgio);
+		if (err == -EAGAIN && mntflags & NFS_MOUNT_SOFTERR)
+			break;
 	} while (err < 0 && !nfs_error_is_fatal(err));
 	nfs_io_completion_put(ioc);
 
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b7da17e53007..7b641095a665 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -426,8 +426,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
 		return -EINVAL;
 	}
 
-	if (!inode->i_sb->s_export_op ||
-	    !inode->i_sb->s_export_op->fh_to_dentry) {
+	if (!exportfs_can_decode_fh(inode->i_sb->s_export_op)) {
 		dprintk("exp_export: export of invalid fs type.\n");
 		return -EINVAL;
 	}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 45aecdc302f4..4d765c72496f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1595,7 +1595,7 @@ static int fanotify_test_fid(struct dentry *dentry, unsigned int flags)
 	 * file handles so user can use name_to_handle_at() to compare fids
 	 * reported with events to the file handle of watched objects.
 	 */
-	if (!nop)
+	if (!exportfs_can_encode_fid(nop))
 		return -EOPNOTSUPP;
 
 	/*
@@ -1603,7 +1603,7 @@ static int fanotify_test_fid(struct dentry *dentry, unsigned int flags)
 	 * supports decoding file handles, so user has a way to map back the
 	 * reported fids to filesystem objects.
 	 */
-	if (mark_type != FAN_MARK_INODE && !nop->fh_to_dentry)
+	if (mark_type != FAN_MARK_INODE && !exportfs_can_decode_fh(nop))
 		return -EOPNOTSUPP;
 
 	return 0;
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index ab44f2db533b..d7498ddc4a72 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -384,6 +384,7 @@ static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid,
  * and due to using iget() whereas NTFS needs ntfs_iget().
  */
 const struct export_operations ntfs_export_ops = {
+	.encode_fh	= generic_encode_ino32_fh,
 	.get_parent	= ntfs_get_parent,	/* Find the parent of a given
 						   directory. */
 	.fh_to_dentry	= ntfs_fh_to_dentry,
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index f763e3256ccc..9153dffde950 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -811,6 +811,7 @@ static int ntfs_nfs_commit_metadata(struct inode *inode)
 }
 
 static const struct export_operations ntfs_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry = ntfs_fh_to_dentry,
 	.fh_to_parent = ntfs_fh_to_parent,
 	.get_parent = ntfs3_get_parent,
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 4e173d56b11f..5648954f8588 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -6,4 +6,4 @@
 obj-$(CONFIG_OVERLAY_FS) += overlay.o
 
 overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
-		copy_up.o export.o params.o
+		copy_up.o export.o params.o xattrs.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index ada3fcc9c6d5..4382881b0709 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -252,7 +252,9 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 		return PTR_ERR(old_file);
 
 	/* Try to use clone_file_range to clone up within the same fs */
+	ovl_start_write(dentry);
 	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+	ovl_end_write(dentry);
 	if (cloned == len)
 		goto out_fput;
 	/* Couldn't clone, so now we try to copy the data */
@@ -287,8 +289,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 		 * it may not recognize all kind of holes and sometimes
 		 * only skips partial of hole area. However, it will be
 		 * enough for most of the use cases.
+		 *
+		 * We do not hold upper sb_writers throughout the loop to avert
+		 * lockdep warning with llseek of lower file in nested overlay:
+		 * - upper sb_writers
+		 * -- lower ovl_inode_lock (ovl_llseek)
 		 */
-
 		if (skip_hole && data_pos < old_pos) {
 			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
 			if (data_pos > old_pos) {
@@ -303,9 +309,11 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 			}
 		}
 
+		ovl_start_write(dentry);
 		bytes = do_splice_direct(old_file, &old_pos,
 					 new_file, &new_pos,
 					 this_len, SPLICE_F_MOVE);
+		ovl_end_write(dentry);
 		if (bytes <= 0) {
 			error = bytes;
 			break;
@@ -426,29 +434,29 @@ out_err:
 	return ERR_PTR(err);
 }
 
-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
-		   struct dentry *upper)
+struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin)
 {
-	const struct ovl_fh *fh = NULL;
-	int err;
-
 	/*
 	 * When lower layer doesn't support export operations store a 'null' fh,
 	 * so we can use the overlay.origin xattr to distignuish between a copy
 	 * up and a pure upper inode.
 	 */
-	if (ovl_can_decode_fh(lower->d_sb)) {
-		fh = ovl_encode_real_fh(ofs, lower, false);
-		if (IS_ERR(fh))
-			return PTR_ERR(fh);
-	}
+	if (!ovl_can_decode_fh(origin->d_sb))
+		return NULL;
+
+	return ovl_encode_real_fh(ofs, origin, false);
+}
+
+int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
+		      struct dentry *upper)
+{
+	int err;
 
 	/*
 	 * Do not fail when upper doesn't support xattrs.
 	 */
 	err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
 				 fh ? fh->fb.len : 0, 0);
-	kfree(fh);
 
 	/* Ignore -EPERM from setting "user.*" on symlink/special */
 	return err == -EPERM ? 0 : err;
@@ -476,7 +484,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
  *
  * Caller must hold i_mutex on indexdir.
  */
-static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
 			    struct dentry *upper)
 {
 	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
@@ -502,7 +510,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
 	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
 		return -EIO;
 
-	err = ovl_get_index_name(ofs, origin, &name);
+	err = ovl_get_index_name_fh(fh, &name);
 	if (err)
 		return err;
 
@@ -541,6 +549,7 @@ struct ovl_copy_up_ctx {
 	struct dentry *destdir;
 	struct qstr destname;
 	struct dentry *workdir;
+	const struct ovl_fh *origin_fh;
 	bool origin;
 	bool indexed;
 	bool metacopy;
@@ -555,14 +564,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
 	struct inode *udir = d_inode(upperdir);
 
+	ovl_start_write(c->dentry);
+
 	/* Mark parent "impure" because it may now contain non-pure upper */
 	err = ovl_set_impure(c->parent, upperdir);
 	if (err)
-		return err;
+		goto out;
 
 	err = ovl_set_nlink_lower(c->dentry);
 	if (err)
-		return err;
+		goto out;
 
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 	upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
@@ -581,10 +592,12 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 	}
 	inode_unlock(udir);
 	if (err)
-		return err;
+		goto out;
 
 	err = ovl_set_nlink_upper(c->dentry);
 
+out:
+	ovl_end_write(c->dentry);
 	return err;
 }
 
@@ -637,7 +650,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	 * hard link.
 	 */
 	if (c->origin) {
-		err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
+		err = ovl_set_origin_fh(ofs, c->origin_fh, temp);
 		if (err)
 			return err;
 	}
@@ -719,21 +732,19 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 		.link = c->link
 	};
 
-	/* workdir and destdir could be the same when copying up to indexdir */
-	err = -EIO;
-	if (lock_rename(c->workdir, c->destdir) != NULL)
-		goto unlock;
-
 	err = ovl_prep_cu_creds(c->dentry, &cc);
 	if (err)
-		goto unlock;
+		return err;
 
+	ovl_start_write(c->dentry);
+	inode_lock(wdir);
 	temp = ovl_create_temp(ofs, c->workdir, &cattr);
+	inode_unlock(wdir);
+	ovl_end_write(c->dentry);
 	ovl_revert_cu_creds(&cc);
 
-	err = PTR_ERR(temp);
 	if (IS_ERR(temp))
-		goto unlock;
+		return PTR_ERR(temp);
 
 	/*
 	 * Copy up data first and then xattrs. Writing data after
@@ -741,15 +752,28 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 	 */
 	path.dentry = temp;
 	err = ovl_copy_up_data(c, &path);
-	if (err)
+	/*
+	 * We cannot hold lock_rename() throughout this helper, because or
+	 * lock ordering with sb_writers, which shouldn't be held when calling
+	 * ovl_copy_up_data(), so lock workdir and destdir and make sure that
+	 * temp wasn't moved before copy up completion or cleanup.
+	 * If temp was moved, abort without the cleanup.
+	 */
+	ovl_start_write(c->dentry);
+	if (lock_rename(c->workdir, c->destdir) != NULL ||
+	    temp->d_parent != c->workdir) {
+		err = -EIO;
+		goto unlock;
+	} else if (err) {
 		goto cleanup;
+	}
 
 	err = ovl_copy_up_metadata(c, temp);
 	if (err)
 		goto cleanup;
 
 	if (S_ISDIR(c->stat.mode) && c->indexed) {
-		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+		err = ovl_create_index(c->dentry, c->origin_fh, temp);
 		if (err)
 			goto cleanup;
 	}
@@ -779,6 +803,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 		ovl_set_flag(OVL_WHITEOUTS, inode);
 unlock:
 	unlock_rename(c->workdir, c->destdir);
+	ovl_end_write(c->dentry);
 
 	return err;
 
@@ -802,9 +827,10 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 	if (err)
 		return err;
 
+	ovl_start_write(c->dentry);
 	tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+	ovl_end_write(c->dentry);
 	ovl_revert_cu_creds(&cc);
-
 	if (IS_ERR(tmpfile))
 		return PTR_ERR(tmpfile);
 
@@ -815,9 +841,11 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 			goto out_fput;
 	}
 
+	ovl_start_write(c->dentry);
+
 	err = ovl_copy_up_metadata(c, temp);
 	if (err)
-		goto out_fput;
+		goto out;
 
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 
@@ -831,7 +859,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 	inode_unlock(udir);
 
 	if (err)
-		goto out_fput;
+		goto out;
 
 	if (c->metacopy_digest)
 		ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
@@ -843,6 +871,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 		ovl_set_upperdata(d_inode(c->dentry));
 	ovl_inode_update(d_inode(c->dentry), dget(temp));
 
+out:
+	ovl_end_write(c->dentry);
 out_fput:
 	fput(tmpfile);
 	return err;
@@ -861,6 +891,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 {
 	int err;
 	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct dentry *origin = c->lowerpath.dentry;
+	struct ovl_fh *fh = NULL;
 	bool to_index = false;
 
 	/*
@@ -877,25 +909,35 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 			to_index = true;
 	}
 
-	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
+	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) {
+		fh = ovl_get_origin_fh(ofs, origin);
+		if (IS_ERR(fh))
+			return PTR_ERR(fh);
+
+		/* origin_fh may be NULL */
+		c->origin_fh = fh;
 		c->origin = true;
+	}
 
 	if (to_index) {
 		c->destdir = ovl_indexdir(c->dentry->d_sb);
-		err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
+		err = ovl_get_index_name(ofs, origin, &c->destname);
 		if (err)
-			return err;
+			goto out_free_fh;
 	} else if (WARN_ON(!c->parent)) {
 		/* Disconnected dentry must be copied up to index dir */
-		return -EIO;
+		err = -EIO;
+		goto out_free_fh;
 	} else {
 		/*
 		 * Mark parent "impure" because it may now contain non-pure
 		 * upper
 		 */
+		ovl_start_write(c->dentry);
 		err = ovl_set_impure(c->parent, c->destdir);
+		ovl_end_write(c->dentry);
 		if (err)
-			return err;
+			goto out_free_fh;
 	}
 
 	/* Should we copyup with O_TMPFILE or with workdir? */
@@ -909,6 +951,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 	if (c->indexed)
 		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
 
+	ovl_start_write(c->dentry);
 	if (to_index) {
 		/* Initialize nlink for copy up of disconnected dentry */
 		err = ovl_set_nlink_upper(c->dentry);
@@ -923,10 +966,13 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		ovl_dentry_set_upper_alias(c->dentry);
 		ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
 	}
+	ovl_end_write(c->dentry);
 
 out:
 	if (to_index)
 		kfree(c->destname.name);
+out_free_fh:
+	kfree(fh);
 	return err;
 }
 
@@ -1011,15 +1057,16 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
 	 * Writing to upper file will clear security.capability xattr. We
 	 * don't want that to happen for normal copy-up operation.
 	 */
+	ovl_start_write(c->dentry);
 	if (capability) {
 		err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
 				      capability, cap_size, 0);
-		if (err)
-			goto out_free;
 	}
-
-
-	err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
+	if (!err) {
+		err = ovl_removexattr(ofs, upperpath.dentry,
+				      OVL_XATTR_METACOPY);
+	}
+	ovl_end_write(c->dentry);
 	if (err)
 		goto out_free;
 
@@ -1170,17 +1217,10 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
 
 int ovl_maybe_copy_up(struct dentry *dentry, int flags)
 {
-	int err = 0;
-
-	if (ovl_open_need_copy_up(dentry, flags)) {
-		err = ovl_want_write(dentry);
-		if (!err) {
-			err = ovl_copy_up_flags(dentry, flags);
-			ovl_drop_write(dentry);
-		}
-	}
+	if (!ovl_open_need_copy_up(dentry, flags))
+		return 0;
 
-	return err;
+	return ovl_copy_up_flags(dentry, flags);
 }
 
 int ovl_copy_up_with_data(struct dentry *dentry)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 033fc0458a3d..aab3f5d93556 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -477,7 +477,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 		goto out_unlock;
 
 	err = -ESTALE;
-	if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+	if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper))
 		goto out_dput;
 
 	newdentry = ovl_create_temp(ofs, workdir, cattr);
@@ -559,10 +559,6 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
 	struct cred *override_cred;
 	struct dentry *parent = dentry->d_parent;
 
-	err = ovl_copy_up(parent);
-	if (err)
-		return err;
-
 	old_cred = ovl_override_creds(dentry->d_sb);
 
 	/*
@@ -626,6 +622,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
 		.link = link,
 	};
 
+	err = ovl_copy_up(dentry->d_parent);
+	if (err)
+		return err;
+
 	err = ovl_want_write(dentry);
 	if (err)
 		goto out;
@@ -700,28 +700,24 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	int err;
 	struct inode *inode;
 
-	err = ovl_want_write(old);
+	err = ovl_copy_up(old);
 	if (err)
 		goto out;
 
-	err = ovl_copy_up(old);
+	err = ovl_copy_up(new->d_parent);
 	if (err)
-		goto out_drop_write;
+		goto out;
 
-	err = ovl_copy_up(new->d_parent);
+	err = ovl_nlink_start(old);
 	if (err)
-		goto out_drop_write;
+		goto out;
 
 	if (ovl_is_metacopy_dentry(old)) {
 		err = ovl_set_link_redirect(old);
 		if (err)
-			goto out_drop_write;
+			goto out_nlink_end;
 	}
 
-	err = ovl_nlink_start(old);
-	if (err)
-		goto out_drop_write;
-
 	inode = d_inode(old);
 	ihold(inode);
 
@@ -731,9 +727,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	if (err)
 		iput(inode);
 
+out_nlink_end:
 	ovl_nlink_end(old);
-out_drop_write:
-	ovl_drop_write(old);
 out:
 	return err;
 }
@@ -891,17 +886,13 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 			goto out;
 	}
 
-	err = ovl_want_write(dentry);
-	if (err)
-		goto out;
-
 	err = ovl_copy_up(dentry->d_parent);
 	if (err)
-		goto out_drop_write;
+		goto out;
 
 	err = ovl_nlink_start(dentry);
 	if (err)
-		goto out_drop_write;
+		goto out;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	if (!lower_positive)
@@ -926,8 +917,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 	if (ovl_dentry_upper(dentry))
 		ovl_copyattr(d_inode(dentry));
 
-out_drop_write:
-	ovl_drop_write(dentry);
 out:
 	ovl_cache_free(&list);
 	return err;
@@ -1131,29 +1120,32 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
 		}
 	}
 
-	err = ovl_want_write(old);
-	if (err)
-		goto out;
-
 	err = ovl_copy_up(old);
 	if (err)
-		goto out_drop_write;
+		goto out;
 
 	err = ovl_copy_up(new->d_parent);
 	if (err)
-		goto out_drop_write;
+		goto out;
 	if (!overwrite) {
 		err = ovl_copy_up(new);
 		if (err)
-			goto out_drop_write;
+			goto out;
 	} else if (d_inode(new)) {
 		err = ovl_nlink_start(new);
 		if (err)
-			goto out_drop_write;
+			goto out;
 
 		update_nlink = true;
 	}
 
+	if (!update_nlink) {
+		/* ovl_nlink_start() took ovl_want_write() */
+		err = ovl_want_write(old);
+		if (err)
+			goto out;
+	}
+
 	old_cred = ovl_override_creds(old->d_sb);
 
 	if (!list_empty(&list)) {
@@ -1219,7 +1211,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
 		}
 	} else {
 		if (!d_is_negative(newdentry)) {
-			if (!new_opaque || !ovl_is_whiteout(newdentry))
+			if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
 				goto out_dput;
 		} else {
 			if (flags & RENAME_EXCHANGE)
@@ -1286,8 +1278,8 @@ out_revert_creds:
 	revert_creds(old_cred);
 	if (update_nlink)
 		ovl_nlink_end(new);
-out_drop_write:
-	ovl_drop_write(old);
+	else
+		ovl_drop_write(old);
 out:
 	dput(opaquedir);
 	ovl_cache_free(&list);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 26b782c53910..7e16bbcad95e 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -23,12 +23,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
 	if (ovl_dentry_upper(dentry))
 		return 0;
 
-	err = ovl_want_write(dentry);
-	if (!err) {
-		err = ovl_copy_up(dentry);
-		ovl_drop_write(dentry);
-	}
-
+	err = ovl_copy_up(dentry);
 	if (err) {
 		pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
 				    dentry, err);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index ec3671ca140c..131621daeb13 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -15,10 +15,15 @@
 #include <linux/fs.h>
 #include "overlayfs.h"
 
+#include "../internal.h"	/* for sb_init_dio_done_wq */
+
 struct ovl_aio_req {
 	struct kiocb iocb;
 	refcount_t ref;
 	struct kiocb *orig_iocb;
+	/* used for aio completion */
+	struct work_struct work;
+	long res;
 };
 
 static struct kmem_cache *ovl_aio_request_cachep;
@@ -235,6 +240,12 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 	return ret;
 }
 
+static void ovl_file_modified(struct file *file)
+{
+	/* Update size/mtime */
+	ovl_copyattr(file_inode(file));
+}
+
 static void ovl_file_accessed(struct file *file)
 {
 	struct inode *inode, *upperinode;
@@ -263,20 +274,12 @@ static void ovl_file_accessed(struct file *file)
 	touch_atime(&file->f_path);
 }
 
-static rwf_t ovl_iocb_to_rwf(int ifl)
+#define OVL_IOCB_MASK \
+	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
+
+static rwf_t iocb_to_rw_flags(int flags)
 {
-	rwf_t flags = 0;
-
-	if (ifl & IOCB_NOWAIT)
-		flags |= RWF_NOWAIT;
-	if (ifl & IOCB_HIPRI)
-		flags |= RWF_HIPRI;
-	if (ifl & IOCB_DSYNC)
-		flags |= RWF_DSYNC;
-	if (ifl & IOCB_SYNC)
-		flags |= RWF_SYNC;
-
-	return flags;
+	return (__force rwf_t)(flags & OVL_IOCB_MASK);
 }
 
 static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
@@ -293,10 +296,8 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
 	struct kiocb *orig_iocb = aio_req->orig_iocb;
 
 	if (iocb->ki_flags & IOCB_WRITE) {
-		struct inode *inode = file_inode(orig_iocb->ki_filp);
-
 		kiocb_end_write(iocb);
-		ovl_copyattr(inode);
+		ovl_file_modified(orig_iocb->ki_filp);
 	}
 
 	orig_iocb->ki_pos = iocb->ki_pos;
@@ -313,6 +314,37 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
 	orig_iocb->ki_complete(orig_iocb, res);
 }
 
+static void ovl_aio_complete_work(struct work_struct *work)
+{
+	struct ovl_aio_req *aio_req = container_of(work,
+						   struct ovl_aio_req, work);
+
+	ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
+}
+
+static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
+{
+	struct ovl_aio_req *aio_req = container_of(iocb,
+						   struct ovl_aio_req, iocb);
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	/*
+	 * Punt to a work queue to serialize updates of mtime/size.
+	 */
+	aio_req->res = res;
+	INIT_WORK(&aio_req->work, ovl_aio_complete_work);
+	queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
+		   &aio_req->work);
+}
+
+static int ovl_init_aio_done_wq(struct super_block *sb)
+{
+	if (sb->s_dio_done_wq)
+		return 0;
+
+	return sb_init_dio_done_wq(sb);
+}
+
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct file *file = iocb->ki_filp;
@@ -334,8 +366,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	if (is_sync_kiocb(iocb)) {
-		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
-				    ovl_iocb_to_rwf(iocb->ki_flags));
+		rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
+
+		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
 	} else {
 		struct ovl_aio_req *aio_req;
 
@@ -401,15 +434,20 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	if (is_sync_kiocb(iocb)) {
+		rwf_t rwf = iocb_to_rw_flags(ifl);
+
 		file_start_write(real.file);
-		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
-				     ovl_iocb_to_rwf(ifl));
+		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
 		file_end_write(real.file);
 		/* Update size */
-		ovl_copyattr(inode);
+		ovl_file_modified(file);
 	} else {
 		struct ovl_aio_req *aio_req;
 
+		ret = ovl_init_aio_done_wq(inode->i_sb);
+		if (ret)
+			goto out;
+
 		ret = -ENOMEM;
 		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
 		if (!aio_req)
@@ -418,7 +456,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		aio_req->orig_iocb = iocb;
 		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
 		aio_req->iocb.ki_flags = ifl;
-		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		aio_req->iocb.ki_complete = ovl_aio_queue_completion;
 		refcount_set(&aio_req->ref, 2);
 		kiocb_start_write(&aio_req->iocb);
 		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
@@ -492,7 +530,7 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 	file_end_write(real.file);
 	/* Update size */
-	ovl_copyattr(inode);
+	ovl_file_modified(out);
 	revert_creds(old_cred);
 	fdput(real);
 
@@ -573,7 +611,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
 	revert_creds(old_cred);
 
 	/* Update size */
-	ovl_copyattr(inode);
+	ovl_file_modified(file);
 
 	fdput(real);
 
@@ -657,7 +695,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 	revert_creds(old_cred);
 
 	/* Update size */
-	ovl_copyattr(inode_out);
+	ovl_file_modified(file_out);
 
 	fdput(real_in);
 	fdput(real_out);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b6e98a7d36ce..345b8f161ca4 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -32,10 +32,6 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (err)
 		return err;
 
-	err = ovl_want_write(dentry);
-	if (err)
-		goto out;
-
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* Truncate should trigger data copy up as well */
 		full_copy_up = true;
@@ -54,7 +50,7 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 			winode = d_inode(upperdentry);
 			err = get_write_access(winode);
 			if (err)
-				goto out_drop_write;
+				goto out;
 		}
 
 		if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -78,6 +74,10 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 		 */
 		attr->ia_valid &= ~ATTR_OPEN;
 
+		err = ovl_want_write(dentry);
+		if (err)
+			goto out_put_write;
+
 		inode_lock(upperdentry->d_inode);
 		old_cred = ovl_override_creds(dentry->d_sb);
 		err = ovl_do_notify_change(ofs, upperdentry, attr);
@@ -85,12 +85,12 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 		if (!err)
 			ovl_copyattr(dentry->d_inode);
 		inode_unlock(upperdentry->d_inode);
+		ovl_drop_write(dentry);
 
+out_put_write:
 		if (winode)
 			put_write_access(winode);
 	}
-out_drop_write:
-	ovl_drop_write(dentry);
 out:
 	return err;
 }
@@ -339,130 +339,6 @@ static const char *ovl_get_link(struct dentry *dentry,
 	return p;
 }
 
-bool ovl_is_private_xattr(struct super_block *sb, const char *name)
-{
-	struct ovl_fs *ofs = OVL_FS(sb);
-
-	if (ofs->config.userxattr)
-		return strncmp(name, OVL_XATTR_USER_PREFIX,
-			       sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
-	else
-		return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
-			       sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
-}
-
-int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
-		  const void *value, size_t size, int flags)
-{
-	int err;
-	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
-	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
-	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
-	struct path realpath;
-	const struct cred *old_cred;
-
-	err = ovl_want_write(dentry);
-	if (err)
-		goto out;
-
-	if (!value && !upperdentry) {
-		ovl_path_lower(dentry, &realpath);
-		old_cred = ovl_override_creds(dentry->d_sb);
-		err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
-		revert_creds(old_cred);
-		if (err < 0)
-			goto out_drop_write;
-	}
-
-	if (!upperdentry) {
-		err = ovl_copy_up(dentry);
-		if (err)
-			goto out_drop_write;
-
-		realdentry = ovl_dentry_upper(dentry);
-	}
-
-	old_cred = ovl_override_creds(dentry->d_sb);
-	if (value) {
-		err = ovl_do_setxattr(ofs, realdentry, name, value, size,
-				      flags);
-	} else {
-		WARN_ON(flags != XATTR_REPLACE);
-		err = ovl_do_removexattr(ofs, realdentry, name);
-	}
-	revert_creds(old_cred);
-
-	/* copy c/mtime */
-	ovl_copyattr(inode);
-
-out_drop_write:
-	ovl_drop_write(dentry);
-out:
-	return err;
-}
-
-int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
-		  void *value, size_t size)
-{
-	ssize_t res;
-	const struct cred *old_cred;
-	struct path realpath;
-
-	ovl_i_path_real(inode, &realpath);
-	old_cred = ovl_override_creds(dentry->d_sb);
-	res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
-	revert_creds(old_cred);
-	return res;
-}
-
-static bool ovl_can_list(struct super_block *sb, const char *s)
-{
-	/* Never list private (.overlay) */
-	if (ovl_is_private_xattr(sb, s))
-		return false;
-
-	/* List all non-trusted xattrs */
-	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
-		return true;
-
-	/* list other trusted for superuser only */
-	return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
-}
-
-ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
-{
-	struct dentry *realdentry = ovl_dentry_real(dentry);
-	ssize_t res;
-	size_t len;
-	char *s;
-	const struct cred *old_cred;
-
-	old_cred = ovl_override_creds(dentry->d_sb);
-	res = vfs_listxattr(realdentry, list, size);
-	revert_creds(old_cred);
-	if (res <= 0 || size == 0)
-		return res;
-
-	/* filter out private xattrs */
-	for (s = list, len = res; len;) {
-		size_t slen = strnlen(s, len) + 1;
-
-		/* underlying fs providing us with an broken xattr list? */
-		if (WARN_ON(slen > len))
-			return -EIO;
-
-		len -= slen;
-		if (!ovl_can_list(dentry->d_sb, s)) {
-			res -= slen;
-			memmove(s, s + slen, len);
-		} else {
-			s += slen;
-		}
-	}
-
-	return res;
-}
-
 #ifdef CONFIG_FS_POSIX_ACL
 /*
  * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
@@ -611,10 +487,6 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
 	struct dentry *upperdentry = ovl_dentry_upper(dentry);
 	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
 
-	err = ovl_want_write(dentry);
-	if (err)
-		return err;
-
 	/*
 	 * If ACL is to be removed from a lower file, check if it exists in
 	 * the first place before copying it up.
@@ -630,7 +502,7 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
 		revert_creds(old_cred);
 		if (IS_ERR(real_acl)) {
 			err = PTR_ERR(real_acl);
-			goto out_drop_write;
+			goto out;
 		}
 		posix_acl_release(real_acl);
 	}
@@ -638,23 +510,26 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
 	if (!upperdentry) {
 		err = ovl_copy_up(dentry);
 		if (err)
-			goto out_drop_write;
+			goto out;
 
 		realdentry = ovl_dentry_upper(dentry);
 	}
 
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
 	old_cred = ovl_override_creds(dentry->d_sb);
 	if (acl)
 		err = ovl_do_set_acl(ofs, realdentry, acl_name, acl);
 	else
 		err = ovl_do_remove_acl(ofs, realdentry, acl_name);
 	revert_creds(old_cred);
+	ovl_drop_write(dentry);
 
 	/* copy c/mtime */
 	ovl_copyattr(inode);
-
-out_drop_write:
-	ovl_drop_write(dentry);
+out:
 	return err;
 }
 
@@ -778,14 +653,14 @@ int ovl_fileattr_set(struct mnt_idmap *idmap,
 	unsigned int flags;
 	int err;
 
-	err = ovl_want_write(dentry);
-	if (err)
-		goto out;
-
 	err = ovl_copy_up(dentry);
 	if (!err) {
 		ovl_path_real(dentry, &upperpath);
 
+		err = ovl_want_write(dentry);
+		if (err)
+			goto out;
+
 		old_cred = ovl_override_creds(inode->i_sb);
 		/*
 		 * Store immutable/append-only flags in xattr and clear them
@@ -798,6 +673,7 @@ int ovl_fileattr_set(struct mnt_idmap *idmap,
 		if (!err)
 			err = ovl_real_fileattr_set(&upperpath, fa);
 		revert_creds(old_cred);
+		ovl_drop_write(dentry);
 
 		/*
 		 * Merge real inode flags with inode flags read from
@@ -812,7 +688,6 @@ int ovl_fileattr_set(struct mnt_idmap *idmap,
 		/* Update ctime */
 		ovl_copyattr(inode);
 	}
-	ovl_drop_write(dentry);
 out:
 	return err;
 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 80391c687c2a..03bc8d5dfa31 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -251,7 +251,10 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 		err = -EREMOTE;
 		goto out_err;
 	}
-	if (ovl_is_whiteout(this)) {
+
+	path.dentry = this;
+	path.mnt = d->mnt;
+	if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) {
 		d->stop = d->opaque = true;
 		goto put_and_out;
 	}
@@ -264,8 +267,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 		goto put_and_out;
 	}
 
-	path.dentry = this;
-	path.mnt = d->mnt;
 	if (!d_can_lookup(this)) {
 		if (d->is_dir || !last_element) {
 			d->stop = true;
@@ -438,7 +439,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 	else if (IS_ERR(origin))
 		return PTR_ERR(origin);
 
-	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
+	if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
 	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
 		goto invalid;
 
@@ -507,6 +508,19 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
 	return err;
 }
 
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+		      enum ovl_xattr ox, const struct ovl_fh *fh,
+		      bool is_upper, bool set)
+{
+	int err;
+
+	err = ovl_verify_fh(ofs, dentry, ox, fh);
+	if (set && err == -ENODATA)
+		err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
+
+	return err;
+}
+
 /*
  * Verify that @real dentry matches the file handle stored in xattr @name.
  *
@@ -515,9 +529,9 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
  *
  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
  */
-int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
-		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
-		      bool set)
+int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+			    enum ovl_xattr ox, struct dentry *real,
+			    bool is_upper, bool set)
 {
 	struct inode *inode;
 	struct ovl_fh *fh;
@@ -530,9 +544,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
 		goto fail;
 	}
 
-	err = ovl_verify_fh(ofs, dentry, ox, fh);
-	if (set && err == -ENODATA)
-		err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
+	err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
 	if (err)
 		goto fail;
 
@@ -548,6 +560,7 @@ fail:
 	goto out;
 }
 
+
 /* Get upper dentry from index */
 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
 			       bool connected)
@@ -684,7 +697,7 @@ orphan:
 	goto out;
 }
 
-static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
+int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
 {
 	char *n, *s;
 
@@ -873,20 +886,27 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
 static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
 			  struct dentry *lower, struct dentry *upper)
 {
+	const struct ovl_fh *fh;
 	int err;
 
 	if (ovl_check_origin_xattr(ofs, upper))
 		return 0;
 
+	fh = ovl_get_origin_fh(ofs, lower);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
 	err = ovl_want_write(dentry);
 	if (err)
-		return err;
+		goto out;
 
-	err = ovl_set_origin(ofs, lower, upper);
+	err = ovl_set_origin_fh(ofs, fh, upper);
 	if (!err)
 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
 
 	ovl_drop_write(dentry);
+out:
+	kfree(fh);
 	return err;
 }
 
@@ -1383,7 +1403,11 @@ bool ovl_lower_positive(struct dentry *dentry)
 				break;
 			}
 		} else {
-			positive = !ovl_is_whiteout(this);
+			struct path path = {
+				.dentry = this,
+				.mnt = parentpath->layer->mnt,
+			};
+			positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
 			done = true;
 			dput(this);
 		}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 9817b2dcb132..ca88b2636a57 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -28,7 +28,16 @@ enum ovl_path_type {
 
 #define OVL_XATTR_NAMESPACE "overlay."
 #define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1)
 #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
+
+#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1)
+#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX
+#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1)
+#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX
+#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1)
 
 enum ovl_xattr {
 	OVL_XATTR_OPAQUE,
@@ -40,6 +49,8 @@ enum ovl_xattr {
 	OVL_XATTR_UUID,
 	OVL_XATTR_METACOPY,
 	OVL_XATTR_PROTATTR,
+	OVL_XATTR_XWHITEOUT,
+	OVL_XATTR_XWHITEOUTS,
 };
 
 enum ovl_inode_flag {
@@ -398,6 +409,10 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
 }
 
 /* util.c */
+int ovl_get_write_access(struct dentry *dentry);
+void ovl_put_write_access(struct dentry *dentry);
+void ovl_start_write(struct dentry *dentry);
+void ovl_end_write(struct dentry *dentry);
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
@@ -460,6 +475,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
 void ovl_dir_modified(struct dentry *dentry, bool impurity);
 u64 ovl_inode_version_get(struct inode *inode);
 bool ovl_is_whiteout(struct dentry *dentry);
+bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path);
 struct file *ovl_path_open(const struct path *path, int flags);
 int ovl_copy_up_start(struct dentry *dentry, int flags);
 void ovl_copy_up_end(struct dentry *dentry);
@@ -467,9 +483,21 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags);
 bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
 			      enum ovl_xattr ox);
 bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
+bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
+bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path);
 bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
 			 const struct path *upperpath);
 
+static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs,
+					 struct dentry *upperdentry)
+{
+	struct path upperpath = {
+		.dentry = upperdentry,
+		.mnt = ovl_upper_mnt(ofs),
+	};
+	return ovl_path_is_whiteout(ofs, &upperpath);
+}
+
 static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
 					  struct dentry *upperdentry)
 {
@@ -624,11 +652,15 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 			struct dentry *upperdentry, struct ovl_path **stackp);
 int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
-		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
-		      bool set);
+		      enum ovl_xattr ox, const struct ovl_fh *fh,
+		      bool is_upper, bool set);
+int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
+			    enum ovl_xattr ox, struct dentry *real,
+			    bool is_upper, bool set);
 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
 			       bool connected);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
+int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name);
 int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
 		       struct qstr *name);
 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
@@ -640,17 +672,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags);
 bool ovl_lower_positive(struct dentry *dentry);
 
+static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper,
+				       const struct ovl_fh *fh, bool set)
+{
+	return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set);
+}
+
 static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper,
 				    struct dentry *origin, bool set)
 {
-	return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin,
-				 false, set);
+	return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin,
+				       false, set);
 }
 
 static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index,
 				   struct dentry *upper, bool set)
 {
-	return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set);
+	return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper,
+				       true, set);
 }
 
 /* readdir.c */
@@ -684,17 +723,8 @@ int ovl_set_nlink_lower(struct dentry *dentry);
 unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
 			   struct dentry *upperdentry,
 			   unsigned int fallback);
-int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
-		struct iattr *attr);
-int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
-		struct kstat *stat, u32 request_mask, unsigned int flags);
 int ovl_permission(struct mnt_idmap *idmap, struct inode *inode,
 		   int mask);
-int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
-		  const void *value, size_t size, int flags);
-int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
-		  void *value, size_t size);
-ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
 
 #ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
@@ -815,8 +845,9 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentr
 int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
 struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
 				  bool is_upper);
-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
-		   struct dentry *upper);
+struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin);
+int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
+		      struct dentry *upper);
 
 /* export.c */
 extern const struct export_operations ovl_export_operations;
@@ -830,3 +861,12 @@ static inline bool ovl_force_readonly(struct ovl_fs *ofs)
 {
 	return (!ovl_upper_mnt(ofs) || !ofs->workdir);
 }
+
+/* xattr.c */
+
+const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs);
+int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+		struct iattr *attr);
+int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
+		struct kstat *stat, u32 request_mask, unsigned int flags);
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index f6ff23fd101c..ddab9ea267d1 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -43,8 +43,10 @@ module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
 MODULE_PARM_DESC(metacopy,
 		 "Default to on or off for the metadata only copy up feature");
 
-enum {
+enum ovl_opt {
 	Opt_lowerdir,
+	Opt_lowerdir_add,
+	Opt_datadir_add,
 	Opt_upperdir,
 	Opt_workdir,
 	Opt_default_permissions,
@@ -140,8 +142,11 @@ static int ovl_verity_mode_def(void)
 #define fsparam_string_empty(NAME, OPT) \
 	__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
 
+
 const struct fs_parameter_spec ovl_parameter_spec[] = {
 	fsparam_string_empty("lowerdir",    Opt_lowerdir),
+	fsparam_string("lowerdir+",         Opt_lowerdir_add),
+	fsparam_string("datadir+",          Opt_datadir_add),
 	fsparam_string("upperdir",          Opt_upperdir),
 	fsparam_string("workdir",           Opt_workdir),
 	fsparam_flag("default_permissions", Opt_default_permissions),
@@ -238,19 +243,8 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
 		pr_err("failed to resolve '%s': %i\n", name, err);
 		goto out;
 	}
-	err = -EINVAL;
-	if (ovl_dentry_weird(path->dentry)) {
-		pr_err("filesystem on '%s' not supported\n", name);
-		goto out_put;
-	}
-	if (!d_is_dir(path->dentry)) {
-		pr_err("'%s' not a directory\n", name);
-		goto out_put;
-	}
 	return 0;
 
-out_put:
-	path_put_init(path);
 out:
 	return err;
 }
@@ -268,7 +262,7 @@ static void ovl_unescape(char *s)
 	}
 }
 
-static int ovl_mount_dir(const char *name, struct path *path, bool upper)
+static int ovl_mount_dir(const char *name, struct path *path)
 {
 	int err = -ENOMEM;
 	char *tmp = kstrdup(name, GFP_KERNEL);
@@ -276,68 +270,147 @@ static int ovl_mount_dir(const char *name, struct path *path, bool upper)
 	if (tmp) {
 		ovl_unescape(tmp);
 		err = ovl_mount_dir_noesc(tmp, path);
-
-		if (!err && upper && path->dentry->d_flags & DCACHE_OP_REAL) {
-			pr_err("filesystem on '%s' not supported as upperdir\n",
-			       tmp);
-			path_put_init(path);
-			err = -EINVAL;
-		}
 		kfree(tmp);
 	}
 	return err;
 }
 
-static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc,
-				    bool workdir)
+static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path,
+			       enum ovl_opt layer, const char *name, bool upper)
 {
-	int err;
-	struct ovl_fs *ofs = fc->s_fs_info;
-	struct ovl_config *config = &ofs->config;
 	struct ovl_fs_context *ctx = fc->fs_private;
-	struct path path;
-	char *dup;
 
-	err = ovl_mount_dir(name, &path, true);
-	if (err)
-		return err;
+	if (ovl_dentry_weird(path->dentry))
+		return invalfc(fc, "filesystem on %s not supported", name);
+
+	if (!d_is_dir(path->dentry))
+		return invalfc(fc, "%s is not a directory", name);
+
 
 	/*
 	 * Check whether upper path is read-only here to report failures
 	 * early. Don't forget to recheck when the superblock is created
 	 * as the mount attributes could change.
 	 */
-	if (__mnt_is_readonly(path.mnt)) {
-		path_put(&path);
-		return -EINVAL;
+	if (upper) {
+		if (path->dentry->d_flags & DCACHE_OP_REAL)
+			return invalfc(fc, "filesystem on %s not supported as upperdir", name);
+		if (__mnt_is_readonly(path->mnt))
+			return invalfc(fc, "filesystem on %s is read-only", name);
+	} else {
+		if (ctx->lowerdir_all && layer != Opt_lowerdir)
+			return invalfc(fc, "lowerdir+ and datadir+ cannot follow lowerdir");
+		if (ctx->nr_data && layer == Opt_lowerdir_add)
+			return invalfc(fc, "regular lower layers cannot follow data layers");
+		if (ctx->nr == OVL_MAX_STACK)
+			return invalfc(fc, "too many lower directories, limit is %d",
+				       OVL_MAX_STACK);
 	}
+	return 0;
+}
 
-	dup = kstrdup(name, GFP_KERNEL);
-	if (!dup) {
-		path_put(&path);
+static int ovl_ctx_realloc_lower(struct fs_context *fc)
+{
+	struct ovl_fs_context *ctx = fc->fs_private;
+	struct ovl_fs_context_layer *l;
+	size_t nr;
+
+	if (ctx->nr < ctx->capacity)
+		return 0;
+
+	nr = min_t(size_t, max(4096 / sizeof(*l), ctx->capacity * 2),
+		   OVL_MAX_STACK);
+	l = krealloc_array(ctx->lower, nr, sizeof(*l), GFP_KERNEL_ACCOUNT);
+	if (!l)
 		return -ENOMEM;
+
+	ctx->lower = l;
+	ctx->capacity = nr;
+	return 0;
+}
+
+static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
+			 struct path *path, char **pname)
+{
+	struct ovl_fs *ofs = fc->s_fs_info;
+	struct ovl_config *config = &ofs->config;
+	struct ovl_fs_context *ctx = fc->fs_private;
+	struct ovl_fs_context_layer *l;
+
+	switch (layer) {
+	case Opt_workdir:
+		swap(config->workdir, *pname);
+		swap(ctx->work, *path);
+		break;
+	case Opt_upperdir:
+		swap(config->upperdir, *pname);
+		swap(ctx->upper, *path);
+		break;
+	case Opt_datadir_add:
+		ctx->nr_data++;
+		fallthrough;
+	case Opt_lowerdir_add:
+		WARN_ON(ctx->nr >= ctx->capacity);
+		l = &ctx->lower[ctx->nr++];
+		memset(l, 0, sizeof(*l));
+		swap(l->name, *pname);
+		swap(l->path, *path);
+		break;
+	default:
+		WARN_ON(1);
 	}
+}
 
-	if (workdir) {
-		kfree(config->workdir);
-		config->workdir = dup;
-		path_put(&ctx->work);
-		ctx->work = path;
-	} else {
-		kfree(config->upperdir);
-		config->upperdir = dup;
-		path_put(&ctx->upper);
-		ctx->upper = path;
+static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
+			   enum ovl_opt layer)
+{
+	char *name = kstrdup(param->string, GFP_KERNEL);
+	bool upper = (layer == Opt_upperdir || layer == Opt_workdir);
+	struct path path;
+	int err;
+
+	if (!name)
+		return -ENOMEM;
+
+	if (upper)
+		err = ovl_mount_dir(name, &path);
+	else
+		err = ovl_mount_dir_noesc(name, &path);
+	if (err)
+		goto out_free;
+
+	err = ovl_mount_dir_check(fc, &path, layer, name, upper);
+	if (err)
+		goto out_put;
+
+	if (!upper) {
+		err = ovl_ctx_realloc_lower(fc);
+		if (err)
+			goto out_put;
 	}
-	return 0;
+
+	/* Store the user provided path string in ctx to show in mountinfo */
+	ovl_add_layer(fc, layer, &path, &name);
+
+out_put:
+	path_put(&path);
+out_free:
+	kfree(name);
+	return err;
 }
 
-static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx)
+static void ovl_reset_lowerdirs(struct ovl_fs_context *ctx)
 {
-	for (size_t nr = 0; nr < ctx->nr; nr++) {
-		path_put(&ctx->lower[nr].path);
-		kfree(ctx->lower[nr].name);
-		ctx->lower[nr].name = NULL;
+	struct ovl_fs_context_layer *l = ctx->lower;
+
+	// Reset old user provided lowerdir string
+	kfree(ctx->lowerdir_all);
+	ctx->lowerdir_all = NULL;
+
+	for (size_t nr = 0; nr < ctx->nr; nr++, l++) {
+		path_put(&l->path);
+		kfree(l->name);
+		l->name = NULL;
 	}
 	ctx->nr = 0;
 	ctx->nr_data = 0;
@@ -346,7 +419,7 @@ static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx)
 /*
  * Parse lowerdir= mount option:
  *
- * (1) lowerdir=/lower1:/lower2:/lower3::/data1::/data2
+ * e.g.: lowerdir=/lower1:/lower2:/lower3::/data1::/data2
  *     Set "/lower1", "/lower2", and "/lower3" as lower layers and
  *     "/data1" and "/data2" as data lower layers. Any existing lower
  *     layers are replaced.
@@ -356,9 +429,9 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 	int err;
 	struct ovl_fs_context *ctx = fc->fs_private;
 	struct ovl_fs_context_layer *l;
-	char *dup = NULL, *dup_iter;
+	char *dup = NULL, *iter;
 	ssize_t nr_lower = 0, nr = 0, nr_data = 0;
-	bool append = false, data_layer = false;
+	bool data_layer = false;
 
 	/*
 	 * Ensure we're backwards compatible with mount(2)
@@ -366,16 +439,21 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 	 */
 
 	/* drop all existing lower layers */
-	if (!*name) {
-		ovl_parse_param_drop_lowerdir(ctx);
+	ovl_reset_lowerdirs(ctx);
+
+	if (!*name)
 		return 0;
-	}
 
 	if (*name == ':') {
 		pr_err("cannot append lower layer");
 		return -EINVAL;
 	}
 
+	// Store user provided lowerdir string to show in mount options
+	ctx->lowerdir_all = kstrdup(name, GFP_KERNEL);
+	if (!ctx->lowerdir_all)
+		return -ENOMEM;
+
 	dup = kstrdup(name, GFP_KERNEL);
 	if (!dup)
 		return -ENOMEM;
@@ -385,36 +463,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 	if (nr_lower < 0)
 		goto out_err;
 
-	if ((nr_lower > OVL_MAX_STACK) ||
-	    (append && (size_add(ctx->nr, nr_lower) > OVL_MAX_STACK))) {
+	if (nr_lower > OVL_MAX_STACK) {
 		pr_err("too many lower directories, limit is %d\n", OVL_MAX_STACK);
 		goto out_err;
 	}
 
-	if (!append)
-		ovl_parse_param_drop_lowerdir(ctx);
-
-	/*
-	 * (1) append
-	 *
-	 * We want nr <= nr_lower <= capacity We know nr > 0 and nr <=
-	 * capacity. If nr == 0 this wouldn't be append. If nr +
-	 * nr_lower is <= capacity then nr <= nr_lower <= capacity
-	 * already holds. If nr + nr_lower exceeds capacity, we realloc.
-	 *
-	 * (2) replace
-	 *
-	 * Ensure we're backwards compatible with mount(2) which allows
-	 * "lowerdir=/a:/b:/c,lowerdir=/d:/e:/f" causing the last
-	 * specified lowerdir mount option to win.
-	 *
-	 * We want nr <= nr_lower <= capacity We know either (i) nr == 0
-	 * or (ii) nr > 0. We also know nr_lower > 0. The capacity
-	 * could've been changed multiple times already so we only know
-	 * nr <= capacity. If nr + nr_lower > capacity we realloc,
-	 * otherwise nr <= nr_lower <= capacity holds already.
-	 */
-	nr_lower += ctx->nr;
 	if (nr_lower > ctx->capacity) {
 		err = -ENOMEM;
 		l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower),
@@ -426,41 +479,21 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 		ctx->capacity = nr_lower;
 	}
 
-	/*
-	 *   (3) By (1) and (2) we know nr <= nr_lower <= capacity.
-	 *   (4) If ctx->nr == 0 => replace
-	 *       We have verified above that the lowerdir mount option
-	 *       isn't an append, i.e., the lowerdir mount option
-	 *       doesn't start with ":" or "::".
-	 * (4.1) The lowerdir mount options only contains regular lower
-	 *       layers ":".
-	 *       => Nothing to verify.
-	 * (4.2) The lowerdir mount options contains regular ":" and
-	 *       data "::" layers.
-	 *       => We need to verify that data lower layers "::" aren't
-	 *          followed by regular ":" lower layers
-	 *   (5) If ctx->nr > 0 => append
-	 *       We know that there's at least one regular layer
-	 *       otherwise we would've failed when parsing the previous
-	 *       lowerdir mount option.
-	 * (5.1) The lowerdir mount option is a regular layer ":" append
-	 *       => We need to verify that no data layers have been
-	 *          specified before.
-	 * (5.2) The lowerdir mount option is a data layer "::" append
-	 *       We know that there's at least one regular layer or
-	 *       other data layers. => There's nothing to verify.
-	 */
-	dup_iter = dup;
-	for (nr = ctx->nr; nr < nr_lower; nr++) {
-		l = &ctx->lower[nr];
+	iter = dup;
+	l = ctx->lower;
+	for (nr = 0; nr < nr_lower; nr++, l++) {
 		memset(l, 0, sizeof(*l));
 
-		err = ovl_mount_dir(dup_iter, &l->path, false);
+		err = ovl_mount_dir(iter, &l->path);
+		if (err)
+			goto out_put;
+
+		err = ovl_mount_dir_check(fc, &l->path, Opt_lowerdir, iter, false);
 		if (err)
 			goto out_put;
 
 		err = -ENOMEM;
-		l->name = kstrdup(dup_iter, GFP_KERNEL_ACCOUNT);
+		l->name = kstrdup(iter, GFP_KERNEL_ACCOUNT);
 		if (!l->name)
 			goto out_put;
 
@@ -472,8 +505,8 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 			break;
 
 		err = -EINVAL;
-		dup_iter = strchr(dup_iter, '\0') + 1;
-		if (*dup_iter) {
+		iter = strchr(iter, '\0') + 1;
+		if (*iter) {
 			/*
 			 * This is a regular layer so we require that
 			 * there are no data layers.
@@ -489,7 +522,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 
 		/* This is a data lower layer. */
 		data_layer = true;
-		dup_iter++;
+		iter++;
 	}
 	ctx->nr = nr_lower;
 	ctx->nr_data += nr_data;
@@ -497,21 +530,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
 	return 0;
 
 out_put:
-	/*
-	 * We know nr >= ctx->nr < nr_lower. If we failed somewhere
-	 * we want to undo until nr == ctx->nr. This is correct for
-	 * both ctx->nr == 0 and ctx->nr > 0.
-	 */
-	for (; nr >= ctx->nr; nr--) {
-		l = &ctx->lower[nr];
-		kfree(l->name);
-		l->name = NULL;
-		path_put(&l->path);
-
-		/* don't overflow */
-		if (nr == 0)
-			break;
-	}
+	ovl_reset_lowerdirs(ctx);
 
 out_err:
 	kfree(dup);
@@ -556,11 +575,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_lowerdir:
 		err = ovl_parse_param_lowerdir(param->string, fc);
 		break;
+	case Opt_lowerdir_add:
+	case Opt_datadir_add:
 	case Opt_upperdir:
-		fallthrough;
 	case Opt_workdir:
-		err = ovl_parse_param_upperdir(param->string, fc,
-					       (Opt_workdir == opt));
+		err = ovl_parse_layer(fc, param, opt);
 		break;
 	case Opt_default_permissions:
 		config->default_permissions = true;
@@ -617,7 +636,7 @@ static int ovl_get_tree(struct fs_context *fc)
 
 static inline void ovl_fs_context_free(struct ovl_fs_context *ctx)
 {
-	ovl_parse_param_drop_lowerdir(ctx);
+	ovl_reset_lowerdirs(ctx);
 	path_put(&ctx->upper);
 	path_put(&ctx->work);
 	kfree(ctx->lower);
@@ -933,23 +952,28 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct ovl_fs *ofs = OVL_FS(sb);
-	size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer;
+	size_t nr, nr_merged_lower, nr_lower = 0;
+	char **lowerdirs = ofs->config.lowerdirs;
 
 	/*
-	 * lowerdirs[] starts from offset 1, then
-	 * >= 0 regular lower layers prefixed with : and
-	 * >= 0 data-only lower layers prefixed with ::
-	 *
-	 * we need to escase comma and space like seq_show_option() does and
-	 * we also need to escape the colon separator from lowerdir paths.
+	 * lowerdirs[0] holds the colon separated list that user provided
+	 * with lowerdir mount option.
+	 * lowerdirs[1..numlayer] hold the lowerdir paths that were added
+	 * using the lowerdir+ and datadir+ mount options.
+	 * For now, we do not allow mixing the legacy lowerdir mount option
+	 * with the new lowerdir+ and datadir+ mount options.
 	 */
-	seq_puts(m, ",lowerdir=");
-	for (nr = 1; nr < ofs->numlayer; nr++) {
-		if (nr > 1)
-			seq_putc(m, ':');
-		if (nr >= nr_merged_lower)
-			seq_putc(m, ':');
-		seq_escape(m, ofs->config.lowerdirs[nr], ":, \t\n\\");
+	if (lowerdirs[0]) {
+		seq_show_option(m, "lowerdir", lowerdirs[0]);
+	} else {
+		nr_lower = ofs->numlayer;
+		nr_merged_lower = nr_lower - ofs->numdatalayer;
+	}
+	for (nr = 1; nr < nr_lower; nr++) {
+		if (nr < nr_merged_lower)
+			seq_show_option(m, "lowerdir+", lowerdirs[nr]);
+		else
+			seq_show_option(m, "datadir+", lowerdirs[nr]);
 	}
 	if (ofs->config.upperdir) {
 		seq_show_option(m, "upperdir", ofs->config.upperdir);
diff --git a/fs/overlayfs/params.h b/fs/overlayfs/params.h
index 8750da68ab2a..c96d93982021 100644
--- a/fs/overlayfs/params.h
+++ b/fs/overlayfs/params.h
@@ -32,6 +32,7 @@ struct ovl_fs_context {
 	size_t nr_data;
 	struct ovl_opt_set set;
 	struct ovl_fs_context_layer *lower;
+	char *lowerdir_all; /* user provided lowerdir string */
 };
 
 int ovl_init_fs_context(struct fs_context *fc);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index de39e067ae65..a490fc47c3e7 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -25,6 +25,7 @@ struct ovl_cache_entry {
 	struct ovl_cache_entry *next_maybe_whiteout;
 	bool is_upper;
 	bool is_whiteout;
+	bool check_xwhiteout;
 	char name[];
 };
 
@@ -47,6 +48,7 @@ struct ovl_readdir_data {
 	int err;
 	bool is_upper;
 	bool d_type_supported;
+	bool in_xwhiteouts_dir;
 };
 
 struct ovl_dir_file {
@@ -162,6 +164,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 		p->ino = 0;
 	p->is_upper = rdd->is_upper;
 	p->is_whiteout = false;
+	/* Defer check for overlay.whiteout to ovl_iterate() */
+	p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
 
 	if (d_type == DT_CHR) {
 		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
@@ -301,6 +305,8 @@ static inline int ovl_dir_read(const struct path *realpath,
 	if (IS_ERR(realfile))
 		return PTR_ERR(realfile);
 
+	rdd->in_xwhiteouts_dir = rdd->dentry &&
+		ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
 	rdd->first_maybe_whiteout = NULL;
 	rdd->ctx.pos = 0;
 	do {
@@ -447,7 +453,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
 }
 
 /*
- * Set d_ino for upper entries. Non-upper entries should always report
+ * Set d_ino for upper entries if needed. Non-upper entries should always report
  * the uppermost real inode ino and should not call this function.
  *
  * When not all layer are on same fs, report real ino also for upper.
@@ -455,8 +461,11 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
  * When all layers are on the same fs, and upper has a reference to
  * copy up origin, call vfs_getattr() on the overlay entry to make
  * sure that d_ino will be consistent with st_ino from stat(2).
+ *
+ * Also checks the overlay.whiteout xattr by doing a full lookup which will return
+ * negative in this case.
  */
-static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p)
+static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
 
 {
 	struct dentry *dir = path->dentry;
@@ -467,7 +476,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry
 	int xinobits = ovl_xino_bits(ofs);
 	int err = 0;
 
-	if (!ovl_same_dev(ofs))
+	if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
 		goto out;
 
 	if (p->name[0] == '.') {
@@ -481,6 +490,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry
 			goto get;
 		}
 	}
+	/* This checks also for xwhiteouts */
 	this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
 	if (IS_ERR_OR_NULL(this) || !this->d_inode) {
 		/* Mark a stale entry */
@@ -494,6 +504,9 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry
 	}
 
 get:
+	if (!ovl_same_dev(ofs) || !update_ino)
+		goto out;
+
 	type = ovl_path_type(this);
 	if (OVL_TYPE_ORIGIN(type)) {
 		struct kstat stat;
@@ -572,7 +585,7 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
 	list_for_each_entry_safe(p, n, list, l_node) {
 		if (strcmp(p->name, ".") != 0 &&
 		    strcmp(p->name, "..") != 0) {
-			err = ovl_cache_update_ino(path, p);
+			err = ovl_cache_update(path, p, true);
 			if (err)
 				return err;
 		}
@@ -778,13 +791,13 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
 	while (od->cursor != &od->cache->entries) {
 		p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
 		if (!p->is_whiteout) {
-			if (!p->ino) {
-				err = ovl_cache_update_ino(&file->f_path, p);
+			if (!p->ino || p->check_xwhiteout) {
+				err = ovl_cache_update(&file->f_path, p, !p->ino);
 				if (err)
 					goto out;
 			}
 		}
-		/* ovl_cache_update_ino() sets is_whiteout on stale entry */
+		/* ovl_cache_update() sets is_whiteout on stale entry */
 		if (!p->is_whiteout) {
 			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
 				break;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 17864a8d2b85..a0967bb25003 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -445,68 +445,6 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 	return ok;
 }
 
-static int ovl_own_xattr_get(const struct xattr_handler *handler,
-			     struct dentry *dentry, struct inode *inode,
-			     const char *name, void *buffer, size_t size)
-{
-	return -EOPNOTSUPP;
-}
-
-static int ovl_own_xattr_set(const struct xattr_handler *handler,
-			     struct mnt_idmap *idmap,
-			     struct dentry *dentry, struct inode *inode,
-			     const char *name, const void *value,
-			     size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static int ovl_other_xattr_get(const struct xattr_handler *handler,
-			       struct dentry *dentry, struct inode *inode,
-			       const char *name, void *buffer, size_t size)
-{
-	return ovl_xattr_get(dentry, inode, name, buffer, size);
-}
-
-static int ovl_other_xattr_set(const struct xattr_handler *handler,
-			       struct mnt_idmap *idmap,
-			       struct dentry *dentry, struct inode *inode,
-			       const char *name, const void *value,
-			       size_t size, int flags)
-{
-	return ovl_xattr_set(dentry, inode, name, value, size, flags);
-}
-
-static const struct xattr_handler ovl_own_trusted_xattr_handler = {
-	.prefix	= OVL_XATTR_TRUSTED_PREFIX,
-	.get = ovl_own_xattr_get,
-	.set = ovl_own_xattr_set,
-};
-
-static const struct xattr_handler ovl_own_user_xattr_handler = {
-	.prefix	= OVL_XATTR_USER_PREFIX,
-	.get = ovl_own_xattr_get,
-	.set = ovl_own_xattr_set,
-};
-
-static const struct xattr_handler ovl_other_xattr_handler = {
-	.prefix	= "", /* catch all */
-	.get = ovl_other_xattr_get,
-	.set = ovl_other_xattr_set,
-};
-
-static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = {
-	&ovl_own_trusted_xattr_handler,
-	&ovl_other_xattr_handler,
-	NULL
-};
-
-static const struct xattr_handler * const ovl_user_xattr_handlers[] = {
-	&ovl_own_user_xattr_handler,
-	&ovl_other_xattr_handler,
-	NULL
-};
-
 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
 			  struct inode **ptrap, const char *name)
 {
@@ -647,7 +585,7 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
 	if (IS_ERR(whiteout))
 		goto cleanup_temp;
 
-	err = ovl_is_whiteout(whiteout);
+	err = ovl_upper_is_whiteout(ofs, whiteout);
 
 	/* Best effort cleanup of whiteout and temp file */
 	if (err)
@@ -887,15 +825,20 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 {
 	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 	struct dentry *indexdir;
+	struct dentry *origin = ovl_lowerstack(oe)->dentry;
+	const struct ovl_fh *fh;
 	int err;
 
+	fh = ovl_get_origin_fh(ofs, origin);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
 	err = mnt_want_write(mnt);
 	if (err)
-		return err;
+		goto out_free_fh;
 
 	/* Verify lower root is upper root origin */
-	err = ovl_verify_origin(ofs, upperpath->dentry,
-				ovl_lowerstack(oe)->dentry, true);
+	err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true);
 	if (err) {
 		pr_err("failed to verify upper root origin\n");
 		goto out;
@@ -927,9 +870,10 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 		 * directory entries.
 		 */
 		if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
-			err = ovl_verify_set_fh(ofs, ofs->indexdir,
-						OVL_XATTR_ORIGIN,
-						upperpath->dentry, true, false);
+			err = ovl_verify_origin_xattr(ofs, ofs->indexdir,
+						      OVL_XATTR_ORIGIN,
+						      upperpath->dentry, true,
+						      false);
 			if (err)
 				pr_err("failed to verify index dir 'origin' xattr\n");
 		}
@@ -947,6 +891,8 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 
 out:
 	mnt_drop_write(mnt);
+out_free_fh:
+	kfree(fh);
 	return err;
 }
 
@@ -1382,8 +1328,11 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
 	ofs->layers = layers;
 	/*
 	 * Layer 0 is reserved for upper even if there's no upper.
-	 * For consistency, config.lowerdirs[0] is NULL.
+	 * config.lowerdirs[0] is used for storing the user provided colon
+	 * separated lowerdir string.
 	 */
+	ofs->config.lowerdirs[0] = ctx->lowerdir_all;
+	ctx->lowerdir_all = NULL;
 	ofs->numlayer = 1;
 
 	sb->s_stack_depth = 0;
@@ -1493,8 +1442,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
 	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
 
 	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
-	sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
-		ovl_trusted_xattr_handlers;
+	sb->s_xattr = ovl_xattr_handlers(ofs);
 	sb->s_fs_info = ofs;
 #ifdef CONFIG_FS_POSIX_ACL
 	sb->s_flags |= SB_POSIXACL;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 868afd8834c3..50a201e9cd39 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -17,12 +17,38 @@
 #include <linux/ratelimit.h>
 #include "overlayfs.h"
 
+/* Get write access to upper mnt - may fail if upper sb was remounted ro */
+int ovl_get_write_access(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	return mnt_get_write_access(ovl_upper_mnt(ofs));
+}
+
+/* Get write access to upper sb - may block if upper sb is frozen */
+void ovl_start_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	sb_start_write(ovl_upper_mnt(ofs)->mnt_sb);
+}
+
 int ovl_want_write(struct dentry *dentry)
 {
 	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	return mnt_want_write(ovl_upper_mnt(ofs));
 }
 
+void ovl_put_write_access(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	mnt_put_write_access(ovl_upper_mnt(ofs));
+}
+
+void ovl_end_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	sb_end_write(ovl_upper_mnt(ofs)->mnt_sb);
+}
+
 void ovl_drop_write(struct dentry *dentry)
 {
 	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
@@ -55,7 +81,7 @@ int ovl_can_decode_fh(struct super_block *sb)
 	if (!capable(CAP_DAC_READ_SEARCH))
 		return 0;
 
-	if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
+	if (!exportfs_can_decode_fh(sb->s_export_op))
 		return 0;
 
 	return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
@@ -575,6 +601,16 @@ bool ovl_is_whiteout(struct dentry *dentry)
 	return inode && IS_WHITEOUT(inode);
 }
 
+/*
+ * Use this over ovl_is_whiteout for upper and lower files, as it also
+ * handles overlay.whiteout xattr whiteout files.
+ */
+bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path)
+{
+	return ovl_is_whiteout(path->dentry) ||
+		ovl_path_check_xwhiteout_xattr(ofs, path);
+}
+
 struct file *ovl_path_open(const struct path *path, int flags)
 {
 	struct inode *inode = d_inode(path->dentry);
@@ -644,22 +680,36 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags)
 	return false;
 }
 
+/*
+ * The copy up "transaction" keeps an elevated mnt write count on upper mnt,
+ * but leaves taking freeze protection on upper sb to lower level helpers.
+ */
 int ovl_copy_up_start(struct dentry *dentry, int flags)
 {
 	struct inode *inode = d_inode(dentry);
 	int err;
 
 	err = ovl_inode_lock_interruptible(inode);
-	if (!err && ovl_already_copied_up_locked(dentry, flags)) {
+	if (err)
+		return err;
+
+	if (ovl_already_copied_up_locked(dentry, flags))
 		err = 1; /* Already copied up */
-		ovl_inode_unlock(inode);
-	}
+	else
+		err = ovl_get_write_access(dentry);
+	if (err)
+		goto out_unlock;
+
+	return 0;
 
+out_unlock:
+	ovl_inode_unlock(inode);
 	return err;
 }
 
 void ovl_copy_up_end(struct dentry *dentry)
 {
+	ovl_put_write_access(dentry);
 	ovl_inode_unlock(d_inode(dentry));
 }
 
@@ -676,6 +726,32 @@ bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
 	return false;
 }
 
+bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
+{
+	struct dentry *dentry = path->dentry;
+	int res;
+
+	/* xattr.whiteout must be a zero size regular file */
+	if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0)
+		return false;
+
+	res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0);
+	return res >= 0;
+}
+
+bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path)
+{
+	struct dentry *dentry = path->dentry;
+	int res;
+
+	/* xattr.whiteouts must be a directory */
+	if (!d_is_dir(dentry))
+		return false;
+
+	res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0);
+	return res >= 0;
+}
+
 /*
  * Load persistent uuid from xattr into s_uuid if found, or store a new
  * random generated value in s_uuid and in xattr.
@@ -760,6 +836,8 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
 #define OVL_XATTR_UUID_POSTFIX		"uuid"
 #define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
 #define OVL_XATTR_PROTATTR_POSTFIX	"protattr"
+#define OVL_XATTR_XWHITEOUT_POSTFIX	"whiteout"
+#define OVL_XATTR_XWHITEOUTS_POSTFIX	"whiteouts"
 
 #define OVL_XATTR_TAB_ENTRY(x) \
 	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
@@ -775,6 +853,8 @@ const char *const ovl_xattr_table[][2] = {
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS),
 };
 
 int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
@@ -973,12 +1053,18 @@ static void ovl_cleanup_index(struct dentry *dentry)
 	struct dentry *index = NULL;
 	struct inode *inode;
 	struct qstr name = { };
+	bool got_write = false;
 	int err;
 
 	err = ovl_get_index_name(ofs, lowerdentry, &name);
 	if (err)
 		goto fail;
 
+	err = ovl_want_write(dentry);
+	if (err)
+		goto fail;
+
+	got_write = true;
 	inode = d_inode(upperdentry);
 	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
 		pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
@@ -1016,6 +1102,8 @@ static void ovl_cleanup_index(struct dentry *dentry)
 		goto fail;
 
 out:
+	if (got_write)
+		ovl_drop_write(dentry);
 	kfree(name.name);
 	dput(index);
 	return;
@@ -1062,8 +1150,12 @@ int ovl_nlink_start(struct dentry *dentry)
 	if (err)
 		return err;
 
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out_unlock;
+
 	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
-		goto out;
+		return 0;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	/*
@@ -1074,10 +1166,15 @@ int ovl_nlink_start(struct dentry *dentry)
 	 */
 	err = ovl_set_nlink_upper(dentry);
 	revert_creds(old_cred);
-
-out:
 	if (err)
-		ovl_inode_unlock(inode);
+		goto out_drop_write;
+
+	return 0;
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out_unlock:
+	ovl_inode_unlock(inode);
 
 	return err;
 }
@@ -1086,6 +1183,8 @@ void ovl_nlink_end(struct dentry *dentry)
 {
 	struct inode *inode = d_inode(dentry);
 
+	ovl_drop_write(dentry);
+
 	if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
 		const struct cred *old_cred;
 
@@ -1403,6 +1502,7 @@ void ovl_copyattr(struct inode *inode)
 	realinode = ovl_i_path_real(inode, &realpath);
 	real_idmap = mnt_idmap(realpath.mnt);
 
+	spin_lock(&inode->i_lock);
 	vfsuid = i_uid_into_vfsuid(real_idmap, realinode);
 	vfsgid = i_gid_into_vfsgid(real_idmap, realinode);
 
@@ -1413,4 +1513,5 @@ void ovl_copyattr(struct inode *inode)
 	inode_set_mtime_to_ts(inode, inode_get_mtime(realinode));
 	inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
 	i_size_write(inode, i_size_read(realinode));
+	spin_unlock(&inode->i_lock);
 }
diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
new file mode 100644
index 000000000000..383978e4663c
--- /dev/null
+++ b/fs/overlayfs/xattrs.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include "overlayfs.h"
+
+static bool ovl_is_escaped_xattr(struct super_block *sb, const char *name)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	if (ofs->config.userxattr)
+		return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX,
+			       OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0;
+	else
+		return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX,
+			       OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0;
+}
+
+static bool ovl_is_own_xattr(struct super_block *sb, const char *name)
+{
+	struct ovl_fs *ofs = OVL_FS(sb);
+
+	if (ofs->config.userxattr)
+		return strncmp(name, OVL_XATTR_USER_PREFIX,
+			       OVL_XATTR_USER_PREFIX_LEN) == 0;
+	else
+		return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
+			       OVL_XATTR_TRUSTED_PREFIX_LEN) == 0;
+}
+
+bool ovl_is_private_xattr(struct super_block *sb, const char *name)
+{
+	return ovl_is_own_xattr(sb, name) && !ovl_is_escaped_xattr(sb, name);
+}
+
+static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+			 const void *value, size_t size, int flags)
+{
+	int err;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
+	struct path realpath;
+	const struct cred *old_cred;
+
+	if (!value && !upperdentry) {
+		ovl_path_lower(dentry, &realpath);
+		old_cred = ovl_override_creds(dentry->d_sb);
+		err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
+		revert_creds(old_cred);
+		if (err < 0)
+			goto out;
+	}
+
+	if (!upperdentry) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			goto out;
+
+		realdentry = ovl_dentry_upper(dentry);
+	}
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	if (value) {
+		err = ovl_do_setxattr(ofs, realdentry, name, value, size,
+				      flags);
+	} else {
+		WARN_ON(flags != XATTR_REPLACE);
+		err = ovl_do_removexattr(ofs, realdentry, name);
+	}
+	revert_creds(old_cred);
+	ovl_drop_write(dentry);
+
+	/* copy c/mtime */
+	ovl_copyattr(inode);
+out:
+	return err;
+}
+
+static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
+			 void *value, size_t size)
+{
+	ssize_t res;
+	const struct cred *old_cred;
+	struct path realpath;
+
+	ovl_i_path_real(inode, &realpath);
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
+	revert_creds(old_cred);
+	return res;
+}
+
+static bool ovl_can_list(struct super_block *sb, const char *s)
+{
+	/* Never list private (.overlay) */
+	if (ovl_is_private_xattr(sb, s))
+		return false;
+
+	/* List all non-trusted xattrs */
+	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+		return true;
+
+	/* list other trusted for superuser only */
+	return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+}
+
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+	struct dentry *realdentry = ovl_dentry_real(dentry);
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	ssize_t res;
+	size_t len;
+	char *s;
+	const struct cred *old_cred;
+	size_t prefix_len, name_len;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_listxattr(realdentry, list, size);
+	revert_creds(old_cred);
+	if (res <= 0 || size == 0)
+		return res;
+
+	prefix_len = ofs->config.userxattr ?
+		OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN;
+
+	/* filter out private xattrs */
+	for (s = list, len = res; len;) {
+		size_t slen = strnlen(s, len) + 1;
+
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > len))
+			return -EIO;
+
+		len -= slen;
+		if (!ovl_can_list(dentry->d_sb, s)) {
+			res -= slen;
+			memmove(s, s + slen, len);
+		} else if (ovl_is_escaped_xattr(dentry->d_sb, s)) {
+			res -= OVL_XATTR_ESCAPE_PREFIX_LEN;
+			name_len = slen - prefix_len - OVL_XATTR_ESCAPE_PREFIX_LEN;
+			s += prefix_len;
+			memmove(s, s + OVL_XATTR_ESCAPE_PREFIX_LEN, name_len + len);
+			s += name_len;
+		} else {
+			s += slen;
+		}
+	}
+
+	return res;
+}
+
+static char *ovl_xattr_escape_name(const char *prefix, const char *name)
+{
+	size_t prefix_len = strlen(prefix);
+	size_t name_len = strlen(name);
+	size_t escaped_len;
+	char *escaped, *s;
+
+	escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len;
+	if (escaped_len > XATTR_NAME_MAX)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	escaped = kmalloc(escaped_len + 1, GFP_KERNEL);
+	if (escaped == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	s = escaped;
+	memcpy(s, prefix, prefix_len);
+	s += prefix_len;
+	memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN);
+	s += OVL_XATTR_ESCAPE_PREFIX_LEN;
+	memcpy(s, name, name_len + 1);
+
+	return escaped;
+}
+
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, void *buffer, size_t size)
+{
+	char *escaped;
+	int r;
+
+	escaped = ovl_xattr_escape_name(handler->prefix, name);
+	if (IS_ERR(escaped))
+		return PTR_ERR(escaped);
+
+	r = ovl_xattr_get(dentry, inode, escaped, buffer, size);
+
+	kfree(escaped);
+
+	return r;
+}
+
+static int ovl_own_xattr_set(const struct xattr_handler *handler,
+			     struct mnt_idmap *idmap,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, const void *value,
+			     size_t size, int flags)
+{
+	char *escaped;
+	int r;
+
+	escaped = ovl_xattr_escape_name(handler->prefix, name);
+	if (IS_ERR(escaped))
+		return PTR_ERR(escaped);
+
+	r = ovl_xattr_set(dentry, inode, escaped, value, size, flags);
+
+	kfree(escaped);
+
+	return r;
+}
+
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, inode, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+			       struct mnt_idmap *idmap,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, const void *value,
+			       size_t size, int flags)
+{
+	return ovl_xattr_set(dentry, inode, name, value, size, flags);
+}
+
+static const struct xattr_handler ovl_own_trusted_xattr_handler = {
+	.prefix	= OVL_XATTR_TRUSTED_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_user_xattr_handler = {
+	.prefix	= OVL_XATTR_USER_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_other_xattr_handler = {
+	.prefix	= "", /* catch all */
+	.get = ovl_other_xattr_get,
+	.set = ovl_other_xattr_set,
+};
+
+static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = {
+	&ovl_own_trusted_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+static const struct xattr_handler * const ovl_user_xattr_handlers[] = {
+	&ovl_own_user_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs)
+{
+	return ofs->config.userxattr ? ovl_user_xattr_handlers :
+		ovl_trusted_xattr_handlers;
+}
+
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 6d8804fb6535..5596c9f30ccb 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -136,6 +136,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
 {
 	struct TCP_Server_Info *server = chan->server;
 
+	if (!server) {
+		seq_printf(m, "\n\n\t\tChannel: %d DISABLED", i+1);
+		return;
+	}
+
 	seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx"
 		   "\n\t\tNumber of credits: %d,%d,%d Dialect 0x%x"
 		   "\n\t\tTCP status: %d Instance: %d"
@@ -279,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
 	struct cifs_server_iface *iface;
+	size_t iface_weight = 0, iface_min_speed = 0;
+	struct cifs_server_iface *last_iface = NULL;
 	int c, i, j;
 
 	seq_puts(m,
@@ -544,11 +551,25 @@ skip_rdma:
 					   "\tLast updated: %lu seconds ago",
 					   ses->iface_count,
 					   (jiffies - ses->iface_last_update) / HZ);
+
+			last_iface = list_last_entry(&ses->iface_list,
+						     struct cifs_server_iface,
+						     iface_head);
+			iface_min_speed = last_iface->speed;
+
 			j = 0;
 			list_for_each_entry(iface, &ses->iface_list,
 						 iface_head) {
 				seq_printf(m, "\n\t%d)", ++j);
 				cifs_dump_iface(m, iface);
+
+				iface_weight = iface->speed / iface_min_speed;
+				seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+					   "\n\t\tAllocated channels: %u\n",
+					   iface->weight_fulfilled,
+					   iface_weight,
+					   iface->num_channels);
+
 				if (is_ses_using_iface(ses, iface))
 					seq_puts(m, "\t\t[CONNECTED]\n");
 			}
@@ -746,14 +767,14 @@ static ssize_t name##_write(struct file *file, const char __user *buffer, \
 	size_t count, loff_t *ppos) \
 { \
 	int rc; \
-	rc = kstrtoint_from_user(buffer, count, 10, & name); \
+	rc = kstrtoint_from_user(buffer, count, 10, &name); \
 	if (rc) \
 		return rc; \
 	return count; \
 } \
 static int name##_proc_show(struct seq_file *m, void *v) \
 { \
-	seq_printf(m, "%d\n", name ); \
+	seq_printf(m, "%d\n", name); \
 	return 0; \
 } \
 static int name##_open(struct inode *inode, struct file *file) \
diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h
index 332588e77c31..26327442e383 100644
--- a/fs/smb/client/cifs_ioctl.h
+++ b/fs/smb/client/cifs_ioctl.h
@@ -26,6 +26,11 @@ struct smb_mnt_fs_info {
 	__u64   cifs_posix_caps;
 } __packed;
 
+struct smb_mnt_tcon_info {
+	__u32	tid;
+	__u64	session_id;
+} __packed;
+
 struct smb_snapshot_array {
 	__u32	number_of_snapshots;
 	__u32	number_of_snapshots_returned;
@@ -108,6 +113,7 @@ struct smb3_notify_info {
 #define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify)
 #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info)
 #define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info)
+#define CIFS_IOC_GET_TCON_INFO _IOR(CIFS_IOCTL_MAGIC, 12, struct smb_mnt_tcon_info)
 #define CIFS_IOC_SHUTDOWN _IOR('X', 125, __u32)
 
 /*
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index 8ca3d7606bb4..3adea10aa9da 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -152,6 +152,6 @@ extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
 /* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 45
-#define CIFS_VERSION   "2.45"
+#define SMB3_PRODUCT_BUILD 46
+#define CIFS_VERSION   "2.46"
 #endif				/* _CIFSFS_H */
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 02082621d8e0..6ffbd81bd109 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -650,6 +650,7 @@ struct TCP_Server_Info {
 	bool noautotune;		/* do not autotune send buf sizes */
 	bool nosharesock;
 	bool tcp_nodelay;
+	bool terminate;
 	unsigned int credits;  /* send no more requests at once */
 	unsigned int max_credits; /* can override large 32000 default at mnt */
 	unsigned int in_flight;  /* number of requests on the wire to server */
@@ -969,6 +970,8 @@ struct cifs_server_iface {
 	struct list_head iface_head;
 	struct kref refcount;
 	size_t speed;
+	size_t weight_fulfilled;
+	unsigned int num_channels;
 	unsigned int rdma_capable : 1;
 	unsigned int rss_capable : 1;
 	unsigned int is_active : 1; /* unset if non existent */
@@ -1050,6 +1053,7 @@ struct cifs_ses {
 	spinlock_t chan_lock;
 	/* ========= begin: protected by chan_lock ======== */
 #define CIFS_MAX_CHANNELS 16
+#define CIFS_INVAL_CHAN_INDEX (-1)
 #define CIFS_ALL_CHANNELS_SET(ses)	\
 	((1UL << (ses)->chan_count) - 1)
 #define CIFS_ALL_CHANS_GOOD(ses)		\
@@ -2143,6 +2147,7 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst,
 	unsigned int len, skip;
 	unsigned int nents = 0;
 	unsigned long addr;
+	size_t data_size;
 	int i, j;
 
 	/*
@@ -2158,17 +2163,21 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst,
 	 * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
 	 */
 	for (i = 0; i < num_rqst; i++) {
+		data_size = iov_iter_count(&rqst[i].rq_iter);
+
 		/* We really don't want a mixture of pinned and unpinned pages
 		 * in the sglist.  It's hard to keep track of which is what.
 		 * Instead, we convert to a BVEC-type iterator higher up.
 		 */
-		if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter)))
+		if (data_size &&
+		    WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter)))
 			return -EIO;
 
 		/* We also don't want to have any extra refs or pins to clean
 		 * up in the sglist.
 		 */
-		if (WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter)))
+		if (data_size &&
+		    WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter)))
 			return -EIO;
 
 		for (j = 0; j < rqst[i].rq_nvec; j++) {
@@ -2184,7 +2193,8 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst,
 			}
 			skip = 0;
 		}
-		nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX);
+		if (data_size)
+			nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX);
 	}
 	nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE);
 	return nents;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 890ceddae07e..d87e2c26cce2 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -132,6 +132,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
 			struct smb_hdr *in_buf,
 			struct smb_hdr *out_buf,
 			int *bytes_returned);
+
 void
 cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
 				      bool all_channels);
@@ -610,13 +611,13 @@ void cifs_free_hash(struct shash_desc **sdesc);
 
 struct cifs_chan *
 cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
-int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
+int cifs_try_adding_channels(struct cifs_ses *ses);
 bool is_server_using_iface(struct TCP_Server_Info *server,
 			   struct cifs_server_iface *iface);
 bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface);
 void cifs_ses_mark_for_reconnect(struct cifs_ses *ses);
 
-unsigned int
+int
 cifs_ses_get_chan_index(struct cifs_ses *ses,
 			struct TCP_Server_Info *server);
 void
@@ -640,6 +641,8 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses,
 bool
 cifs_chan_is_iface_active(struct cifs_ses *ses,
 			  struct TCP_Server_Info *server);
+void
+cifs_disable_secondary_channels(struct cifs_ses *ses);
 int
 cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server);
 int
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 1a137b33858a..57c2a7df3457 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -132,6 +132,9 @@ static void smb2_query_server_interfaces(struct work_struct *work)
 	free_xid(xid);
 
 	if (rc) {
+		if (rc == -EOPNOTSUPP)
+			return;
+
 		cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n",
 				__func__, rc);
 	}
@@ -173,8 +176,12 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
 	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
 		spin_lock(&ses->chan_lock);
 		for (i = 0; i < ses->chan_count; i++) {
+			if (!ses->chans[i].server)
+				continue;
+
 			spin_lock(&ses->chans[i].server->srv_lock);
-			ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+			if (ses->chans[i].server->tcpStatus != CifsExiting)
+				ses->chans[i].server->tcpStatus = CifsNeedReconnect;
 			spin_unlock(&ses->chans[i].server->srv_lock);
 		}
 		spin_unlock(&ses->chan_lock);
@@ -212,6 +219,14 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
+		/*
+		 * if channel has been marked for termination, nothing to do
+		 * for the channel. in fact, we cannot find the channel for the
+		 * server. So safe to exit here
+		 */
+		if (server->terminate)
+			break;
+
 		/* check if iface is still active */
 		if (!cifs_chan_is_iface_active(ses, server))
 			cifs_chan_update_iface(ses, server);
@@ -246,6 +261,8 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
 			spin_lock(&tcon->tc_lock);
 			tcon->status = TID_NEED_RECON;
 			spin_unlock(&tcon->tc_lock);
+
+			cancel_delayed_work(&tcon->query_interfaces);
 		}
 		if (ses->tcon_ipc) {
 			ses->tcon_ipc->need_reconnect = true;
@@ -385,7 +402,13 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
 			spin_unlock(&server->srv_lock);
 			cifs_swn_reset_server_dstaddr(server);
 			cifs_server_unlock(server);
-			mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
+
+			/* increase ref count which reconnect work will drop */
+			spin_lock(&cifs_tcp_ses_lock);
+			server->srv_count++;
+			spin_unlock(&cifs_tcp_ses_lock);
+			if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
+				cifs_put_tcp_session(server, false);
 		}
 	} while (server->tcpStatus == CifsNeedReconnect);
 
@@ -515,7 +538,13 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
 		spin_unlock(&server->srv_lock);
 		cifs_swn_reset_server_dstaddr(server);
 		cifs_server_unlock(server);
-		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
+
+		/* increase ref count which reconnect work will drop */
+		spin_lock(&cifs_tcp_ses_lock);
+		server->srv_count++;
+		spin_unlock(&cifs_tcp_ses_lock);
+		if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
+			cifs_put_tcp_session(server, false);
 	} while (server->tcpStatus == CifsNeedReconnect);
 
 	mutex_lock(&server->refpath_lock);
@@ -1597,16 +1626,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 
 	cancel_delayed_work_sync(&server->echo);
 
-	if (from_reconnect)
+	if (from_reconnect) {
 		/*
 		 * Avoid deadlock here: reconnect work calls
 		 * cifs_put_tcp_session() at its end. Need to be sure
 		 * that reconnect work does nothing with server pointer after
 		 * that step.
 		 */
-		cancel_delayed_work(&server->reconnect);
-	else
-		cancel_delayed_work_sync(&server->reconnect);
+		if (cancel_delayed_work(&server->reconnect))
+			cifs_put_tcp_session(server, from_reconnect);
+	} else {
+		if (cancel_delayed_work_sync(&server->reconnect))
+			cifs_put_tcp_session(server, from_reconnect);
+	}
 
 	spin_lock(&server->srv_lock);
 	server->tcpStatus = CifsExiting;
@@ -3560,7 +3592,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	ctx->prepath = NULL;
 
 out:
-	cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
+	cifs_try_adding_channels(mnt_ctx.ses);
 	rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
 	if (rc)
 		goto error;
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index 81b84151450d..a8a1d386da65 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -263,15 +263,23 @@ out:
 	return rc;
 }
 
-/* Resolve UNC hostname in @ctx->source and set ip addr in @ctx->dstaddr */
+/*
+ * If @ctx->dfs_automount, then update @ctx->dstaddr earlier with the DFS root
+ * server from where we'll start following any referrals.  Otherwise rely on the
+ * value provided by mount(2) as the user might not have dns_resolver key set up
+ * and therefore failing to upcall to resolve UNC hostname under @ctx->source.
+ */
 static int update_fs_context_dstaddr(struct smb3_fs_context *ctx)
 {
 	struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr;
-	int rc;
+	int rc = 0;
 
-	rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL);
-	if (!rc)
-		cifs_set_port(addr, ctx->port);
+	if (!ctx->nodfs && ctx->dfs_automount) {
+		rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL);
+		if (!rc)
+			cifs_set_port(addr, ctx->port);
+		ctx->dfs_automount = false;
+	}
 	return rc;
 }
 
diff --git a/fs/smb/client/export.c b/fs/smb/client/export.c
index 37c28415df1e..d606e8cbcb7d 100644
--- a/fs/smb/client/export.c
+++ b/fs/smb/client/export.c
@@ -41,13 +41,12 @@ static struct dentry *cifs_get_parent(struct dentry *dentry)
 }
 
 const struct export_operations cifs_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.get_parent = cifs_get_parent,
-/*	Following five export operations are unneeded so far and can default:
-	.get_dentry =
-	.get_name =
-	.find_exported_dentry =
-	.decode_fh =
-	.encode_fs =  */
+/*
+ * Following export operations are mandatory for NFS export support:
+ *	.fh_to_dentry =
+ */
 };
 
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 9d8d34af0211..cf46916286d0 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -268,6 +268,7 @@ struct smb3_fs_context {
 	bool witness:1; /* use witness protocol */
 	char *leaf_fullpath;
 	struct cifs_ses *dfs_root_ses;
+	bool dfs_automount:1; /* set for dfs automount only */
 };
 
 extern const struct fs_parameter_spec smb3_fs_parameters[];
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index f7160003e0ed..e2f92c21fff5 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -117,6 +117,20 @@ out_drop_write:
 	return rc;
 }
 
+static long smb_mnt_get_tcon_info(struct cifs_tcon *tcon, void __user *arg)
+{
+	int rc = 0;
+	struct smb_mnt_tcon_info tcon_inf;
+
+	tcon_inf.tid = tcon->tid;
+	tcon_inf.session_id = tcon->ses->Suid;
+
+	if (copy_to_user(arg, &tcon_inf, sizeof(struct smb_mnt_tcon_info)))
+		rc = -EFAULT;
+
+	return rc;
+}
+
 static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
 				void __user *arg)
 {
@@ -129,6 +143,7 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
 
 	fsinf->version = 1;
 	fsinf->protocol_id = tcon->ses->server->vals->protocol_id;
+	fsinf->tcon_flags = tcon->Flags;
 	fsinf->device_characteristics =
 			le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics);
 	fsinf->device_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
@@ -414,6 +429,17 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			tcon = tlink_tcon(pSMBFile->tlink);
 			rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
 			break;
+		case CIFS_IOC_GET_TCON_INFO:
+			cifs_sb = CIFS_SB(inode->i_sb);
+			tlink = cifs_sb_tlink(cifs_sb);
+			if (IS_ERR(tlink)) {
+				rc = PTR_ERR(tlink);
+				break;
+			}
+			tcon = tlink_tcon(tlink);
+			rc = smb_mnt_get_tcon_info(tcon, (void __user *)arg);
+			cifs_put_tlink(tlink);
+			break;
 		case CIFS_ENUMERATE_SNAPSHOTS:
 			if (pSMBFile == NULL)
 				break;
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index c8f5ed8a69f1..a6968573b775 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -117,6 +117,18 @@ cifs_build_devname(char *nodename, const char *prepath)
 	return dev;
 }
 
+static bool is_dfs_mount(struct dentry *dentry)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
+	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+	bool ret;
+
+	spin_lock(&tcon->tc_lock);
+	ret = !!tcon->origin_fullpath;
+	spin_unlock(&tcon->tc_lock);
+	return ret;
+}
+
 /* Return full path out of a dentry set for automount */
 static char *automount_fullpath(struct dentry *dentry, void *page)
 {
@@ -212,8 +224,9 @@ static struct vfsmount *cifs_do_automount(struct path *path)
 		ctx->source = NULL;
 		goto out;
 	}
-	cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s\n",
-		 __func__, ctx->source, ctx->UNC, ctx->prepath);
+	ctx->dfs_automount = is_dfs_mount(mntpt);
+	cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s dfs_automount=%d\n",
+		 __func__, ctx->source, ctx->UNC, ctx->prepath, ctx->dfs_automount);
 
 	mnt = fc_mount(fc);
 out:
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index cd474cf98f30..0bb2ac929061 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -24,7 +24,7 @@
 #include "fs_context.h"
 
 static int
-cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+cifs_ses_add_channel(struct cifs_ses *ses,
 		     struct cifs_server_iface *iface);
 
 bool
@@ -69,7 +69,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
 
 /* channel helper functions. assumed that chan_lock is held by caller. */
 
-unsigned int
+int
 cifs_ses_get_chan_index(struct cifs_ses *ses,
 			struct TCP_Server_Info *server)
 {
@@ -85,14 +85,17 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
 		cifs_dbg(VFS, "unable to get chan index for server: 0x%llx",
 			 server->conn_id);
 	WARN_ON(1);
-	return 0;
+	return CIFS_INVAL_CHAN_INDEX;
 }
 
 void
 cifs_chan_set_in_reconnect(struct cifs_ses *ses,
 			     struct TCP_Server_Info *server)
 {
-	unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+	int chan_index = cifs_ses_get_chan_index(ses, server);
+
+	if (chan_index == CIFS_INVAL_CHAN_INDEX)
+		return;
 
 	ses->chans[chan_index].in_reconnect = true;
 }
@@ -103,6 +106,9 @@ cifs_chan_clear_in_reconnect(struct cifs_ses *ses,
 {
 	unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
 
+	if (chan_index == CIFS_INVAL_CHAN_INDEX)
+		return;
+
 	ses->chans[chan_index].in_reconnect = false;
 }
 
@@ -112,6 +118,9 @@ cifs_chan_in_reconnect(struct cifs_ses *ses,
 {
 	unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
 
+	if (chan_index == CIFS_INVAL_CHAN_INDEX)
+		return true;	/* err on the safer side */
+
 	return CIFS_CHAN_IN_RECONNECT(ses, chan_index);
 }
 
@@ -121,6 +130,9 @@ cifs_chan_set_need_reconnect(struct cifs_ses *ses,
 {
 	unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
 
+	if (chan_index == CIFS_INVAL_CHAN_INDEX)
+		return;
+
 	set_bit(chan_index, &ses->chans_need_reconnect);
 	cifs_dbg(FYI, "Set reconnect bitmask for chan %u; now 0x%lx\n",
 		 chan_index, ses->chans_need_reconnect);
@@ -132,6 +144,9 @@ cifs_chan_clear_need_reconnect(struct cifs_ses *ses,
 {
 	unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
 
+	if (chan_index == CIFS_INVAL_CHAN_INDEX)
+		return;
+
 	clear_bit(chan_index, &ses->chans_need_reconnect);
 	cifs_dbg(FYI, "Cleared reconnect bitmask for chan %u; now 0x%lx\n",
 		 chan_index, ses->chans_need_reconnect);
@@ -143,6 +158,9 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses,
 {
 	unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
 
+	if (chan_index == CIFS_INVAL_CHAN_INDEX)
+		return true;	/* err on the safer side */
+
 	return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index);
 }
 
@@ -152,19 +170,24 @@ cifs_chan_is_iface_active(struct cifs_ses *ses,
 {
 	unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
 
+	if (chan_index == CIFS_INVAL_CHAN_INDEX)
+		return true;	/* err on the safer side */
+
 	return ses->chans[chan_index].iface &&
 		ses->chans[chan_index].iface->is_active;
 }
 
 /* returns number of channels added */
-int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
+int cifs_try_adding_channels(struct cifs_ses *ses)
 {
 	struct TCP_Server_Info *server = ses->server;
 	int old_chan_count, new_chan_count;
 	int left;
 	int rc = 0;
 	int tries = 0;
+	size_t iface_weight = 0, iface_min_speed = 0;
 	struct cifs_server_iface *iface = NULL, *niface = NULL;
+	struct cifs_server_iface *last_iface = NULL;
 
 	spin_lock(&ses->chan_lock);
 
@@ -192,21 +215,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 	}
 	spin_unlock(&ses->chan_lock);
 
-	/*
-	 * Keep connecting to same, fastest, iface for all channels as
-	 * long as its RSS. Try next fastest one if not RSS or channel
-	 * creation fails.
-	 */
-	spin_lock(&ses->iface_lock);
-	iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
-				 iface_head);
-	spin_unlock(&ses->iface_lock);
-
 	while (left > 0) {
 
 		tries++;
 		if (tries > 3*ses->chan_max) {
-			cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+			cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
 				 left);
 			break;
 		}
@@ -214,23 +227,41 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 		spin_lock(&ses->iface_lock);
 		if (!ses->iface_count) {
 			spin_unlock(&ses->iface_lock);
+			cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+				      ses->server->hostname);
 			break;
 		}
 
+		if (!iface)
+			iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+						 iface_head);
+		last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+					     iface_head);
+		iface_min_speed = last_iface->speed;
+
 		list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
 				    iface_head) {
+			/* do not mix rdma and non-rdma interfaces */
+			if (iface->rdma_capable != ses->server->rdma)
+				continue;
+
 			/* skip ifaces that are unusable */
 			if (!iface->is_active ||
 			    (is_ses_using_iface(ses, iface) &&
-			     !iface->rss_capable)) {
+			     !iface->rss_capable))
+				continue;
+
+			/* check if we already allocated enough channels */
+			iface_weight = iface->speed / iface_min_speed;
+
+			if (iface->weight_fulfilled >= iface_weight)
 				continue;
-			}
 
 			/* take ref before unlock */
 			kref_get(&iface->refcount);
 
 			spin_unlock(&ses->iface_lock);
-			rc = cifs_ses_add_channel(cifs_sb, ses, iface);
+			rc = cifs_ses_add_channel(ses, iface);
 			spin_lock(&ses->iface_lock);
 
 			if (rc) {
@@ -241,10 +272,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 				continue;
 			}
 
-			cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+			iface->num_channels++;
+			iface->weight_fulfilled++;
+			cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
 				 &iface->sockaddr);
 			break;
 		}
+
+		/* reached end of list. reset weight_fulfilled and start over */
+		if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+			list_for_each_entry(iface, &ses->iface_list, iface_head)
+				iface->weight_fulfilled = 0;
+			spin_unlock(&ses->iface_lock);
+			iface = NULL;
+			continue;
+		}
 		spin_unlock(&ses->iface_lock);
 
 		left--;
@@ -255,6 +297,60 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 }
 
 /*
+ * called when multichannel is disabled by the server.
+ * this always gets called from smb2_reconnect
+ * and cannot get called in parallel threads.
+ */
+void
+cifs_disable_secondary_channels(struct cifs_ses *ses)
+{
+	int i, chan_count;
+	struct TCP_Server_Info *server;
+	struct cifs_server_iface *iface;
+
+	spin_lock(&ses->chan_lock);
+	chan_count = ses->chan_count;
+	if (chan_count == 1)
+		goto done;
+
+	ses->chan_count = 1;
+
+	/* for all secondary channels reset the need reconnect bit */
+	ses->chans_need_reconnect &= 1;
+
+	for (i = 1; i < chan_count; i++) {
+		iface = ses->chans[i].iface;
+		server = ses->chans[i].server;
+
+		if (iface) {
+			spin_lock(&ses->iface_lock);
+			kref_put(&iface->refcount, release_iface);
+			ses->chans[i].iface = NULL;
+			iface->num_channels--;
+			if (iface->weight_fulfilled)
+				iface->weight_fulfilled--;
+			spin_unlock(&ses->iface_lock);
+		}
+
+		spin_unlock(&ses->chan_lock);
+		if (server && !server->terminate) {
+			server->terminate = true;
+			cifs_signal_cifsd_for_reconnect(server, false);
+		}
+		spin_lock(&ses->chan_lock);
+
+		if (server) {
+			ses->chans[i].server = NULL;
+			cifs_put_tcp_session(server, false);
+		}
+
+	}
+
+done:
+	spin_unlock(&ses->chan_lock);
+}
+
+/*
  * update the iface for the channel if necessary.
  * will return 0 when iface is updated, 1 if removed, 2 otherwise
  * Must be called with chan_lock held.
@@ -263,13 +359,16 @@ int
 cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 {
 	unsigned int chan_index;
+	size_t iface_weight = 0, iface_min_speed = 0;
 	struct cifs_server_iface *iface = NULL;
 	struct cifs_server_iface *old_iface = NULL;
+	struct cifs_server_iface *last_iface = NULL;
+	struct sockaddr_storage ss;
 	int rc = 0;
 
 	spin_lock(&ses->chan_lock);
 	chan_index = cifs_ses_get_chan_index(ses, server);
-	if (!chan_index) {
+	if (chan_index == CIFS_INVAL_CHAN_INDEX) {
 		spin_unlock(&ses->chan_lock);
 		return 0;
 	}
@@ -283,14 +382,49 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 	}
 	spin_unlock(&ses->chan_lock);
 
+	spin_lock(&server->srv_lock);
+	ss = server->dstaddr;
+	spin_unlock(&server->srv_lock);
+
 	spin_lock(&ses->iface_lock);
+	if (!ses->iface_count) {
+		spin_unlock(&ses->iface_lock);
+		cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+		return 0;
+	}
+
+	last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+				     iface_head);
+	iface_min_speed = last_iface->speed;
+
 	/* then look for a new one */
 	list_for_each_entry(iface, &ses->iface_list, iface_head) {
+		if (!chan_index) {
+			/* if we're trying to get the updated iface for primary channel */
+			if (!cifs_match_ipaddr((struct sockaddr *) &ss,
+					       (struct sockaddr *) &iface->sockaddr))
+				continue;
+
+			kref_get(&iface->refcount);
+			break;
+		}
+
+		/* do not mix rdma and non-rdma interfaces */
+		if (iface->rdma_capable != server->rdma)
+			continue;
+
 		if (!iface->is_active ||
 		    (is_ses_using_iface(ses, iface) &&
 		     !iface->rss_capable)) {
 			continue;
 		}
+
+		/* check if we already allocated enough channels */
+		iface_weight = iface->speed / iface_min_speed;
+
+		if (iface->weight_fulfilled >= iface_weight)
+			continue;
+
 		kref_get(&iface->refcount);
 		break;
 	}
@@ -301,16 +435,41 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 		cifs_dbg(FYI, "unable to find a suitable iface\n");
 	}
 
+	if (!chan_index && !iface) {
+		cifs_dbg(FYI, "unable to get the interface matching: %pIS\n",
+			 &ss);
+		spin_unlock(&ses->iface_lock);
+		return 0;
+	}
+
 	/* now drop the ref to the current iface */
 	if (old_iface && iface) {
 		cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
 			 &old_iface->sockaddr,
 			 &iface->sockaddr);
+
+		old_iface->num_channels--;
+		if (old_iface->weight_fulfilled)
+			old_iface->weight_fulfilled--;
+		iface->num_channels++;
+		iface->weight_fulfilled++;
+
 		kref_put(&old_iface->refcount, release_iface);
 	} else if (old_iface) {
 		cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
 			 &old_iface->sockaddr);
+
+		old_iface->num_channels--;
+		if (old_iface->weight_fulfilled)
+			old_iface->weight_fulfilled--;
+
 		kref_put(&old_iface->refcount, release_iface);
+	} else if (!chan_index) {
+		/* special case: update interface for primary channel */
+		cifs_dbg(FYI, "referencing primary channel iface: %pIS\n",
+			 &iface->sockaddr);
+		iface->num_channels++;
+		iface->weight_fulfilled++;
 	} else {
 		WARN_ON(!iface);
 		cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr);
@@ -319,6 +478,11 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 
 	spin_lock(&ses->chan_lock);
 	chan_index = cifs_ses_get_chan_index(ses, server);
+	if (chan_index == CIFS_INVAL_CHAN_INDEX) {
+		spin_unlock(&ses->chan_lock);
+		return 0;
+	}
+
 	ses->chans[chan_index].iface = iface;
 
 	/* No iface is found. if secondary chan, drop connection */
@@ -354,7 +518,7 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server)
 }
 
 static int
-cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+cifs_ses_add_channel(struct cifs_ses *ses,
 		     struct cifs_server_iface *iface)
 {
 	struct TCP_Server_Info *chan_server;
@@ -433,7 +597,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 	 * This will be used for encoding/decoding user/domain/pw
 	 * during sess setup auth.
 	 */
-	ctx->local_nls = cifs_sb->local_nls;
+	ctx->local_nls = ses->local_nls;
 
 	/* Use RDMA if possible */
 	ctx->rdma = iface->rdma_capable;
@@ -479,20 +643,16 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 
 	rc = cifs_negotiate_protocol(xid, ses, chan->server);
 	if (!rc)
-		rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls);
+		rc = cifs_setup_session(xid, ses, chan->server, ses->local_nls);
 
 	mutex_unlock(&ses->session_mutex);
 
 out:
 	if (rc && chan->server) {
-		/*
-		 * we should avoid race with these delayed works before we
-		 * remove this channel
-		 */
-		cancel_delayed_work_sync(&chan->server->echo);
-		cancel_delayed_work_sync(&chan->server->reconnect);
+		cifs_put_tcp_session(chan->server, 0);
 
 		spin_lock(&ses->chan_lock);
+
 		/* we rely on all bits beyond chan_count to be clear */
 		cifs_chan_clear_need_reconnect(ses, chan->server);
 		ses->chan_count--;
@@ -502,8 +662,6 @@ out:
 		 */
 		WARN_ON(ses->chan_count < 1);
 		spin_unlock(&ses->chan_lock);
-
-		cifs_put_tcp_session(chan->server, 0);
 	}
 
 	kfree(ctx->UNC);
@@ -535,8 +693,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses,
 
 	/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
 
-	/* BB verify whether signing required on neg or just on auth frame
-	   (and NTLM case) */
+	/* BB verify whether signing required on neg or just auth frame (and NTLM case) */
 
 	capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
 			CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
@@ -593,8 +750,10 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
 
 	/* copy domain */
 	if (ses->domainName == NULL) {
-		/* Sending null domain better than using a bogus domain name (as
-		we did briefly in 2.6.18) since server will use its default */
+		/*
+		 * Sending null domain better than using a bogus domain name (as
+		 * we did briefly in 2.6.18) since server will use its default
+		 */
 		*bcc_ptr = 0;
 		*(bcc_ptr+1) = 0;
 		bytes_ret = 0;
@@ -613,8 +772,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
 	char *bcc_ptr = *pbcc_area;
 	int bytes_ret = 0;
 
-	/* BB FIXME add check that strings total less
-	than 335 or will need to send them as arrays */
+	/* BB FIXME add check that strings less than 335 or will need to send as arrays */
 
 	/* copy user */
 	if (ses->user_name == NULL) {
@@ -659,8 +817,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
 		if (WARN_ON_ONCE(len < 0))
 			len = CIFS_MAX_DOMAINNAME_LEN - 1;
 		bcc_ptr += len;
-	} /* else we will send a null domain name
-	     so the server will default to its own domain */
+	} /* else we send a null domain name so server will default to its own domain */
 	*bcc_ptr = 0;
 	bcc_ptr++;
 
@@ -756,11 +913,14 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
 	if (len > bleft)
 		return;
 
-	/* No domain field in LANMAN case. Domain is
-	   returned by old servers in the SMB negprot response */
-	/* BB For newer servers which do not support Unicode,
-	   but thus do return domain here we could add parsing
-	   for it later, but it is not very important */
+	/*
+	 * No domain field in LANMAN case. Domain is
+	 * returned by old servers in the SMB negprot response
+	 *
+	 * BB For newer servers which do not support Unicode,
+	 * but thus do return domain here, we could add parsing
+	 * for it later, but it is not very important
+	 */
 	cifs_dbg(FYI, "ascii: bytes left %d\n", bleft);
 }
 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
@@ -816,9 +976,12 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 	ses->ntlmssp->server_flags = server_flags;
 
 	memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
-	/* In particular we can examine sign flags */
-	/* BB spec says that if AvId field of MsvAvTimestamp is populated then
-		we must set the MIC field of the AUTHENTICATE_MESSAGE */
+	/*
+	 * In particular we can examine sign flags
+	 *
+	 * BB spec says that if AvId field of MsvAvTimestamp is populated then
+	 * we must set the MIC field of the AUTHENTICATE_MESSAGE
+	 */
 
 	tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
 	tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 601e7a187f87..a959ed2c9b22 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -756,6 +756,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_
 	unsigned int ret_data_len = 0;
 	struct network_interface_info_ioctl_rsp *out_buf = NULL;
 	struct cifs_ses *ses = tcon->ses;
+	struct TCP_Server_Info *pserver;
 
 	/* do not query too frequently */
 	if (ses->iface_last_update &&
@@ -780,6 +781,11 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_
 	if (rc)
 		goto out;
 
+	/* check if iface is still active */
+	pserver = ses->chans[0].server;
+	if (pserver && !cifs_chan_is_iface_active(ses, pserver))
+		cifs_chan_update_iface(ses, pserver);
+
 out:
 	kfree(out_buf);
 	return rc;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index c75a80bb6d9e..2eb29fa278c3 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -163,6 +163,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 	int rc = 0;
 	struct nls_table *nls_codepage = NULL;
 	struct cifs_ses *ses;
+	int xid;
+	struct TCP_Server_Info *pserver;
+	unsigned int chan_index;
 
 	/*
 	 * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so
@@ -223,6 +226,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 			return -EAGAIN;
 		}
 	}
+
+	/* if server is marked for termination, cifsd will cleanup */
+	if (server->terminate) {
+		spin_unlock(&server->srv_lock);
+		return -EHOSTDOWN;
+	}
 	spin_unlock(&server->srv_lock);
 
 again:
@@ -242,11 +251,23 @@ again:
 
 	mutex_lock(&ses->session_mutex);
 	/*
+	 * if this is called by delayed work, and the channel has been disabled
+	 * in parallel, the delayed work can continue to execute in parallel
+	 * there's a chance that this channel may not exist anymore
+	 */
+	spin_lock(&server->srv_lock);
+	if (server->tcpStatus == CifsExiting) {
+		spin_unlock(&server->srv_lock);
+		mutex_unlock(&ses->session_mutex);
+		rc = -EHOSTDOWN;
+		goto out;
+	}
+
+	/*
 	 * Recheck after acquire mutex. If another thread is negotiating
 	 * and the server never sends an answer the socket will be closed
 	 * and tcpStatus set to reconnect.
 	 */
-	spin_lock(&server->srv_lock);
 	if (server->tcpStatus == CifsNeedReconnect) {
 		spin_unlock(&server->srv_lock);
 		mutex_unlock(&ses->session_mutex);
@@ -283,6 +304,53 @@ again:
 
 	rc = cifs_negotiate_protocol(0, ses, server);
 	if (!rc) {
+		/*
+		 * if server stopped supporting multichannel
+		 * and the first channel reconnected, disable all the others.
+		 */
+		if (ses->chan_count > 1 &&
+		    !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+			if (SERVER_IS_CHAN(server)) {
+				cifs_dbg(VFS, "server %s does not support " \
+					 "multichannel anymore. skipping secondary channel\n",
+					 ses->server->hostname);
+
+				spin_lock(&ses->chan_lock);
+				chan_index = cifs_ses_get_chan_index(ses, server);
+				if (chan_index == CIFS_INVAL_CHAN_INDEX) {
+					spin_unlock(&ses->chan_lock);
+					goto skip_terminate;
+				}
+
+				ses->chans[chan_index].server = NULL;
+				spin_unlock(&ses->chan_lock);
+
+				/*
+				 * the above reference of server by channel
+				 * needs to be dropped without holding chan_lock
+				 * as cifs_put_tcp_session takes a higher lock
+				 * i.e. cifs_tcp_ses_lock
+				 */
+				cifs_put_tcp_session(server, 1);
+
+				server->terminate = true;
+				cifs_signal_cifsd_for_reconnect(server, false);
+
+				/* mark primary server as needing reconnect */
+				pserver = server->primary_server;
+				cifs_signal_cifsd_for_reconnect(pserver, false);
+
+skip_terminate:
+				mutex_unlock(&ses->session_mutex);
+				rc = -EHOSTDOWN;
+				goto out;
+			} else {
+				cifs_server_dbg(VFS, "does not support " \
+					 "multichannel anymore. disabling all other channels\n");
+				cifs_disable_secondary_channels(ses);
+			}
+		}
+
 		rc = cifs_setup_session(0, ses, server, nls_codepage);
 		if ((rc == -EACCES) && !tcon->retry) {
 			mutex_unlock(&ses->session_mutex);
@@ -307,17 +375,44 @@ skip_sess_setup:
 		tcon->need_reopen_files = true;
 
 	rc = cifs_tree_connect(0, tcon, nls_codepage);
-	mutex_unlock(&ses->session_mutex);
 
 	cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
 	if (rc) {
 		/* If sess reconnected but tcon didn't, something strange ... */
+		mutex_unlock(&ses->session_mutex);
 		cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc);
 		goto out;
 	}
 
+	if (!rc &&
+	    (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+		mutex_unlock(&ses->session_mutex);
+
+		/*
+		 * query server network interfaces, in case they change
+		 */
+		xid = get_xid();
+		rc = SMB3_request_interfaces(xid, tcon, false);
+		free_xid(xid);
+
+		if (rc)
+			cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n",
+				 __func__, rc);
+
+		if (ses->chan_max > ses->chan_count &&
+		    !SERVER_IS_CHAN(server)) {
+			if (ses->chan_count == 1)
+				cifs_server_dbg(VFS, "supports multichannel now\n");
+
+			cifs_try_adding_channels(ses);
+		}
+	} else {
+		mutex_unlock(&ses->session_mutex);
+	}
+
 	if (smb2_command != SMB2_INTERNAL_CMD)
-		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
+		if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
+			cifs_put_tcp_session(server, false);
 
 	atomic_inc(&tconInfoReconnectCount);
 out:
@@ -3808,6 +3903,13 @@ void smb2_reconnect_server(struct work_struct *work)
 	/* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
 	mutex_lock(&pserver->reconnect_mutex);
 
+	/* if the server is marked for termination, drop the ref count here */
+	if (server->terminate) {
+		cifs_put_tcp_session(server, true);
+		mutex_unlock(&pserver->reconnect_mutex);
+		return;
+	}
+
 	INIT_LIST_HEAD(&tmp_list);
 	INIT_LIST_HEAD(&tmp_ses_list);
 	cifs_dbg(FYI, "Reconnecting tcons and channels\n");
@@ -3852,12 +3954,6 @@ void smb2_reconnect_server(struct work_struct *work)
 		}
 		spin_unlock(&ses->chan_lock);
 	}
-	/*
-	 * Get the reference to server struct to be sure that the last call of
-	 * cifs_put_tcon() in the loop below won't release the server pointer.
-	 */
-	if (tcon_exist || ses_exist)
-		server->srv_count++;
 
 	spin_unlock(&cifs_tcp_ses_lock);
 
@@ -3905,13 +4001,17 @@ void smb2_reconnect_server(struct work_struct *work)
 
 done:
 	cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
-	if (resched)
+	if (resched) {
 		queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
+		mutex_unlock(&pserver->reconnect_mutex);
+
+		/* no need to put tcp session as we're retrying */
+		return;
+	}
 	mutex_unlock(&pserver->reconnect_mutex);
 
 	/* now we can safely release srv struct */
-	if (tcon_exist || ses_exist)
-		cifs_put_tcp_session(server, 1);
+	cifs_put_tcp_session(server, true);
 }
 
 int
@@ -3931,7 +4031,12 @@ SMB2_echo(struct TCP_Server_Info *server)
 	    server->ops->need_neg(server)) {
 		spin_unlock(&server->srv_lock);
 		/* No need to send echo on newly established connections */
-		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
+		spin_lock(&cifs_tcp_ses_lock);
+		server->srv_count++;
+		spin_unlock(&cifs_tcp_ses_lock);
+		if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
+			cifs_put_tcp_session(server, false);
+
 		return rc;
 	}
 	spin_unlock(&server->srv_lock);
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 23c50ed7d4b5..84ea67301303 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -413,7 +413,13 @@ generate_smb3signingkey(struct cifs_ses *ses,
 		      ses->ses_status == SES_GOOD);
 
 	chan_index = cifs_ses_get_chan_index(ses, server);
-	/* TODO: introduce ref counting for channels when the can be freed */
+	if (chan_index == CIFS_INVAL_CHAN_INDEX) {
+		spin_unlock(&ses->chan_lock);
+		spin_unlock(&ses->ses_lock);
+
+		return -EINVAL;
+	}
+
 	spin_unlock(&ses->chan_lock);
 	spin_unlock(&ses->ses_lock);
 
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index d553b7a54621..4f717ad7c21b 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1023,7 +1023,7 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
 	spin_lock(&ses->chan_lock);
 	for (i = 0; i < ses->chan_count; i++) {
 		server = ses->chans[i].server;
-		if (!server)
+		if (!server || server->terminate)
 			continue;
 
 		/*
diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c
index ac199160bce6..6780aa3e98a1 100644
--- a/fs/smb/client/xattr.c
+++ b/fs/smb/client/xattr.c
@@ -150,10 +150,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
 			goto out;
 
-		if (pTcon->ses->server->ops->set_EA)
+		if (pTcon->ses->server->ops->set_EA) {
 			rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
 				full_path, name, value, (__u16)size,
 				cifs_sb->local_nls, cifs_sb);
+			if (rc == 0)
+				inode_set_ctime_current(inode);
+		}
 		break;
 
 	case XATTR_CIFS_ACL:
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index e6ba1e9b8589..6691ae68af0c 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -366,11 +366,22 @@ static int smb1_allocate_rsp_buf(struct ksmbd_work *work)
 	return 0;
 }
 
+/**
+ * set_smb1_rsp_status() - set error type in smb response header
+ * @work:	smb work containing smb response header
+ * @err:	error code to set in response
+ */
+static void set_smb1_rsp_status(struct ksmbd_work *work, __le32 err)
+{
+	work->send_no_response = 1;
+}
+
 static struct smb_version_ops smb1_server_ops = {
 	.get_cmd_val = get_smb1_cmd_val,
 	.init_rsp_hdr = init_smb1_rsp_hdr,
 	.allocate_rsp_buf = smb1_allocate_rsp_buf,
 	.check_user_session = smb1_check_user_session,
+	.set_rsp_status = set_smb1_rsp_status,
 };
 
 static int smb1_negotiate(struct ksmbd_work *work)
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index 6c0305be895e..51b8bfab7481 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -1107,6 +1107,7 @@ pass:
 		struct smb_acl *pdacl;
 		struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL;
 		int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size;
+		int pntsd_alloc_size;
 
 		if (parent_pntsd->osidoffset) {
 			powner_sid = (struct smb_sid *)((char *)parent_pntsd +
@@ -1119,9 +1120,10 @@ pass:
 			pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4);
 		}
 
-		pntsd = kzalloc(sizeof(struct smb_ntsd) + powner_sid_size +
-				pgroup_sid_size + sizeof(struct smb_acl) +
-				nt_size, GFP_KERNEL);
+		pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size +
+			pgroup_sid_size + sizeof(struct smb_acl) + nt_size;
+
+		pntsd = kzalloc(pntsd_alloc_size, GFP_KERNEL);
 		if (!pntsd) {
 			rc = -ENOMEM;
 			goto free_aces_base;
@@ -1136,6 +1138,27 @@ pass:
 		pntsd->gsidoffset = parent_pntsd->gsidoffset;
 		pntsd->dacloffset = parent_pntsd->dacloffset;
 
+		if ((u64)le32_to_cpu(pntsd->osidoffset) + powner_sid_size >
+		    pntsd_alloc_size) {
+			rc = -EINVAL;
+			kfree(pntsd);
+			goto free_aces_base;
+		}
+
+		if ((u64)le32_to_cpu(pntsd->gsidoffset) + pgroup_sid_size >
+		    pntsd_alloc_size) {
+			rc = -EINVAL;
+			kfree(pntsd);
+			goto free_aces_base;
+		}
+
+		if ((u64)le32_to_cpu(pntsd->dacloffset) + sizeof(struct smb_acl) + nt_size >
+		    pntsd_alloc_size) {
+			rc = -EINVAL;
+			kfree(pntsd);
+			goto free_aces_base;
+		}
+
 		if (pntsd->osidoffset) {
 			struct smb_sid *owner_sid = (struct smb_sid *)((char *)pntsd +
 					le32_to_cpu(pntsd->osidoffset));
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 1053127f71ad..c53dea5598fc 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -1177,9 +1177,10 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
 
 /**
  * ksmbd_vfs_kern_path_locked() - lookup a file and get path info
- * @name:	file path that is relative to share
- * @flags:	lookup flags
- * @path:	if lookup succeed, return path info
+ * @name:		file path that is relative to share
+ * @flags:		lookup flags
+ * @parent_path:	if lookup succeed, return parent_path info
+ * @path:		if lookup succeed, return path info
  * @caseless:	caseless filename lookup
  *
  * Return:	0 on success, otherwise error
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 723763746238..62972f0ff868 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -173,6 +173,7 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
 
 
 const struct export_operations squashfs_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry = squashfs_fh_to_dentry,
 	.fh_to_parent = squashfs_fh_to_parent,
 	.get_parent = squashfs_get_parent
diff --git a/fs/super.c b/fs/super.c
index 77faad662739..076392396e72 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -2160,3 +2160,4 @@ int sb_init_dio_done_wq(struct super_block *sb)
 		destroy_workqueue(wq);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sb_init_dio_done_wq);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 23377c1baed9..a480810cd4e3 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -137,6 +137,7 @@ static struct dentry *ufs_get_parent(struct dentry *child)
 }
 
 static const struct export_operations ufs_export_ops = {
+	.encode_fh = generic_encode_ino32_fh,
 	.fh_to_dentry	= ufs_fh_to_dentry,
 	.fh_to_parent	= ufs_fh_to_parent,
 	.get_parent	= ufs_get_parent,
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 30c931b38853..be62acffad6c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -21,7 +21,7 @@
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
@@ -2989,7 +2989,7 @@ xfs_bmap_extsize_align(
 	 * If realtime, and the result isn't a multiple of the realtime
 	 * extent size we need to remove blocks until it is.
 	 */
-	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+	if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) {
 		/*
 		 * We're not covering the original request, or
 		 * we won't be able to once we fix the length.
@@ -3016,7 +3016,7 @@ xfs_bmap_extsize_align(
 		else {
 			align_alen -= orig_off - align_off;
 			align_off = orig_off;
-			align_alen -= align_alen % mp->m_sb.sb_rextsize;
+			align_alen -= xfs_extlen_to_rtxmod(mp, align_alen);
 		}
 		/*
 		 * Result doesn't cover the request, fail it.
@@ -4826,12 +4826,8 @@ xfs_bmap_del_extent_delay(
 	ASSERT(got->br_startoff <= del->br_startoff);
 	ASSERT(got_endoff >= del_endoff);
 
-	if (isrt) {
-		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
-
-		do_div(rtexts, mp->m_sb.sb_rextsize);
-		xfs_mod_frextents(mp, rtexts);
-	}
+	if (isrt)
+		xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
 
 	/*
 	 * Update the inode delalloc counter now and wait to update the
@@ -5057,33 +5053,20 @@ xfs_bmap_del_extent_real(
 
 	flags = XFS_ILOG_CORE;
 	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
-		xfs_filblks_t	len;
-		xfs_extlen_t	mod;
-
-		len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
-				  &mod);
-		ASSERT(mod == 0);
-
 		if (!(bflags & XFS_BMAPI_REMAP)) {
-			xfs_fsblock_t	bno;
-
-			bno = div_u64_rem(del->br_startblock,
-					mp->m_sb.sb_rextsize, &mod);
-			ASSERT(mod == 0);
-
-			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+			error = xfs_rtfree_blocks(tp, del->br_startblock,
+					del->br_blockcount);
 			if (error)
 				goto done;
 		}
 
 		do_fx = 0;
-		nblks = len * mp->m_sb.sb_rextsize;
 		qfield = XFS_TRANS_DQ_RTBCOUNT;
 	} else {
 		do_fx = 1;
-		nblks = del->br_blockcount;
 		qfield = XFS_TRANS_DQ_BCOUNT;
 	}
+	nblks = del->br_blockcount;
 
 	del_endblock = del->br_startblock + del->br_blockcount;
 	if (cur) {
@@ -5289,7 +5272,6 @@ __xfs_bunmapi(
 	int			tmp_logflags;	/* partial logging flags */
 	int			wasdel;		/* was a delayed alloc extent */
 	int			whichfork;	/* data or attribute fork */
-	xfs_fsblock_t		sum;
 	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
 	xfs_fileoff_t		end;
 	struct xfs_iext_cursor	icur;
@@ -5384,8 +5366,8 @@ __xfs_bunmapi(
 		if (!isrt)
 			goto delete;
 
-		sum = del.br_startblock + del.br_blockcount;
-		div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
+		mod = xfs_rtb_to_rtxoff(mp,
+				del.br_startblock + del.br_blockcount);
 		if (mod) {
 			/*
 			 * Realtime extent not lined up at the end.
@@ -5432,7 +5414,8 @@ __xfs_bunmapi(
 				goto error0;
 			goto nodelete;
 		}
-		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
+
+		mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
 		if (mod) {
 			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
 
@@ -6209,8 +6192,8 @@ xfs_bmap_validate_extent(
 		return __this_address;
 
 	if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
-		if (!xfs_verify_rtext(mp, irec->br_startblock,
-					  irec->br_blockcount))
+		if (!xfs_verify_rtbext(mp, irec->br_startblock,
+					   irec->br_blockcount))
 			return __this_address;
 	} else {
 		if (!xfs_verify_fsbext(mp, irec->br_startblock,
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 371dc07233e0..9a88aba1589f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -98,7 +98,7 @@ typedef struct xfs_sb {
 	uint32_t	sb_blocksize;	/* logical block size, bytes */
 	xfs_rfsblock_t	sb_dblocks;	/* number of data blocks */
 	xfs_rfsblock_t	sb_rblocks;	/* number of realtime blocks */
-	xfs_rtblock_t	sb_rextents;	/* number of realtime extents */
+	xfs_rtbxlen_t	sb_rextents;	/* number of realtime extents */
 	uuid_t		sb_uuid;	/* user-visible file system unique id */
 	xfs_fsblock_t	sb_logstart;	/* starting block of log if internal */
 	xfs_ino_t	sb_rootino;	/* root inode number */
@@ -691,6 +691,22 @@ struct xfs_agfl {
 		   xfs_daddr_to_agno(mp, (d) + (len) - 1)))
 
 /*
+ * Realtime bitmap information is accessed by the word, which is currently
+ * stored in host-endian format.
+ */
+union xfs_rtword_raw {
+	__u32		old;
+};
+
+/*
+ * Realtime summary counts are accessed by the word, which is currently
+ * stored in host-endian format.
+ */
+union xfs_suminfo_raw {
+	__u32		old;
+};
+
+/*
  * XFS Timestamps
  * ==============
  *
@@ -1142,24 +1158,10 @@ static inline bool xfs_dinode_has_large_extent_counts(
 
 #define	XFS_BLOCKSIZE(mp)	((mp)->m_sb.sb_blocksize)
 #define	XFS_BLOCKMASK(mp)	((mp)->m_blockmask)
-#define	XFS_BLOCKWSIZE(mp)	((mp)->m_blockwsize)
-#define	XFS_BLOCKWMASK(mp)	((mp)->m_blockwmask)
 
 /*
- * RT Summary and bit manipulation macros.
+ * RT bit manipulation macros.
  */
-#define	XFS_SUMOFFS(mp,ls,bb)	((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
-#define	XFS_SUMOFFSTOBLOCK(mp,s)	\
-	(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
-#define	XFS_SUMPTR(mp,bp,so)	\
-	((xfs_suminfo_t *)((bp)->b_addr + \
-		(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
-
-#define	XFS_BITTOBLOCK(mp,bi)	((bi) >> (mp)->m_blkbit_log)
-#define	XFS_BLOCKTOBIT(mp,bb)	((bb) << (mp)->m_blkbit_log)
-#define	XFS_BITTOWORD(mp,bi)	\
-	((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
-
 #define	XFS_RTMIN(a,b)	((a) < (b) ? (a) : (b))
 #define	XFS_RTMAX(a,b)	((a) > (b) ? (a) : (b))
 
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 396648acb5be..c269d704314d 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -16,6 +16,7 @@
 #include "xfs_trans.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
+#include "xfs_rtbitmap.h"
 
 /*
  * Realtime allocator bitmap functions shared with userspace.
@@ -46,25 +47,69 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
 	.verify_write = xfs_rtbuf_verify_write,
 };
 
+/* Release cached rt bitmap and summary buffers. */
+void
+xfs_rtbuf_cache_relse(
+	struct xfs_rtalloc_args	*args)
+{
+	if (args->rbmbp) {
+		xfs_trans_brelse(args->tp, args->rbmbp);
+		args->rbmbp = NULL;
+		args->rbmoff = NULLFILEOFF;
+	}
+	if (args->sumbp) {
+		xfs_trans_brelse(args->tp, args->sumbp);
+		args->sumbp = NULL;
+		args->sumoff = NULLFILEOFF;
+	}
+}
+
 /*
  * Get a buffer for the bitmap or summary file block specified.
  * The buffer is returned read and locked.
  */
 int
 xfs_rtbuf_get(
-	xfs_mount_t	*mp,		/* file system mount structure */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	block,		/* block number in bitmap or summary */
-	int		issum,		/* is summary not bitmap */
-	struct xfs_buf	**bpp)		/* output: buffer for the block */
+	struct xfs_rtalloc_args	*args,
+	xfs_fileoff_t		block,	/* block number in bitmap or summary */
+	int			issum)	/* is summary not bitmap */
 {
-	struct xfs_buf	*bp;		/* block buffer, result */
-	xfs_inode_t	*ip;		/* bitmap or summary inode */
-	xfs_bmbt_irec_t	map;
-	int		nmap = 1;
-	int		error;		/* error value */
+	struct xfs_mount	*mp = args->mp;
+	struct xfs_buf		**cbpp;	/* cached block buffer */
+	xfs_fileoff_t		*coffp;	/* cached block number */
+	struct xfs_buf		*bp;	/* block buffer, result */
+	struct xfs_inode	*ip;	/* bitmap or summary inode */
+	struct xfs_bmbt_irec	map;
+	enum xfs_blft		type;
+	int			nmap = 1;
+	int			error;
 
-	ip = issum ? mp->m_rsumip : mp->m_rbmip;
+	if (issum) {
+		cbpp = &args->sumbp;
+		coffp = &args->sumoff;
+		ip = mp->m_rsumip;
+		type = XFS_BLFT_RTSUMMARY_BUF;
+	} else {
+		cbpp = &args->rbmbp;
+		coffp = &args->rbmoff;
+		ip = mp->m_rbmip;
+		type = XFS_BLFT_RTBITMAP_BUF;
+	}
+
+	/*
+	 * If we have a cached buffer, and the block number matches, use that.
+	 */
+	if (*cbpp && *coffp == block)
+		return 0;
+
+	/*
+	 * Otherwise we have to have to get the buffer.  If there was an old
+	 * one, get rid of it first.
+	 */
+	if (*cbpp) {
+		xfs_trans_brelse(args->tp, *cbpp);
+		*cbpp = NULL;
+	}
 
 	error = xfs_bmapi_read(ip, block, 1, &map, &nmap, 0);
 	if (error)
@@ -74,15 +119,15 @@ xfs_rtbuf_get(
 		return -EFSCORRUPTED;
 
 	ASSERT(map.br_startblock != NULLFSBLOCK);
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+	error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp,
 				   XFS_FSB_TO_DADDR(mp, map.br_startblock),
 				   mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
 	if (error)
 		return error;
 
-	xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
-					     : XFS_BLFT_RTBITMAP_BUF);
-	*bpp = bp;
+	xfs_trans_buf_set_type(args->tp, bp, type);
+	*cbpp = bp;
+	*coffp = block;
 	return 0;
 }
 
@@ -92,47 +137,44 @@ xfs_rtbuf_get(
  */
 int
 xfs_rtfind_back(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	start,		/* starting block to look at */
-	xfs_rtblock_t	limit,		/* last block to look at */
-	xfs_rtblock_t	*rtblock)	/* out: start block found */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext to look at */
+	xfs_rtxnum_t		limit,	/* last rtext to look at */
+	xfs_rtxnum_t		*rtx)	/* out: start rtext found */
 {
-	xfs_rtword_t	*b;		/* current word in buffer */
-	int		bit;		/* bit number in the word */
-	xfs_rtblock_t	block;		/* bitmap block number */
-	struct xfs_buf	*bp;		/* buf for the block */
-	xfs_rtword_t	*bufp;		/* starting word in buffer */
-	int		error;		/* error value */
-	xfs_rtblock_t	firstbit;	/* first useful bit in the word */
-	xfs_rtblock_t	i;		/* current bit number rel. to start */
-	xfs_rtblock_t	len;		/* length of inspected area */
-	xfs_rtword_t	mask;		/* mask of relevant bits for value */
-	xfs_rtword_t	want;		/* mask for "good" values */
-	xfs_rtword_t	wdiff;		/* difference from wanted value */
-	int		word;		/* word number in the buffer */
+	struct xfs_mount	*mp = args->mp;
+	int			bit;	/* bit number in the word */
+	xfs_fileoff_t		block;	/* bitmap block number */
+	int			error;	/* error value */
+	xfs_rtxnum_t		firstbit; /* first useful bit in the word */
+	xfs_rtxnum_t		i;	/* current bit number rel. to start */
+	xfs_rtxnum_t		len;	/* length of inspected area */
+	xfs_rtword_t		mask;	/* mask of relevant bits for value */
+	xfs_rtword_t		want;	/* mask for "good" values */
+	xfs_rtword_t		wdiff;	/* difference from wanted value */
+	xfs_rtword_t		incore;
+	unsigned int		word;	/* word number in the buffer */
 
 	/*
 	 * Compute and read in starting bitmap block for starting block.
 	 */
-	block = XFS_BITTOBLOCK(mp, start);
-	error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-	if (error) {
+	block = xfs_rtx_to_rbmblock(mp, start);
+	error = xfs_rtbitmap_read_buf(args, block);
+	if (error)
 		return error;
-	}
-	bufp = bp->b_addr;
+
 	/*
 	 * Get the first word's index & point to it.
 	 */
-	word = XFS_BITTOWORD(mp, start);
-	b = &bufp[word];
+	word = xfs_rtx_to_rbmword(mp, start);
 	bit = (int)(start & (XFS_NBWORD - 1));
 	len = start - limit + 1;
 	/*
 	 * Compute match value, based on the bit at start: if 1 (free)
 	 * then all-ones, else all-zeroes.
 	 */
-	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+	incore = xfs_rtbitmap_getword(args, word);
+	want = (incore & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
 	/*
 	 * If the starting position is not word-aligned, deal with the
 	 * partial word.
@@ -149,13 +191,12 @@ xfs_rtfind_back(
 		 * Calculate the difference between the value there
 		 * and what we're looking for.
 		 */
-		if ((wdiff = (*b ^ want) & mask)) {
+		if ((wdiff = (incore ^ want) & mask)) {
 			/*
 			 * Different.  Mark where we are and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i = bit - XFS_RTHIBIT(wdiff);
-			*rtblock = start - i + 1;
+			*rtx = start - i + 1;
 			return 0;
 		}
 		i = bit - firstbit + 1;
@@ -167,19 +208,11 @@ xfs_rtfind_back(
 			/*
 			 * If done with this block, get the previous one.
 			 */
-			xfs_trans_brelse(tp, bp);
-			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
-			if (error) {
+			error = xfs_rtbitmap_read_buf(args, --block);
+			if (error)
 				return error;
-			}
-			bufp = bp->b_addr;
-			word = XFS_BLOCKWMASK(mp);
-			b = &bufp[word];
-		} else {
-			/*
-			 * Go on to the previous word in the buffer.
-			 */
-			b--;
+
+			word = mp->m_blockwsize - 1;
 		}
 	} else {
 		/*
@@ -195,13 +228,13 @@ xfs_rtfind_back(
 		/*
 		 * Compute difference between actual and desired value.
 		 */
-		if ((wdiff = *b ^ want)) {
+		incore = xfs_rtbitmap_getword(args, word);
+		if ((wdiff = incore ^ want)) {
 			/*
 			 * Different, mark where we are and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
-			*rtblock = start - i + 1;
+			*rtx = start - i + 1;
 			return 0;
 		}
 		i += XFS_NBWORD;
@@ -213,19 +246,11 @@ xfs_rtfind_back(
 			/*
 			 * If done with this block, get the previous one.
 			 */
-			xfs_trans_brelse(tp, bp);
-			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
-			if (error) {
+			error = xfs_rtbitmap_read_buf(args, --block);
+			if (error)
 				return error;
-			}
-			bufp = bp->b_addr;
-			word = XFS_BLOCKWMASK(mp);
-			b = &bufp[word];
-		} else {
-			/*
-			 * Go on to the previous word in the buffer.
-			 */
-			b--;
+
+			word = mp->m_blockwsize - 1;
 		}
 	}
 	/*
@@ -242,13 +267,13 @@ xfs_rtfind_back(
 		/*
 		 * Compute difference between actual and desired value.
 		 */
-		if ((wdiff = (*b ^ want) & mask)) {
+		incore = xfs_rtbitmap_getword(args, word);
+		if ((wdiff = (incore ^ want) & mask)) {
 			/*
 			 * Different, mark where we are and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
-			*rtblock = start - i + 1;
+			*rtx = start - i + 1;
 			return 0;
 		} else
 			i = len;
@@ -256,8 +281,7 @@ xfs_rtfind_back(
 	/*
 	 * No match, return that we scanned the whole area.
 	 */
-	xfs_trans_brelse(tp, bp);
-	*rtblock = start - i + 1;
+	*rtx = start - i + 1;
 	return 0;
 }
 
@@ -267,47 +291,44 @@ xfs_rtfind_back(
  */
 int
 xfs_rtfind_forw(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	start,		/* starting block to look at */
-	xfs_rtblock_t	limit,		/* last block to look at */
-	xfs_rtblock_t	*rtblock)	/* out: start block found */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext to look at */
+	xfs_rtxnum_t		limit,	/* last rtext to look at */
+	xfs_rtxnum_t		*rtx)	/* out: start rtext found */
 {
-	xfs_rtword_t	*b;		/* current word in buffer */
-	int		bit;		/* bit number in the word */
-	xfs_rtblock_t	block;		/* bitmap block number */
-	struct xfs_buf	*bp;		/* buf for the block */
-	xfs_rtword_t	*bufp;		/* starting word in buffer */
-	int		error;		/* error value */
-	xfs_rtblock_t	i;		/* current bit number rel. to start */
-	xfs_rtblock_t	lastbit;	/* last useful bit in the word */
-	xfs_rtblock_t	len;		/* length of inspected area */
-	xfs_rtword_t	mask;		/* mask of relevant bits for value */
-	xfs_rtword_t	want;		/* mask for "good" values */
-	xfs_rtword_t	wdiff;		/* difference from wanted value */
-	int		word;		/* word number in the buffer */
+	struct xfs_mount	*mp = args->mp;
+	int			bit;	/* bit number in the word */
+	xfs_fileoff_t		block;	/* bitmap block number */
+	int			error;
+	xfs_rtxnum_t		i;	/* current bit number rel. to start */
+	xfs_rtxnum_t		lastbit;/* last useful bit in the word */
+	xfs_rtxnum_t		len;	/* length of inspected area */
+	xfs_rtword_t		mask;	/* mask of relevant bits for value */
+	xfs_rtword_t		want;	/* mask for "good" values */
+	xfs_rtword_t		wdiff;	/* difference from wanted value */
+	xfs_rtword_t		incore;
+	unsigned int		word;	/* word number in the buffer */
 
 	/*
 	 * Compute and read in starting bitmap block for starting block.
 	 */
-	block = XFS_BITTOBLOCK(mp, start);
-	error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-	if (error) {
+	block = xfs_rtx_to_rbmblock(mp, start);
+	error = xfs_rtbitmap_read_buf(args, block);
+	if (error)
 		return error;
-	}
-	bufp = bp->b_addr;
+
 	/*
 	 * Get the first word's index & point to it.
 	 */
-	word = XFS_BITTOWORD(mp, start);
-	b = &bufp[word];
+	word = xfs_rtx_to_rbmword(mp, start);
 	bit = (int)(start & (XFS_NBWORD - 1));
 	len = limit - start + 1;
 	/*
 	 * Compute match value, based on the bit at start: if 1 (free)
 	 * then all-ones, else all-zeroes.
 	 */
-	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+	incore = xfs_rtbitmap_getword(args, word);
+	want = (incore & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
 	/*
 	 * If the starting position is not word-aligned, deal with the
 	 * partial word.
@@ -323,13 +344,12 @@ xfs_rtfind_forw(
 		 * Calculate the difference between the value there
 		 * and what we're looking for.
 		 */
-		if ((wdiff = (*b ^ want) & mask)) {
+		if ((wdiff = (incore ^ want) & mask)) {
 			/*
 			 * Different.  Mark where we are and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i = XFS_RTLOBIT(wdiff) - bit;
-			*rtblock = start + i - 1;
+			*rtx = start + i - 1;
 			return 0;
 		}
 		i = lastbit - bit;
@@ -337,22 +357,15 @@ xfs_rtfind_forw(
 		 * Go on to next block if that's where the next word is
 		 * and we need the next word.
 		 */
-		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+		if (++word == mp->m_blockwsize && i < len) {
 			/*
 			 * If done with this block, get the previous one.
 			 */
-			xfs_trans_brelse(tp, bp);
-			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-			if (error) {
+			error = xfs_rtbitmap_read_buf(args, ++block);
+			if (error)
 				return error;
-			}
-			b = bufp = bp->b_addr;
+
 			word = 0;
-		} else {
-			/*
-			 * Go on to the previous word in the buffer.
-			 */
-			b++;
 		}
 	} else {
 		/*
@@ -368,13 +381,13 @@ xfs_rtfind_forw(
 		/*
 		 * Compute difference between actual and desired value.
 		 */
-		if ((wdiff = *b ^ want)) {
+		incore = xfs_rtbitmap_getword(args, word);
+		if ((wdiff = incore ^ want)) {
 			/*
 			 * Different, mark where we are and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i += XFS_RTLOBIT(wdiff);
-			*rtblock = start + i - 1;
+			*rtx = start + i - 1;
 			return 0;
 		}
 		i += XFS_NBWORD;
@@ -382,22 +395,15 @@ xfs_rtfind_forw(
 		 * Go on to next block if that's where the next word is
 		 * and we need the next word.
 		 */
-		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+		if (++word == mp->m_blockwsize && i < len) {
 			/*
 			 * If done with this block, get the next one.
 			 */
-			xfs_trans_brelse(tp, bp);
-			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-			if (error) {
+			error = xfs_rtbitmap_read_buf(args, ++block);
+			if (error)
 				return error;
-			}
-			b = bufp = bp->b_addr;
+
 			word = 0;
-		} else {
-			/*
-			 * Go on to the next word in the buffer.
-			 */
-			b++;
 		}
 	}
 	/*
@@ -412,13 +418,13 @@ xfs_rtfind_forw(
 		/*
 		 * Compute difference between actual and desired value.
 		 */
-		if ((wdiff = (*b ^ want) & mask)) {
+		incore = xfs_rtbitmap_getword(args, word);
+		if ((wdiff = (incore ^ want) & mask)) {
 			/*
 			 * Different, mark where we are and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i += XFS_RTLOBIT(wdiff);
-			*rtblock = start + i - 1;
+			*rtx = start + i - 1;
 			return 0;
 		} else
 			i = len;
@@ -426,11 +432,25 @@ xfs_rtfind_forw(
 	/*
 	 * No match, return that we scanned the whole area.
 	 */
-	xfs_trans_brelse(tp, bp);
-	*rtblock = start + i - 1;
+	*rtx = start + i - 1;
 	return 0;
 }
 
+/* Log rtsummary counter at @infoword. */
+static inline void
+xfs_trans_log_rtsummary(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		infoword)
+{
+	struct xfs_buf		*bp = args->sumbp;
+	size_t			first, last;
+
+	first = (void *)xfs_rsumblock_infoptr(args, infoword) - bp->b_addr;
+	last = first + sizeof(xfs_suminfo_t) - 1;
+
+	xfs_trans_log_buf(args->tp, bp, first, last);
+}
+
 /*
  * Read and/or modify the summary information for a given extent size,
  * bitmap block combination.
@@ -442,86 +462,77 @@ xfs_rtfind_forw(
  */
 int
 xfs_rtmodify_summary_int(
-	xfs_mount_t	*mp,		/* file system mount structure */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	int		log,		/* log2 of extent size */
-	xfs_rtblock_t	bbno,		/* bitmap block number */
-	int		delta,		/* change to make to summary info */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
-	xfs_suminfo_t	*sum)		/* out: summary info for this block */
+	struct xfs_rtalloc_args	*args,
+	int			log,	/* log2 of extent size */
+	xfs_fileoff_t		bbno,	/* bitmap block number */
+	int			delta,	/* change to make to summary info */
+	xfs_suminfo_t		*sum)	/* out: summary info for this block */
 {
-	struct xfs_buf	*bp;		/* buffer for the summary block */
-	int		error;		/* error value */
-	xfs_fsblock_t	sb;		/* summary fsblock */
-	int		so;		/* index into the summary file */
-	xfs_suminfo_t	*sp;		/* pointer to returned data */
+	struct xfs_mount	*mp = args->mp;
+	int			error;
+	xfs_fileoff_t		sb;	/* summary fsblock */
+	xfs_rtsumoff_t		so;	/* index into the summary file */
+	unsigned int		infoword;
 
 	/*
 	 * Compute entry number in the summary file.
 	 */
-	so = XFS_SUMOFFS(mp, log, bbno);
+	so = xfs_rtsumoffs(mp, log, bbno);
 	/*
 	 * Compute the block number in the summary file.
 	 */
-	sb = XFS_SUMOFFSTOBLOCK(mp, so);
-	/*
-	 * If we have an old buffer, and the block number matches, use that.
-	 */
-	if (*rbpp && *rsb == sb)
-		bp = *rbpp;
-	/*
-	 * Otherwise we have to get the buffer.
-	 */
-	else {
-		/*
-		 * If there was an old one, get rid of it first.
-		 */
-		if (*rbpp)
-			xfs_trans_brelse(tp, *rbpp);
-		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
-		if (error) {
-			return error;
-		}
-		/*
-		 * Remember this buffer and block for the next call.
-		 */
-		*rbpp = bp;
-		*rsb = sb;
-	}
+	sb = xfs_rtsumoffs_to_block(mp, so);
+
+	error = xfs_rtsummary_read_buf(args, sb);
+	if (error)
+		return error;
+
 	/*
 	 * Point to the summary information, modify/log it, and/or copy it out.
 	 */
-	sp = XFS_SUMPTR(mp, bp, so);
+	infoword = xfs_rtsumoffs_to_infoword(mp, so);
 	if (delta) {
-		uint first = (uint)((char *)sp - (char *)bp->b_addr);
+		xfs_suminfo_t	val = xfs_suminfo_add(args, infoword, delta);
 
-		*sp += delta;
 		if (mp->m_rsum_cache) {
-			if (*sp == 0 && log == mp->m_rsum_cache[bbno])
-				mp->m_rsum_cache[bbno]++;
-			if (*sp != 0 && log < mp->m_rsum_cache[bbno])
+			if (val == 0 && log + 1 == mp->m_rsum_cache[bbno])
 				mp->m_rsum_cache[bbno] = log;
+			if (val != 0 && log >= mp->m_rsum_cache[bbno])
+				mp->m_rsum_cache[bbno] = log + 1;
 		}
-		xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
+		xfs_trans_log_rtsummary(args, infoword);
+		if (sum)
+			*sum = val;
+	} else if (sum) {
+		*sum = xfs_suminfo_get(args, infoword);
 	}
-	if (sum)
-		*sum = *sp;
 	return 0;
 }
 
 int
 xfs_rtmodify_summary(
-	xfs_mount_t	*mp,		/* file system mount structure */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	int		log,		/* log2 of extent size */
-	xfs_rtblock_t	bbno,		/* bitmap block number */
-	int		delta,		/* change to make to summary info */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
+	struct xfs_rtalloc_args	*args,
+	int			log,	/* log2 of extent size */
+	xfs_fileoff_t		bbno,	/* bitmap block number */
+	int			delta)	/* in/out: summary block number */
+{
+	return xfs_rtmodify_summary_int(args, log, bbno, delta, NULL);
+}
+
+/* Log rtbitmap block from the word @from to the byte before @next. */
+static inline void
+xfs_trans_log_rtbitmap(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		from,
+	unsigned int		next)
 {
-	return xfs_rtmodify_summary_int(mp, tp, log, bbno,
-					delta, rbpp, rsb, NULL);
+	struct xfs_buf		*bp = args->rbmbp;
+	size_t			first, last;
+
+	first = (void *)xfs_rbmblock_wordptr(args, from) - bp->b_addr;
+	last = ((void *)xfs_rbmblock_wordptr(args, next) - 1) - bp->b_addr;
+
+	xfs_trans_log_buf(args->tp, bp, first, last);
 }
 
 /*
@@ -530,41 +541,37 @@ xfs_rtmodify_summary(
  */
 int
 xfs_rtmodify_range(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	start,		/* starting block to modify */
-	xfs_extlen_t	len,		/* length of extent to modify */
-	int		val)		/* 1 for free, 0 for allocated */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext to modify */
+	xfs_rtxlen_t		len,	/* length of extent to modify */
+	int			val)	/* 1 for free, 0 for allocated */
 {
-	xfs_rtword_t	*b;		/* current word in buffer */
-	int		bit;		/* bit number in the word */
-	xfs_rtblock_t	block;		/* bitmap block number */
-	struct xfs_buf	*bp;		/* buf for the block */
-	xfs_rtword_t	*bufp;		/* starting word in buffer */
-	int		error;		/* error value */
-	xfs_rtword_t	*first;		/* first used word in the buffer */
-	int		i;		/* current bit number rel. to start */
-	int		lastbit;	/* last useful bit in word */
-	xfs_rtword_t	mask;		/* mask o frelevant bits for value */
-	int		word;		/* word number in the buffer */
+	struct xfs_mount	*mp = args->mp;
+	int			bit;	/* bit number in the word */
+	xfs_fileoff_t		block;	/* bitmap block number */
+	int			error;
+	int			i;	/* current bit number rel. to start */
+	int			lastbit; /* last useful bit in word */
+	xfs_rtword_t		mask;	 /* mask of relevant bits for value */
+	xfs_rtword_t		incore;
+	unsigned int		firstword; /* first word used in the buffer */
+	unsigned int		word;	/* word number in the buffer */
 
 	/*
 	 * Compute starting bitmap block number.
 	 */
-	block = XFS_BITTOBLOCK(mp, start);
+	block = xfs_rtx_to_rbmblock(mp, start);
 	/*
 	 * Read the bitmap block, and point to its data.
 	 */
-	error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-	if (error) {
+	error = xfs_rtbitmap_read_buf(args, block);
+	if (error)
 		return error;
-	}
-	bufp = bp->b_addr;
+
 	/*
 	 * Compute the starting word's address, and starting bit.
 	 */
-	word = XFS_BITTOWORD(mp, start);
-	first = b = &bufp[word];
+	firstword = word = xfs_rtx_to_rbmword(mp, start);
 	bit = (int)(start & (XFS_NBWORD - 1));
 	/*
 	 * 0 (allocated) => all zeroes; 1 (free) => all ones.
@@ -583,34 +590,28 @@ xfs_rtmodify_range(
 		/*
 		 * Set/clear the active bits.
 		 */
+		incore = xfs_rtbitmap_getword(args, word);
 		if (val)
-			*b |= mask;
+			incore |= mask;
 		else
-			*b &= ~mask;
+			incore &= ~mask;
+		xfs_rtbitmap_setword(args, word, incore);
 		i = lastbit - bit;
 		/*
 		 * Go on to the next block if that's where the next word is
 		 * and we need the next word.
 		 */
-		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+		if (++word == mp->m_blockwsize && i < len) {
 			/*
 			 * Log the changed part of this block.
 			 * Get the next one.
 			 */
-			xfs_trans_log_buf(tp, bp,
-				(uint)((char *)first - (char *)bufp),
-				(uint)((char *)b - (char *)bufp));
-			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-			if (error) {
+			xfs_trans_log_rtbitmap(args, firstword, word);
+			error = xfs_rtbitmap_read_buf(args, ++block);
+			if (error)
 				return error;
-			}
-			first = b = bufp = bp->b_addr;
-			word = 0;
-		} else {
-			/*
-			 * Go on to the next word in the buffer
-			 */
-			b++;
+
+			firstword = word = 0;
 		}
 	} else {
 		/*
@@ -626,31 +627,23 @@ xfs_rtmodify_range(
 		/*
 		 * Set the word value correctly.
 		 */
-		*b = val;
+		xfs_rtbitmap_setword(args, word, val);
 		i += XFS_NBWORD;
 		/*
 		 * Go on to the next block if that's where the next word is
 		 * and we need the next word.
 		 */
-		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+		if (++word == mp->m_blockwsize && i < len) {
 			/*
 			 * Log the changed part of this block.
 			 * Get the next one.
 			 */
-			xfs_trans_log_buf(tp, bp,
-				(uint)((char *)first - (char *)bufp),
-				(uint)((char *)b - (char *)bufp));
-			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-			if (error) {
+			xfs_trans_log_rtbitmap(args, firstword, word);
+			error = xfs_rtbitmap_read_buf(args, ++block);
+			if (error)
 				return error;
-			}
-			first = b = bufp = bp->b_addr;
-			word = 0;
-		} else {
-			/*
-			 * Go on to the next word in the buffer
-			 */
-			b++;
+
+			firstword = word = 0;
 		}
 	}
 	/*
@@ -665,18 +658,19 @@ xfs_rtmodify_range(
 		/*
 		 * Set/clear the active bits.
 		 */
+		incore = xfs_rtbitmap_getword(args, word);
 		if (val)
-			*b |= mask;
+			incore |= mask;
 		else
-			*b &= ~mask;
-		b++;
+			incore &= ~mask;
+		xfs_rtbitmap_setword(args, word, incore);
+		word++;
 	}
 	/*
 	 * Log any remaining changed bytes.
 	 */
-	if (b > first)
-		xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
-			(uint)((char *)b - (char *)bufp - 1));
+	if (word > firstword)
+		xfs_trans_log_rtbitmap(args, firstword, word);
 	return 0;
 }
 
@@ -686,23 +680,21 @@ xfs_rtmodify_range(
  */
 int
 xfs_rtfree_range(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	start,		/* starting block to free */
-	xfs_extlen_t	len,		/* length to free */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext to free */
+	xfs_rtxlen_t		len)	/* in/out: summary block number */
 {
-	xfs_rtblock_t	end;		/* end of the freed extent */
-	int		error;		/* error value */
-	xfs_rtblock_t	postblock;	/* first block freed > end */
-	xfs_rtblock_t	preblock;	/* first block freed < start */
+	struct xfs_mount	*mp = args->mp;
+	xfs_rtxnum_t		end;	/* end of the freed extent */
+	int			error;	/* error value */
+	xfs_rtxnum_t		postblock; /* first rtext freed > end */
+	xfs_rtxnum_t		preblock;  /* first rtext freed < start */
 
 	end = start + len - 1;
 	/*
 	 * Modify the bitmap to mark this extent freed.
 	 */
-	error = xfs_rtmodify_range(mp, tp, start, len, 1);
+	error = xfs_rtmodify_range(args, start, len, 1);
 	if (error) {
 		return error;
 	}
@@ -711,15 +703,15 @@ xfs_rtfree_range(
 	 * We need to find the beginning and end of the extent so we can
 	 * properly update the summary.
 	 */
-	error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+	error = xfs_rtfind_back(args, start, 0, &preblock);
 	if (error) {
 		return error;
 	}
 	/*
 	 * Find the next allocated block (end of allocated extent).
 	 */
-	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
-		&postblock);
+	error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1,
+			&postblock);
 	if (error)
 		return error;
 	/*
@@ -727,9 +719,9 @@ xfs_rtfree_range(
 	 * old extent, add summary data for them to be allocated.
 	 */
 	if (preblock < start) {
-		error = xfs_rtmodify_summary(mp, tp,
-			XFS_RTBLOCKLOG(start - preblock),
-			XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+		error = xfs_rtmodify_summary(args,
+				XFS_RTBLOCKLOG(start - preblock),
+				xfs_rtx_to_rbmblock(mp, preblock), -1);
 		if (error) {
 			return error;
 		}
@@ -739,9 +731,9 @@ xfs_rtfree_range(
 	 * old extent, add summary data for them to be allocated.
 	 */
 	if (postblock > end) {
-		error = xfs_rtmodify_summary(mp, tp,
-			XFS_RTBLOCKLOG(postblock - end),
-			XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
+		error = xfs_rtmodify_summary(args,
+				XFS_RTBLOCKLOG(postblock - end),
+				xfs_rtx_to_rbmblock(mp, end + 1), -1);
 		if (error) {
 			return error;
 		}
@@ -750,10 +742,9 @@ xfs_rtfree_range(
 	 * Increment the summary information corresponding to the entire
 	 * (new) free extent.
 	 */
-	error = xfs_rtmodify_summary(mp, tp,
-		XFS_RTBLOCKLOG(postblock + 1 - preblock),
-		XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
-	return error;
+	return xfs_rtmodify_summary(args,
+			XFS_RTBLOCKLOG(postblock + 1 - preblock),
+			xfs_rtx_to_rbmblock(mp, preblock), 1);
 }
 
 /*
@@ -762,43 +753,39 @@ xfs_rtfree_range(
  */
 int
 xfs_rtcheck_range(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	start,		/* starting block number of extent */
-	xfs_extlen_t	len,		/* length of extent */
-	int		val,		/* 1 for free, 0 for allocated */
-	xfs_rtblock_t	*new,		/* out: first block not matching */
-	int		*stat)		/* out: 1 for matches, 0 for not */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext number of extent */
+	xfs_rtxlen_t		len,	/* length of extent */
+	int			val,	/* 1 for free, 0 for allocated */
+	xfs_rtxnum_t		*new,	/* out: first rtext not matching */
+	int			*stat)	/* out: 1 for matches, 0 for not */
 {
-	xfs_rtword_t	*b;		/* current word in buffer */
-	int		bit;		/* bit number in the word */
-	xfs_rtblock_t	block;		/* bitmap block number */
-	struct xfs_buf	*bp;		/* buf for the block */
-	xfs_rtword_t	*bufp;		/* starting word in buffer */
-	int		error;		/* error value */
-	xfs_rtblock_t	i;		/* current bit number rel. to start */
-	xfs_rtblock_t	lastbit;	/* last useful bit in word */
-	xfs_rtword_t	mask;		/* mask of relevant bits for value */
-	xfs_rtword_t	wdiff;		/* difference from wanted value */
-	int		word;		/* word number in the buffer */
+	struct xfs_mount	*mp = args->mp;
+	int			bit;	/* bit number in the word */
+	xfs_fileoff_t		block;	/* bitmap block number */
+	int			error;
+	xfs_rtxnum_t		i;	/* current bit number rel. to start */
+	xfs_rtxnum_t		lastbit; /* last useful bit in word */
+	xfs_rtword_t		mask;	/* mask of relevant bits for value */
+	xfs_rtword_t		wdiff;	/* difference from wanted value */
+	xfs_rtword_t		incore;
+	unsigned int		word;	/* word number in the buffer */
 
 	/*
 	 * Compute starting bitmap block number
 	 */
-	block = XFS_BITTOBLOCK(mp, start);
+	block = xfs_rtx_to_rbmblock(mp, start);
 	/*
 	 * Read the bitmap block.
 	 */
-	error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-	if (error) {
+	error = xfs_rtbitmap_read_buf(args, block);
+	if (error)
 		return error;
-	}
-	bufp = bp->b_addr;
+
 	/*
 	 * Compute the starting word's address, and starting bit.
 	 */
-	word = XFS_BITTOWORD(mp, start);
-	b = &bufp[word];
+	word = xfs_rtx_to_rbmword(mp, start);
 	bit = (int)(start & (XFS_NBWORD - 1));
 	/*
 	 * 0 (allocated) => all zero's; 1 (free) => all one's.
@@ -820,11 +807,11 @@ xfs_rtcheck_range(
 		/*
 		 * Compute difference between actual and desired value.
 		 */
-		if ((wdiff = (*b ^ val) & mask)) {
+		incore = xfs_rtbitmap_getword(args, word);
+		if ((wdiff = (incore ^ val) & mask)) {
 			/*
 			 * Different, compute first wrong bit and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i = XFS_RTLOBIT(wdiff) - bit;
 			*new = start + i;
 			*stat = 0;
@@ -835,22 +822,15 @@ xfs_rtcheck_range(
 		 * Go on to next block if that's where the next word is
 		 * and we need the next word.
 		 */
-		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+		if (++word == mp->m_blockwsize && i < len) {
 			/*
 			 * If done with this block, get the next one.
 			 */
-			xfs_trans_brelse(tp, bp);
-			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-			if (error) {
+			error = xfs_rtbitmap_read_buf(args, ++block);
+			if (error)
 				return error;
-			}
-			b = bufp = bp->b_addr;
+
 			word = 0;
-		} else {
-			/*
-			 * Go on to the next word in the buffer.
-			 */
-			b++;
 		}
 	} else {
 		/*
@@ -866,11 +846,11 @@ xfs_rtcheck_range(
 		/*
 		 * Compute difference between actual and desired value.
 		 */
-		if ((wdiff = *b ^ val)) {
+		incore = xfs_rtbitmap_getword(args, word);
+		if ((wdiff = incore ^ val)) {
 			/*
 			 * Different, compute first wrong bit and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i += XFS_RTLOBIT(wdiff);
 			*new = start + i;
 			*stat = 0;
@@ -881,22 +861,15 @@ xfs_rtcheck_range(
 		 * Go on to next block if that's where the next word is
 		 * and we need the next word.
 		 */
-		if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+		if (++word == mp->m_blockwsize && i < len) {
 			/*
 			 * If done with this block, get the next one.
 			 */
-			xfs_trans_brelse(tp, bp);
-			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-			if (error) {
+			error = xfs_rtbitmap_read_buf(args, ++block);
+			if (error)
 				return error;
-			}
-			b = bufp = bp->b_addr;
+
 			word = 0;
-		} else {
-			/*
-			 * Go on to the next word in the buffer.
-			 */
-			b++;
 		}
 	}
 	/*
@@ -911,11 +884,11 @@ xfs_rtcheck_range(
 		/*
 		 * Compute difference between actual and desired value.
 		 */
-		if ((wdiff = (*b ^ val) & mask)) {
+		incore = xfs_rtbitmap_getword(args, word);
+		if ((wdiff = (incore ^ val) & mask)) {
 			/*
 			 * Different, compute first wrong bit and return.
 			 */
-			xfs_trans_brelse(tp, bp);
 			i += XFS_RTLOBIT(wdiff);
 			*new = start + i;
 			*stat = 0;
@@ -926,7 +899,6 @@ xfs_rtcheck_range(
 	/*
 	 * Successful, return.
 	 */
-	xfs_trans_brelse(tp, bp);
 	*new = start + i;
 	*stat = 1;
 	return 0;
@@ -936,58 +908,57 @@ xfs_rtcheck_range(
 /*
  * Check that the given extent (block range) is allocated already.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtcheck_alloc_range(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	bno,		/* starting block number of extent */
-	xfs_extlen_t	len)		/* length of extent */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext number of extent */
+	xfs_rtxlen_t		len)	/* length of extent */
 {
-	xfs_rtblock_t	new;		/* dummy for xfs_rtcheck_range */
-	int		stat;
-	int		error;
+	xfs_rtxnum_t		new;	/* dummy for xfs_rtcheck_range */
+	int			stat;
+	int			error;
 
-	error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat);
+	error = xfs_rtcheck_range(args, start, len, 0, &new, &stat);
 	if (error)
 		return error;
 	ASSERT(stat);
 	return 0;
 }
 #else
-#define xfs_rtcheck_alloc_range(m,t,b,l)	(0)
+#define xfs_rtcheck_alloc_range(a,b,l)	(0)
 #endif
 /*
  * Free an extent in the realtime subvolume.  Length is expressed in
  * realtime extents, as is the block number.
  */
-int					/* error */
+int
 xfs_rtfree_extent(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	bno,		/* starting block number to free */
-	xfs_extlen_t	len)		/* length of extent freed */
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_rtxnum_t		start,	/* starting rtext number to free */
+	xfs_rtxlen_t		len)	/* length of extent freed */
 {
-	int		error;		/* error value */
-	xfs_mount_t	*mp;		/* file system mount structure */
-	xfs_fsblock_t	sb;		/* summary file block number */
-	struct xfs_buf	*sumbp = NULL;	/* summary file block buffer */
-	struct timespec64 atime;
-
-	mp = tp->t_mountp;
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_rtalloc_args	args = {
+		.mp		= mp,
+		.tp		= tp,
+	};
+	int			error;
+	struct timespec64	atime;
 
 	ASSERT(mp->m_rbmip->i_itemp != NULL);
 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
 
-	error = xfs_rtcheck_alloc_range(mp, tp, bno, len);
+	error = xfs_rtcheck_alloc_range(&args, start, len);
 	if (error)
 		return error;
 
 	/*
 	 * Free the range of realtime blocks.
 	 */
-	error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
-	if (error) {
-		return error;
-	}
+	error = xfs_rtfree_range(&args, start, len);
+	if (error)
+		goto out;
+
 	/*
 	 * Mark more blocks free in the superblock.
 	 */
@@ -1002,11 +973,47 @@ xfs_rtfree_extent(
 			mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
 
 		atime = inode_get_atime(VFS_I(mp->m_rbmip));
-		*((uint64_t *)&atime) = 0;
+		atime.tv_sec = 0;
 		inode_set_atime_to_ts(VFS_I(mp->m_rbmip), atime);
 		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
 	}
-	return 0;
+	error = 0;
+out:
+	xfs_rtbuf_cache_relse(&args);
+	return error;
+}
+
+/*
+ * Free some blocks in the realtime subvolume.  rtbno and rtlen are in units of
+ * rt blocks, not rt extents; must be aligned to the rt extent size; and rtlen
+ * cannot exceed XFS_MAX_BMBT_EXTLEN.
+ */
+int
+xfs_rtfree_blocks(
+	struct xfs_trans	*tp,
+	xfs_fsblock_t		rtbno,
+	xfs_filblks_t		rtlen)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	xfs_rtxnum_t		start;
+	xfs_filblks_t		len;
+	xfs_extlen_t		mod;
+
+	ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN);
+
+	len = xfs_rtb_to_rtxrem(mp, rtlen, &mod);
+	if (mod) {
+		ASSERT(mod == 0);
+		return -EIO;
+	}
+
+	start = xfs_rtb_to_rtxrem(mp, rtbno, &mod);
+	if (mod) {
+		ASSERT(mod == 0);
+		return -EIO;
+	}
+
+	return xfs_rtfree_extent(tp, start, len);
 }
 
 /* Find all the free records within a given range. */
@@ -1019,10 +1026,14 @@ xfs_rtalloc_query_range(
 	xfs_rtalloc_query_range_fn	fn,
 	void				*priv)
 {
+	struct xfs_rtalloc_args		args = {
+		.mp			= mp,
+		.tp			= tp,
+	};
 	struct xfs_rtalloc_rec		rec;
-	xfs_rtblock_t			rtstart;
-	xfs_rtblock_t			rtend;
-	xfs_rtblock_t			high_key;
+	xfs_rtxnum_t			rtstart;
+	xfs_rtxnum_t			rtend;
+	xfs_rtxnum_t			high_key;
 	int				is_free;
 	int				error = 0;
 
@@ -1038,13 +1049,13 @@ xfs_rtalloc_query_range(
 	rtstart = low_rec->ar_startext;
 	while (rtstart <= high_key) {
 		/* Is the first block free? */
-		error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
+		error = xfs_rtcheck_range(&args, rtstart, 1, 1, &rtend,
 				&is_free);
 		if (error)
 			break;
 
 		/* How long does the extent go for? */
-		error = xfs_rtfind_forw(mp, tp, rtstart, high_key, &rtend);
+		error = xfs_rtfind_forw(&args, rtstart, high_key, &rtend);
 		if (error)
 			break;
 
@@ -1060,6 +1071,7 @@ xfs_rtalloc_query_range(
 		rtstart = rtend + 1;
 	}
 
+	xfs_rtbuf_cache_relse(&args);
 	return error;
 }
 
@@ -1085,18 +1097,79 @@ int
 xfs_rtalloc_extent_is_free(
 	struct xfs_mount		*mp,
 	struct xfs_trans		*tp,
-	xfs_rtblock_t			start,
-	xfs_extlen_t			len,
+	xfs_rtxnum_t			start,
+	xfs_rtxlen_t			len,
 	bool				*is_free)
 {
-	xfs_rtblock_t			end;
+	struct xfs_rtalloc_args		args = {
+		.mp			= mp,
+		.tp			= tp,
+	};
+	xfs_rtxnum_t			end;
 	int				matches;
 	int				error;
 
-	error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches);
+	error = xfs_rtcheck_range(&args, start, len, 1, &end, &matches);
+	xfs_rtbuf_cache_relse(&args);
 	if (error)
 		return error;
 
 	*is_free = matches;
 	return 0;
 }
+
+/*
+ * Compute the number of rtbitmap blocks needed to track the given number of rt
+ * extents.
+ */
+xfs_filblks_t
+xfs_rtbitmap_blockcount(
+	struct xfs_mount	*mp,
+	xfs_rtbxlen_t		rtextents)
+{
+	return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Compute the number of rtbitmap words needed to populate every block of a
+ * bitmap that is large enough to track the given number of rt extents.
+ */
+unsigned long long
+xfs_rtbitmap_wordcount(
+	struct xfs_mount	*mp,
+	xfs_rtbxlen_t		rtextents)
+{
+	xfs_filblks_t		blocks;
+
+	blocks = xfs_rtbitmap_blockcount(mp, rtextents);
+	return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG;
+}
+
+/* Compute the number of rtsummary blocks needed to track the given rt space. */
+xfs_filblks_t
+xfs_rtsummary_blockcount(
+	struct xfs_mount	*mp,
+	unsigned int		rsumlevels,
+	xfs_extlen_t		rbmblocks)
+{
+	unsigned long long	rsumwords;
+
+	rsumwords = (unsigned long long)rsumlevels * rbmblocks;
+	return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG);
+}
+
+/*
+ * Compute the number of rtsummary info words needed to populate every block of
+ * a summary file that is large enough to track the given rt space.
+ */
+unsigned long long
+xfs_rtsummary_wordcount(
+	struct xfs_mount	*mp,
+	unsigned int		rsumlevels,
+	xfs_extlen_t		rbmblocks)
+{
+	xfs_filblks_t		blocks;
+
+	blocks = xfs_rtsummary_blockcount(mp, rsumlevels, rbmblocks);
+	return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG;
+}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
new file mode 100644
index 000000000000..c0637057d69c
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_RTBITMAP_H__
+#define	__XFS_RTBITMAP_H__
+
+struct xfs_rtalloc_args {
+	struct xfs_mount	*mp;
+	struct xfs_trans	*tp;
+
+	struct xfs_buf		*rbmbp;	/* bitmap block buffer */
+	struct xfs_buf		*sumbp;	/* summary block buffer */
+
+	xfs_fileoff_t		rbmoff;	/* bitmap block number */
+	xfs_fileoff_t		sumoff;	/* summary block number */
+};
+
+static inline xfs_rtblock_t
+xfs_rtx_to_rtb(
+	struct xfs_mount	*mp,
+	xfs_rtxnum_t		rtx)
+{
+	if (mp->m_rtxblklog >= 0)
+		return rtx << mp->m_rtxblklog;
+
+	return rtx * mp->m_sb.sb_rextsize;
+}
+
+static inline xfs_extlen_t
+xfs_rtxlen_to_extlen(
+	struct xfs_mount	*mp,
+	xfs_rtxlen_t		rtxlen)
+{
+	if (mp->m_rtxblklog >= 0)
+		return rtxlen << mp->m_rtxblklog;
+
+	return rtxlen * mp->m_sb.sb_rextsize;
+}
+
+/* Compute the misalignment between an extent length and a realtime extent .*/
+static inline unsigned int
+xfs_extlen_to_rtxmod(
+	struct xfs_mount	*mp,
+	xfs_extlen_t		len)
+{
+	if (mp->m_rtxblklog >= 0)
+		return len & mp->m_rtxblkmask;
+
+	return len % mp->m_sb.sb_rextsize;
+}
+
+static inline xfs_rtxlen_t
+xfs_extlen_to_rtxlen(
+	struct xfs_mount	*mp,
+	xfs_extlen_t		len)
+{
+	if (mp->m_rtxblklog >= 0)
+		return len >> mp->m_rtxblklog;
+
+	return len / mp->m_sb.sb_rextsize;
+}
+
+/* Convert an rt block number into an rt extent number. */
+static inline xfs_rtxnum_t
+xfs_rtb_to_rtx(
+	struct xfs_mount	*mp,
+	xfs_rtblock_t		rtbno)
+{
+	if (likely(mp->m_rtxblklog >= 0))
+		return rtbno >> mp->m_rtxblklog;
+
+	return div_u64(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/* Return the offset of an rt block number within an rt extent. */
+static inline xfs_extlen_t
+xfs_rtb_to_rtxoff(
+	struct xfs_mount	*mp,
+	xfs_rtblock_t		rtbno)
+{
+	if (likely(mp->m_rtxblklog >= 0))
+		return rtbno & mp->m_rtxblkmask;
+
+	return do_div(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/*
+ * Crack an rt block number into an rt extent number and an offset within that
+ * rt extent.  Returns the rt extent number directly and the offset in @off.
+ */
+static inline xfs_rtxnum_t
+xfs_rtb_to_rtxrem(
+	struct xfs_mount	*mp,
+	xfs_rtblock_t		rtbno,
+	xfs_extlen_t		*off)
+{
+	if (likely(mp->m_rtxblklog >= 0)) {
+		*off = rtbno & mp->m_rtxblkmask;
+		return rtbno >> mp->m_rtxblklog;
+	}
+
+	return div_u64_rem(rtbno, mp->m_sb.sb_rextsize, off);
+}
+
+/*
+ * Convert an rt block number into an rt extent number, rounding up to the next
+ * rt extent if the rt block is not aligned to an rt extent boundary.
+ */
+static inline xfs_rtxnum_t
+xfs_rtb_to_rtxup(
+	struct xfs_mount	*mp,
+	xfs_rtblock_t		rtbno)
+{
+	if (likely(mp->m_rtxblklog >= 0)) {
+		if (rtbno & mp->m_rtxblkmask)
+			return (rtbno >> mp->m_rtxblklog) + 1;
+		return rtbno >> mp->m_rtxblklog;
+	}
+
+	if (do_div(rtbno, mp->m_sb.sb_rextsize))
+		rtbno++;
+	return rtbno;
+}
+
+/* Round this rtblock up to the nearest rt extent size. */
+static inline xfs_rtblock_t
+xfs_rtb_roundup_rtx(
+	struct xfs_mount	*mp,
+	xfs_rtblock_t		rtbno)
+{
+	return roundup_64(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/* Round this rtblock down to the nearest rt extent size. */
+static inline xfs_rtblock_t
+xfs_rtb_rounddown_rtx(
+	struct xfs_mount	*mp,
+	xfs_rtblock_t		rtbno)
+{
+	return rounddown_64(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/* Convert an rt extent number to a file block offset in the rt bitmap file. */
+static inline xfs_fileoff_t
+xfs_rtx_to_rbmblock(
+	struct xfs_mount	*mp,
+	xfs_rtxnum_t		rtx)
+{
+	return rtx >> mp->m_blkbit_log;
+}
+
+/* Convert an rt extent number to a word offset within an rt bitmap block. */
+static inline unsigned int
+xfs_rtx_to_rbmword(
+	struct xfs_mount	*mp,
+	xfs_rtxnum_t		rtx)
+{
+	return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1);
+}
+
+/* Convert a file block offset in the rt bitmap file to an rt extent number. */
+static inline xfs_rtxnum_t
+xfs_rbmblock_to_rtx(
+	struct xfs_mount	*mp,
+	xfs_fileoff_t		rbmoff)
+{
+	return rbmoff << mp->m_blkbit_log;
+}
+
+/* Return a pointer to a bitmap word within a rt bitmap block. */
+static inline union xfs_rtword_raw *
+xfs_rbmblock_wordptr(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		index)
+{
+	union xfs_rtword_raw	*words = args->rbmbp->b_addr;
+
+	return words + index;
+}
+
+/* Convert an ondisk bitmap word to its incore representation. */
+static inline xfs_rtword_t
+xfs_rtbitmap_getword(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		index)
+{
+	union xfs_rtword_raw	*word = xfs_rbmblock_wordptr(args, index);
+
+	return word->old;
+}
+
+/* Set an ondisk bitmap word from an incore representation. */
+static inline void
+xfs_rtbitmap_setword(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		index,
+	xfs_rtword_t		value)
+{
+	union xfs_rtword_raw	*word = xfs_rbmblock_wordptr(args, index);
+
+	word->old = value;
+}
+
+/*
+ * Convert a rt extent length and rt bitmap block number to a xfs_suminfo_t
+ * offset within the rt summary file.
+ */
+static inline xfs_rtsumoff_t
+xfs_rtsumoffs(
+	struct xfs_mount	*mp,
+	int			log2_len,
+	xfs_fileoff_t		rbmoff)
+{
+	return log2_len * mp->m_sb.sb_rbmblocks + rbmoff;
+}
+
+/*
+ * Convert an xfs_suminfo_t offset to a file block offset within the rt summary
+ * file.
+ */
+static inline xfs_fileoff_t
+xfs_rtsumoffs_to_block(
+	struct xfs_mount	*mp,
+	xfs_rtsumoff_t		rsumoff)
+{
+	return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t));
+}
+
+/*
+ * Convert an xfs_suminfo_t offset to an info word offset within an rt summary
+ * block.
+ */
+static inline unsigned int
+xfs_rtsumoffs_to_infoword(
+	struct xfs_mount	*mp,
+	xfs_rtsumoff_t		rsumoff)
+{
+	unsigned int		mask = mp->m_blockmask >> XFS_SUMINFOLOG;
+
+	return rsumoff & mask;
+}
+
+/* Return a pointer to a summary info word within a rt summary block. */
+static inline union xfs_suminfo_raw *
+xfs_rsumblock_infoptr(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		index)
+{
+	union xfs_suminfo_raw	*info = args->sumbp->b_addr;
+
+	return info + index;
+}
+
+/* Get the current value of a summary counter. */
+static inline xfs_suminfo_t
+xfs_suminfo_get(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		index)
+{
+	union xfs_suminfo_raw	*info = xfs_rsumblock_infoptr(args, index);
+
+	return info->old;
+}
+
+/* Add to the current value of a summary counter and return the new value. */
+static inline xfs_suminfo_t
+xfs_suminfo_add(
+	struct xfs_rtalloc_args	*args,
+	unsigned int		index,
+	int			delta)
+{
+	union xfs_suminfo_raw	*info = xfs_rsumblock_infoptr(args, index);
+
+	info->old += delta;
+	return info->old;
+}
+
+/*
+ * Functions for walking free space rtextents in the realtime bitmap.
+ */
+struct xfs_rtalloc_rec {
+	xfs_rtxnum_t		ar_startext;
+	xfs_rtbxlen_t		ar_extcount;
+};
+
+typedef int (*xfs_rtalloc_query_range_fn)(
+	struct xfs_mount		*mp,
+	struct xfs_trans		*tp,
+	const struct xfs_rtalloc_rec	*rec,
+	void				*priv);
+
+#ifdef CONFIG_XFS_RT
+void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args);
+
+int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block,
+		int issum);
+
+static inline int
+xfs_rtbitmap_read_buf(
+	struct xfs_rtalloc_args		*args,
+	xfs_fileoff_t			block)
+{
+	return xfs_rtbuf_get(args, block, 0);
+}
+
+static inline int
+xfs_rtsummary_read_buf(
+	struct xfs_rtalloc_args		*args,
+	xfs_fileoff_t			block)
+{
+	return xfs_rtbuf_get(args, block, 1);
+}
+
+int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+		xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat);
+int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+		xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
+int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+		xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
+int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+		xfs_rtxlen_t len, int val);
+int xfs_rtmodify_summary_int(struct xfs_rtalloc_args *args, int log,
+		xfs_fileoff_t bbno, int delta, xfs_suminfo_t *sum);
+int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log,
+		xfs_fileoff_t bbno, int delta);
+int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+		xfs_rtxlen_t len);
+int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
+		const struct xfs_rtalloc_rec *low_rec,
+		const struct xfs_rtalloc_rec *high_rec,
+		xfs_rtalloc_query_range_fn fn, void *priv);
+int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
+			  xfs_rtalloc_query_range_fn fn,
+			  void *priv);
+int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+			       xfs_rtxnum_t start, xfs_rtxlen_t len,
+			       bool *is_free);
+/*
+ * Free an extent in the realtime subvolume.  Length is expressed in
+ * realtime extents, as is the block number.
+ */
+int					/* error */
+xfs_rtfree_extent(
+	struct xfs_trans	*tp,	/* transaction pointer */
+	xfs_rtxnum_t		start,	/* starting rtext number to free */
+	xfs_rtxlen_t		len);	/* length of extent freed */
+
+/* Same as above, but in units of rt blocks. */
+int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
+		xfs_filblks_t rtlen);
+
+xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t
+		rtextents);
+unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp,
+		xfs_rtbxlen_t rtextents);
+
+xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp,
+		unsigned int rsumlevels, xfs_extlen_t rbmblocks);
+unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
+		unsigned int rsumlevels, xfs_extlen_t rbmblocks);
+#else /* CONFIG_XFS_RT */
+# define xfs_rtfree_extent(t,b,l)			(-ENOSYS)
+# define xfs_rtfree_blocks(t,rb,rl)			(-ENOSYS)
+# define xfs_rtalloc_query_range(m,t,l,h,f,p)		(-ENOSYS)
+# define xfs_rtalloc_query_all(m,t,f,p)			(-ENOSYS)
+# define xfs_rtbitmap_read_buf(a,b)			(-ENOSYS)
+# define xfs_rtsummary_read_buf(a,b)			(-ENOSYS)
+# define xfs_rtbuf_cache_relse(a)			(0)
+# define xfs_rtalloc_extent_is_free(m,t,s,l,i)		(-ENOSYS)
+static inline xfs_filblks_t
+xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
+{
+	/* shut up gcc */
+	return 0;
+}
+# define xfs_rtbitmap_wordcount(mp, r)			(0)
+# define xfs_rtsummary_blockcount(mp, l, b)		(0)
+# define xfs_rtsummary_wordcount(mp, l, b)		(0)
+#endif /* CONFIG_XFS_RT */
+
+#endif /* __XFS_RTBITMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 6264daaab37b..1f74d0cd1618 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -975,6 +975,8 @@ xfs_sb_mount_common(
 	mp->m_blockmask = sbp->sb_blocksize - 1;
 	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
 	mp->m_blockwmask = mp->m_blockwsize - 1;
+	mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize);
+	mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize);
 
 	mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
 	mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index a5e14740ec9a..19134b23c10b 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -25,7 +25,7 @@ extern uint64_t	xfs_sb_version_to_features(struct xfs_sb *sbp);
 
 extern int	xfs_update_secondary_sbs(struct xfs_mount *mp);
 
-#define XFS_FS_GEOM_MAX_STRUCT_VER	(4)
+#define XFS_FS_GEOM_MAX_STRUCT_VER	(5)
 extern void	xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo,
 				int struct_version);
 extern int	xfs_sb_read_secondary(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 5b2f27cbdb80..6cd45e8c118d 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -19,6 +19,7 @@
 #include "xfs_trans.h"
 #include "xfs_qm.h"
 #include "xfs_trans_space.h"
+#include "xfs_rtbitmap.h"
 
 #define _ALLOC	true
 #define _FREE	false
@@ -217,11 +218,12 @@ xfs_rtalloc_block_count(
 	struct xfs_mount	*mp,
 	unsigned int		num_ops)
 {
-	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
-	unsigned int		rtbmp_bytes;
+	unsigned int		rtbmp_blocks;
+	xfs_rtxlen_t		rtxlen;
 
-	rtbmp_bytes = (XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize) / NBBY;
-	return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
+	rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
+	rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen);
+	return (rtbmp_blocks + 1) * num_ops;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index 5c2765934732..c299b16c9365 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -148,10 +148,10 @@ xfs_verify_rtbno(
 
 /* Verify that a realtime device extent is fully contained inside the volume. */
 bool
-xfs_verify_rtext(
+xfs_verify_rtbext(
 	struct xfs_mount	*mp,
 	xfs_rtblock_t		rtbno,
-	xfs_rtblock_t		len)
+	xfs_filblks_t		len)
 {
 	if (rtbno + len <= rtbno)
 		return false;
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 851220021484..533200c4ccc2 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -11,6 +11,7 @@ typedef uint32_t	prid_t;		/* project ID */
 typedef uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
 typedef uint32_t	xfs_agino_t;	/* inode # within allocation grp */
 typedef uint32_t	xfs_extlen_t;	/* extent length in blocks */
+typedef uint32_t	xfs_rtxlen_t;	/* file extent length in rtextents */
 typedef uint32_t	xfs_agnumber_t;	/* allocation group number */
 typedef uint64_t	xfs_extnum_t;	/* # of extents in a file */
 typedef uint32_t	xfs_aextnum_t;	/* # extents in an attribute fork */
@@ -18,6 +19,7 @@ typedef int64_t		xfs_fsize_t;	/* bytes in a file */
 typedef uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
 
 typedef int32_t		xfs_suminfo_t;	/* type of bitmap summary info */
+typedef uint32_t	xfs_rtsumoff_t;	/* offset of an rtsummary info word */
 typedef uint32_t	xfs_rtword_t;	/* word type for bitmap manipulations */
 
 typedef int64_t		xfs_lsn_t;	/* log sequence number */
@@ -31,6 +33,8 @@ typedef uint64_t	xfs_rfsblock_t;	/* blockno in filesystem (raw) */
 typedef uint64_t	xfs_rtblock_t;	/* extent (block) in realtime area */
 typedef uint64_t	xfs_fileoff_t;	/* block number in a file */
 typedef uint64_t	xfs_filblks_t;	/* number of blocks in a file */
+typedef uint64_t	xfs_rtxnum_t;	/* rtextent number */
+typedef uint64_t	xfs_rtbxlen_t;	/* rtbitmap extent length in rtextents */
 
 typedef int64_t		xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
 
@@ -47,6 +51,7 @@ typedef void *		xfs_failaddr_t;
 #define	NULLRFSBLOCK	((xfs_rfsblock_t)-1)
 #define	NULLRTBLOCK	((xfs_rtblock_t)-1)
 #define	NULLFILEOFF	((xfs_fileoff_t)-1)
+#define	NULLRTEXTNO	((xfs_rtxnum_t)-1)
 
 #define	NULLAGBLOCK	((xfs_agblock_t)-1)
 #define	NULLAGNUMBER	((xfs_agnumber_t)-1)
@@ -145,6 +150,7 @@ typedef uint32_t	xfs_dqid_t;
  */
 #define	XFS_NBBYLOG	3		/* log2(NBBY) */
 #define	XFS_WORDLOG	2		/* log2(sizeof(xfs_rtword_t)) */
+#define	XFS_SUMINFOLOG	2		/* log2(sizeof(xfs_suminfo_t)) */
 #define	XFS_NBWORDLOG	(XFS_NBBYLOG + XFS_WORDLOG)
 #define	XFS_NBWORD	(1 << XFS_NBWORDLOG)
 #define	XFS_WORDMASK	((1 << XFS_WORDLOG) - 1)
@@ -229,8 +235,8 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
-bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
-		xfs_rtblock_t len);
+bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
+		xfs_filblks_t len);
 bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount);
 bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off);
 void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min,
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 75588915572e..06d8c1996a33 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -410,7 +410,7 @@ xchk_bmap_iextent(
 
 	/* Make sure the extent points to a valid place. */
 	if (info->is_rt &&
-	    !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
+	    !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount))
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 	if (!info->is_rt &&
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 05be757668bb..5799e9a94f1f 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -16,7 +16,7 @@
 #include "xfs_health.h"
 #include "xfs_btree.h"
 #include "xfs_ag.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
 #include "xfs_inode.h"
 #include "xfs_icache.h"
 #include "scrub/scrub.h"
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 59d7912fb75f..889f556bc98f 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -20,6 +20,7 @@
 #include "xfs_reflink.h"
 #include "xfs_rmap.h"
 #include "xfs_bmap_util.h"
+#include "xfs_rtbitmap.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
@@ -225,7 +226,7 @@ xchk_inode_extsize(
 	 */
 	if ((flags & XFS_DIFLAG_RTINHERIT) &&
 	    (flags & XFS_DIFLAG_EXTSZINHERIT) &&
-	    value % sc->mp->m_sb.sb_rextsize > 0)
+	    xfs_extlen_to_rtxmod(sc->mp, value) > 0)
 		xchk_ino_set_warning(sc, ino);
 }
 
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 008ddb599e13..41a1d89ae8e6 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -11,7 +11,7 @@
 #include "xfs_mount.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
 #include "scrub/scrub.h"
@@ -48,12 +48,12 @@ xchk_rtbitmap_rec(
 {
 	struct xfs_scrub	*sc = priv;
 	xfs_rtblock_t		startblock;
-	xfs_rtblock_t		blockcount;
+	xfs_filblks_t		blockcount;
 
-	startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
-	blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
+	startblock = xfs_rtx_to_rtb(mp, rec->ar_startext);
+	blockcount = xfs_rtx_to_rtb(mp, rec->ar_extcount);
 
-	if (!xfs_verify_rtext(mp, startblock, blockcount))
+	if (!xfs_verify_rtbext(mp, startblock, blockcount))
 		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 	return 0;
 }
@@ -128,26 +128,22 @@ out:
 void
 xchk_xref_is_used_rt_space(
 	struct xfs_scrub	*sc,
-	xfs_rtblock_t		fsbno,
+	xfs_rtblock_t		rtbno,
 	xfs_extlen_t		len)
 {
-	xfs_rtblock_t		startext;
-	xfs_rtblock_t		endext;
-	xfs_rtblock_t		extcount;
+	xfs_rtxnum_t		startext;
+	xfs_rtxnum_t		endext;
 	bool			is_free;
 	int			error;
 
 	if (xchk_skip_xref(sc->sm))
 		return;
 
-	startext = fsbno;
-	endext = fsbno + len - 1;
-	do_div(startext, sc->mp->m_sb.sb_rextsize);
-	do_div(endext, sc->mp->m_sb.sb_rextsize);
-	extcount = endext - startext + 1;
+	startext = xfs_rtb_to_rtx(sc->mp, rtbno);
+	endext = xfs_rtb_to_rtx(sc->mp, rtbno + len - 1);
 	xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
-	error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, extcount,
-			&is_free);
+	error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext,
+			endext - startext + 1, &is_free);
 	if (!xchk_should_check_xref(sc, &error, NULL))
 		goto out_unlock;
 	if (is_free)
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 437ed9acbb27..8b15c47408d0 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -13,7 +13,7 @@
 #include "xfs_inode.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
 #include "xfs_bit.h"
 #include "xfs_bmap.h"
 #include "scrub/scrub.h"
@@ -81,34 +81,45 @@ typedef unsigned int xchk_rtsumoff_t;
 static inline int
 xfsum_load(
 	struct xfs_scrub	*sc,
-	xchk_rtsumoff_t		sumoff,
-	xfs_suminfo_t		*info)
+	xfs_rtsumoff_t		sumoff,
+	union xfs_suminfo_raw	*rawinfo)
 {
-	return xfile_obj_load(sc->xfile, info, sizeof(xfs_suminfo_t),
+	return xfile_obj_load(sc->xfile, rawinfo,
+			sizeof(union xfs_suminfo_raw),
 			sumoff << XFS_WORDLOG);
 }
 
 static inline int
 xfsum_store(
 	struct xfs_scrub	*sc,
-	xchk_rtsumoff_t		sumoff,
-	const xfs_suminfo_t	info)
+	xfs_rtsumoff_t		sumoff,
+	const union xfs_suminfo_raw rawinfo)
 {
-	return xfile_obj_store(sc->xfile, &info, sizeof(xfs_suminfo_t),
+	return xfile_obj_store(sc->xfile, &rawinfo,
+			sizeof(union xfs_suminfo_raw),
 			sumoff << XFS_WORDLOG);
 }
 
 static inline int
 xfsum_copyout(
 	struct xfs_scrub	*sc,
-	xchk_rtsumoff_t		sumoff,
-	xfs_suminfo_t		*info,
+	xfs_rtsumoff_t		sumoff,
+	union xfs_suminfo_raw	*rawinfo,
 	unsigned int		nr_words)
 {
-	return xfile_obj_load(sc->xfile, info, nr_words << XFS_WORDLOG,
+	return xfile_obj_load(sc->xfile, rawinfo, nr_words << XFS_WORDLOG,
 			sumoff << XFS_WORDLOG);
 }
 
+static inline xfs_suminfo_t
+xchk_rtsum_inc(
+	struct xfs_mount	*mp,
+	union xfs_suminfo_raw	*v)
+{
+	v->old += 1;
+	return v->old;
+}
+
 /* Update the summary file to reflect the free extent that we've accumulated. */
 STATIC int
 xchk_rtsum_record_free(
@@ -121,23 +132,24 @@ xchk_rtsum_record_free(
 	xfs_fileoff_t			rbmoff;
 	xfs_rtblock_t			rtbno;
 	xfs_filblks_t			rtlen;
-	xchk_rtsumoff_t			offs;
+	xfs_rtsumoff_t			offs;
 	unsigned int			lenlog;
-	xfs_suminfo_t			v = 0;
+	union xfs_suminfo_raw		v;
+	xfs_suminfo_t			value;
 	int				error = 0;
 
 	if (xchk_should_terminate(sc, &error))
 		return error;
 
 	/* Compute the relevant location in the rtsum file. */
-	rbmoff = XFS_BITTOBLOCK(mp, rec->ar_startext);
+	rbmoff = xfs_rtx_to_rbmblock(mp, rec->ar_startext);
 	lenlog = XFS_RTBLOCKLOG(rec->ar_extcount);
-	offs = XFS_SUMOFFS(mp, lenlog, rbmoff);
+	offs = xfs_rtsumoffs(mp, lenlog, rbmoff);
 
-	rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
-	rtlen = rec->ar_extcount * mp->m_sb.sb_rextsize;
+	rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
+	rtlen = xfs_rtx_to_rtb(mp, rec->ar_extcount);
 
-	if (!xfs_verify_rtext(mp, rtbno, rtlen)) {
+	if (!xfs_verify_rtbext(mp, rtbno, rtlen)) {
 		xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino);
 		return -EFSCORRUPTED;
 	}
@@ -147,9 +159,9 @@ xchk_rtsum_record_free(
 	if (error)
 		return error;
 
-	v++;
+	value = xchk_rtsum_inc(sc->mp, &v);
 	trace_xchk_rtsum_record_free(mp, rec->ar_startext, rec->ar_extcount,
-			lenlog, offs, v);
+			lenlog, offs, value);
 
 	return xfsum_store(sc, offs, v);
 }
@@ -160,12 +172,11 @@ xchk_rtsum_compute(
 	struct xfs_scrub	*sc)
 {
 	struct xfs_mount	*mp = sc->mp;
-	unsigned long long	rtbmp_bytes;
+	unsigned long long	rtbmp_blocks;
 
 	/* If the bitmap size doesn't match the computed size, bail. */
-	rtbmp_bytes = howmany_64(mp->m_sb.sb_rextents, NBBY);
-	if (roundup_64(rtbmp_bytes, mp->m_sb.sb_blocksize) !=
-			mp->m_rbmip->i_disk_size)
+	rtbmp_blocks = xfs_rtbitmap_blockcount(mp, mp->m_sb.sb_rextents);
+	if (XFS_FSB_TO_B(mp, rtbmp_blocks) != mp->m_rbmip->i_disk_size)
 		return -EFSCORRUPTED;
 
 	return xfs_rtalloc_query_all(sc->mp, sc->tp, xchk_rtsum_record_free,
@@ -177,14 +188,18 @@ STATIC int
 xchk_rtsum_compare(
 	struct xfs_scrub	*sc)
 {
+	struct xfs_rtalloc_args args = {
+		.mp		= sc->mp,
+		.tp		= sc->tp,
+	};
 	struct xfs_mount	*mp = sc->mp;
-	struct xfs_buf		*bp;
 	struct xfs_bmbt_irec	map;
 	xfs_fileoff_t		off;
 	xchk_rtsumoff_t		sumoff = 0;
 	int			nmap;
 
 	for (off = 0; off < XFS_B_TO_FSB(mp, mp->m_rsumsize); off++) {
+		union xfs_suminfo_raw *ondisk_info;
 		int		error = 0;
 
 		if (xchk_should_terminate(sc, &error))
@@ -205,22 +220,23 @@ xchk_rtsum_compare(
 		}
 
 		/* Read a block's worth of ondisk rtsummary file. */
-		error = xfs_rtbuf_get(mp, sc->tp, off, 1, &bp);
+		error = xfs_rtsummary_read_buf(&args, off);
 		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
 			return error;
 
 		/* Read a block's worth of computed rtsummary file. */
 		error = xfsum_copyout(sc, sumoff, sc->buf, mp->m_blockwsize);
 		if (error) {
-			xfs_trans_brelse(sc->tp, bp);
+			xfs_rtbuf_cache_relse(&args);
 			return error;
 		}
 
-		if (memcmp(bp->b_addr, sc->buf,
+		ondisk_info = xfs_rsumblock_infoptr(&args, 0);
+		if (memcmp(ondisk_info, sc->buf,
 					mp->m_blockwsize << XFS_WORDLOG) != 0)
 			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
 
-		xfs_trans_brelse(sc->tp, bp);
+		xfs_rtbuf_cache_relse(&args);
 		sumoff += mp->m_blockwsize;
 	}
 
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 46249e7b17e0..29afa4851235 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -13,6 +13,7 @@
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_ag.h"
+#include "xfs_rtbitmap.h"
 #include "scrub/scrub.h"
 #include "scrub/xfile.h"
 #include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index cbd4d01e253c..4a8bc6f3c8f2 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1036,17 +1036,18 @@ TRACE_EVENT(xfarray_sort_stats,
 
 #ifdef CONFIG_XFS_RT
 TRACE_EVENT(xchk_rtsum_record_free,
-	TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start,
-		 uint64_t len, unsigned int log, loff_t pos, xfs_suminfo_t v),
-	TP_ARGS(mp, start, len, log, pos, v),
+	TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start,
+		 xfs_rtbxlen_t len, unsigned int log, loff_t pos,
+		 xfs_suminfo_t value),
+	TP_ARGS(mp, start, len, log, pos, value),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(dev_t, rtdev)
-		__field(xfs_rtblock_t, start)
+		__field(xfs_rtxnum_t, start)
 		__field(unsigned long long, len)
 		__field(unsigned int, log)
 		__field(loff_t, pos)
-		__field(xfs_suminfo_t, v)
+		__field(xfs_suminfo_t, value)
 	),
 	TP_fast_assign(
 		__entry->dev = mp->m_super->s_dev;
@@ -1055,7 +1056,7 @@ TRACE_EVENT(xchk_rtsum_record_free,
 		__entry->len = len;
 		__entry->log = log;
 		__entry->pos = pos;
-		__entry->v = v;
+		__entry->value = value;
 	),
 	TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rtxcount 0x%llx log %u rsumpos 0x%llx sumcount %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1064,7 +1065,7 @@ TRACE_EVENT(xchk_rtsum_record_free,
 		  __entry->len,
 		  __entry->log,
 		  __entry->pos,
-		  __entry->v)
+		  __entry->value)
 );
 #endif /* CONFIG_XFS_RT */
 
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 40e0a1f1f753..731260a5af6d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -28,6 +28,7 @@
 #include "xfs_icache.h"
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
+#include "xfs_rtbitmap.h"
 
 /* Kernel only BMAP related definitions and functions */
 
@@ -75,28 +76,28 @@ xfs_bmap_rtalloc(
 {
 	struct xfs_mount	*mp = ap->ip->i_mount;
 	xfs_fileoff_t		orig_offset = ap->offset;
-	xfs_rtblock_t		rtb;
-	xfs_extlen_t		prod = 0;  /* product factor for allocators */
+	xfs_rtxnum_t		rtx;
+	xfs_rtxlen_t		prod = 0;  /* product factor for allocators */
 	xfs_extlen_t		mod = 0;   /* product factor for allocators */
-	xfs_extlen_t		ralen = 0; /* realtime allocation length */
+	xfs_rtxlen_t		ralen = 0; /* realtime allocation length */
 	xfs_extlen_t		align;     /* minimum allocation alignment */
 	xfs_extlen_t		orig_length = ap->length;
 	xfs_extlen_t		minlen = mp->m_sb.sb_rextsize;
-	xfs_extlen_t		raminlen;
+	xfs_rtxlen_t		raminlen;
 	bool			rtlocked = false;
 	bool			ignore_locality = false;
 	int			error;
 
 	align = xfs_get_extsz_hint(ap->ip);
 retry:
-	prod = align / mp->m_sb.sb_rextsize;
+	prod = xfs_extlen_to_rtxlen(mp, align);
 	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
 					align, 1, ap->eof, 0,
 					ap->conv, &ap->offset, &ap->length);
 	if (error)
 		return error;
 	ASSERT(ap->length);
-	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
+	ASSERT(xfs_extlen_to_rtxmod(mp, ap->length) == 0);
 
 	/*
 	 * If we shifted the file offset downward to satisfy an extent size
@@ -116,17 +117,14 @@ retry:
 		prod = 1;
 	/*
 	 * Set ralen to be the actual requested length in rtextents.
-	 */
-	ralen = ap->length / mp->m_sb.sb_rextsize;
-	/*
+	 *
 	 * If the old value was close enough to XFS_BMBT_MAX_EXTLEN that
 	 * we rounded up to it, cut it back so it's valid again.
 	 * Note that if it's a really large request (bigger than
 	 * XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't
 	 * adjust the starting point to match it.
 	 */
-	if (ralen * mp->m_sb.sb_rextsize >= XFS_MAX_BMBT_EXTLEN)
-		ralen = XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize;
+	ralen = xfs_extlen_to_rtxlen(mp, min(ap->length, XFS_MAX_BMBT_EXTLEN));
 
 	/*
 	 * Lock out modifications to both the RT bitmap and summary inodes
@@ -144,12 +142,10 @@ retry:
 	 * pick an extent that will space things out in the rt area.
 	 */
 	if (ap->eof && ap->offset == 0) {
-		xfs_rtblock_t rtx; /* realtime extent no */
-
 		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
 		if (error)
 			return error;
-		ap->blkno = rtx * mp->m_sb.sb_rextsize;
+		ap->blkno = xfs_rtx_to_rtb(mp, rtx);
 	} else {
 		ap->blkno = 0;
 	}
@@ -160,20 +156,18 @@ retry:
 	 * Realtime allocation, done through xfs_rtallocate_extent.
 	 */
 	if (ignore_locality)
-		ap->blkno = 0;
+		rtx = 0;
 	else
-		do_div(ap->blkno, mp->m_sb.sb_rextsize);
-	rtb = ap->blkno;
-	ap->length = ralen;
-	raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
-	error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length,
-			&ralen, ap->wasdel, prod, &rtb);
+		rtx = xfs_rtb_to_rtx(mp, ap->blkno);
+	raminlen = max_t(xfs_rtxlen_t, 1, xfs_extlen_to_rtxlen(mp, minlen));
+	error = xfs_rtallocate_extent(ap->tp, rtx, raminlen, ralen, &ralen,
+			ap->wasdel, prod, &rtx);
 	if (error)
 		return error;
 
-	if (rtb != NULLRTBLOCK) {
-		ap->blkno = rtb * mp->m_sb.sb_rextsize;
-		ap->length = ralen * mp->m_sb.sb_rextsize;
+	if (rtx != NULLRTEXTNO) {
+		ap->blkno = xfs_rtx_to_rtb(mp, rtx);
+		ap->length = xfs_rtxlen_to_extlen(mp, ralen);
 		ap->ip->i_nblocks += ap->length;
 		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
 		if (ap->wasdel)
@@ -690,7 +684,7 @@ xfs_can_free_eofblocks(
 	 */
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
 	if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
-		end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
+		end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb);
 	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
 	if (last_fsb <= end_fsb)
 		return false;
@@ -780,12 +774,10 @@ xfs_alloc_file_space(
 {
 	xfs_mount_t		*mp = ip->i_mount;
 	xfs_off_t		count;
-	xfs_filblks_t		allocated_fsb;
 	xfs_filblks_t		allocatesize_fsb;
 	xfs_extlen_t		extsz, temp;
 	xfs_fileoff_t		startoffset_fsb;
 	xfs_fileoff_t		endoffset_fsb;
-	int			nimaps;
 	int			rt;
 	xfs_trans_t		*tp;
 	xfs_bmbt_irec_t		imaps[1], *imapp;
@@ -808,7 +800,6 @@ xfs_alloc_file_space(
 
 	count = len;
 	imapp = &imaps[0];
-	nimaps = 1;
 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
 	endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
 	allocatesize_fsb = endoffset_fsb - startoffset_fsb;
@@ -819,6 +810,7 @@ xfs_alloc_file_space(
 	while (allocatesize_fsb && !error) {
 		xfs_fileoff_t	s, e;
 		unsigned int	dblocks, rblocks, resblks;
+		int		nimaps = 1;
 
 		/*
 		 * Determine space reservations for data/realtime.
@@ -884,15 +876,19 @@ xfs_alloc_file_space(
 		if (error)
 			break;
 
-		allocated_fsb = imapp->br_blockcount;
-
-		if (nimaps == 0) {
-			error = -ENOSPC;
-			break;
+		/*
+		 * If the allocator cannot find a single free extent large
+		 * enough to cover the start block of the requested range,
+		 * xfs_bmapi_write will return 0 but leave *nimaps set to 0.
+		 *
+		 * In that case we simply need to keep looping with the same
+		 * startoffset_fsb so that one of the following allocations
+		 * will eventually reach the requested range.
+		 */
+		if (nimaps) {
+			startoffset_fsb += imapp->br_blockcount;
+			allocatesize_fsb -= imapp->br_blockcount;
 		}
-
-		startoffset_fsb += allocated_fsb;
-		allocatesize_fsb -= allocated_fsb;
 	}
 
 	return error;
@@ -989,10 +985,8 @@ xfs_free_file_space(
 
 	/* We can only free complete realtime extents. */
 	if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
-		startoffset_fsb = roundup_64(startoffset_fsb,
-					     mp->m_sb.sb_rextsize);
-		endoffset_fsb = rounddown_64(endoffset_fsb,
-					     mp->m_sb.sb_rextsize);
+		startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb);
+		endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb);
 	}
 
 	/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 203700278ddb..e33e5e13b95f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -214,6 +214,43 @@ xfs_ilock_iocb(
 	return 0;
 }
 
+static int
+xfs_ilock_iocb_for_write(
+	struct kiocb		*iocb,
+	unsigned int		*lock_mode)
+{
+	ssize_t			ret;
+	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
+
+	ret = xfs_ilock_iocb(iocb, *lock_mode);
+	if (ret)
+		return ret;
+
+	if (*lock_mode == XFS_IOLOCK_EXCL)
+		return 0;
+	if (!xfs_iflags_test(ip, XFS_IREMAPPING))
+		return 0;
+
+	xfs_iunlock(ip, *lock_mode);
+	*lock_mode = XFS_IOLOCK_EXCL;
+	return xfs_ilock_iocb(iocb, *lock_mode);
+}
+
+static unsigned int
+xfs_ilock_for_write_fault(
+	struct xfs_inode	*ip)
+{
+	/* get a shared lock if no remapping in progress */
+	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+	if (!xfs_iflags_test(ip, XFS_IREMAPPING))
+		return XFS_MMAPLOCK_SHARED;
+
+	/* wait for remapping to complete */
+	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+	return XFS_MMAPLOCK_EXCL;
+}
+
 STATIC ssize_t
 xfs_file_dio_read(
 	struct kiocb		*iocb,
@@ -551,7 +588,7 @@ xfs_file_dio_write_aligned(
 	unsigned int		iolock = XFS_IOLOCK_SHARED;
 	ssize_t			ret;
 
-	ret = xfs_ilock_iocb(iocb, iolock);
+	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
 	if (ret)
 		return ret;
 	ret = xfs_file_write_checks(iocb, from, &iolock);
@@ -618,7 +655,7 @@ retry_exclusive:
 		flags = IOMAP_DIO_FORCE_WAIT;
 	}
 
-	ret = xfs_ilock_iocb(iocb, iolock);
+	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
 	if (ret)
 		return ret;
 
@@ -1180,7 +1217,7 @@ xfs_file_remap_range(
 	if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
 		xfs_log_force_inode(dest);
 out_unlock:
-	xfs_iunlock2_io_mmap(src, dest);
+	xfs_iunlock2_remapping(src, dest);
 	if (ret)
 		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
 	return remapped > 0 ? remapped : ret;
@@ -1328,6 +1365,7 @@ __xfs_filemap_fault(
 	struct inode		*inode = file_inode(vmf->vma->vm_file);
 	struct xfs_inode	*ip = XFS_I(inode);
 	vm_fault_t		ret;
+	unsigned int		lock_mode = 0;
 
 	trace_xfs_filemap_fault(ip, order, write_fault);
 
@@ -1336,25 +1374,24 @@ __xfs_filemap_fault(
 		file_update_time(vmf->vma->vm_file);
 	}
 
+	if (IS_DAX(inode) || write_fault)
+		lock_mode = xfs_ilock_for_write_fault(XFS_I(inode));
+
 	if (IS_DAX(inode)) {
 		pfn_t pfn;
 
-		xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 		ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
 		if (ret & VM_FAULT_NEEDDSYNC)
 			ret = dax_finish_sync_fault(vmf, order, pfn);
-		xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+	} else if (write_fault) {
+		ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
 	} else {
-		if (write_fault) {
-			xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-			ret = iomap_page_mkwrite(vmf,
-					&xfs_page_mkwrite_iomap_ops);
-			xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-		} else {
-			ret = filemap_fault(vmf);
-		}
+		ret = filemap_fault(vmf);
 	}
 
+	if (lock_mode)
+		xfs_iunlock(XFS_I(inode), lock_mode);
+
 	if (write_fault)
 		sb_end_pagefault(inode->i_sb);
 	return ret;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 736e5545f584..5a72217f5feb 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -23,7 +23,7 @@
 #include "xfs_refcount.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_alloc_btree.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
 #include "xfs_ag.h"
 
 /* Convert an xfs_fsmap to an fsmap. */
@@ -483,11 +483,11 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
 	xfs_rtblock_t			rtbno;
 	xfs_daddr_t			rec_daddr, len_daddr;
 
-	rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
+	rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
 	rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
 	irec.rm_startblock = rtbno;
 
-	rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize;
+	rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount);
 	len_daddr = XFS_FSB_TO_BB(mp, rtbno);
 	irec.rm_blockcount = rtbno;
 
@@ -514,7 +514,7 @@ xfs_getfsmap_rtdev_rtbitmap(
 	uint64_t			eofs;
 	int				error;
 
-	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);
+	eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents));
 	if (keys[0].fmr_physical >= eofs)
 		return 0;
 	start_rtb = XFS_BB_TO_FSBT(mp,
@@ -539,11 +539,8 @@ xfs_getfsmap_rtdev_rtbitmap(
 	 * Set up query parameters to return free rtextents covering the range
 	 * we want.
 	 */
-	alow.ar_startext = start_rtb;
-	ahigh.ar_startext = end_rtb;
-	do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
-	if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
-		ahigh.ar_startext++;
+	alow.ar_startext = xfs_rtb_to_rtx(mp, start_rtb);
+	ahigh.ar_startext = xfs_rtb_to_rtxup(mp, end_rtb);
 	error = xfs_rtalloc_query_range(mp, tp, &alow, &ahigh,
 			xfs_getfsmap_rtdev_rtbitmap_helper, info);
 	if (error)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 36f5cf802c07..c0f1c89786c2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -918,6 +918,13 @@ xfs_droplink(
 	xfs_trans_t *tp,
 	xfs_inode_t *ip)
 {
+	if (VFS_I(ip)->i_nlink == 0) {
+		xfs_alert(ip->i_mount,
+			  "%s: Attempt to drop inode (%llu) with nlink zero.",
+			  __func__, ip->i_ino);
+		return -EFSCORRUPTED;
+	}
+
 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
 	drop_nlink(VFS_I(ip));
@@ -3621,6 +3628,23 @@ xfs_iunlock2_io_mmap(
 		inode_unlock(VFS_I(ip1));
 }
 
+/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */
+void
+xfs_iunlock2_remapping(
+	struct xfs_inode	*ip1,
+	struct xfs_inode	*ip2)
+{
+	xfs_iflags_clear(ip1, XFS_IREMAPPING);
+
+	if (ip1 != ip2)
+		xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED);
+	xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
+
+	if (ip1 != ip2)
+		inode_unlock_shared(VFS_I(ip1));
+	inode_unlock(VFS_I(ip2));
+}
+
 /*
  * Reload the incore inode list for this inode.  Caller should ensure that
  * the link count cannot change, either by taking ILOCK_SHARED or otherwise
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0c5bdb91152e..3dc47937da5d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -347,6 +347,14 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
 /* Quotacheck is running but inode has not been added to quota counts. */
 #define XFS_IQUOTAUNCHECKED	(1 << 14)
 
+/*
+ * Remap in progress. Callers that wish to update file data while
+ * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake
+ * the lock in exclusive mode. Relocking the file will block until
+ * IREMAPPING is cleared.
+ */
+#define XFS_IREMAPPING		(1U << 15)
+
 /* All inode state flags related to inode reclaim. */
 #define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \
 				 XFS_IRECLAIM | \
@@ -595,6 +603,7 @@ void xfs_end_io(struct work_struct *work);
 
 int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
 void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
+void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2);
 
 static inline bool
 xfs_inode_unlinked_incomplete(
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 17c51804f9c6..cd7803fda8b1 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -19,6 +19,7 @@
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_error.h"
+#include "xfs_rtbitmap.h"
 
 #include <linux/iversion.h>
 
@@ -107,7 +108,7 @@ xfs_inode_item_precommit(
 	 */
 	if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
 	    (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
-	    (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
+	    xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
 		ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
 				   XFS_DIFLAG_EXTSZINHERIT);
 		ip->i_extsize = 0;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 55bb01173cde..a82470e027f7 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -38,6 +38,7 @@
 #include "xfs_reflink.h"
 #include "xfs_ioctl.h"
 #include "xfs_xattr.h"
+#include "xfs_rtbitmap.h"
 
 #include <linux/mount.h>
 #include <linux/namei.h>
@@ -1004,7 +1005,7 @@ xfs_fill_fsxattr(
 		 * later.
 		 */
 		if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
-		    ip->i_extsize % mp->m_sb.sb_rextsize > 0) {
+		    xfs_extlen_to_rtxmod(mp, ip->i_extsize) > 0) {
 			fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE |
 					    FS_XFLAG_EXTSZINHERIT);
 			fa->fsx_extsize = 0;
@@ -1130,7 +1131,7 @@ xfs_ioctl_setattr_xflags(
 	/* If realtime flag is set then must have realtime device */
 	if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
 		if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
-		    (ip->i_extsize % mp->m_sb.sb_rextsize))
+		    xfs_extlen_to_rtxmod(mp, ip->i_extsize))
 			return -EINVAL;
 	}
 
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index e9d317a3dafe..d7873e0360f0 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -198,6 +198,18 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
 	return x;
 }
 
+/* If @b is a power of 2, return log2(b).  Else return -1. */
+static inline int8_t log2_if_power2(unsigned long b)
+{
+	return is_power_of_2(b) ? ilog2(b) : -1;
+}
+
+/* If @b is a power of 2, return a mask of the lower bits, else return zero. */
+static inline unsigned long long mask64_if_power2(unsigned long b)
+{
+	return is_power_of_2(b) ? b - 1 : 0;
+}
+
 int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
 		char *data, enum req_op op);
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 219681d29fbc..503fe3c7edbf 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -101,9 +101,9 @@ typedef struct xfs_mount {
 
 	/*
 	 * Optional cache of rt summary level per bitmap block with the
-	 * invariant that m_rsum_cache[bbno] <= the minimum i for which
-	 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
-	 * inode lock.
+	 * invariant that m_rsum_cache[bbno] > the maximum i for which
+	 * rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i.
+	 * Reads and writes are serialized by the rsumip inode lock.
 	 */
 	uint8_t			*m_rsum_cache;
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
@@ -119,6 +119,7 @@ typedef struct xfs_mount {
 	uint8_t			m_blkbb_log;	/* blocklog - BBSHIFT */
 	uint8_t			m_agno_log;	/* log #ag's */
 	uint8_t			m_sectbb_log;	/* sectlog - BBSHIFT */
+	int8_t			m_rtxblklog;	/* log2 of rextsize, if possible */
 	uint			m_blockmask;	/* sb_blocksize-1 */
 	uint			m_blockwsize;	/* sb_blocksize in words */
 	uint			m_blockwmask;	/* blockwsize-1 */
@@ -152,6 +153,7 @@ typedef struct xfs_mount {
 	uint64_t		m_features;	/* active filesystem features */
 	uint64_t		m_low_space[XFS_LOWSP_MAX];
 	uint64_t		m_low_rtexts[XFS_LOWSP_MAX];
+	uint64_t		m_rtxblkmask;	/* rt extent block mask */
 	struct xfs_ino_geometry	m_ino_geo;	/* inode geometry */
 	struct xfs_trans_resv	m_resv;		/* precomputed res values */
 						/* low free space thresholds */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index c4cc99b70dd3..21a7e350b4c5 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -72,6 +72,10 @@ xfs_check_ondisk_structs(void)
 	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t,		4);
 	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t,	4);
 
+	/* realtime structures */
+	XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw,		4);
+	XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw,		4);
+
 	/*
 	 * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
 	 * 4 bytes anyway so it's not obviously a problem.  Hence for the moment
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index eb9102453aff..658edee8381d 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep(
 	if (ret)
 		goto out_unlock;
 
+	xfs_iflags_set(src, XFS_IREMAPPING);
+	if (inode_in != inode_out)
+		xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
+
 	return 0;
 out_unlock:
 	xfs_iunlock2_io_mmap(src, dest);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 2e1a4e5cd03d..88c48de5c9c8 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -19,6 +19,7 @@
 #include "xfs_icache.h"
 #include "xfs_rtalloc.h"
 #include "xfs_sb.h"
+#include "xfs_rtbitmap.h"
 
 /*
  * Read and return the summary information for a given extent size,
@@ -28,48 +29,48 @@
  */
 static int
 xfs_rtget_summary(
-	xfs_mount_t	*mp,		/* file system mount structure */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	int		log,		/* log2 of extent size */
-	xfs_rtblock_t	bbno,		/* bitmap block number */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
-	xfs_suminfo_t	*sum)		/* out: summary info for this block */
+	struct xfs_rtalloc_args	*args,
+	int			log,	/* log2 of extent size */
+	xfs_fileoff_t		bbno,	/* bitmap block number */
+	xfs_suminfo_t		*sum)	/* out: summary info for this block */
 {
-	return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum);
+	return xfs_rtmodify_summary_int(args, log, bbno, 0, sum);
 }
 
 /*
  * Return whether there are any free extents in the size range given
  * by low and high, for the bitmap block bbno.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtany_summary(
-	xfs_mount_t	*mp,		/* file system mount structure */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	int		low,		/* low log2 extent size */
-	int		high,		/* high log2 extent size */
-	xfs_rtblock_t	bbno,		/* bitmap block number */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
-	int		*stat)		/* out: any good extents here? */
+	struct xfs_rtalloc_args	*args,
+	int			low,	/* low log2 extent size */
+	int			high,	/* high log2 extent size */
+	xfs_fileoff_t		bbno,	/* bitmap block number */
+	int			*maxlog) /* out: max log2 extent size free */
 {
-	int		error;		/* error value */
-	int		log;		/* loop counter, log2 of ext. size */
-	xfs_suminfo_t	sum;		/* summary data */
-
-	/* There are no extents at levels < m_rsum_cache[bbno]. */
-	if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
-		low = mp->m_rsum_cache[bbno];
+	struct xfs_mount	*mp = args->mp;
+	int			error;
+	int			log;	/* loop counter, log2 of ext. size */
+	xfs_suminfo_t		sum;	/* summary data */
+
+	/* There are no extents at levels >= m_rsum_cache[bbno]. */
+	if (mp->m_rsum_cache) {
+		high = min(high, mp->m_rsum_cache[bbno] - 1);
+		if (low > high) {
+			*maxlog = -1;
+			return 0;
+		}
+	}
 
 	/*
 	 * Loop over logs of extent sizes.
 	 */
-	for (log = low; log <= high; log++) {
+	for (log = high; log >= low; log--) {
 		/*
 		 * Get one summary datum.
 		 */
-		error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum);
+		error = xfs_rtget_summary(args, log, bbno, &sum);
 		if (error) {
 			return error;
 		}
@@ -77,18 +78,18 @@ xfs_rtany_summary(
 		 * If there are any, return success.
 		 */
 		if (sum) {
-			*stat = 1;
+			*maxlog = log;
 			goto out;
 		}
 	}
 	/*
 	 * Found nothing, return failure.
 	 */
-	*stat = 0;
+	*maxlog = -1;
 out:
-	/* There were no extents at levels < log. */
-	if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
-		mp->m_rsum_cache[bbno] = log;
+	/* There were no extents at levels > log. */
+	if (mp->m_rsum_cache && log + 1 < mp->m_rsum_cache[bbno])
+		mp->m_rsum_cache[bbno] = log + 1;
 	return 0;
 }
 
@@ -97,60 +98,54 @@ out:
  * Copy and transform the summary file, given the old and new
  * parameters in the mount structures.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtcopy_summary(
-	xfs_mount_t	*omp,		/* old file system mount point */
-	xfs_mount_t	*nmp,		/* new file system mount point */
-	xfs_trans_t	*tp)		/* transaction pointer */
+	struct xfs_rtalloc_args	*oargs,
+	struct xfs_rtalloc_args	*nargs)
 {
-	xfs_rtblock_t	bbno;		/* bitmap block number */
-	struct xfs_buf	*bp;		/* summary buffer */
-	int		error;		/* error return value */
-	int		log;		/* summary level number (log length) */
-	xfs_suminfo_t	sum;		/* summary data */
-	xfs_fsblock_t	sumbno;		/* summary block number */
+	xfs_fileoff_t		bbno;	/* bitmap block number */
+	int			error;
+	int			log;	/* summary level number (log length) */
+	xfs_suminfo_t		sum;	/* summary data */
 
-	bp = NULL;
-	for (log = omp->m_rsumlevels - 1; log >= 0; log--) {
-		for (bbno = omp->m_sb.sb_rbmblocks - 1;
+	for (log = oargs->mp->m_rsumlevels - 1; log >= 0; log--) {
+		for (bbno = oargs->mp->m_sb.sb_rbmblocks - 1;
 		     (xfs_srtblock_t)bbno >= 0;
 		     bbno--) {
-			error = xfs_rtget_summary(omp, tp, log, bbno, &bp,
-				&sumbno, &sum);
+			error = xfs_rtget_summary(oargs, log, bbno, &sum);
 			if (error)
-				return error;
+				goto out;
 			if (sum == 0)
 				continue;
-			error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum,
-				&bp, &sumbno);
+			error = xfs_rtmodify_summary(oargs, log, bbno, -sum);
 			if (error)
-				return error;
-			error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum,
-				&bp, &sumbno);
+				goto out;
+			error = xfs_rtmodify_summary(nargs, log, bbno, sum);
 			if (error)
-				return error;
+				goto out;
 			ASSERT(sum > 0);
 		}
 	}
+	error = 0;
+out:
+	xfs_rtbuf_cache_relse(oargs);
 	return 0;
 }
 /*
  * Mark an extent specified by start and len allocated.
  * Updates all the summary information as well as the bitmap.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtallocate_range(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	start,		/* start block to allocate */
-	xfs_extlen_t	len,		/* length to allocate */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* start rtext to allocate */
+	xfs_rtxlen_t		len)	/* in/out: summary block number */
 {
-	xfs_rtblock_t	end;		/* end of the allocated extent */
-	int		error;		/* error value */
-	xfs_rtblock_t	postblock = 0;	/* first block allocated > end */
-	xfs_rtblock_t	preblock = 0;	/* first block allocated < start */
+	struct xfs_mount	*mp = args->mp;
+	xfs_rtxnum_t		end;	/* end of the allocated rtext */
+	int			error;
+	xfs_rtxnum_t		postblock = 0; /* first rtext allocated > end */
+	xfs_rtxnum_t		preblock = 0; /* first rtext allocated < start */
 
 	end = start + len - 1;
 	/*
@@ -158,15 +153,15 @@ xfs_rtallocate_range(
 	 * We need to find the beginning and end of the extent so we can
 	 * properly update the summary.
 	 */
-	error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+	error = xfs_rtfind_back(args, start, 0, &preblock);
 	if (error) {
 		return error;
 	}
 	/*
 	 * Find the next allocated block (end of free extent).
 	 */
-	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
-		&postblock);
+	error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1,
+			&postblock);
 	if (error) {
 		return error;
 	}
@@ -174,9 +169,9 @@ xfs_rtallocate_range(
 	 * Decrement the summary information corresponding to the entire
 	 * (old) free extent.
 	 */
-	error = xfs_rtmodify_summary(mp, tp,
-		XFS_RTBLOCKLOG(postblock + 1 - preblock),
-		XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+	error = xfs_rtmodify_summary(args,
+			XFS_RTBLOCKLOG(postblock + 1 - preblock),
+			xfs_rtx_to_rbmblock(mp, preblock), -1);
 	if (error) {
 		return error;
 	}
@@ -185,9 +180,9 @@ xfs_rtallocate_range(
 	 * old extent, add summary data for them to be free.
 	 */
 	if (preblock < start) {
-		error = xfs_rtmodify_summary(mp, tp,
-			XFS_RTBLOCKLOG(start - preblock),
-			XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
+		error = xfs_rtmodify_summary(args,
+				XFS_RTBLOCKLOG(start - preblock),
+				xfs_rtx_to_rbmblock(mp, preblock), 1);
 		if (error) {
 			return error;
 		}
@@ -197,9 +192,9 @@ xfs_rtallocate_range(
 	 * old extent, add summary data for them to be free.
 	 */
 	if (postblock > end) {
-		error = xfs_rtmodify_summary(mp, tp,
-			XFS_RTBLOCKLOG(postblock - end),
-			XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb);
+		error = xfs_rtmodify_summary(args,
+				XFS_RTBLOCKLOG(postblock - end),
+				xfs_rtx_to_rbmblock(mp, end + 1), 1);
 		if (error) {
 			return error;
 		}
@@ -207,54 +202,69 @@ xfs_rtallocate_range(
 	/*
 	 * Modify the bitmap to mark this extent allocated.
 	 */
-	error = xfs_rtmodify_range(mp, tp, start, len, 0);
+	error = xfs_rtmodify_range(args, start, len, 0);
 	return error;
 }
 
 /*
+ * Make sure we don't run off the end of the rt volume.  Be careful that
+ * adjusting maxlen downwards doesn't cause us to fail the alignment checks.
+ */
+static inline xfs_rtxlen_t
+xfs_rtallocate_clamp_len(
+	struct xfs_mount	*mp,
+	xfs_rtxnum_t		startrtx,
+	xfs_rtxlen_t		rtxlen,
+	xfs_rtxlen_t		prod)
+{
+	xfs_rtxlen_t		ret;
+
+	ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx;
+	return rounddown(ret, prod);
+}
+
+/*
  * Attempt to allocate an extent minlen<=len<=maxlen starting from
  * bitmap block bbno.  If we don't get maxlen then use prod to trim
- * the length, if given.  Returns error; returns starting block in *rtblock.
+ * the length, if given.  Returns error; returns starting block in *rtx.
  * The lengths are all in rtextents.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtallocate_extent_block(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	bbno,		/* bitmap block number */
-	xfs_extlen_t	minlen,		/* minimum length to allocate */
-	xfs_extlen_t	maxlen,		/* maximum length to allocate */
-	xfs_extlen_t	*len,		/* out: actual length allocated */
-	xfs_rtblock_t	*nextp,		/* out: next block to try */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
-	xfs_extlen_t	prod,		/* extent product factor */
-	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
+	struct xfs_rtalloc_args	*args,
+	xfs_fileoff_t		bbno,	/* bitmap block number */
+	xfs_rtxlen_t		minlen,	/* minimum length to allocate */
+	xfs_rtxlen_t		maxlen,	/* maximum length to allocate */
+	xfs_rtxlen_t		*len,	/* out: actual length allocated */
+	xfs_rtxnum_t		*nextp,	/* out: next rtext to try */
+	xfs_rtxlen_t		prod,	/* extent product factor */
+	xfs_rtxnum_t		*rtx)	/* out: start rtext allocated */
 {
-	xfs_rtblock_t	besti;		/* best rtblock found so far */
-	xfs_rtblock_t	bestlen;	/* best length found so far */
-	xfs_rtblock_t	end;		/* last rtblock in chunk */
-	int		error;		/* error value */
-	xfs_rtblock_t	i;		/* current rtblock trying */
-	xfs_rtblock_t	next;		/* next rtblock to try */
-	int		stat;		/* status from internal calls */
+	struct xfs_mount	*mp = args->mp;
+	xfs_rtxnum_t		besti;	/* best rtext found so far */
+	xfs_rtxnum_t		bestlen;/* best length found so far */
+	xfs_rtxnum_t		end;	/* last rtext in chunk */
+	int			error;
+	xfs_rtxnum_t		i;	/* current rtext trying */
+	xfs_rtxnum_t		next;	/* next rtext to try */
+	int			stat;	/* status from internal calls */
 
 	/*
 	 * Loop over all the extents starting in this bitmap block,
 	 * looking for one that's long enough.
 	 */
-	for (i = XFS_BLOCKTOBIT(mp, bbno), besti = -1, bestlen = 0,
-		end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
+	for (i = xfs_rbmblock_to_rtx(mp, bbno), besti = -1, bestlen = 0,
+		end = xfs_rbmblock_to_rtx(mp, bbno + 1) - 1;
 	     i <= end;
 	     i++) {
 		/* Make sure we don't scan off the end of the rt volume. */
-		maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
+		maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod);
 
 		/*
 		 * See if there's a free extent of maxlen starting at i.
 		 * If it's not so then next will contain the first non-free.
 		 */
-		error = xfs_rtcheck_range(mp, tp, i, maxlen, 1, &next, &stat);
+		error = xfs_rtcheck_range(args, i, maxlen, 1, &next, &stat);
 		if (error) {
 			return error;
 		}
@@ -262,13 +272,12 @@ xfs_rtallocate_extent_block(
 			/*
 			 * i for maxlen is all free, allocate and return that.
 			 */
-			error = xfs_rtallocate_range(mp, tp, i, maxlen, rbpp,
-				rsb);
+			error = xfs_rtallocate_range(args, i, maxlen);
 			if (error) {
 				return error;
 			}
 			*len = maxlen;
-			*rtblock = i;
+			*rtx = i;
 			return 0;
 		}
 		/*
@@ -278,7 +287,7 @@ xfs_rtallocate_extent_block(
 		 * so far, remember it.
 		 */
 		if (minlen < maxlen) {
-			xfs_rtblock_t	thislen;	/* this extent size */
+			xfs_rtxnum_t	thislen;	/* this extent size */
 
 			thislen = next - i;
 			if (thislen >= minlen && thislen > bestlen) {
@@ -290,7 +299,7 @@ xfs_rtallocate_extent_block(
 		 * If not done yet, find the start of the next free space.
 		 */
 		if (next < end) {
-			error = xfs_rtfind_forw(mp, tp, next, end, &i);
+			error = xfs_rtfind_forw(args, next, end, &i);
 			if (error) {
 				return error;
 			}
@@ -301,7 +310,7 @@ xfs_rtallocate_extent_block(
 	 * Searched the whole thing & didn't find a maxlen free extent.
 	 */
 	if (minlen < maxlen && besti != -1) {
-		xfs_extlen_t	p;	/* amount to trim length by */
+		xfs_rtxlen_t	p;	/* amount to trim length by */
 
 		/*
 		 * If size should be a multiple of prod, make that so.
@@ -315,51 +324,49 @@ xfs_rtallocate_extent_block(
 		/*
 		 * Allocate besti for bestlen & return that.
 		 */
-		error = xfs_rtallocate_range(mp, tp, besti, bestlen, rbpp, rsb);
+		error = xfs_rtallocate_range(args, besti, bestlen);
 		if (error) {
 			return error;
 		}
 		*len = bestlen;
-		*rtblock = besti;
+		*rtx = besti;
 		return 0;
 	}
 	/*
 	 * Allocation failed.  Set *nextp to the next block to try.
 	 */
 	*nextp = next;
-	*rtblock = NULLRTBLOCK;
+	*rtx = NULLRTEXTNO;
 	return 0;
 }
 
 /*
  * Allocate an extent of length minlen<=len<=maxlen, starting at block
  * bno.  If we don't get maxlen then use prod to trim the length, if given.
- * Returns error; returns starting block in *rtblock.
+ * Returns error; returns starting block in *rtx.
  * The lengths are all in rtextents.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtallocate_extent_exact(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	bno,		/* starting block number to allocate */
-	xfs_extlen_t	minlen,		/* minimum length to allocate */
-	xfs_extlen_t	maxlen,		/* maximum length to allocate */
-	xfs_extlen_t	*len,		/* out: actual length allocated */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
-	xfs_extlen_t	prod,		/* extent product factor */
-	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext number to allocate */
+	xfs_rtxlen_t		minlen,	/* minimum length to allocate */
+	xfs_rtxlen_t		maxlen,	/* maximum length to allocate */
+	xfs_rtxlen_t		*len,	/* out: actual length allocated */
+	xfs_rtxlen_t		prod,	/* extent product factor */
+	xfs_rtxnum_t		*rtx)	/* out: start rtext allocated */
 {
-	int		error;		/* error value */
-	xfs_extlen_t	i;		/* extent length trimmed due to prod */
-	int		isfree;		/* extent is free */
-	xfs_rtblock_t	next;		/* next block to try (dummy) */
+	int			error;
+	xfs_rtxlen_t		i;	/* extent length trimmed due to prod */
+	int			isfree;	/* extent is free */
+	xfs_rtxnum_t		next;	/* next rtext to try (dummy) */
 
-	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+	ASSERT(minlen % prod == 0);
+	ASSERT(maxlen % prod == 0);
 	/*
 	 * Check if the range in question (for maxlen) is free.
 	 */
-	error = xfs_rtcheck_range(mp, tp, bno, maxlen, 1, &next, &isfree);
+	error = xfs_rtcheck_range(args, start, maxlen, 1, &next, &isfree);
 	if (error) {
 		return error;
 	}
@@ -367,23 +374,23 @@ xfs_rtallocate_extent_exact(
 		/*
 		 * If it is, allocate it and return success.
 		 */
-		error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
+		error = xfs_rtallocate_range(args, start, maxlen);
 		if (error) {
 			return error;
 		}
 		*len = maxlen;
-		*rtblock = bno;
+		*rtx = start;
 		return 0;
 	}
 	/*
 	 * If not, allocate what there is, if it's at least minlen.
 	 */
-	maxlen = next - bno;
+	maxlen = next - start;
 	if (maxlen < minlen) {
 		/*
 		 * Failed, return failure status.
 		 */
-		*rtblock = NULLRTBLOCK;
+		*rtx = NULLRTEXTNO;
 		return 0;
 	}
 	/*
@@ -395,81 +402,82 @@ xfs_rtallocate_extent_exact(
 			/*
 			 * Now we can't do it, return failure status.
 			 */
-			*rtblock = NULLRTBLOCK;
+			*rtx = NULLRTEXTNO;
 			return 0;
 		}
 	}
 	/*
 	 * Allocate what we can and return it.
 	 */
-	error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
+	error = xfs_rtallocate_range(args, start, maxlen);
 	if (error) {
 		return error;
 	}
 	*len = maxlen;
-	*rtblock = bno;
+	*rtx = start;
 	return 0;
 }
 
 /*
  * Allocate an extent of length minlen<=len<=maxlen, starting as near
- * to bno as possible.  If we don't get maxlen then use prod to trim
+ * to start as possible.  If we don't get maxlen then use prod to trim
  * the length, if given.  The lengths are all in rtextents.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtallocate_extent_near(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	bno,		/* starting block number to allocate */
-	xfs_extlen_t	minlen,		/* minimum length to allocate */
-	xfs_extlen_t	maxlen,		/* maximum length to allocate */
-	xfs_extlen_t	*len,		/* out: actual length allocated */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
-	xfs_extlen_t	prod,		/* extent product factor */
-	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxnum_t		start,	/* starting rtext number to allocate */
+	xfs_rtxlen_t		minlen,	/* minimum length to allocate */
+	xfs_rtxlen_t		maxlen,	/* maximum length to allocate */
+	xfs_rtxlen_t		*len,	/* out: actual length allocated */
+	xfs_rtxlen_t		prod,	/* extent product factor */
+	xfs_rtxnum_t		*rtx)	/* out: start rtext allocated */
 {
-	int		any;		/* any useful extents from summary */
-	xfs_rtblock_t	bbno;		/* bitmap block number */
-	int		error;		/* error value */
-	int		i;		/* bitmap block offset (loop control) */
-	int		j;		/* secondary loop control */
-	int		log2len;	/* log2 of minlen */
-	xfs_rtblock_t	n;		/* next block to try */
-	xfs_rtblock_t	r;		/* result block */
-
-	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+	struct xfs_mount	*mp = args->mp;
+	int			maxlog;	/* max useful extent from summary */
+	xfs_fileoff_t		bbno;	/* bitmap block number */
+	int			error;
+	int			i;	/* bitmap block offset (loop control) */
+	int			j;	/* secondary loop control */
+	int			log2len; /* log2 of minlen */
+	xfs_rtxnum_t		n;	/* next rtext to try */
+	xfs_rtxnum_t		r;	/* result rtext */
+
+	ASSERT(minlen % prod == 0);
+	ASSERT(maxlen % prod == 0);
+
 	/*
 	 * If the block number given is off the end, silently set it to
 	 * the last block.
 	 */
-	if (bno >= mp->m_sb.sb_rextents)
-		bno = mp->m_sb.sb_rextents - 1;
+	if (start >= mp->m_sb.sb_rextents)
+		start = mp->m_sb.sb_rextents - 1;
 
 	/* Make sure we don't run off the end of the rt volume. */
-	maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
+	maxlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod);
 	if (maxlen < minlen) {
-		*rtblock = NULLRTBLOCK;
+		*rtx = NULLRTEXTNO;
 		return 0;
 	}
 
 	/*
 	 * Try the exact allocation first.
 	 */
-	error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, len,
-		rbpp, rsb, prod, &r);
+	error = xfs_rtallocate_extent_exact(args, start, minlen, maxlen, len,
+			prod, &r);
 	if (error) {
 		return error;
 	}
 	/*
 	 * If the exact allocation worked, return that.
 	 */
-	if (r != NULLRTBLOCK) {
-		*rtblock = r;
+	if (r != NULLRTEXTNO) {
+		*rtx = r;
 		return 0;
 	}
-	bbno = XFS_BITTOBLOCK(mp, bno);
+	bbno = xfs_rtx_to_rbmblock(mp, start);
 	i = 0;
+	j = -1;
 	ASSERT(minlen != 0);
 	log2len = xfs_highbit32(minlen);
 	/*
@@ -480,8 +488,8 @@ xfs_rtallocate_extent_near(
 		 * Get summary information of extents of all useful levels
 		 * starting in this bitmap block.
 		 */
-		error = xfs_rtany_summary(mp, tp, log2len, mp->m_rsumlevels - 1,
-			bbno + i, rbpp, rsb, &any);
+		error = xfs_rtany_summary(args, log2len, mp->m_rsumlevels - 1,
+				bbno + i, &maxlog);
 		if (error) {
 			return error;
 		}
@@ -489,7 +497,10 @@ xfs_rtallocate_extent_near(
 		 * If there are any useful extents starting here, try
 		 * allocating one.
 		 */
-		if (any) {
+		if (maxlog >= 0) {
+			xfs_extlen_t maxavail =
+				min_t(xfs_rtblock_t, maxlen,
+				      (1ULL << (maxlog + 1)) - 1);
 			/*
 			 * On the positive side of the starting location.
 			 */
@@ -498,17 +509,17 @@ xfs_rtallocate_extent_near(
 				 * Try to allocate an extent starting in
 				 * this block.
 				 */
-				error = xfs_rtallocate_extent_block(mp, tp,
-					bbno + i, minlen, maxlen, len, &n, rbpp,
-					rsb, prod, &r);
+				error = xfs_rtallocate_extent_block(args,
+						bbno + i, minlen, maxavail, len,
+						&n, prod, &r);
 				if (error) {
 					return error;
 				}
 				/*
 				 * If it worked, return it.
 				 */
-				if (r != NULLRTBLOCK) {
-					*rtblock = r;
+				if (r != NULLRTEXTNO) {
+					*rtx = r;
 					return 0;
 				}
 			}
@@ -516,68 +527,46 @@ xfs_rtallocate_extent_near(
 			 * On the negative side of the starting location.
 			 */
 			else {		/* i < 0 */
+				int	maxblocks;
+
 				/*
-				 * Loop backwards through the bitmap blocks from
-				 * the starting point-1 up to where we are now.
-				 * There should be an extent which ends in this
-				 * bitmap block and is long enough.
+				 * Loop backwards to find the end of the extent
+				 * we found in the realtime summary.
+				 *
+				 * maxblocks is the maximum possible number of
+				 * bitmap blocks from the start of the extent
+				 * to the end of the extent.
 				 */
-				for (j = -1; j > i; j--) {
-					/*
-					 * Grab the summary information for
-					 * this bitmap block.
-					 */
-					error = xfs_rtany_summary(mp, tp,
-						log2len, mp->m_rsumlevels - 1,
-						bbno + j, rbpp, rsb, &any);
-					if (error) {
-						return error;
-					}
-					/*
-					 * If there's no extent given in the
-					 * summary that means the extent we
-					 * found must carry over from an
-					 * earlier block.  If there is an
-					 * extent given, we've already tried
-					 * that allocation, don't do it again.
-					 */
-					if (any)
-						continue;
-					error = xfs_rtallocate_extent_block(mp,
-						tp, bbno + j, minlen, maxlen,
-						len, &n, rbpp, rsb, prod, &r);
+				if (maxlog == 0)
+					maxblocks = 0;
+				else if (maxlog < mp->m_blkbit_log)
+					maxblocks = 1;
+				else
+					maxblocks = 2 << (maxlog - mp->m_blkbit_log);
+
+				/*
+				 * We need to check bbno + i + maxblocks down to
+				 * bbno + i. We already checked bbno down to
+				 * bbno + j + 1, so we don't need to check those
+				 * again.
+				 */
+				j = min(i + maxblocks, j);
+				for (; j >= i; j--) {
+					error = xfs_rtallocate_extent_block(args,
+							bbno + j, minlen,
+							maxavail, len, &n, prod,
+							&r);
 					if (error) {
 						return error;
 					}
 					/*
 					 * If it works, return the extent.
 					 */
-					if (r != NULLRTBLOCK) {
-						*rtblock = r;
+					if (r != NULLRTEXTNO) {
+						*rtx = r;
 						return 0;
 					}
 				}
-				/*
-				 * There weren't intervening bitmap blocks
-				 * with a long enough extent, or the
-				 * allocation didn't work for some reason
-				 * (i.e. it's a little * too short).
-				 * Try to allocate from the summary block
-				 * that we found.
-				 */
-				error = xfs_rtallocate_extent_block(mp, tp,
-					bbno + i, minlen, maxlen, len, &n, rbpp,
-					rsb, prod, &r);
-				if (error) {
-					return error;
-				}
-				/*
-				 * If it works, return the extent.
-				 */
-				if (r != NULLRTBLOCK) {
-					*rtblock = r;
-					return 0;
-				}
 			}
 		}
 		/*
@@ -610,7 +599,7 @@ xfs_rtallocate_extent_near(
 		else
 			break;
 	}
-	*rtblock = NULLRTBLOCK;
+	*rtx = NULLRTEXTNO;
 	return 0;
 }
 
@@ -619,26 +608,25 @@ xfs_rtallocate_extent_near(
  * specified.  If we don't get maxlen then use prod to trim
  * the length, if given.  The lengths are all in rtextents.
  */
-STATIC int				/* error */
+STATIC int
 xfs_rtallocate_extent_size(
-	xfs_mount_t	*mp,		/* file system mount point */
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_extlen_t	minlen,		/* minimum length to allocate */
-	xfs_extlen_t	maxlen,		/* maximum length to allocate */
-	xfs_extlen_t	*len,		/* out: actual length allocated */
-	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
-	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
-	xfs_extlen_t	prod,		/* extent product factor */
-	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
+	struct xfs_rtalloc_args	*args,
+	xfs_rtxlen_t		minlen,	/* minimum length to allocate */
+	xfs_rtxlen_t		maxlen,	/* maximum length to allocate */
+	xfs_rtxlen_t		*len,	/* out: actual length allocated */
+	xfs_rtxlen_t		prod,	/* extent product factor */
+	xfs_rtxnum_t		*rtx)	/* out: start rtext allocated */
 {
-	int		error;		/* error value */
-	int		i;		/* bitmap block number */
-	int		l;		/* level number (loop control) */
-	xfs_rtblock_t	n;		/* next block to be tried */
-	xfs_rtblock_t	r;		/* result block number */
-	xfs_suminfo_t	sum;		/* summary information for extents */
-
-	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+	struct xfs_mount	*mp = args->mp;
+	int			error;
+	xfs_fileoff_t		i;	/* bitmap block number */
+	int			l;	/* level number (loop control) */
+	xfs_rtxnum_t		n;	/* next rtext to be tried */
+	xfs_rtxnum_t		r;	/* result rtext number */
+	xfs_suminfo_t		sum;	/* summary information for extents */
+
+	ASSERT(minlen % prod == 0);
+	ASSERT(maxlen % prod == 0);
 	ASSERT(maxlen != 0);
 
 	/*
@@ -656,8 +644,7 @@ xfs_rtallocate_extent_size(
 			/*
 			 * Get the summary for this level/block.
 			 */
-			error = xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
-				&sum);
+			error = xfs_rtget_summary(args, l, i, &sum);
 			if (error) {
 				return error;
 			}
@@ -669,16 +656,16 @@ xfs_rtallocate_extent_size(
 			/*
 			 * Try allocating the extent.
 			 */
-			error = xfs_rtallocate_extent_block(mp, tp, i, maxlen,
-				maxlen, len, &n, rbpp, rsb, prod, &r);
+			error = xfs_rtallocate_extent_block(args, i, maxlen,
+					maxlen, len, &n, prod, &r);
 			if (error) {
 				return error;
 			}
 			/*
 			 * If it worked, return that.
 			 */
-			if (r != NULLRTBLOCK) {
-				*rtblock = r;
+			if (r != NULLRTEXTNO) {
+				*rtx = r;
 				return 0;
 			}
 			/*
@@ -686,8 +673,8 @@ xfs_rtallocate_extent_size(
 			 * allocator is beyond the next bitmap block,
 			 * skip to that bitmap block.
 			 */
-			if (XFS_BITTOBLOCK(mp, n) > i + 1)
-				i = XFS_BITTOBLOCK(mp, n) - 1;
+			if (xfs_rtx_to_rbmblock(mp, n) > i + 1)
+				i = xfs_rtx_to_rbmblock(mp, n) - 1;
 		}
 	}
 	/*
@@ -695,7 +682,7 @@ xfs_rtallocate_extent_size(
 	 * we're asking for a fixed size extent.
 	 */
 	if (minlen > --maxlen) {
-		*rtblock = NULLRTBLOCK;
+		*rtx = NULLRTEXTNO;
 		return 0;
 	}
 	ASSERT(minlen != 0);
@@ -715,8 +702,7 @@ xfs_rtallocate_extent_size(
 			/*
 			 * Get the summary information for this level/block.
 			 */
-			error =	xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
-						  &sum);
+			error =	xfs_rtget_summary(args, l, i, &sum);
 			if (error) {
 				return error;
 			}
@@ -730,18 +716,18 @@ xfs_rtallocate_extent_size(
 			 * minlen/maxlen are in the possible range for
 			 * this summary level.
 			 */
-			error = xfs_rtallocate_extent_block(mp, tp, i,
+			error = xfs_rtallocate_extent_block(args, i,
 					XFS_RTMAX(minlen, 1 << l),
 					XFS_RTMIN(maxlen, (1 << (l + 1)) - 1),
-					len, &n, rbpp, rsb, prod, &r);
+					len, &n, prod, &r);
 			if (error) {
 				return error;
 			}
 			/*
 			 * If it worked, return that extent.
 			 */
-			if (r != NULLRTBLOCK) {
-				*rtblock = r;
+			if (r != NULLRTEXTNO) {
+				*rtx = r;
 				return 0;
 			}
 			/*
@@ -749,14 +735,14 @@ xfs_rtallocate_extent_size(
 			 * allocator is beyond the next bitmap block,
 			 * skip to that bitmap block.
 			 */
-			if (XFS_BITTOBLOCK(mp, n) > i + 1)
-				i = XFS_BITTOBLOCK(mp, n) - 1;
+			if (xfs_rtx_to_rbmblock(mp, n) > i + 1)
+				i = xfs_rtx_to_rbmblock(mp, n) - 1;
 		}
 	}
 	/*
 	 * Got nothing, return failure.
 	 */
-	*rtblock = NULLRTBLOCK;
+	*rtx = NULLRTEXTNO;
 	return 0;
 }
 
@@ -886,12 +872,14 @@ xfs_alloc_rsum_cache(
 	xfs_extlen_t	rbmblocks)	/* number of rt bitmap blocks */
 {
 	/*
-	 * The rsum cache is initialized to all zeroes, which is trivially a
-	 * lower bound on the minimum level with any free extents. We can
-	 * continue without the cache if it couldn't be allocated.
+	 * The rsum cache is initialized to the maximum value, which is
+	 * trivially an upper bound on the maximum level with any free extents.
+	 * We can continue without the cache if it couldn't be allocated.
 	 */
-	mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL);
-	if (!mp->m_rsum_cache)
+	mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL);
+	if (mp->m_rsum_cache)
+		memset(mp->m_rsum_cache, -1, rbmblocks);
+	else
 		xfs_warn(mp, "could not allocate realtime summary cache");
 }
 
@@ -907,13 +895,13 @@ xfs_growfs_rt(
 	xfs_mount_t	*mp,		/* mount point for filesystem */
 	xfs_growfs_rt_t	*in)		/* growfs rt input struct */
 {
-	xfs_rtblock_t	bmbno;		/* bitmap block number */
+	xfs_fileoff_t	bmbno;		/* bitmap block number */
 	struct xfs_buf	*bp;		/* temporary buffer */
 	int		error;		/* error return value */
 	xfs_mount_t	*nmp;		/* new (fake) mount structure */
 	xfs_rfsblock_t	nrblocks;	/* new number of realtime blocks */
 	xfs_extlen_t	nrbmblocks;	/* new number of rt bitmap blocks */
-	xfs_rtblock_t	nrextents;	/* new number of realtime extents */
+	xfs_rtxnum_t	nrextents;	/* new number of realtime extents */
 	uint8_t		nrextslog;	/* new log2 of sb_rextents */
 	xfs_extlen_t	nrsumblocks;	/* new number of summary blocks */
 	uint		nrsumlevels;	/* new rt summary levels */
@@ -922,7 +910,6 @@ xfs_growfs_rt(
 	xfs_extlen_t	rbmblocks;	/* current number of rt bitmap blocks */
 	xfs_extlen_t	rsumblocks;	/* current number of rt summary blks */
 	xfs_sb_t	*sbp;		/* old superblock */
-	xfs_fsblock_t	sumbno;		/* summary block number */
 	uint8_t		*rsum_cache;	/* old summary cache */
 
 	sbp = &mp->m_sb;
@@ -954,7 +941,7 @@ xfs_growfs_rt(
 		return -EINVAL;
 
 	/* Unsupported realtime features. */
-	if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
+	if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
 		return -EOPNOTSUPP;
 
 	nrblocks = in->newblocks;
@@ -976,11 +963,10 @@ xfs_growfs_rt(
 	 */
 	nrextents = nrblocks;
 	do_div(nrextents, in->extsize);
-	nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
+	nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents);
 	nrextslog = xfs_highbit32(nrextents);
 	nrsumlevels = nrextslog + 1;
-	nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
-	nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
+	nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, nrbmblocks);
 	nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
 	/*
 	 * New summary size can't be more than half the size of
@@ -1023,6 +1009,12 @@ xfs_growfs_rt(
 		     ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
 	     bmbno < nrbmblocks;
 	     bmbno++) {
+		struct xfs_rtalloc_args	args = {
+			.mp		= mp,
+		};
+		struct xfs_rtalloc_args	nargs = {
+			.mp		= nmp,
+		};
 		struct xfs_trans	*tp;
 		xfs_rfsblock_t		nrblocks_step;
 
@@ -1032,19 +1024,17 @@ xfs_growfs_rt(
 		 * Calculate new sb and mount fields for this round.
 		 */
 		nsbp->sb_rextsize = in->extsize;
+		nmp->m_rtxblklog = -1; /* don't use shift or masking */
 		nsbp->sb_rbmblocks = bmbno + 1;
 		nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
 				nsbp->sb_rextsize;
 		nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
-		nsbp->sb_rextents = nsbp->sb_rblocks;
-		do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
+		nsbp->sb_rextents = xfs_rtb_to_rtx(nmp, nsbp->sb_rblocks);
 		ASSERT(nsbp->sb_rextents != 0);
 		nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
 		nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
-		nrsumsize =
-			(uint)sizeof(xfs_suminfo_t) * nrsumlevels *
-			nsbp->sb_rbmblocks;
-		nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
+		nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels,
+				nsbp->sb_rbmblocks);
 		nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
 		/*
 		 * Start a transaction, get the log reservation.
@@ -1053,6 +1043,9 @@ xfs_growfs_rt(
 				&tp);
 		if (error)
 			break;
+		args.tp = tp;
+		nargs.tp = tp;
+
 		/*
 		 * Lock out other callers by grabbing the bitmap inode lock.
 		 */
@@ -1086,7 +1079,7 @@ xfs_growfs_rt(
 		 */
 		if (sbp->sb_rbmblocks != nsbp->sb_rbmblocks ||
 		    mp->m_rsumlevels != nmp->m_rsumlevels) {
-			error = xfs_rtcopy_summary(mp, nmp, tp);
+			error = xfs_rtcopy_summary(&args, &nargs);
 			if (error)
 				goto error_cancel;
 		}
@@ -1111,9 +1104,9 @@ xfs_growfs_rt(
 		/*
 		 * Free new extent.
 		 */
-		bp = NULL;
-		error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
-			nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
+		error = xfs_rtfree_range(&nargs, sbp->sb_rextents,
+				nsbp->sb_rextents - sbp->sb_rextents);
+		xfs_rtbuf_cache_relse(&nargs);
 		if (error) {
 error_cancel:
 			xfs_trans_cancel(tp);
@@ -1171,59 +1164,60 @@ out_free:
  * parameters.  The length units are all in realtime extents, as is the
  * result block number.
  */
-int					/* error */
+int
 xfs_rtallocate_extent(
-	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_rtblock_t	bno,		/* starting block number to allocate */
-	xfs_extlen_t	minlen,		/* minimum length to allocate */
-	xfs_extlen_t	maxlen,		/* maximum length to allocate */
-	xfs_extlen_t	*len,		/* out: actual length allocated */
-	int		wasdel,		/* was a delayed allocation extent */
-	xfs_extlen_t	prod,		/* extent product factor */
-	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
+	struct xfs_trans	*tp,
+	xfs_rtxnum_t		start,	/* starting rtext number to allocate */
+	xfs_rtxlen_t		minlen,	/* minimum length to allocate */
+	xfs_rtxlen_t		maxlen,	/* maximum length to allocate */
+	xfs_rtxlen_t		*len,	/* out: actual length allocated */
+	int			wasdel,	/* was a delayed allocation extent */
+	xfs_rtxlen_t		prod,	/* extent product factor */
+	xfs_rtxnum_t		*rtblock) /* out: start rtext allocated */
 {
-	xfs_mount_t	*mp = tp->t_mountp;
-	int		error;		/* error value */
-	xfs_rtblock_t	r;		/* result allocated block */
-	xfs_fsblock_t	sb;		/* summary file block number */
-	struct xfs_buf	*sumbp;		/* summary file block buffer */
-
-	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
+	struct xfs_rtalloc_args	args = {
+		.mp		= tp->t_mountp,
+		.tp		= tp,
+	};
+	int			error;	/* error value */
+	xfs_rtxnum_t		r;	/* result allocated rtext */
+
+	ASSERT(xfs_isilocked(args.mp->m_rbmip, XFS_ILOCK_EXCL));
 	ASSERT(minlen > 0 && minlen <= maxlen);
 
 	/*
 	 * If prod is set then figure out what to do to minlen and maxlen.
 	 */
 	if (prod > 1) {
-		xfs_extlen_t	i;
+		xfs_rtxlen_t	i;
 
 		if ((i = maxlen % prod))
 			maxlen -= i;
 		if ((i = minlen % prod))
 			minlen += prod - i;
 		if (maxlen < minlen) {
-			*rtblock = NULLRTBLOCK;
+			*rtblock = NULLRTEXTNO;
 			return 0;
 		}
 	}
 
 retry:
-	sumbp = NULL;
-	if (bno == 0) {
-		error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len,
-				&sumbp,	&sb, prod, &r);
+	if (start == 0) {
+		error = xfs_rtallocate_extent_size(&args, minlen,
+				maxlen, len, prod, &r);
 	} else {
-		error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen,
-				len, &sumbp, &sb, prod, &r);
+		error = xfs_rtallocate_extent_near(&args, start, minlen,
+				maxlen, len, prod, &r);
 	}
 
+	xfs_rtbuf_cache_relse(&args);
 	if (error)
 		return error;
 
 	/*
 	 * If it worked, update the superblock.
 	 */
-	if (r != NULLRTBLOCK) {
+	if (r != NULLRTEXTNO) {
 		long	slen = (long)*len;
 
 		ASSERT(*len >= minlen && *len <= maxlen);
@@ -1250,6 +1244,7 @@ xfs_rtmount_init(
 	struct xfs_buf		*bp;	/* buffer for last block of subvolume */
 	struct xfs_sb		*sbp;	/* filesystem superblock copy in mount */
 	xfs_daddr_t		d;	/* address of last block of subvolume */
+	unsigned int		rsumblocks;
 	int			error;
 
 	sbp = &mp->m_sb;
@@ -1261,10 +1256,9 @@ xfs_rtmount_init(
 		return -ENODEV;
 	}
 	mp->m_rsumlevels = sbp->sb_rextslog + 1;
-	mp->m_rsumsize =
-		(uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
-		sbp->sb_rbmblocks;
-	mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize);
+	rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels,
+			mp->m_sb.sb_rbmblocks);
+	mp->m_rsumsize = XFS_FSB_TO_B(mp, rsumblocks);
 	mp->m_rbmip = mp->m_rsumip = NULL;
 	/*
 	 * Check that the realtime section is an ok size.
@@ -1418,27 +1412,27 @@ xfs_rtunmount_inodes(
  * of rtextents and the fraction.
  * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ...
  */
-int					/* error */
+int						/* error */
 xfs_rtpick_extent(
 	xfs_mount_t		*mp,		/* file system mount point */
 	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_extlen_t		len,		/* allocation length (rtextents) */
-	xfs_rtblock_t		*pick)		/* result rt extent */
-	{
-	xfs_rtblock_t		b;		/* result block */
+	xfs_rtxlen_t		len,		/* allocation length (rtextents) */
+	xfs_rtxnum_t		*pick)		/* result rt extent */
+{
+	xfs_rtxnum_t		b;		/* result rtext */
 	int			log2;		/* log of sequence number */
 	uint64_t		resid;		/* residual after log removed */
 	uint64_t		seq;		/* sequence number of file creation */
-	struct timespec64	ts;		/* temporary timespec64 storage */
+	struct timespec64	ts;		/* timespec in inode */
 
 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
 
+	ts = inode_get_atime(VFS_I(mp->m_rbmip));
 	if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) {
 		mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
 		seq = 0;
 	} else {
-		ts = inode_get_atime(VFS_I(mp->m_rbmip));
-		seq = (uint64_t)ts.tv_sec;
+		seq = ts.tv_sec;
 	}
 	if ((log2 = xfs_highbit64(seq)) == -1)
 		b = 0;
@@ -1451,7 +1445,7 @@ xfs_rtpick_extent(
 		if (b + len > mp->m_sb.sb_rextents)
 			b = mp->m_sb.sb_rextents - len;
 	}
-	ts.tv_sec = (time64_t)seq + 1;
+	ts.tv_sec = seq + 1;
 	inode_set_atime_to_ts(VFS_I(mp->m_rbmip), ts);
 	xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
 	*pick = b;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 62c7ad79cbb6..f7cb9ffe51ca 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -11,22 +11,6 @@
 struct xfs_mount;
 struct xfs_trans;
 
-/*
- * XXX: Most of the realtime allocation functions deal in units of realtime
- * extents, not realtime blocks.  This looks funny when paired with the type
- * name and screams for a larger cleanup.
- */
-struct xfs_rtalloc_rec {
-	xfs_rtblock_t		ar_startext;
-	xfs_rtblock_t		ar_extcount;
-};
-
-typedef int (*xfs_rtalloc_query_range_fn)(
-	struct xfs_mount		*mp,
-	struct xfs_trans		*tp,
-	const struct xfs_rtalloc_rec	*rec,
-	void				*priv);
-
 #ifdef CONFIG_XFS_RT
 /*
  * Function prototypes for exported functions.
@@ -40,23 +24,14 @@ typedef int (*xfs_rtalloc_query_range_fn)(
 int					/* error */
 xfs_rtallocate_extent(
 	struct xfs_trans	*tp,	/* transaction pointer */
-	xfs_rtblock_t		bno,	/* starting block number to allocate */
-	xfs_extlen_t		minlen,	/* minimum length to allocate */
-	xfs_extlen_t		maxlen,	/* maximum length to allocate */
-	xfs_extlen_t		*len,	/* out: actual length allocated */
+	xfs_rtxnum_t		start,	/* starting rtext number to allocate */
+	xfs_rtxlen_t		minlen,	/* minimum length to allocate */
+	xfs_rtxlen_t		maxlen,	/* maximum length to allocate */
+	xfs_rtxlen_t		*len,	/* out: actual length allocated */
 	int			wasdel,	/* was a delayed allocation extent */
-	xfs_extlen_t		prod,	/* extent product factor */
-	xfs_rtblock_t		*rtblock); /* out: start block allocated */
+	xfs_rtxlen_t		prod,	/* extent product factor */
+	xfs_rtxnum_t		*rtblock); /* out: start rtext allocated */
 
-/*
- * Free an extent in the realtime subvolume.  Length is expressed in
- * realtime extents, as is the block number.
- */
-int					/* error */
-xfs_rtfree_extent(
-	struct xfs_trans	*tp,	/* transaction pointer */
-	xfs_rtblock_t		bno,	/* starting block number to free */
-	xfs_extlen_t		len);	/* length of extent freed */
 
 /*
  * Initialize realtime fields in the mount structure.
@@ -87,8 +62,8 @@ int					/* error */
 xfs_rtpick_extent(
 	struct xfs_mount	*mp,	/* file system mount point */
 	struct xfs_trans	*tp,	/* transaction pointer */
-	xfs_extlen_t		len,	/* allocation length (rtextents) */
-	xfs_rtblock_t		*pick);	/* result rt extent */
+	xfs_rtxlen_t		len,	/* allocation length (rtextents) */
+	xfs_rtxnum_t		*pick);	/* result rt extent */
 
 /*
  * Grow the realtime area of the filesystem.
@@ -98,55 +73,12 @@ xfs_growfs_rt(
 	struct xfs_mount	*mp,	/* file system mount structure */
 	xfs_growfs_rt_t		*in);	/* user supplied growfs struct */
 
-/*
- * From xfs_rtbitmap.c
- */
-int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
-		  xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
-int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
-		      xfs_rtblock_t start, xfs_extlen_t len, int val,
-		      xfs_rtblock_t *new, int *stat);
-int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
-		    xfs_rtblock_t start, xfs_rtblock_t limit,
-		    xfs_rtblock_t *rtblock);
-int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
-		    xfs_rtblock_t start, xfs_rtblock_t limit,
-		    xfs_rtblock_t *rtblock);
-int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
-		       xfs_rtblock_t start, xfs_extlen_t len, int val);
-int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
-			     int log, xfs_rtblock_t bbno, int delta,
-			     struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
-			     xfs_suminfo_t *sum);
-int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
-			 xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
-			 xfs_fsblock_t *rsb);
-int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
-		     xfs_rtblock_t start, xfs_extlen_t len,
-		     struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
-int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
-		const struct xfs_rtalloc_rec *low_rec,
-		const struct xfs_rtalloc_rec *high_rec,
-		xfs_rtalloc_query_range_fn fn, void *priv);
-int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
-			  xfs_rtalloc_query_range_fn fn,
-			  void *priv);
-bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
-int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
-			       xfs_rtblock_t start, xfs_extlen_t len,
-			       bool *is_free);
 int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
 #else
-# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb)    (ENOSYS)
-# define xfs_rtfree_extent(t,b,l)                       (ENOSYS)
-# define xfs_rtpick_extent(m,t,l,rb)                    (ENOSYS)
-# define xfs_growfs_rt(mp,in)                           (ENOSYS)
-# define xfs_rtalloc_query_range(t,l,h,f,p)             (ENOSYS)
-# define xfs_rtalloc_query_all(m,t,f,p)                 (ENOSYS)
-# define xfs_rtbuf_get(m,t,b,i,p)                       (ENOSYS)
-# define xfs_verify_rtbno(m, r)			(false)
-# define xfs_rtalloc_extent_is_free(m,t,s,l,i)          (ENOSYS)
-# define xfs_rtalloc_reinit_frextents(m)                (0)
+# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb)	(-ENOSYS)
+# define xfs_rtpick_extent(m,t,l,rb)			(-ENOSYS)
+# define xfs_growfs_rt(mp,in)				(-ENOSYS)
+# define xfs_rtalloc_reinit_frextents(m)		(0)
 static inline int		/* error */
 xfs_rtmount_init(
 	xfs_mount_t	*mp)	/* file system mount structure */
@@ -157,7 +89,7 @@ xfs_rtmount_init(
 	xfs_warn(mp, "Not built with CONFIG_XFS_RT");
 	return -ENOSYS;
 }
-# define xfs_rtmount_inodes(m)  (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
+# define xfs_rtmount_inodes(m)  (((mp)->m_sb.sb_rblocks == 0)? 0 : (-ENOSYS))
 # define xfs_rtunmount_inodes(m)
 #endif	/* CONFIG_XFS_RT */
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f0ae07828153..764304595e8b 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -42,6 +42,7 @@
 #include "xfs_xattr.h"
 #include "xfs_iunlink_item.h"
 #include "xfs_dahash_test.h"
+#include "xfs_rtbitmap.h"
 #include "scrub/stats.h"
 
 #include <linux/magic.h>
@@ -896,7 +897,7 @@ xfs_fs_statfs(
 
 		statp->f_blocks = sbp->sb_rblocks;
 		freertx = percpu_counter_sum_positive(&mp->m_frextents);
-		statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize;
+		statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
 	}
 
 	return 0;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8c0bfc9a33b1..305c9d07bf1b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -24,6 +24,7 @@
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
 #include "xfs_icache.h"
+#include "xfs_rtbitmap.h"
 
 struct kmem_cache	*xfs_trans_cache;
 
@@ -655,6 +656,10 @@ xfs_trans_unreserve_and_mod_sb(
 	mp->m_sb.sb_agcount += tp->t_agcount_delta;
 	mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
 	mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
+	if (tp->t_rextsize_delta) {
+		mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize);
+		mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize);
+	}
 	mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
 	mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
 	mp->m_sb.sb_rextents += tp->t_rextents_delta;
@@ -1196,7 +1201,7 @@ xfs_trans_alloc_inode(
 
 retry:
 	error = xfs_trans_alloc(mp, resv, dblocks,
-			rblocks / mp->m_sb.sb_rextsize,
+			xfs_extlen_to_rtxlen(mp, rblocks),
 			force ? XFS_TRANS_RESERVE : 0, &tp);
 	if (error)
 		return error;
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 90b69270f2fa..724c45e3e9a7 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -68,4 +68,9 @@ enum amd_asic_type {
 
 extern const char *amdgpu_asic_name[];
 
+struct amdgpu_asic_type_quirk {
+	unsigned short device;	/* PCI device ID */
+	u8 revision;	/* revision ID */
+	unsigned short type;	/* real ASIC type */
+};
 #endif /*__AMD_ASIC_TYPE_H__ */
diff --git a/include/dt-bindings/watchdog/aspeed-wdt.h b/include/dt-bindings/watchdog/aspeed-wdt.h
new file mode 100644
index 000000000000..7ae6d84b2bd9
--- /dev/null
+++ b/include/dt-bindings/watchdog/aspeed-wdt.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef DT_BINDINGS_ASPEED_WDT_H
+#define DT_BINDINGS_ASPEED_WDT_H
+
+#define AST2500_WDT_RESET_CPU		(1 << 0)
+#define AST2500_WDT_RESET_COPROC	(1 << 1)
+#define AST2500_WDT_RESET_SDRAM		(1 << 2)
+#define AST2500_WDT_RESET_AHB		(1 << 3)
+#define AST2500_WDT_RESET_I2C		(1 << 4)
+#define AST2500_WDT_RESET_MAC0		(1 << 5)
+#define AST2500_WDT_RESET_MAC1		(1 << 6)
+#define AST2500_WDT_RESET_GRAPHICS	(1 << 7)
+#define AST2500_WDT_RESET_USB2_HOST_HUB	(1 << 8)
+#define AST2500_WDT_RESET_USB_HOST	(1 << 9)
+#define AST2500_WDT_RESET_HID_EHCI	(1 << 10)
+#define AST2500_WDT_RESET_VIDEO		(1 << 11)
+#define AST2500_WDT_RESET_HAC		(1 << 12)
+#define AST2500_WDT_RESET_LPC		(1 << 13)
+#define AST2500_WDT_RESET_SDIO		(1 << 14)
+#define AST2500_WDT_RESET_MIC		(1 << 15)
+#define AST2500_WDT_RESET_CRT		(1 << 16)
+#define AST2500_WDT_RESET_PWM		(1 << 17)
+#define AST2500_WDT_RESET_PECI		(1 << 18)
+#define AST2500_WDT_RESET_JTAG		(1 << 19)
+#define AST2500_WDT_RESET_ADC		(1 << 20)
+#define AST2500_WDT_RESET_GPIO		(1 << 21)
+#define AST2500_WDT_RESET_MCTP		(1 << 22)
+#define AST2500_WDT_RESET_XDMA		(1 << 23)
+#define AST2500_WDT_RESET_SPI		(1 << 24)
+#define AST2500_WDT_RESET_SOC_MISC	(1 << 25)
+
+#define AST2500_WDT_RESET_DEFAULT 0x023ffff3
+
+#define AST2600_WDT_RESET1_CPU		(1 << 0)
+#define AST2600_WDT_RESET1_SDRAM	(1 << 1)
+#define AST2600_WDT_RESET1_AHB		(1 << 2)
+#define AST2600_WDT_RESET1_SLI		(1 << 3)
+#define AST2600_WDT_RESET1_SOC_MISC0	(1 << 4)
+#define AST2600_WDT_RESET1_COPROC	(1 << 5)
+#define AST2600_WDT_RESET1_USB_A	(1 << 6)
+#define AST2600_WDT_RESET1_USB_B	(1 << 7)
+#define AST2600_WDT_RESET1_UHCI		(1 << 8)
+#define AST2600_WDT_RESET1_GRAPHICS	(1 << 9)
+#define AST2600_WDT_RESET1_CRT		(1 << 10)
+#define AST2600_WDT_RESET1_VIDEO	(1 << 11)
+#define AST2600_WDT_RESET1_HAC		(1 << 12)
+#define AST2600_WDT_RESET1_DP		(1 << 13)
+#define AST2600_WDT_RESET1_DP_MCU	(1 << 14)
+#define AST2600_WDT_RESET1_GP_MCU	(1 << 15)
+#define AST2600_WDT_RESET1_MAC0		(1 << 16)
+#define AST2600_WDT_RESET1_MAC1		(1 << 17)
+#define AST2600_WDT_RESET1_SDIO0	(1 << 18)
+#define AST2600_WDT_RESET1_JTAG0	(1 << 19)
+#define AST2600_WDT_RESET1_MCTP0	(1 << 20)
+#define AST2600_WDT_RESET1_MCTP1	(1 << 21)
+#define AST2600_WDT_RESET1_XDMA0	(1 << 22)
+#define AST2600_WDT_RESET1_XDMA1	(1 << 23)
+#define AST2600_WDT_RESET1_GPIO0	(1 << 24)
+#define AST2600_WDT_RESET1_RVAS		(1 << 25)
+
+#define AST2600_WDT_RESET1_DEFAULT 0x030f1ff1
+
+#define AST2600_WDT_RESET2_CPU		(1 << 0)
+#define AST2600_WDT_RESET2_SPI		(1 << 1)
+#define AST2600_WDT_RESET2_AHB2		(1 << 2)
+#define AST2600_WDT_RESET2_SLI2		(1 << 3)
+#define AST2600_WDT_RESET2_SOC_MISC1	(1 << 4)
+#define AST2600_WDT_RESET2_MAC2		(1 << 5)
+#define AST2600_WDT_RESET2_MAC3		(1 << 6)
+#define AST2600_WDT_RESET2_SDIO1	(1 << 7)
+#define AST2600_WDT_RESET2_JTAG1	(1 << 8)
+#define AST2600_WDT_RESET2_GPIO1	(1 << 9)
+#define AST2600_WDT_RESET2_MDIO		(1 << 10)
+#define AST2600_WDT_RESET2_LPC		(1 << 11)
+#define AST2600_WDT_RESET2_PECI		(1 << 12)
+#define AST2600_WDT_RESET2_PWM		(1 << 13)
+#define AST2600_WDT_RESET2_ADC		(1 << 14)
+#define AST2600_WDT_RESET2_FSI		(1 << 15)
+#define AST2600_WDT_RESET2_I2C		(1 << 16)
+#define AST2600_WDT_RESET2_I3C_GLOBAL	(1 << 17)
+#define AST2600_WDT_RESET2_I3C0		(1 << 18)
+#define AST2600_WDT_RESET2_I3C1		(1 << 19)
+#define AST2600_WDT_RESET2_I3C2		(1 << 20)
+#define AST2600_WDT_RESET2_I3C3		(1 << 21)
+#define AST2600_WDT_RESET2_I3C4		(1 << 22)
+#define AST2600_WDT_RESET2_I3C5		(1 << 23)
+#define AST2600_WDT_RESET2_ESPI		(1 << 26)
+
+#define AST2600_WDT_RESET2_DEFAULT 0x03fffff1
+
+#endif
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 99a5201d9e62..dc7ed2f46886 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -33,126 +33,6 @@ struct pci_dev;
 
 extern int amd_iommu_detect(void);
 
-/**
- * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
- * @pdev: The PCI device to initialize
- * @pasids: Number of PASIDs to support for this device
- *
- * This function does all setup for the device pdev so that it can be
- * used with IOMMUv2.
- * Returns 0 on success or negative value on error.
- */
-extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
-
-/**
- * amd_iommu_free_device() - Free all IOMMUv2 related device resources
- *			     and disable IOMMUv2 usage for this device
- * @pdev: The PCI device to disable IOMMUv2 usage for'
- */
-extern void amd_iommu_free_device(struct pci_dev *pdev);
-
-/**
- * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
- * @pdev: The PCI device to bind the task to
- * @pasid: The PASID on the device the task should be bound to
- * @task: the task to bind
- *
- * The function returns 0 on success or a negative value on error.
- */
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
-				struct task_struct *task);
-
-/**
- * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
- *			      a device
- * @pdev: The device of the PASID
- * @pasid: The PASID to unbind
- *
- * When this function returns the device is no longer using the PASID
- * and the PASID is no longer bound to its task.
- */
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid);
-
-/**
- * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
- *				    PRI requests
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- *
- * The IOMMUv2 driver invokes this call-back when it is unable to
- * successfully handle a PRI request. The device driver can then decide
- * which PRI response the device should see. Possible return values for
- * the call-back are:
- *
- * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
- * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
- * - AMD_IOMMU_INV_PRI_RSP_FAIL    - Send Failure back to the device,
- *				     the device is required to disable
- *				     PRI when it receives this response
- *
- * The function returns 0 on success or negative value on error.
- */
-#define AMD_IOMMU_INV_PRI_RSP_SUCCESS	0
-#define AMD_IOMMU_INV_PRI_RSP_INVALID	1
-#define AMD_IOMMU_INV_PRI_RSP_FAIL	2
-
-typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
-					u32 pasid,
-					unsigned long address,
-					u16);
-
-extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
-					amd_iommu_invalid_ppr_cb cb);
-
-#define PPR_FAULT_EXEC	(1 << 1)
-#define PPR_FAULT_READ  (1 << 2)
-#define PPR_FAULT_WRITE (1 << 5)
-#define PPR_FAULT_USER  (1 << 6)
-#define PPR_FAULT_RSVD  (1 << 7)
-#define PPR_FAULT_GN    (1 << 8)
-
-/**
- * amd_iommu_device_info() - Get information about IOMMUv2 support of a
- *			     PCI device
- * @pdev: PCI device to query information from
- * @info: A pointer to an amd_iommu_device_info structure which will contain
- *	  the information about the PCI device
- *
- * Returns 0 on success, negative value on error
- */
-
-#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP     0x1    /* ATS feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP     0x2    /* PRI feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP   0x4    /* PASID context supported */
-#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP    0x8    /* Device may request execution
-						    on memory pages */
-#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP   0x10    /* Device may request
-						    super-user privileges */
-
-struct amd_iommu_device_info {
-	int max_pasids;
-	u32 flags;
-};
-
-extern int amd_iommu_device_info(struct pci_dev *pdev,
-				 struct amd_iommu_device_info *info);
-
-/**
- * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
- *				       a pasid context. This call-back is
- *				       invoked when the IOMMUv2 driver needs to
- *				       invalidate a PASID context, for example
- *				       because the task that is bound to that
- *				       context is about to exit.
- *
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- */
-
-typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid);
-
-extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
-					   amd_iommu_invalidate_ctx cb);
 #else /* CONFIG_AMD_IOMMU */
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
diff --git a/include/linux/btf.h b/include/linux/btf.h
index c2231c64d60b..59d404e22814 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -84,6 +84,17 @@
  */
 #define __bpf_kfunc __used noinline
 
+#define __bpf_kfunc_start_defs()					       \
+	__diag_push();							       \
+	__diag_ignore_all("-Wmissing-declarations",			       \
+			  "Global kfuncs as their definitions will be in BTF");\
+	__diag_ignore_all("-Wmissing-prototypes",			       \
+			  "Global kfuncs as their definitions will be in BTF")
+
+#define __bpf_kfunc_end_defs() __diag_pop()
+#define __bpf_hook_start() __bpf_kfunc_start_defs()
+#define __bpf_hook_end() __bpf_kfunc_end_defs()
+
 /*
  * Return the name of the passed struct, if exists, or halt the build if for
  * example the structure gets renamed. In this way, developers have to revisit
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
index d5a5da838caf..11a92a946016 100644
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h
@@ -19,12 +19,25 @@
 	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
 		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
 		 kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
+#  define doutc(client, fmt, ...)					\
+	pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt,			\
+		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
+		 kbasename(__FILE__), __LINE__,				\
+		 &client->fsid, client->monc.auth->global_id,		\
+		 ##__VA_ARGS__)
 # else
 /* faux printk call just to see any compiler warnings. */
 #  define dout(fmt, ...)	do {				\
 		if (0)						\
 			printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
 	} while (0)
+#  define doutc(client, fmt, ...)	do {			\
+		if (0)						\
+			printk(KERN_DEBUG "[%pU %llu] " fmt,	\
+			&client->fsid,				\
+			client->monc.auth->global_id,		\
+			##__VA_ARGS__);				\
+		} while (0)
 # endif
 
 #else
@@ -33,7 +46,32 @@
  * or, just wrap pr_debug
  */
 # define dout(fmt, ...)	pr_debug(" " fmt, ##__VA_ARGS__)
+# define doutc(client, fmt, ...)					\
+	pr_debug(" [%pU %llu] %s: " fmt, &client->fsid,			\
+		 client->monc.auth->global_id, __func__, ##__VA_ARGS__)
 
 #endif
 
+#define pr_notice_client(client, fmt, ...)				\
+	pr_notice("[%pU %llu]: " fmt, &client->fsid,			\
+		  client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_info_client(client, fmt, ...)				\
+	pr_info("[%pU %llu]: " fmt, &client->fsid,			\
+		client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_warn_client(client, fmt, ...)				\
+	pr_warn("[%pU %llu]: " fmt, &client->fsid,			\
+		client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_warn_once_client(client, fmt, ...)				\
+	pr_warn_once("[%pU %llu]: " fmt, &client->fsid,			\
+		     client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_err_client(client, fmt, ...)					\
+	pr_err("[%pU %llu]: " fmt, &client->fsid,			\
+	       client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_warn_ratelimited_client(client, fmt, ...)			\
+	pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid,		\
+			    client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_err_ratelimited_client(client, fmt, ...)			\
+	pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid,		\
+			   client->monc.auth->global_id, ##__VA_ARGS__)
+
 #endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index f3b3593254b9..ee1d0e5f9789 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -357,6 +357,11 @@ enum {
 	CEPH_MDS_OP_RENAMESNAP = 0x01403,
 };
 
+#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE     || \
+				     op == CEPH_MDS_OP_MKNOD      || \
+				     op == CEPH_MDS_OP_MKDIR      || \
+				     op == CEPH_MDS_OP_SYMLINK)
+
 extern const char *ceph_mds_op_name(int op);
 
 #define CEPH_SETATTR_MODE              (1 << 0)
@@ -497,7 +502,7 @@ struct ceph_mds_request_head_legacy {
 	union ceph_mds_request_args args;
 } __attribute__ ((packed));
 
-#define CEPH_MDS_REQUEST_HEAD_VERSION  2
+#define CEPH_MDS_REQUEST_HEAD_VERSION  3
 
 struct ceph_mds_request_head_old {
 	__le16 version;                /* struct version */
@@ -528,6 +533,9 @@ struct ceph_mds_request_head {
 
 	__le32 ext_num_retry;          /* new count retry attempts */
 	__le32 ext_num_fwd;            /* new count fwd attempts */
+
+	__le32 struct_len;             /* to store size of struct ceph_mds_request_head */
+	__le32 owner_uid, owner_gid;   /* used for OPs which create inodes */
 } __attribute__ ((packed));
 
 /* cap/lease release record */
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 722a586bb224..de7bb47d8a46 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -154,6 +154,7 @@ struct closure {
 	struct closure		*parent;
 
 	atomic_t		remaining;
+	bool			closure_get_happened;
 
 #ifdef CONFIG_DEBUG_CLOSURES
 #define CLOSURE_MAGIC_DEAD	0xc054dead
@@ -185,7 +186,11 @@ static inline unsigned closure_nr_remaining(struct closure *cl)
  */
 static inline void closure_sync(struct closure *cl)
 {
-	if (closure_nr_remaining(cl) != 1)
+#ifdef CONFIG_DEBUG_CLOSURES
+	BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
+#endif
+
+	if (cl->closure_get_happened)
 		__closure_sync(cl);
 }
 
@@ -233,8 +238,6 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
 	closure_set_ip(cl);
 	cl->fn = fn;
 	cl->wq = wq;
-	/* between atomic_dec() in closure_put() */
-	smp_mb__before_atomic();
 }
 
 static inline void closure_queue(struct closure *cl)
@@ -259,6 +262,8 @@ static inline void closure_queue(struct closure *cl)
  */
 static inline void closure_get(struct closure *cl)
 {
+	cl->closure_get_happened = true;
+
 #ifdef CONFIG_DEBUG_CLOSURES
 	BUG_ON((atomic_inc_return(&cl->remaining) &
 		CLOSURE_REMAINING_MASK) <= 1);
@@ -281,6 +286,7 @@ static inline void closure_init(struct closure *cl, struct closure *parent)
 		closure_get(parent);
 
 	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+	cl->closure_get_happened = false;
 
 	closure_debug_create(cl);
 	closure_set_ip(cl);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f0ccca16a0ac..4a658de44ee9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -144,6 +144,7 @@ bool dma_pci_p2pdma_supported(struct device *dev);
 int dma_set_mask(struct device *dev, u64 mask);
 int dma_set_coherent_mask(struct device *dev, u64 mask);
 u64 dma_get_required_mask(struct device *dev);
+bool dma_addressing_limited(struct device *dev);
 size_t dma_max_mapping_size(struct device *dev);
 size_t dma_opt_mapping_size(struct device *dev);
 bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
@@ -264,6 +265,10 @@ static inline u64 dma_get_required_mask(struct device *dev)
 {
 	return 0;
 }
+static inline bool dma_addressing_limited(struct device *dev)
+{
+	return false;
+}
 static inline size_t dma_max_mapping_size(struct device *dev)
 {
 	return 0;
@@ -465,20 +470,6 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
 	return dma_set_mask_and_coherent(dev, mask);
 }
 
-/**
- * dma_addressing_limited - return if the device is addressing limited
- * @dev:	device to check
- *
- * Return %true if the devices DMA mask is too small to address all memory in
- * the system, else %false.  Lack of addressing bits is the prime reason for
- * bounce buffering, but might not be the only one.
- */
-static inline bool dma_addressing_limited(struct device *dev)
-{
-	return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) <
-			    dma_get_required_mask(dev);
-}
-
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
 {
 	if (dev->dma_parms && dev->dma_parms->max_segment_size)
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 226a36ed5aa1..689028257fcc 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1045,10 +1045,10 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add,
 
 /**
  * ethtool_sprintf - Write formatted string to ethtool string data
- * @data: Pointer to start of string to update
+ * @data: Pointer to a pointer to the start of string to update
  * @fmt: Format of string to write
  *
- * Write formatted string to data. Update data to point at start of
+ * Write formatted string to *data. Update *data to point at start of
  * next string.
  */
 extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 0388e8c20f52..bb37ad5cc954 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -99,6 +99,17 @@ enum fid_type {
 	FILEID_FAT_WITH_PARENT = 0x72,
 
 	/*
+	 * 64 bit inode number, 32 bit generation number.
+	 */
+	FILEID_INO64_GEN = 0x81,
+
+	/*
+	 * 64 bit inode number, 32 bit generation number,
+	 * 64 bit parent inode number, 32 bit parent generation.
+	 */
+	FILEID_INO64_GEN_PARENT = 0x82,
+
+	/*
 	 * 128 bit child FID (struct lu_fid)
 	 * 128 bit parent FID (struct lu_fid)
 	 */
@@ -129,7 +140,11 @@ struct fid {
 			u32 parent_ino;
 			u32 parent_gen;
 		} i32;
- 		struct {
+		struct {
+			u64 ino;
+			u32 gen;
+		} __packed i64;
+		struct {
  			u32 block;
  			u16 partref;
  			u16 parent_partref;
@@ -253,6 +268,33 @@ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
 			      int *max_len, int flags);
 
+static inline bool exportfs_can_encode_fid(const struct export_operations *nop)
+{
+	return !nop || nop->encode_fh;
+}
+
+static inline bool exportfs_can_decode_fh(const struct export_operations *nop)
+{
+	return nop && nop->fh_to_dentry;
+}
+
+static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
+					  int fh_flags)
+{
+	/*
+	 * If a non-decodeable file handle was requested, we only need to make
+	 * sure that filesystem did not opt-out of encoding fid.
+	 */
+	if (fh_flags & EXPORT_FH_FID)
+		return exportfs_can_encode_fid(nop);
+
+	/*
+	 * If a decodeable file handle was requested, we need to make sure that
+	 * filesystem can also decode file handles.
+	 */
+	return exportfs_can_decode_fh(nop);
+}
+
 static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid,
 				      int *max_len)
 {
@@ -272,10 +314,12 @@ extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
 /*
  * Generic helpers for filesystems.
  */
-extern struct dentry *generic_fh_to_dentry(struct super_block *sb,
+int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len,
+			    struct inode *parent);
+struct dentry *generic_fh_to_dentry(struct super_block *sb,
 	struct fid *fid, int fh_len, int fh_type,
 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
-extern struct dentry *generic_fh_to_parent(struct super_block *sb,
+struct dentry *generic_fh_to_parent(struct super_block *sb,
 	struct fid *fid, int fh_len, int fh_type,
 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 1d454dc944b3..0aed62f0c633 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -605,9 +605,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc,
 				    void *data, struct lock_class_key *lock_key,
 				    struct lock_class_key *request_key);
 
-struct gpio_chip *gpiochip_find(void *data,
-				int (*match)(struct gpio_chip *gc, void *data));
-
 struct gpio_device *gpio_device_find(void *data,
 				int (*match)(struct gpio_chip *gc, void *data));
 struct gpio_device *gpio_device_find_by_label(const char *label);
diff --git a/include/linux/idr.h b/include/linux/idr.h
index a0dce14090a9..da5f5fa4a3a6 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -200,7 +200,7 @@ static inline void idr_preload_end(void)
  */
 #define idr_for_each_entry_ul(idr, entry, tmp, id)			\
 	for (tmp = 0, id = 0;						\
-	     tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \
+	     ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
 	     tmp = id, ++id)
 
 /**
@@ -224,10 +224,12 @@ static inline void idr_preload_end(void)
  * @id: Entry ID.
  *
  * Continue to iterate over entries, continuing after the current position.
+ * After normal termination @entry is left with the value NULL.  This
+ * is convenient for a "not found" value.
  */
 #define idr_for_each_entry_continue_ul(idr, entry, tmp, id)		\
 	for (tmp = id;							\
-	     tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \
+	     ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
 	     tmp = id, ++id)
 
 /*
diff --git a/include/linux/input.h b/include/linux/input.h
index 49790c1bd2c4..de6503c0edb8 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -562,7 +562,7 @@ struct ff_device {
 
 	int max_effects;
 	struct ff_effect *effects;
-	struct file *effect_owners[];
+	struct file *effect_owners[] __counted_by(max_effects);
 };
 
 int input_ff_create(struct input_dev *dev, unsigned int max_effects);
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index 3b8580bd33c1..2cf89a538b18 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -47,7 +47,7 @@ struct input_mt {
 	unsigned int flags;
 	unsigned int frame;
 	int *red;
-	struct input_mt_slot slots[];
+	struct input_mt_slot slots[] __counted_by(num_slots);
 };
 
 static inline void input_mt_set_value(struct input_mt_slot *slot,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 8fb1b41b4d15..ec289c1016f5 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -66,6 +66,7 @@ struct iommu_domain_geometry {
 #define __IOMMU_DOMAIN_DMA_FQ	(1U << 3)  /* DMA-API uses flush queue    */
 
 #define __IOMMU_DOMAIN_SVA	(1U << 4)  /* Shared process address space */
+#define __IOMMU_DOMAIN_PLATFORM	(1U << 5)
 
 #define __IOMMU_DOMAIN_NESTED	(1U << 6)  /* User-managed address space nested
 					      on a stage-2 translation        */
@@ -86,6 +87,8 @@ struct iommu_domain_geometry {
  *				  invalidation.
  *	IOMMU_DOMAIN_SVA	- DMA addresses are shared process addresses
  *				  represented by mm_struct's.
+ *	IOMMU_DOMAIN_PLATFORM	- Legacy domain for drivers that do their own
+ *				  dma_api stuff. Do not use in new drivers.
  */
 #define IOMMU_DOMAIN_BLOCKED	(0U)
 #define IOMMU_DOMAIN_IDENTITY	(__IOMMU_DOMAIN_PT)
@@ -96,6 +99,7 @@ struct iommu_domain_geometry {
 				 __IOMMU_DOMAIN_DMA_API |	\
 				 __IOMMU_DOMAIN_DMA_FQ)
 #define IOMMU_DOMAIN_SVA	(__IOMMU_DOMAIN_SVA)
+#define IOMMU_DOMAIN_PLATFORM	(__IOMMU_DOMAIN_PLATFORM)
 #define IOMMU_DOMAIN_NESTED	(__IOMMU_DOMAIN_NESTED)
 
 struct iommu_domain {
@@ -340,13 +344,12 @@ static inline int __iommu_copy_struct_from_user(
  *                     NULL while the @user_data can be optionally provided, the
  *                     new domain must support __IOMMU_DOMAIN_PAGING.
  *                     Upon failure, ERR_PTR must be returned.
+ * @domain_alloc_paging: Allocate an iommu_domain that can be used for
+ *                       UNMANAGED, DMA, and DMA_FQ domain types.
  * @probe_device: Add device to iommu driver handling
  * @release_device: Remove device from iommu driver handling
  * @probe_finalize: Do final setup work after the device is added to an IOMMU
  *                  group and attached to the groups domain
- * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op
- *                        is to support old IOMMU drivers, new drivers should use
- *                        default domains, and the common IOMMU DMA ops.
  * @device_group: find iommu group for a particular device
  * @get_resv_regions: Request list of reserved regions for a device
  * @of_xlate: add OF master IDs to iommu grouping
@@ -365,6 +368,13 @@ static inline int __iommu_copy_struct_from_user(
  *                    will be blocked by the hardware.
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  * @owner: Driver module providing these ops
+ * @identity_domain: An always available, always attachable identity
+ *                   translation.
+ * @blocked_domain: An always available, always attachable blocking
+ *                  translation.
+ * @default_domain: If not NULL this will always be set as the default domain.
+ *                  This should be an IDENTITY/BLOCKED/PLATFORM domain.
+ *                  Do not use in new drivers.
  */
 struct iommu_ops {
 	bool (*capable)(struct device *dev, enum iommu_cap);
@@ -375,11 +385,11 @@ struct iommu_ops {
 	struct iommu_domain *(*domain_alloc_user)(
 		struct device *dev, u32 flags, struct iommu_domain *parent,
 		const struct iommu_user_data *user_data);
+	struct iommu_domain *(*domain_alloc_paging)(struct device *dev);
 
 	struct iommu_device *(*probe_device)(struct device *dev);
 	void (*release_device)(struct device *dev);
 	void (*probe_finalize)(struct device *dev);
-	void (*set_platform_dma_ops)(struct device *dev);
 	struct iommu_group *(*device_group)(struct device *dev);
 
 	/* Request/Free a list of reserved regions for a device */
@@ -402,6 +412,9 @@ struct iommu_ops {
 	const struct iommu_domain_ops *default_domain_ops;
 	unsigned long pgsize_bitmap;
 	struct module *owner;
+	struct iommu_domain *identity_domain;
+	struct iommu_domain *blocked_domain;
+	struct iommu_domain *default_domain;
 };
 
 /**
@@ -420,10 +433,8 @@ struct iommu_ops {
  * * ENODEV	- device specific errors, not able to be attached
  * * <others>	- treated as ENODEV by the caller. Use is discouraged
  * @set_dev_pasid: set an iommu domain to a pasid of device
- * @map: map a physically contiguous memory region to an iommu domain
  * @map_pages: map a physically contiguous set of pages of the same size to
  *             an iommu domain.
- * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
@@ -442,20 +453,16 @@ struct iommu_domain_ops {
 	int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
 			     ioasid_t pasid);
 
-	int (*map)(struct iommu_domain *domain, unsigned long iova,
-		   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
 	int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
 			 int prot, gfp_t gfp, size_t *mapped);
-	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
-			size_t size, struct iommu_iotlb_gather *iotlb_gather);
 	size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
 			      size_t pgsize, size_t pgcount,
 			      struct iommu_iotlb_gather *iotlb_gather);
 
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
-	void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
-			       size_t size);
+	int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
+			      size_t size);
 	void (*iotlb_sync)(struct iommu_domain *domain,
 			   struct iommu_iotlb_gather *iotlb_gather);
 
@@ -476,6 +483,7 @@ struct iommu_domain_ops {
  * @list: Used by the iommu-core to keep a list of registered iommus
  * @ops: iommu-ops for talking to this iommu
  * @dev: struct device for sysfs handling
+ * @singleton_group: Used internally for drivers that have only one group
  * @max_pasids: number of supported PASIDs
  */
 struct iommu_device {
@@ -483,6 +491,7 @@ struct iommu_device {
 	const struct iommu_ops *ops;
 	struct fwnode_handle *fwnode;
 	struct device *dev;
+	struct iommu_group *singleton_group;
 	u32 max_pasids;
 };
 
@@ -526,6 +535,7 @@ struct iommu_fault_param {
  * @attach_deferred: the dma domain attachment is deferred
  * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs
  * @require_direct: device requires IOMMU_RESV_DIRECT regions
+ * @shadow_on_flush: IOTLB flushes are used to sync shadow tables
  *
  * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
  *	struct iommu_group	*iommu_group;
@@ -541,6 +551,7 @@ struct dev_iommu {
 	u32				attach_deferred:1;
 	u32				pci_32bit_workaround:1;
 	u32				require_direct:1;
+	u32				shadow_on_flush:1;
 };
 
 int iommu_device_register(struct iommu_device *iommu,
@@ -768,6 +779,7 @@ extern struct iommu_group *pci_device_group(struct device *dev);
 extern struct iommu_group *generic_device_group(struct device *dev);
 /* FSL-MC device grouping function */
 struct iommu_group *fsl_mc_device_group(struct device *dev);
+extern struct iommu_group *generic_single_device_group(struct device *dev);
 
 /**
  * struct iommu_fwspec - per-device IOMMU instance data
@@ -1253,7 +1265,7 @@ static inline void iommu_free_global_pasid(ioasid_t pasid) {}
  * Creates a mapping at @iova for the buffer described by a scatterlist
  * stored in the given sg_table object in the provided IOMMU domain.
  */
-static inline size_t iommu_map_sgtable(struct iommu_domain *domain,
+static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain,
 			unsigned long iova, struct sg_table *sgt, int prot)
 {
 	return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot,
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 365eb092e9c4..ab1da3142b06 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -445,6 +445,10 @@ int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 
 int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
 			    char *type, char *sym);
+
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data);
+
 #else /* !CONFIG_KPROBES: */
 
 static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 99b8176c3738..ff217a5ce552 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -48,7 +48,7 @@ LSM_HOOK(int, 0, quota_on, struct dentry *dentry)
 LSM_HOOK(int, 0, syslog, int type)
 LSM_HOOK(int, 0, settime, const struct timespec64 *ts,
 	 const struct timezone *tz)
-LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages)
+LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages)
 LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm)
 LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file)
 LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
@@ -273,7 +273,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen)
 LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode)
 LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen)
 LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen)
-LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx,
+LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx,
 	 u32 *ctxlen)
 
 #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index daa2f40d9ce6..7b12eebc5586 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -295,7 +295,9 @@ struct mmc_card {
 #define MMC_QUIRK_BROKEN_HPI	(1<<13)		/* Disable broken HPI support */
 #define MMC_QUIRK_BROKEN_SD_DISCARD	(1<<14)	/* Disable broken SD discard support */
 #define MMC_QUIRK_BROKEN_SD_CACHE	(1<<15)	/* Disable broken SD cache support */
+#define MMC_QUIRK_BROKEN_CACHE_FLUSH	(1<<16)	/* Don't flush cache until the write has occurred */
 
+	bool			written_flag;	/* Indicates eMMC has been written since power on */
 	bool			reenable_cmdq;	/* Re-enable Command Queue */
 
 	unsigned int		erase_size;	/* erase size in sectors */
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index 057c89867aa2..b8da2db4ecd2 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
 
 int vfsgid_in_group_p(vfsgid_t vfsgid);
 
+struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
+void mnt_idmap_put(struct mnt_idmap *idmap);
+
 vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
 		     struct user_namespace *fs_userns, kuid_t kuid);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index cd628c4b011e..cd797e00fe35 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -239,6 +239,7 @@ struct nfs_server {
 	struct list_head	delegations;
 	struct list_head	ss_copies;
 
+	unsigned long		delegation_gen;
 	unsigned long		mig_gen;
 	unsigned long		mig_status;
 #define NFS_MIG_IN_TRANSITION		(1)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 12bbb5c63664..539b57fbf3ce 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1772,7 +1772,7 @@ struct nfs_rpc_ops {
 	void	(*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *);
 	int	(*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir);
 	int	(*link)    (struct inode *, struct inode *, const struct qstr *);
-	int	(*symlink) (struct inode *, struct dentry *, struct page *,
+	int	(*symlink) (struct inode *, struct dentry *, struct folio *,
 			    unsigned int, struct iattr *);
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, const struct qstr *);
diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h
index 4efea9dd967c..e10333d78dbb 100644
--- a/include/linux/nvme-keyring.h
+++ b/include/linux/nvme-keyring.h
@@ -6,14 +6,12 @@
 #ifndef _NVME_KEYRING_H
 #define _NVME_KEYRING_H
 
-#ifdef CONFIG_NVME_KEYRING
+#if IS_ENABLED(CONFIG_NVME_KEYRING)
 
 key_serial_t nvme_tls_psk_default(struct key *keyring,
 		const char *hostnqn, const char *subnqn);
 
 key_serial_t nvme_keyring_id(void);
-int nvme_keyring_init(void);
-void nvme_keyring_exit(void);
 
 #else
 
@@ -26,11 +24,5 @@ static inline key_serial_t nvme_keyring_id(void)
 {
 	return 0;
 }
-static inline int nvme_keyring_init(void)
-{
-	return 0;
-}
-static inline void nvme_keyring_exit(void) {}
-
 #endif /* !CONFIG_NVME_KEYRING */
 #endif /* _NVME_KEYRING_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a7ba74babad7..44325c068b6a 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1732,7 +1732,7 @@ struct nvmf_auth_dhchap_success1_data {
 	__u8		rsvd2;
 	__u8		rvalid;
 	__u8		rsvd3[7];
-	/* 'hl' bytes of response value if 'rvalid' is set */
+	/* 'hl' bytes of response value */
 	__u8		rval[];
 };
 
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 4729d54e8995..73de70362b98 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -17,6 +17,7 @@
 #include <linux/pinctrl/pinctrl-state.h>
 
 struct device;
+struct gpio_chip;
 
 /* This struct is private to the core and should be regarded as a cookie */
 struct pinctrl;
@@ -25,27 +26,30 @@ struct pinctrl_state;
 #ifdef CONFIG_PINCTRL
 
 /* External interface to pin control */
-extern bool pinctrl_gpio_can_use_line(unsigned gpio);
-extern int pinctrl_gpio_request(unsigned gpio);
-extern void pinctrl_gpio_free(unsigned gpio);
-extern int pinctrl_gpio_direction_input(unsigned gpio);
-extern int pinctrl_gpio_direction_output(unsigned gpio);
-extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config);
-
-extern struct pinctrl * __must_check pinctrl_get(struct device *dev);
-extern void pinctrl_put(struct pinctrl *p);
-extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p,
-								const char *name);
-extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s);
-
-extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev);
-extern void devm_pinctrl_put(struct pinctrl *p);
-extern int pinctrl_select_default_state(struct device *dev);
+bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset);
+int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset);
+void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset);
+int pinctrl_gpio_direction_input(struct gpio_chip *gc,
+				 unsigned int offset);
+int pinctrl_gpio_direction_output(struct gpio_chip *gc,
+				  unsigned int offset);
+int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+				unsigned long config);
+
+struct pinctrl * __must_check pinctrl_get(struct device *dev);
+void pinctrl_put(struct pinctrl *p);
+struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p,
+							 const char *name);
+int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s);
+
+struct pinctrl * __must_check devm_pinctrl_get(struct device *dev);
+void devm_pinctrl_put(struct pinctrl *p);
+int pinctrl_select_default_state(struct device *dev);
 
 #ifdef CONFIG_PM
-extern int pinctrl_pm_select_default_state(struct device *dev);
-extern int pinctrl_pm_select_sleep_state(struct device *dev);
-extern int pinctrl_pm_select_idle_state(struct device *dev);
+int pinctrl_pm_select_default_state(struct device *dev);
+int pinctrl_pm_select_sleep_state(struct device *dev);
+int pinctrl_pm_select_idle_state(struct device *dev);
 #else
 static inline int pinctrl_pm_select_default_state(struct device *dev)
 {
@@ -63,31 +67,38 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev)
 
 #else /* !CONFIG_PINCTRL */
 
-static inline bool pinctrl_gpio_can_use_line(unsigned gpio)
+static inline bool
+pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset)
 {
 	return true;
 }
 
-static inline int pinctrl_gpio_request(unsigned gpio)
+static inline int
+pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset)
 {
 	return 0;
 }
 
-static inline void pinctrl_gpio_free(unsigned gpio)
+static inline void
+pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset)
 {
 }
 
-static inline int pinctrl_gpio_direction_input(unsigned gpio)
+static inline int
+pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
 {
 	return 0;
 }
 
-static inline int pinctrl_gpio_direction_output(unsigned gpio)
+static inline int
+pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset)
 {
 	return 0;
 }
 
-static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config)
+static inline int
+pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+			    unsigned long config)
 {
 	return 0;
 }
diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h
index d661399b217d..6c19d4fbbe39 100644
--- a/include/linux/platform_data/shmob_drm.h
+++ b/include/linux/platform_data/shmob_drm.h
@@ -10,7 +10,7 @@
 #ifndef __SHMOB_DRM_H__
 #define __SHMOB_DRM_H__
 
-#include <drm/drm_mode.h>
+#include <video/videomode.h>
 
 enum shmob_drm_clk_source {
 	SHMOB_DRM_CLK_BUS,
@@ -18,72 +18,21 @@ enum shmob_drm_clk_source {
 	SHMOB_DRM_CLK_EXTERNAL,
 };
 
-enum shmob_drm_interface {
-	SHMOB_DRM_IFACE_RGB8,		/* 24bpp, 8:8:8 */
-	SHMOB_DRM_IFACE_RGB9,		/* 18bpp, 9:9 */
-	SHMOB_DRM_IFACE_RGB12A,		/* 24bpp, 12:12 */
-	SHMOB_DRM_IFACE_RGB12B,		/* 12bpp */
-	SHMOB_DRM_IFACE_RGB16,		/* 16bpp */
-	SHMOB_DRM_IFACE_RGB18,		/* 18bpp */
-	SHMOB_DRM_IFACE_RGB24,		/* 24bpp */
-	SHMOB_DRM_IFACE_YUV422,		/* 16bpp */
-	SHMOB_DRM_IFACE_SYS8A,		/* 24bpp, 8:8:8 */
-	SHMOB_DRM_IFACE_SYS8B,		/* 18bpp, 8:8:2 */
-	SHMOB_DRM_IFACE_SYS8C,		/* 18bpp, 2:8:8 */
-	SHMOB_DRM_IFACE_SYS8D,		/* 16bpp, 8:8 */
-	SHMOB_DRM_IFACE_SYS9,		/* 18bpp, 9:9 */
-	SHMOB_DRM_IFACE_SYS12,		/* 24bpp, 12:12 */
-	SHMOB_DRM_IFACE_SYS16A,		/* 16bpp */
-	SHMOB_DRM_IFACE_SYS16B,		/* 18bpp, 16:2 */
-	SHMOB_DRM_IFACE_SYS16C,		/* 18bpp, 2:16 */
-	SHMOB_DRM_IFACE_SYS18,		/* 18bpp */
-	SHMOB_DRM_IFACE_SYS24,		/* 24bpp */
-};
-
-struct shmob_drm_backlight_data {
-	const char *name;
-	int max_brightness;
-	int (*get_brightness)(void);
-	int (*set_brightness)(int brightness);
-};
-
 struct shmob_drm_panel_data {
 	unsigned int width_mm;		/* Panel width in mm */
 	unsigned int height_mm;		/* Panel height in mm */
-	struct drm_mode_modeinfo mode;
+	struct videomode mode;
 };
 
-struct shmob_drm_sys_interface_data {
-	unsigned int read_latch:6;
-	unsigned int read_setup:8;
-	unsigned int read_cycle:8;
-	unsigned int read_strobe:8;
-	unsigned int write_setup:8;
-	unsigned int write_cycle:8;
-	unsigned int write_strobe:8;
-	unsigned int cs_setup:3;
-	unsigned int vsync_active_high:1;
-	unsigned int vsync_dir_input:1;
-};
-
-#define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */
-#define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */
-#define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */
-#define SHMOB_DRM_IFACE_FL_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */
-#define SHMOB_DRM_IFACE_FL_DWCNT (1 << 4) /* Disable dotclock during blanking */
-
 struct shmob_drm_interface_data {
-	enum shmob_drm_interface interface;
-	struct shmob_drm_sys_interface_data sys;
+	unsigned int bus_fmt;		/* MEDIA_BUS_FMT_* */
 	unsigned int clk_div;
-	unsigned int flags;
 };
 
 struct shmob_drm_platform_data {
 	enum shmob_drm_clk_source clk_source;
 	struct shmob_drm_interface_data iface;
 	struct shmob_drm_panel_data panel;
-	struct shmob_drm_backlight_data backlight;
 };
 
 #endif /* __SHMOB_DRM_H__ */
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index d2f9f690a9c1..cda3597b84f2 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -41,8 +41,8 @@ struct pwm_args {
 };
 
 enum {
-	PWMF_REQUESTED = 1 << 0,
-	PWMF_EXPORTED = 1 << 1,
+	PWMF_REQUESTED = 0,
+	PWMF_EXPORTED = 1,
 };
 
 /*
@@ -71,7 +71,6 @@ struct pwm_state {
  * @hwpwm: per-chip relative index of the PWM device
  * @pwm: global index of the PWM device
  * @chip: PWM chip providing this PWM device
- * @chip_data: chip-private data associated with the PWM device
  * @args: PWM arguments
  * @state: last applied state
  * @last: last implemented state (for PWM_DEBUG)
@@ -82,7 +81,6 @@ struct pwm_device {
 	unsigned int hwpwm;
 	unsigned int pwm;
 	struct pwm_chip *chip;
-	void *chip_data;
 
 	struct pwm_args args;
 	struct pwm_state state;
@@ -267,7 +265,6 @@ struct pwm_capture {
  * @get_state: get the current PWM state. This function is only
  *	       called once per PWM device when the PWM chip is
  *	       registered.
- * @owner: helps prevent removal of modules exporting active PWMs
  */
 struct pwm_ops {
 	int (*request)(struct pwm_chip *chip, struct pwm_device *pwm);
@@ -278,13 +275,13 @@ struct pwm_ops {
 		     const struct pwm_state *state);
 	int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
 			 struct pwm_state *state);
-	struct module *owner;
 };
 
 /**
  * struct pwm_chip - abstract a PWM controller
  * @dev: device providing the PWMs
  * @ops: callbacks for this PWM controller
+ * @owner: module providing this chip
  * @base: number of first PWM controlled by this chip
  * @npwm: number of PWMs controlled by this chip
  * @of_xlate: request a PWM device given a device tree PWM specifier
@@ -295,6 +292,7 @@ struct pwm_ops {
 struct pwm_chip {
 	struct device *dev;
 	const struct pwm_ops *ops;
+	struct module *owner;
 	int base;
 	unsigned int npwm;
 
@@ -383,13 +381,13 @@ static inline void pwm_disable(struct pwm_device *pwm)
 /* PWM provider APIs */
 int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result,
 		unsigned long timeout);
-int pwm_set_chip_data(struct pwm_device *pwm, void *data);
-void *pwm_get_chip_data(struct pwm_device *pwm);
 
-int pwmchip_add(struct pwm_chip *chip);
+int __pwmchip_add(struct pwm_chip *chip, struct module *owner);
+#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE)
 void pwmchip_remove(struct pwm_chip *chip);
 
-int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip);
+int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner);
+#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE)
 
 struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 					 unsigned int index,
@@ -445,16 +443,6 @@ static inline int pwm_capture(struct pwm_device *pwm,
 	return -EINVAL;
 }
 
-static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
-{
-	return -EINVAL;
-}
-
-static inline void *pwm_get_chip_data(struct pwm_device *pwm)
-{
-	return NULL;
-}
-
 static inline int pwmchip_add(struct pwm_chip *chip)
 {
 	return -EINVAL;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 86825c88b576..255a0562aea5 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -566,6 +566,7 @@ struct spi_controller {
 #define SPI_CONTROLLER_MUST_RX		BIT(3)	/* Requires rx */
 #define SPI_CONTROLLER_MUST_TX		BIT(4)	/* Requires tx */
 #define SPI_CONTROLLER_GPIO_SS		BIT(5)	/* GPIO CS must select slave */
+#define SPI_CONTROLLER_SUSPENDED	BIT(6)	/* Currently suspended */
 
 	/* Flag indicating if the allocation of this struct is devres-managed */
 	bool			devm_allocated;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index af7358277f1c..e9d4377d03c6 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -92,6 +92,7 @@ struct rpc_clnt {
 	};
 	const struct cred	*cl_cred;
 	unsigned int		cl_max_connect; /* max number of transports not to the same IP */
+	struct super_block *pipefs_sb;
 };
 
 /*
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ec4e9367f5b0..68f3d315d2e1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -152,7 +152,7 @@ struct tcp_request_sock {
 	u64				snt_synack; /* first SYNACK sent time */
 	bool				tfo_listener;
 	bool				is_mptcp;
-	s8				req_usec_ts;
+	bool				req_usec_ts;
 #if IS_ENABLED(CONFIG_MPTCP)
 	bool				drop_req;
 #endif
diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h
deleted file mode 100644
index e9c0cd36c48a..000000000000
--- a/include/linux/vlynq.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org>
- */
-
-#ifndef __VLYNQ_H__
-#define __VLYNQ_H__
-
-#include <linux/device.h>
-#include <linux/types.h>
-
-struct module;
-
-#define VLYNQ_NUM_IRQS 32
-
-struct vlynq_mapping {
-	u32 size;
-	u32 offset;
-};
-
-enum vlynq_divisor {
-	vlynq_div_auto = 0,
-	vlynq_ldiv1,
-	vlynq_ldiv2,
-	vlynq_ldiv3,
-	vlynq_ldiv4,
-	vlynq_ldiv5,
-	vlynq_ldiv6,
-	vlynq_ldiv7,
-	vlynq_ldiv8,
-	vlynq_rdiv1,
-	vlynq_rdiv2,
-	vlynq_rdiv3,
-	vlynq_rdiv4,
-	vlynq_rdiv5,
-	vlynq_rdiv6,
-	vlynq_rdiv7,
-	vlynq_rdiv8,
-	vlynq_div_external
-};
-
-struct vlynq_device_id {
-	u32 id;
-	enum vlynq_divisor divisor;
-	unsigned long driver_data;
-};
-
-struct vlynq_regs;
-struct vlynq_device {
-	u32 id, dev_id;
-	int local_irq;
-	int remote_irq;
-	enum vlynq_divisor divisor;
-	u32 regs_start, regs_end;
-	u32 mem_start, mem_end;
-	u32 irq_start, irq_end;
-	int irq;
-	int enabled;
-	struct vlynq_regs *local;
-	struct vlynq_regs *remote;
-	struct device dev;
-};
-
-struct vlynq_driver {
-	char *name;
-	struct vlynq_device_id *id_table;
-	int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id);
-	void (*remove)(struct vlynq_device *dev);
-	struct device_driver driver;
-};
-
-struct plat_vlynq_ops {
-	int (*on)(struct vlynq_device *dev);
-	void (*off)(struct vlynq_device *dev);
-};
-
-static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv)
-{
-	return container_of(drv, struct vlynq_driver, driver);
-}
-
-static inline struct vlynq_device *to_vlynq_device(struct device *device)
-{
-	return container_of(device, struct vlynq_device, dev);
-}
-
-extern struct bus_type vlynq_bus_type;
-
-extern int __vlynq_register_driver(struct vlynq_driver *driver,
-				   struct module *owner);
-
-static inline int vlynq_register_driver(struct vlynq_driver *driver)
-{
-	return __vlynq_register_driver(driver, THIS_MODULE);
-}
-
-static inline void *vlynq_get_drvdata(struct vlynq_device *dev)
-{
-	return dev_get_drvdata(&dev->dev);
-}
-
-static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data)
-{
-	dev_set_drvdata(&dev->dev, data);
-}
-
-static inline u32 vlynq_mem_start(struct vlynq_device *dev)
-{
-	return dev->mem_start;
-}
-
-static inline u32 vlynq_mem_end(struct vlynq_device *dev)
-{
-	return dev->mem_end;
-}
-
-static inline u32 vlynq_mem_len(struct vlynq_device *dev)
-{
-	return dev->mem_end - dev->mem_start + 1;
-}
-
-static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq)
-{
-	int irq = dev->irq_start + virq;
-	if ((irq < dev->irq_start) || (irq > dev->irq_end))
-		return -EINVAL;
-
-	return irq;
-}
-
-static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq)
-{
-	if ((irq < dev->irq_start) || (irq > dev->irq_end))
-		return -EINVAL;
-
-	return irq - dev->irq_start;
-}
-
-extern void vlynq_unregister_driver(struct vlynq_driver *driver);
-extern int vlynq_enable_device(struct vlynq_device *dev);
-extern void vlynq_disable_device(struct vlynq_device *dev);
-extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset,
-				   struct vlynq_mapping *mapping);
-extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset,
-				    struct vlynq_mapping *mapping);
-extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq);
-extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq);
-
-#endif /* __VLYNQ_H__ */
diff --git a/include/net/flow.h b/include/net/flow.h
index 7f0adda3bf2f..335bbc52171c 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -40,8 +40,8 @@ struct flowi_common {
 #define FLOWI_FLAG_KNOWN_NH		0x02
 	__u32	flowic_secid;
 	kuid_t  flowic_uid;
-	struct flowi_tunnel flowic_tun_key;
 	__u32		flowic_multipath_hash;
+	struct flowi_tunnel flowic_tun_key;
 };
 
 union flowi_uli {
diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h
index 078d3c52c03f..e5f2f0b73a9a 100644
--- a/include/net/netfilter/nf_conntrack_act_ct.h
+++ b/include/net/netfilter/nf_conntrack_act_ct.h
@@ -20,7 +20,22 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf
 #endif
 }
 
-static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct)
+static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct,
+					   enum ip_conntrack_info ctinfo)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+	struct nf_conn_act_ct_ext *act_ct_ext;
+
+	act_ct_ext = nf_conn_act_ct_ext_find(ct);
+	if (dev_net(skb->dev) == &init_net && act_ct_ext)
+		act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex;
+#endif
+}
+
+static inline struct
+nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb,
+					   struct nf_conn *ct,
+					   enum ip_conntrack_info ctinfo)
 {
 #if IS_ENABLED(CONFIG_NET_ACT_CT)
 	struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
@@ -29,22 +44,11 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *
 		return act_ct;
 
 	act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC);
+	nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
 	return act_ct;
 #else
 	return NULL;
 #endif
 }
 
-static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct,
-					   enum ip_conntrack_info ctinfo)
-{
-#if IS_ENABLED(CONFIG_NET_ACT_CT)
-	struct nf_conn_act_ct_ext *act_ct_ext;
-
-	act_ct_ext = nf_conn_act_ct_ext_find(ct);
-	if (dev_net(skb->dev) == &init_net && act_ct_ext)
-		act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex;
-#endif
-}
-
 #endif /* _NF_CONNTRACK_ACT_CT_H */
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index a375a171ef3c..b56be10838f0 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -124,7 +124,7 @@ struct tcp_ao_info {
 #define tcp_hash_fail(msg, family, skb, fmt, ...)			\
 do {									\
 	const struct tcphdr *th = tcp_hdr(skb);				\
-	char hdr_flags[5] = {};						\
+	char hdr_flags[6];						\
 	char *f = hdr_flags;						\
 									\
 	if (th->fin)							\
@@ -133,17 +133,18 @@ do {									\
 		*f++ = 'S';						\
 	if (th->rst)							\
 		*f++ = 'R';						\
+	if (th->psh)							\
+		*f++ = 'P';						\
 	if (th->ack)							\
-		*f++ = 'A';						\
-	if (f != hdr_flags)						\
-		*f = ' ';						\
+		*f++ = '.';						\
+	*f = 0;								\
 	if ((family) == AF_INET) {					\
-		net_info_ratelimited("%s for (%pI4, %d)->(%pI4, %d) %s" fmt "\n", \
+		net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \
 				msg, &ip_hdr(skb)->saddr, ntohs(th->source), \
 				&ip_hdr(skb)->daddr, ntohs(th->dest),	\
 				hdr_flags, ##__VA_ARGS__);		\
 	} else {							\
-		net_info_ratelimited("%s for [%pI6c]:%u->[%pI6c]:%u %s" fmt "\n", \
+		net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \
 				msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \
 				&ipv6_hdr(skb)->daddr, ntohs(th->dest),	\
 				hdr_flags, ##__VA_ARGS__);		\
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index a5ef84944a06..71ae37d3bedd 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -96,7 +96,6 @@ struct tegra_smmu_soc {
 
 struct tegra_mc;
 struct tegra_smmu;
-struct gart_device;
 
 #ifdef CONFIG_TEGRA_IOMMU_SMMU
 struct tegra_smmu *tegra_smmu_probe(struct device *dev,
@@ -116,28 +115,6 @@ static inline void tegra_smmu_remove(struct tegra_smmu *smmu)
 }
 #endif
 
-#ifdef CONFIG_TEGRA_IOMMU_GART
-struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc);
-int tegra_gart_suspend(struct gart_device *gart);
-int tegra_gart_resume(struct gart_device *gart);
-#else
-static inline struct gart_device *
-tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline int tegra_gart_suspend(struct gart_device *gart)
-{
-	return -ENODEV;
-}
-
-static inline int tegra_gart_resume(struct gart_device *gart)
-{
-	return -ENODEV;
-}
-#endif
-
 struct tegra_mc_reset {
 	const char *name;
 	unsigned long id;
@@ -185,8 +162,6 @@ struct tegra_mc_ops {
 	 */
 	int (*probe)(struct tegra_mc *mc);
 	void (*remove)(struct tegra_mc *mc);
-	int (*suspend)(struct tegra_mc *mc);
-	int (*resume)(struct tegra_mc *mc);
 	irqreturn_t (*handle_irq)(int irq, void *data);
 	int (*probe_device)(struct tegra_mc *mc, struct device *dev);
 };
@@ -225,7 +200,6 @@ struct tegra_mc {
 	struct tegra_bpmp *bpmp;
 	struct device *dev;
 	struct tegra_smmu *smmu;
-	struct gart_device *gart;
 	void __iomem *regs;
 	void __iomem *bcast_ch_regs;
 	void __iomem **ch_regs;
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index a03c543cb072..f05f747e444d 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -34,7 +34,7 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x1025 */
+/* RGB - next is	0x1026 */
 #define MEDIA_BUS_FMT_RGB444_1X12		0x1016
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
@@ -46,6 +46,7 @@
 #define MEDIA_BUS_FMT_RGB565_2X8_BE		0x1007
 #define MEDIA_BUS_FMT_RGB565_2X8_LE		0x1008
 #define MEDIA_BUS_FMT_RGB666_1X18		0x1009
+#define MEDIA_BUS_FMT_RGB666_2X9_BE		0x1025
 #define MEDIA_BUS_FMT_BGR666_1X18		0x1023
 #define MEDIA_BUS_FMT_RBG888_1X24		0x100e
 #define MEDIA_BUS_FMT_RGB666_1X24_CPADHI	0x1015
diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h
index c8ae72466ee6..3cd044edee5d 100644
--- a/include/uapi/linux/nfsd_netlink.h
+++ b/include/uapi/linux/nfsd_netlink.h
@@ -3,8 +3,8 @@
 /*	Documentation/netlink/specs/nfsd.yaml */
 /* YNL-GEN uapi header */
 
-#ifndef _UAPI_LINUX_NFSD_H
-#define _UAPI_LINUX_NFSD_H
+#ifndef _UAPI_LINUX_NFSD_NETLINK_H
+#define _UAPI_LINUX_NFSD_NETLINK_H
 
 #define NFSD_FAMILY_NAME	"nfsd"
 #define NFSD_FAMILY_VERSION	1
@@ -36,4 +36,4 @@ enum {
 	NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1)
 };
 
-#endif /* _UAPI_LINUX_NFSD_H */
+#endif /* _UAPI_LINUX_NFSD_NETLINK_H */
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index fea06810b43d..a1e4239c7d75 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -52,7 +52,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
 	return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
 }
 
-void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
+bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_buffer_list *bl;
@@ -65,7 +65,7 @@ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
 	 * multiple use.
 	 */
 	if (req->flags & REQ_F_PARTIAL_IO)
-		return;
+		return false;
 
 	io_ring_submit_lock(ctx, issue_flags);
 
@@ -76,7 +76,7 @@ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
 	req->buf_index = buf->bgid;
 
 	io_ring_submit_unlock(ctx, issue_flags);
-	return;
+	return true;
 }
 
 unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index d14345ef61fc..f2d615236b2c 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -53,11 +53,11 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 
 unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
 
-void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
 
 void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
 
-static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
+static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
 {
 	/*
 	 * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
@@ -80,8 +80,10 @@ static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
 		} else {
 			req->buf_index = req->buf_list->bgid;
 			req->flags &= ~REQ_F_BUFFER_RING;
+			return true;
 		}
 	}
+	return false;
 }
 
 static inline bool io_do_buffer_select(struct io_kiocb *req)
@@ -91,12 +93,13 @@ static inline bool io_do_buffer_select(struct io_kiocb *req)
 	return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
 }
 
-static inline void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
+static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
 {
 	if (req->flags & REQ_F_BUFFER_SELECTED)
-		io_kbuf_recycle_legacy(req, issue_flags);
+		return io_kbuf_recycle_legacy(req, issue_flags);
 	if (req->flags & REQ_F_BUFFER_RING)
-		io_kbuf_recycle_ring(req);
+		return io_kbuf_recycle_ring(req);
+	return false;
 }
 
 static inline unsigned int __io_put_kbuf_list(struct io_kiocb *req,
diff --git a/io_uring/net.c b/io_uring/net.c
index 7a8e298af81b..75d494dad7e2 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1461,16 +1461,6 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
 	int ret;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
-	if (connect->in_progress) {
-		struct socket *socket;
-
-		ret = -ENOTSOCK;
-		socket = sock_from_file(req->file);
-		if (socket)
-			ret = sock_error(socket->sk);
-		goto out;
-	}
-
 	if (req_has_async_data(req)) {
 		io = req->async_data;
 	} else {
@@ -1490,9 +1480,7 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
 	    && force_nonblock) {
 		if (ret == -EINPROGRESS) {
 			connect->in_progress = true;
-			return -EAGAIN;
-		}
-		if (ret == -ECONNABORTED) {
+		} else if (ret == -ECONNABORTED) {
 			if (connect->seen_econnaborted)
 				goto out;
 			connect->seen_econnaborted = true;
@@ -1506,6 +1494,16 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
 		memcpy(req->async_data, &__io, sizeof(__io));
 		return -EAGAIN;
 	}
+	if (connect->in_progress) {
+		/*
+		 * At least bluetooth will return -EBADFD on a re-connect
+		 * attempt, and it's (supposedly) also valid to get -EISCONN
+		 * which means the previous result is good. For both of these,
+		 * grab the sock_error() and use that for the completion.
+		 */
+		if (ret == -EBADFD || ret == -EISCONN)
+			ret = sock_error(sock_from_file(req->file)->sk);
+	}
 	if (ret == -ERESTARTSYS)
 		ret = -EINTR;
 out:
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 25a3515a177c..799db44283c7 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -66,7 +66,7 @@ const struct io_issue_def io_issue_defs[] = {
 		.iopoll			= 1,
 		.iopoll_queue		= 1,
 		.vectored		= 1,
-		.prep			= io_prep_rw,
+		.prep			= io_prep_rwv,
 		.issue			= io_read,
 	},
 	[IORING_OP_WRITEV] = {
@@ -80,7 +80,7 @@ const struct io_issue_def io_issue_defs[] = {
 		.iopoll			= 1,
 		.iopoll_queue		= 1,
 		.vectored		= 1,
-		.prep			= io_prep_rw,
+		.prep			= io_prep_rwv,
 		.issue			= io_write,
 	},
 	[IORING_OP_FSYNC] = {
@@ -98,7 +98,7 @@ const struct io_issue_def io_issue_defs[] = {
 		.ioprio			= 1,
 		.iopoll			= 1,
 		.iopoll_queue		= 1,
-		.prep			= io_prep_rw,
+		.prep			= io_prep_rw_fixed,
 		.issue			= io_read,
 	},
 	[IORING_OP_WRITE_FIXED] = {
@@ -111,7 +111,7 @@ const struct io_issue_def io_issue_defs[] = {
 		.ioprio			= 1,
 		.iopoll			= 1,
 		.iopoll_queue		= 1,
-		.prep			= io_prep_rw,
+		.prep			= io_prep_rw_fixed,
 		.issue			= io_write,
 	},
 	[IORING_OP_POLL_ADD] = {
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 3398e1d944c2..64390d4e20c1 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -83,18 +83,6 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	/* used for fixed read/write too - just read unconditionally */
 	req->buf_index = READ_ONCE(sqe->buf_index);
 
-	if (req->opcode == IORING_OP_READ_FIXED ||
-	    req->opcode == IORING_OP_WRITE_FIXED) {
-		struct io_ring_ctx *ctx = req->ctx;
-		u16 index;
-
-		if (unlikely(req->buf_index >= ctx->nr_user_bufs))
-			return -EFAULT;
-		index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
-		req->imu = ctx->user_bufs[index];
-		io_req_set_rsrc_node(req, ctx, 0);
-	}
-
 	ioprio = READ_ONCE(sqe->ioprio);
 	if (ioprio) {
 		ret = ioprio_check_cap(ioprio);
@@ -110,16 +98,42 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	rw->addr = READ_ONCE(sqe->addr);
 	rw->len = READ_ONCE(sqe->len);
 	rw->flags = READ_ONCE(sqe->rw_flags);
+	return 0;
+}
+
+int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	int ret;
+
+	ret = io_prep_rw(req, sqe);
+	if (unlikely(ret))
+		return ret;
 
-	/* Have to do this validation here, as this is in io_read() rw->len might
-	 * have chanaged due to buffer selection
+	/*
+	 * Have to do this validation here, as this is in io_read() rw->len
+	 * might have chanaged due to buffer selection
 	 */
-	if (req->opcode == IORING_OP_READV && req->flags & REQ_F_BUFFER_SELECT) {
-		ret = io_iov_buffer_select_prep(req);
-		if (ret)
-			return ret;
-	}
+	if (req->flags & REQ_F_BUFFER_SELECT)
+		return io_iov_buffer_select_prep(req);
+
+	return 0;
+}
 
+int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	u16 index;
+	int ret;
+
+	ret = io_prep_rw(req, sqe);
+	if (unlikely(ret))
+		return ret;
+
+	if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+		return -EFAULT;
+	index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+	req->imu = ctx->user_bufs[index];
+	io_req_set_rsrc_node(req, ctx, 0);
 	return 0;
 }
 
@@ -129,12 +143,20 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  */
 int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 	int ret;
 
+	/* must be used with provided buffers */
+	if (!(req->flags & REQ_F_BUFFER_SELECT))
+		return -EINVAL;
+
 	ret = io_prep_rw(req, sqe);
 	if (unlikely(ret))
 		return ret;
 
+	if (rw->addr || rw->len)
+		return -EINVAL;
+
 	req->flags |= REQ_F_APOLL_MULTISHOT;
 	return 0;
 }
@@ -542,6 +564,9 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
 {
 	if (!force && !io_cold_defs[req->opcode].prep_async)
 		return 0;
+	/* opcode type doesn't need async data */
+	if (!io_cold_defs[req->opcode].async_size)
+		return 0;
 	if (!req_has_async_data(req)) {
 		struct io_async_rw *iorw;
 
@@ -887,6 +912,7 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
 int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
 {
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 	unsigned int cflags = 0;
 	int ret;
 
@@ -903,7 +929,12 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
 	 * handling arm it.
 	 */
 	if (ret == -EAGAIN) {
-		io_kbuf_recycle(req, issue_flags);
+		/*
+		 * Reset rw->len to 0 again to avoid clamping future mshot
+		 * reads, in case the buffer size varies.
+		 */
+		if (io_kbuf_recycle(req, issue_flags))
+			rw->len = 0;
 		return -EAGAIN;
 	}
 
@@ -916,6 +947,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
 		 * jump to the termination path. This request is then done.
 		 */
 		cflags = io_put_kbuf(req, issue_flags);
+		rw->len = 0; /* similarly to above, reset len to 0 */
 
 		if (io_fill_cqe_req_aux(req,
 					issue_flags & IO_URING_F_COMPLETE_DEFER,
diff --git a/io_uring/rw.h b/io_uring/rw.h
index c5aed03d42a4..f9e89b4fe4da 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -16,6 +16,8 @@ struct io_async_rw {
 };
 
 int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_read(struct io_kiocb *req, unsigned int issue_flags);
 int io_readv_prep_async(struct io_kiocb *req);
 int io_write(struct io_kiocb *req, unsigned int issue_flags);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 833faa04461b..0fae79164187 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -782,9 +782,7 @@ struct bpf_iter_num_kern {
 	int end; /* final value, exclusive */
 } __aligned(8);
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end)
 {
@@ -843,4 +841,4 @@ __bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it)
 	s->cur = s->end = 0;
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index 209e5135f9fb..f04a468cf6a7 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -282,7 +282,7 @@ static struct bpf_iter_reg bpf_cgroup_reg_info = {
 	.ctx_arg_info_size	= 1,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__cgroup, cgroup),
-		  PTR_TO_BTF_ID_OR_NULL },
+		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
 	},
 	.seq_info		= &cgroup_iter_seq_info,
 };
@@ -305,9 +305,7 @@ struct bpf_iter_css_kern {
 	unsigned int flags;
 } __attribute__((aligned(8)));
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		"Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
 		struct cgroup_subsys_state *start, unsigned int flags)
@@ -358,4 +356,4 @@ __bpf_kfunc void bpf_iter_css_destroy(struct bpf_iter_css *it)
 {
 }
 
-__diag_pop();
-\ No newline at end of file
+__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 6983af8e093c..e01c741e54e7 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -34,9 +34,7 @@ static bool cpu_valid(u32 cpu)
 	return cpu < nr_cpu_ids;
 }
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global kfuncs as their definitions will be in BTF");
+__bpf_kfunc_start_defs();
 
 /**
  * bpf_cpumask_create() - Create a mutable BPF cpumask.
@@ -407,7 +405,7 @@ __bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
 	return cpumask_any_and_distribute(src1, src2);
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(cpumask_kfunc_btf_ids)
 BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e46ac288a108..56b0c1f678ee 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1177,13 +1177,6 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
 		ret = -EBUSY;
 		goto out;
 	}
-	if (!atomic64_read(&map->usercnt)) {
-		/* maps with timers must be either held by user space
-		 * or pinned in bpffs.
-		 */
-		ret = -EPERM;
-		goto out;
-	}
 	/* allocate hrtimer via map_kmalloc to use memcg accounting */
 	t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
 	if (!t) {
@@ -1196,7 +1189,21 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
 	rcu_assign_pointer(t->callback_fn, NULL);
 	hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
 	t->timer.function = bpf_timer_cb;
-	timer->timer = t;
+	WRITE_ONCE(timer->timer, t);
+	/* Guarantee the order between timer->timer and map->usercnt. So
+	 * when there are concurrent uref release and bpf timer init, either
+	 * bpf_timer_cancel_and_free() called by uref release reads a no-NULL
+	 * timer or atomic64_read() below returns a zero usercnt.
+	 */
+	smp_mb();
+	if (!atomic64_read(&map->usercnt)) {
+		/* maps with timers must be either held by user space
+		 * or pinned in bpffs.
+		 */
+		WRITE_ONCE(timer->timer, NULL);
+		kfree(t);
+		ret = -EPERM;
+	}
 out:
 	__bpf_spin_unlock_irqrestore(&timer->lock);
 	return ret;
@@ -1374,7 +1381,7 @@ void bpf_timer_cancel_and_free(void *val)
 	/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
 	 * this timer, since it won't be initialized.
 	 */
-	timer->timer = NULL;
+	WRITE_ONCE(timer->timer, NULL);
 out:
 	__bpf_spin_unlock_irqrestore(&timer->lock);
 	if (!t)
@@ -1886,9 +1893,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
 	}
 }
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
 {
@@ -2505,7 +2510,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
 	WARN(1, "A call to BPF exception callback should never return\n");
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(generic_btf_ids)
 #ifdef CONFIG_KEXEC_CORE
@@ -2564,15 +2569,17 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU)
 BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
+#ifdef CONFIG_CGROUPS
 BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
-BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
-BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
 BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
+#endif
+BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_dynptr_adjust)
 BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index 6fc9dae9edc8..6abd7c5df4b3 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -193,9 +193,7 @@ static int __init bpf_map_iter_init(void)
 
 late_initcall(bpf_map_iter_init);
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map)
 {
@@ -213,7 +211,7 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map)
 	return ret;
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(bpf_map_iter_kfunc_ids)
 BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS)
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 654601dd6b49..26082b97894d 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -704,7 +704,7 @@ static struct bpf_iter_reg task_reg_info = {
 	.ctx_arg_info_size	= 1,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__task, task),
-		  PTR_TO_BTF_ID_OR_NULL },
+		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
 	},
 	.seq_info		= &task_seq_info,
 	.fill_link_info		= bpf_iter_fill_link_info,
@@ -822,9 +822,7 @@ struct bpf_iter_task_vma_kern {
 	struct bpf_iter_task_vma_kern_data *data;
 } __attribute__((aligned(8)));
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
 				      struct task_struct *task, u64 addr)
@@ -890,7 +888,9 @@ __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
 	}
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
+
+#ifdef CONFIG_CGROUPS
 
 struct bpf_iter_css_task {
 	__u64 __opaque[1];
@@ -900,9 +900,7 @@ struct bpf_iter_css_task_kern {
 	struct css_task_iter *css_it;
 } __attribute__((aligned(8)));
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 __bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
 		struct cgroup_subsys_state *css, unsigned int flags)
@@ -948,7 +946,9 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it)
 	bpf_mem_free(&bpf_global_ma, kit->css_it);
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
+
+#endif /* CONFIG_CGROUPS */
 
 struct bpf_iter_task {
 	__u64 __opaque[3];
@@ -969,9 +969,7 @@ enum {
 	BPF_TASK_ITER_PROC_THREADS
 };
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
 		struct task_struct *task__nullable, unsigned int flags)
@@ -1041,7 +1039,7 @@ __bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it)
 {
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 857d76694517..bd1c42eb540f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3742,7 +3742,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 	if (class == BPF_ALU || class == BPF_ALU64) {
 		if (!bt_is_reg_set(bt, dreg))
 			return 0;
-		if (opcode == BPF_MOV) {
+		if (opcode == BPF_END || opcode == BPF_NEG) {
+			/* sreg is reserved and unused
+			 * dreg still need precision before this insn
+			 */
+			return 0;
+		} else if (opcode == BPF_MOV) {
 			if (BPF_SRC(insn->code) == BPF_X) {
 				/* dreg = sreg or dreg = (s8, s16, s32)sreg
 				 * dreg needs precision after this insn
@@ -4674,7 +4679,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 		   insn->imm != 0 && env->bpf_capable) {
 		struct bpf_reg_state fake_reg = {};
 
-		__mark_reg_known(&fake_reg, (u32)insn->imm);
+		__mark_reg_known(&fake_reg, insn->imm);
 		fake_reg.type = SCALAR_VALUE;
 		save_register_state(state, spi, &fake_reg, size);
 	} else if (reg && is_spillable_regtype(reg->type)) {
@@ -5388,7 +5393,9 @@ static bool in_rcu_cs(struct bpf_verifier_env *env)
 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
 BTF_SET_START(rcu_protected_types)
 BTF_ID(struct, prog_test_ref_kfunc)
+#ifdef CONFIG_CGROUPS
 BTF_ID(struct, cgroup)
+#endif
 BTF_ID(struct, bpf_cpumask)
 BTF_ID(struct, task_struct)
 BTF_SET_END(rcu_protected_types)
@@ -10835,7 +10842,9 @@ BTF_ID(func, bpf_dynptr_clone)
 BTF_ID(func, bpf_percpu_obj_new_impl)
 BTF_ID(func, bpf_percpu_obj_drop_impl)
 BTF_ID(func, bpf_throw)
+#ifdef CONFIG_CGROUPS
 BTF_ID(func, bpf_iter_css_task_new)
+#endif
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
@@ -10861,7 +10870,11 @@ BTF_ID(func, bpf_dynptr_clone)
 BTF_ID(func, bpf_percpu_obj_new_impl)
 BTF_ID(func, bpf_percpu_obj_drop_impl)
 BTF_ID(func, bpf_throw)
+#ifdef CONFIG_CGROUPS
 BTF_ID(func, bpf_iter_css_task_new)
+#else
+BTF_ID_UNUSED
+#endif
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -11394,6 +11407,12 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
 						  &meta->arg_rbtree_root.field);
 }
 
+/*
+ * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
+ * LSM hooks and iters (both sleepable and non-sleepable) are safe.
+ * Any sleepable progs are also safe since bpf_check_attach_target() enforce
+ * them can only be attached to some specific hook points.
+ */
 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
@@ -11401,10 +11420,12 @@ static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
 	switch (prog_type) {
 	case BPF_PROG_TYPE_LSM:
 		return true;
-	case BPF_TRACE_ITER:
-		return env->prog->aux->sleepable;
+	case BPF_PROG_TYPE_TRACING:
+		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
+			return true;
+		fallthrough;
 	default:
-		return false;
+		return env->prog->aux->sleepable;
 	}
 }
 
@@ -11663,7 +11684,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 		case KF_ARG_PTR_TO_ITER:
 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
 				if (!check_css_task_iter_allowlist(env)) {
-					verbose(env, "css_task_iter is only allowed in bpf_lsm and bpf iter-s\n");
+					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
 					return -EINVAL;
 				}
 			}
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index d80d7a608141..c0adb7254b45 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -156,19 +156,16 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
  * optimize away the callsite. Therefore, __weak is needed to ensure that the
  * call is still emitted, by telling the compiler that we don't know what the
  * function might eventually be.
- *
- * __diag_* below are needed to dismiss the missing prototype warning.
  */
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "kfuncs which will be used in BPF programs");
+
+__bpf_hook_start();
 
 __weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
 				     struct cgroup *parent, int cpu)
 {
 }
 
-__diag_pop();
+__bpf_hook_end();
 
 /* see cgroup_rstat_flush() */
 static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 621037a0aa87..ce1bb2301c06 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -1006,6 +1006,9 @@ void kgdb_panic(const char *msg)
 	if (panic_timeout)
 		return;
 
+	debug_locks_off();
+	console_flush_on_panic(CONSOLE_FLUSH_PENDING);
+
 	if (dbg_kdb_mode)
 		kdb_printf("PANIC: %s\n", msg);
 
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 438b868cbfa9..6b213c8252d6 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -272,11 +272,10 @@ char *kdbgetenv(const char *match)
  * kdballocenv - This function is used to allocate bytes for
  *	environment entries.
  * Parameters:
- *	match	A character string representing a numeric value
- * Outputs:
- *	*value  the unsigned long representation of the env variable 'match'
+ *	bytes	The number of bytes to allocate in the static buffer.
  * Returns:
- *	Zero on success, a kdb diagnostic on failure.
+ *	A pointer to the allocated space in the buffer on success.
+ *	NULL if bytes > size available in the envbuffer.
  * Remarks:
  *	We use a static environment buffer (envbuffer) to hold the values
  *	of dynamically generated environment variables (see kdb_set).  Buffer
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index ed3056eb20b8..73c95815789a 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -587,6 +587,46 @@ int dma_direct_supported(struct device *dev, u64 mask)
 	return mask >= phys_to_dma_unencrypted(dev, min_mask);
 }
 
+/*
+ * To check whether all ram resource ranges are covered by dma range map
+ * Returns 0 when further check is needed
+ * Returns 1 if there is some RAM range can't be covered by dma_range_map
+ */
+static int check_ram_in_range_map(unsigned long start_pfn,
+				  unsigned long nr_pages, void *data)
+{
+	unsigned long end_pfn = start_pfn + nr_pages;
+	const struct bus_dma_region *bdr = NULL;
+	const struct bus_dma_region *m;
+	struct device *dev = data;
+
+	while (start_pfn < end_pfn) {
+		for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) {
+			unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start);
+
+			if (start_pfn >= cpu_start_pfn &&
+			    start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) {
+				bdr = m;
+				break;
+			}
+		}
+		if (!bdr)
+			return 1;
+
+		start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size);
+	}
+
+	return 0;
+}
+
+bool dma_direct_all_ram_mapped(struct device *dev)
+{
+	if (!dev->dma_range_map)
+		return true;
+	return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev,
+				      check_ram_in_range_map);
+}
+
 size_t dma_direct_max_mapping_size(struct device *dev)
 {
 	/* If SWIOTLB is active, use its maximum mapping size */
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 97ec892ea0b5..18d346118fe8 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -20,6 +20,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
 bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr);
 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 		enum dma_data_direction dir, unsigned long attrs);
+bool dma_direct_all_ram_mapped(struct device *dev);
 size_t dma_direct_max_mapping_size(struct device *dev);
 
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index e323ca48f7f2..58db8fd70471 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -793,6 +793,28 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
 }
 EXPORT_SYMBOL(dma_set_coherent_mask);
 
+/**
+ * dma_addressing_limited - return if the device is addressing limited
+ * @dev:	device to check
+ *
+ * Return %true if the devices DMA mask is too small to address all memory in
+ * the system, else %false.  Lack of addressing bits is the prime reason for
+ * bounce buffering, but might not be the only one.
+ */
+bool dma_addressing_limited(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+
+	if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) <
+			 dma_get_required_mask(dev))
+		return true;
+
+	if (unlikely(ops))
+		return false;
+	return !dma_direct_all_ram_mapped(dev);
+}
+EXPORT_SYMBOL_GPL(dma_addressing_limited);
+
 size_t dma_max_mapping_size(struct device *dev)
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 26202274784f..33d942615be5 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -283,7 +283,8 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
 	}
 
 	for (i = 0; i < mem->nslabs; i++) {
-		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+		mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i),
+					 mem->nslabs - i);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
 	}
@@ -558,29 +559,40 @@ void __init swiotlb_exit(void)
  * alloc_dma_pages() - allocate pages to be used for DMA
  * @gfp:	GFP flags for the allocation.
  * @bytes:	Size of the buffer.
+ * @phys_limit:	Maximum allowed physical address of the buffer.
  *
  * Allocate pages from the buddy allocator. If successful, make the allocated
  * pages decrypted that they can be used for DMA.
  *
- * Return: Decrypted pages, or %NULL on failure.
+ * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
+ * if the allocated physical address was above @phys_limit.
  */
-static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes)
+static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
 {
 	unsigned int order = get_order(bytes);
 	struct page *page;
+	phys_addr_t paddr;
 	void *vaddr;
 
 	page = alloc_pages(gfp, order);
 	if (!page)
 		return NULL;
 
-	vaddr = page_address(page);
+	paddr = page_to_phys(page);
+	if (paddr + bytes - 1 > phys_limit) {
+		__free_pages(page, order);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	vaddr = phys_to_virt(paddr);
 	if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
 		goto error;
 	return page;
 
 error:
-	__free_pages(page, order);
+	/* Intentional leak if pages cannot be encrypted again. */
+	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
+		__free_pages(page, order);
 	return NULL;
 }
 
@@ -618,11 +630,7 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
 	else if (phys_limit <= DMA_BIT_MASK(32))
 		gfp |= __GFP_DMA32;
 
-	while ((page = alloc_dma_pages(gfp, bytes)) &&
-	       page_to_phys(page) + bytes - 1 > phys_limit) {
-		/* allocated, but too high */
-		__free_pages(page, get_order(bytes));
-
+	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
 		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
 		    phys_limit < DMA_BIT_MASK(64) &&
 		    !(gfp & (__GFP_DMA32 | __GFP_DMA)))
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 0d866eaa4cc8..b531c33e9545 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -500,6 +500,7 @@ static inline void rcu_expedite_gp(void) { }
 static inline void rcu_unexpedite_gp(void) { }
 static inline void rcu_async_hurry(void) { }
 static inline void rcu_async_relax(void) { }
+static inline bool rcu_cpu_online(int cpu) { return true; }
 #else /* #ifdef CONFIG_TINY_RCU */
 bool rcu_gp_is_normal(void);     /* Internal RCU use. */
 bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
@@ -509,6 +510,7 @@ void rcu_unexpedite_gp(void);
 void rcu_async_hurry(void);
 void rcu_async_relax(void);
 void rcupdate_announce_bootup_oddness(void);
+bool rcu_cpu_online(int cpu);
 #ifdef CONFIG_TASKS_RCU_GENERIC
 void show_rcu_tasks_gp_kthreads(void);
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 1fa631168594..f54d5782eca0 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -895,10 +895,36 @@ static void rcu_tasks_pregp_step(struct list_head *hop)
 	synchronize_rcu();
 }
 
+/* Check for quiescent states since the pregp's synchronize_rcu() */
+static bool rcu_tasks_is_holdout(struct task_struct *t)
+{
+	int cpu;
+
+	/* Has the task been seen voluntarily sleeping? */
+	if (!READ_ONCE(t->on_rq))
+		return false;
+
+	/*
+	 * Idle tasks (or idle injection) within the idle loop are RCU-tasks
+	 * quiescent states. But CPU boot code performed by the idle task
+	 * isn't a quiescent state.
+	 */
+	if (is_idle_task(t))
+		return false;
+
+	cpu = task_cpu(t);
+
+	/* Idle tasks on offline CPUs are RCU-tasks quiescent states. */
+	if (t == idle_task(cpu) && !rcu_cpu_online(cpu))
+		return false;
+
+	return true;
+}
+
 /* Per-task initial processing. */
 static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
 {
-	if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
+	if (t != current && rcu_tasks_is_holdout(t)) {
 		get_task_struct(t);
 		t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
 		WRITE_ONCE(t->rcu_tasks_holdout, true);
@@ -947,7 +973,7 @@ static void check_holdout_task(struct task_struct *t,
 
 	if (!READ_ONCE(t->rcu_tasks_holdout) ||
 	    t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
-	    !READ_ONCE(t->on_rq) ||
+	    !rcu_tasks_is_holdout(t) ||
 	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
 	     !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
 		WRITE_ONCE(t->rcu_tasks_holdout, false);
@@ -1525,7 +1551,7 @@ static int trc_inspect_reader(struct task_struct *t, void *bhp_in)
 	} else {
 		// The task is not running, so C-language access is safe.
 		nesting = t->trc_reader_nesting;
-		WARN_ON_ONCE(ofl && task_curr(t) && !is_idle_task(t));
+		WARN_ON_ONCE(ofl && task_curr(t) && (t != idle_task(task_cpu(t))));
 		if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && ofl)
 			n_heavy_reader_ofl_updates++;
 	}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d3a97e129020..3ac3c846105f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -755,14 +755,19 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
 }
 
 /*
- * Return true if the specified CPU has passed through a quiescent
- * state by virtue of being in or having passed through an dynticks
- * idle state since the last call to dyntick_save_progress_counter()
- * for this same CPU, or by virtue of having been offline.
+ * Returns positive if the specified CPU has passed through a quiescent state
+ * by virtue of being in or having passed through an dynticks idle state since
+ * the last call to dyntick_save_progress_counter() for this same CPU, or by
+ * virtue of having been offline.
+ *
+ * Returns negative if the specified CPU needs a force resched.
+ *
+ * Returns zero otherwise.
  */
 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 {
 	unsigned long jtsq;
+	int ret = 0;
 	struct rcu_node *rnp = rdp->mynode;
 
 	/*
@@ -848,8 +853,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	    (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||
 	     rcu_state.cbovld)) {
 		WRITE_ONCE(rdp->rcu_urgent_qs, true);
-		resched_cpu(rdp->cpu);
 		WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+		ret = -1;
 	}
 
 	/*
@@ -862,8 +867,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	if (time_after(jiffies, rcu_state.jiffies_resched)) {
 		if (time_after(jiffies,
 			       READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
-			resched_cpu(rdp->cpu);
 			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+			ret = -1;
 		}
 		if (IS_ENABLED(CONFIG_IRQ_WORK) &&
 		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
@@ -892,7 +897,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 		}
 	}
 
-	return 0;
+	return ret;
 }
 
 /* Trace-event wrapper function for trace_rcu_future_grace_period.  */
@@ -2271,15 +2276,15 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
 {
 	int cpu;
 	unsigned long flags;
-	unsigned long mask;
-	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
 	rcu_state.cbovld = rcu_state.cbovldnext;
 	rcu_state.cbovldnext = false;
 	rcu_for_each_leaf_node(rnp) {
+		unsigned long mask = 0;
+		unsigned long rsmask = 0;
+
 		cond_resched_tasks_rcu_qs();
-		mask = 0;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		rcu_state.cbovldnext |= !!rnp->cbovldmask;
 		if (rnp->qsmask == 0) {
@@ -2297,11 +2302,17 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
 			continue;
 		}
 		for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) {
+			struct rcu_data *rdp;
+			int ret;
+
 			rdp = per_cpu_ptr(&rcu_data, cpu);
-			if (f(rdp)) {
+			ret = f(rdp);
+			if (ret > 0) {
 				mask |= rdp->grpmask;
 				rcu_disable_urgency_upon_qs(rdp);
 			}
+			if (ret < 0)
+				rsmask |= rdp->grpmask;
 		}
 		if (mask != 0) {
 			/* Idle/offline CPUs, report (releases rnp->lock). */
@@ -2310,6 +2321,9 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
 			/* Nothing to do here, so just drop the lock. */
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
+
+		for_each_leaf_node_cpu_mask(rnp, cpu, rsmask)
+			resched_cpu(cpu);
 	}
 }
 
@@ -4195,6 +4209,13 @@ static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
 	return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
 }
 
+bool rcu_cpu_online(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+	return rcu_rdp_cpu_online(rdp);
+}
+
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index df697c74d519..84e8a0f6e4e0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1252,9 +1252,7 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
 };
 
 #ifdef CONFIG_KEYS
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "kfuncs which will be used in BPF programs");
+__bpf_kfunc_start_defs();
 
 /**
  * bpf_lookup_user_key - lookup a key by its serial
@@ -1404,7 +1402,7 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
 }
 #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(key_sig_kfunc_set)
 BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 8bfe23af9c73..7d2ddbcfa377 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -927,11 +927,12 @@ static int parse_symbol_and_return(int argc, const char *argv[],
 	for (i = 2; i < argc; i++) {
 		tmp = strstr(argv[i], "$retval");
 		if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') {
+			if (is_tracepoint) {
+				trace_probe_log_set_index(i);
+				trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE);
+				return -EINVAL;
+			}
 			*is_return = true;
-			/*
-			 * NOTE: Don't check is_tracepoint here, because it will
-			 * be checked when the argument is parsed.
-			 */
 			break;
 		}
 	}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a3442db35670..52f8b537dd0a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1020,9 +1020,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init);
 /**
  * __kprobe_event_gen_cmd_start - Generate a kprobe event command from arg list
  * @cmd: A pointer to the dynevent_cmd struct representing the new event
+ * @kretprobe: Is this a return probe?
  * @name: The name of the kprobe event
  * @loc: The location of the kprobe event
- * @kretprobe: Is this a return probe?
  * @...: Variable number of arg (pairs), one pair for each field
  *
  * NOTE: Users normally won't want to call this function directly, but
diff --git a/lib/closure.c b/lib/closure.c
index 0855e698ced1..f86c9eeafb35 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -21,6 +21,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
 	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
 
 	if (!r) {
+		smp_acquire__after_ctrl_dep();
+
+		cl->closure_get_happened = false;
+
 		if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
 			atomic_set(&cl->remaining,
 				   CLOSURE_REMAINING_INITIALIZER);
@@ -43,7 +47,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
 /* For clearing flags with the same atomic op as a put */
 void closure_sub(struct closure *cl, int v)
 {
-	closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
+	closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
 }
 EXPORT_SYMBOL(closure_sub);
 
@@ -52,7 +56,7 @@ EXPORT_SYMBOL(closure_sub);
  */
 void closure_put(struct closure *cl)
 {
-	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
+	closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
 }
 EXPORT_SYMBOL(closure_put);
 
@@ -90,6 +94,7 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
 	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
 		return false;
 
+	cl->closure_get_happened = true;
 	closure_set_waiting(cl, _RET_IP_);
 	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
 	llist_add(&cl->list, &waitlist->list);
diff --git a/lib/test_objpool.c b/lib/test_objpool.c
index a94078402138..bfdb81599832 100644
--- a/lib/test_objpool.c
+++ b/lib/test_objpool.c
@@ -311,7 +311,7 @@ static void ot_fini_sync(struct ot_context *sop)
 	ot_kfree(sop->test, sop, sizeof(*sop));
 }
 
-struct {
+static struct {
 	struct ot_context * (*init)(struct ot_test *oc);
 	void (*fini)(struct ot_context *sop);
 } g_ot_sync_ops[] = {
@@ -475,7 +475,7 @@ static struct ot_context *ot_init_async_m0(struct ot_test *test)
 	return sop;
 }
 
-struct {
+static struct {
 	struct ot_context * (*init)(struct ot_test *oc);
 	void (*fini)(struct ot_context *sop);
 } g_ot_async_ops[] = {
@@ -632,7 +632,7 @@ static int ot_start_async(struct ot_test *test)
 #define NODE_COMPACT sizeof(struct ot_node)
 #define NODE_VMALLOC (512)
 
-struct ot_test g_testcases[] = {
+static struct ot_test g_testcases[] = {
 
 	/* sync & normal */
 	{0, 0, NODE_COMPACT, 1000, 0,  1,  0,  0, "sync: percpu objpool"},
diff --git a/mm/memblock.c b/mm/memblock.c
index fd492e5bbdbc..5a88d6d24d79 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -424,7 +424,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
 	 * of memory that aren't suitable for allocation
 	 */
 	if (!memblock_can_resize)
-		return -1;
+		panic("memblock: cannot resize %s array\n", type->name);
 
 	/* Calculate new doubled size */
 	old_size = type->max * sizeof(struct memblock_region);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0841f8d82419..c9fdcc5cdce1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -503,9 +503,8 @@ out:
  * architecture dependent calling conventions. 7+ can be supported in the
  * future.
  */
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
+
 __bpf_kfunc int bpf_fentry_test1(int a)
 {
 	return a + 1;
@@ -605,7 +604,7 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
 {
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(bpf_test_modify_return_ids)
 BTF_ID_FLAGS(func, bpf_modify_return_test)
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 8f19253024b0..741360219552 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -135,3 +135,4 @@ static void __exit ebtable_broute_fini(void)
 module_init(ebtable_broute_init);
 module_exit(ebtable_broute_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Force packets to be routed instead of bridged");
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 278f324e6752..dacd81b12e62 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -116,3 +116,4 @@ static void __exit ebtable_filter_fini(void)
 module_init(ebtable_filter_init);
 module_exit(ebtable_filter_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy filter table");
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 9066f7f376d5..0f2a8c6118d4 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -116,3 +116,4 @@ static void __exit ebtable_nat_fini(void)
 module_init(ebtable_nat_init);
 module_exit(ebtable_nat_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy stateless nat table");
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index aa23479b20b2..99d82676f780 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2595,3 +2595,4 @@ EXPORT_SYMBOL(ebt_do_table);
 module_init(ebtables_init);
 module_exit(ebtables_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy core");
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 71056ee84773..b5c406a6e765 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -416,3 +416,4 @@ module_exit(nf_conntrack_l3proto_bridge_fini);
 
 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking");
diff --git a/net/core/filter.c b/net/core/filter.c
index 21d75108c2e9..383f96b0a1c7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11767,9 +11767,7 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
 	return func;
 }
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 __bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
 				    struct bpf_dynptr_kern *ptr__uninit)
 {
@@ -11816,7 +11814,7 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
 
 	return 0;
 }
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
 			       struct bpf_dynptr_kern *ptr__uninit)
@@ -11879,10 +11877,7 @@ static int __init bpf_kfunc_init(void)
 }
 late_initcall(bpf_kfunc_init);
 
-/* Disables missing prototype warnings */
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 /* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
  *
@@ -11916,7 +11911,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
 	return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
 }
 
-__diag_pop()
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(bpf_sk_iter_kfunc_ids)
 BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 5e409b98aba0..dec544337236 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -217,8 +217,12 @@ static int page_pool_init(struct page_pool *pool,
 		return -ENOMEM;
 #endif
 
-	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
+	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
+#ifdef CONFIG_PAGE_POOL_STATS
+		free_percpu(pool->recycle_stats);
+#endif
 		return -ENOMEM;
+	}
 
 	atomic_set(&pool->pages_state_release_cnt, 0);
 
diff --git a/net/core/xdp.c b/net/core/xdp.c
index df4789ab512d..b6f1d6dab3f2 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -696,9 +696,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
 	return nxdpf;
 }
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
 
 /**
  * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp.
@@ -738,7 +736,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
 	return -EOPNOTSUPP;
 }
 
-__diag_pop();
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(xdp_metadata_kfunc_ids)
 #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 1b8cbfda6e5d..44b033fe1ef6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -629,9 +629,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (dccp_parse_options(sk, dreq, skb))
 		goto drop_and_free;
 
-	if (security_inet_conn_request(sk, skb, req))
-		goto drop_and_free;
-
 	ireq = inet_rsk(req);
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
@@ -639,6 +636,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ireq_family = AF_INET;
 	ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	/*
 	 * Step 3: Process LISTEN state
 	 *
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8d344b219f84..4550b680665a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -360,15 +360,15 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (dccp_parse_options(sk, dreq, skb))
 		goto drop_and_free;
 
-	if (security_inet_conn_request(sk, skb, req))
-		goto drop_and_free;
-
 	ireq = inet_rsk(req);
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	ireq->ireq_family = AF_INET6;
 	ireq->ir_mark = inet_request_mark(sk, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index 9cbae0169249..788dfdc498a9 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -15,7 +15,7 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_
 	[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY, },
 	[DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_MAX(NLA_U8, 1),
 	[DEVLINK_PORT_FN_ATTR_OPSTATE] = NLA_POLICY_MAX(NLA_U8, 1),
-	[DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(3),
+	[DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15),
 };
 
 const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = {
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index b71dab630a87..80cdc6f6b34c 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -342,9 +342,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
 	skb = skb_copy_expand(frame->skb_std, 0,
 			      skb_tailroom(frame->skb_std) + HSR_HLEN,
 			      GFP_ATOMIC);
-	prp_fill_rct(skb, frame, port);
-
-	return skb;
+	return prp_fill_rct(skb, frame, port);
 }
 
 static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c
index 3760a14b6b57..4da03bf45c9b 100644
--- a/net/ipv4/fou_bpf.c
+++ b/net/ipv4/fou_bpf.c
@@ -22,9 +22,7 @@ enum bpf_fou_encap_type {
 	FOU_BPF_ENCAP_GUE,
 };
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in BTF");
+__bpf_kfunc_start_defs();
 
 /* bpf_skb_set_fou_encap - Set FOU encap parameters
  *
@@ -100,7 +98,7 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
 	return 0;
 }
 
-__diag_pop()
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(fou_kfunc_set)
 BTF_ID_FLAGS(func, bpf_skb_set_fou_encap)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 56f6ecc43451..4d42d0756fd7 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -170,3 +170,4 @@ module_init(iptable_nat_init);
 module_exit(iptable_nat_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables legacy nat table");
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index ca5e5b21587c..0e7f53964d0a 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -108,3 +108,4 @@ static void __exit iptable_raw_fini(void)
 module_init(iptable_raw_init);
 module_exit(iptable_raw_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables legacy raw table");
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 265b39bc435b..482e733c3375 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -186,3 +186,4 @@ module_init(nf_defrag_init);
 module_exit(nf_defrag_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 defragmentation support");
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index f33aeab9424f..f01b038fc1cd 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -336,3 +336,4 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
 EXPORT_SYMBOL_GPL(nf_send_unreach);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 packet rejection core");
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 98b25e5d147b..d37282c06e3d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -306,7 +306,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
 	treq->af_specific = af_ops;
 
 	treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
-	treq->req_usec_ts = -1;
+	treq->req_usec_ts = false;
 
 #if IS_ENABLED(CONFIG_MPTCP)
 	treq->is_mptcp = sk_is_mptcp(sk);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index ef5472ed6158..7696417d0640 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -1315,7 +1315,8 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key)
 	key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */
 
 	/* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss
-	 *					- tstamp - wscale - sackperm),
+	 *					- tstamp (including sackperm)
+	 *					- wscale),
 	 * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b.
 	 *
 	 * In order to allow D-SACK with TCP-AO, the header size should be:
@@ -1342,9 +1343,9 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key)
 	 * large to leave sufficient option space.
 	 */
 	syn_tcp_option_space = MAX_TCP_OPTION_SPACE;
+	syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED;
 	syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED;
 	syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED;
-	syn_tcp_option_space -= TCPOLEN_SACKPERM_ALIGNED;
 	if (tcp_ao_len(key) > syn_tcp_option_space) {
 		err = -EMSGSIZE;
 		goto err_kfree;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 50aaa1527150..bcb55d98004c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -7115,7 +7115,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	req->syncookie = want_cookie;
 	tcp_rsk(req)->af_specific = af_ops;
 	tcp_rsk(req)->ts_off = 0;
-	tcp_rsk(req)->req_usec_ts = -1;
+	tcp_rsk(req)->req_usec_ts = false;
 #if IS_ENABLED(CONFIG_MPTCP)
 	tcp_rsk(req)->is_mptcp = 0;
 #endif
@@ -7143,9 +7143,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (!dst)
 		goto drop_and_free;
 
-	if (tmp_opt.tstamp_ok)
+	if (tmp_opt.tstamp_ok) {
+		tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
 		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
-
+	}
 	if (!want_cookie && !isn) {
 		int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f558c054cf6e..eb13a55d660c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -601,6 +601,44 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
 }
 #endif
 
+static __be32 *process_tcp_ao_options(struct tcp_sock *tp,
+				      const struct tcp_request_sock *tcprsk,
+				      struct tcp_out_options *opts,
+				      struct tcp_key *key, __be32 *ptr)
+{
+#ifdef CONFIG_TCP_AO
+	u8 maclen = tcp_ao_maclen(key->ao_key);
+
+	if (tcprsk) {
+		u8 aolen = maclen + sizeof(struct tcp_ao_hdr);
+
+		*ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) |
+			       (tcprsk->ao_keyid << 8) |
+			       (tcprsk->ao_rcv_next));
+	} else {
+		struct tcp_ao_key *rnext_key;
+		struct tcp_ao_info *ao_info;
+
+		ao_info = rcu_dereference_check(tp->ao_info,
+			lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk));
+		rnext_key = READ_ONCE(ao_info->rnext_key);
+		if (WARN_ON_ONCE(!rnext_key))
+			return ptr;
+		*ptr++ = htonl((TCPOPT_AO << 24) |
+			       (tcp_ao_len(key->ao_key) << 16) |
+			       (key->ao_key->sndid << 8) |
+			       (rnext_key->rcvid));
+	}
+	opts->hash_location = (__u8 *)ptr;
+	ptr += maclen / sizeof(*ptr);
+	if (unlikely(maclen % sizeof(*ptr))) {
+		memset(ptr, TCPOPT_NOP, sizeof(*ptr));
+		ptr++;
+	}
+#endif
+	return ptr;
+}
+
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -629,37 +667,7 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
 		opts->hash_location = (__u8 *)ptr;
 		ptr += 4;
 	} else if (tcp_key_is_ao(key)) {
-#ifdef CONFIG_TCP_AO
-		u8 maclen = tcp_ao_maclen(key->ao_key);
-
-		if (tcprsk) {
-			u8 aolen = maclen + sizeof(struct tcp_ao_hdr);
-
-			*ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) |
-				       (tcprsk->ao_keyid << 8) |
-				       (tcprsk->ao_rcv_next));
-		} else {
-			struct tcp_ao_key *rnext_key;
-			struct tcp_ao_info *ao_info;
-
-			ao_info = rcu_dereference_check(tp->ao_info,
-				lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk));
-			rnext_key = READ_ONCE(ao_info->rnext_key);
-			if (WARN_ON_ONCE(!rnext_key))
-				goto out_ao;
-			*ptr++ = htonl((TCPOPT_AO << 24) |
-				       (tcp_ao_len(key->ao_key) << 16) |
-				       (key->ao_key->sndid << 8) |
-				       (rnext_key->rcvid));
-		}
-		opts->hash_location = (__u8 *)ptr;
-		ptr += maclen / sizeof(*ptr);
-		if (unlikely(maclen % sizeof(*ptr))) {
-			memset(ptr, TCPOPT_NOP, sizeof(*ptr));
-			ptr++;
-		}
-out_ao:
-#endif
+		ptr = process_tcp_ao_options(tp, tcprsk, opts, key, ptr);
 	}
 	if (unlikely(opts->mss)) {
 		*ptr++ = htonl((TCPOPT_MSS << 24) |
@@ -3693,8 +3701,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
 
 	memset(&opts, 0, sizeof(opts));
-	if (tcp_rsk(req)->req_usec_ts < 0)
-		tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
 	now = tcp_clock_ns();
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c
index 65a8eaae2fec..55b310a722c7 100644
--- a/net/ipv4/tcp_sigpool.c
+++ b/net/ipv4/tcp_sigpool.c
@@ -231,7 +231,7 @@ static void cpool_schedule_cleanup(struct kref *kref)
  */
 void tcp_sigpool_release(unsigned int id)
 {
-	if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg))
+	if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg))
 		return;
 
 	/* slow-path */
@@ -245,7 +245,7 @@ EXPORT_SYMBOL_GPL(tcp_sigpool_release);
  */
 void tcp_sigpool_get(unsigned int id)
 {
-	if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg))
+	if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg))
 		return;
 	kref_get(&cpool[id].kref);
 }
@@ -256,7 +256,7 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RC
 	struct crypto_ahash *hash;
 
 	rcu_read_lock_bh();
-	if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) {
+	if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) {
 		rcu_read_unlock_bh();
 		return -EINVAL;
 	}
@@ -301,7 +301,7 @@ EXPORT_SYMBOL_GPL(tcp_sigpool_end);
  */
 size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len)
 {
-	if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg))
+	if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg))
 		return -EINVAL;
 
 	return strscpy(buf, cpool[id].alg, buf_len);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index bf3cb3a13600..52cf104e3478 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -170,3 +170,4 @@ module_init(ip6table_nat_init);
 module_exit(ip6table_nat_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ip6tables legacy nat table");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 08861d5d1f4d..fc9f6754028f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -106,3 +106,4 @@ static void __exit ip6table_raw_fini(void)
 module_init(ip6table_raw_init);
 module_exit(ip6table_raw_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ip6tables legacy raw table");
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index d59b296b4f51..be7817fbc024 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -182,3 +182,4 @@ module_init(nf_defrag_init);
 module_exit(nf_defrag_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 defragmentation support");
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 58ccdb08c0fd..d45bc54b7ea5 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -413,3 +413,4 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
 EXPORT_SYMBOL_GPL(nf_send_unreach6);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 packet rejection core");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 500f6ed3b8cf..12eedc6ca2cc 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -181,14 +181,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	treq = tcp_rsk(req);
 	treq->tfo_listener = false;
 
-	if (security_inet_conn_request(sk, skb, req))
-		goto out_free;
-
 	req->mss = mss;
 	ireq->ir_rmt_port = th->source;
 	ireq->ir_num = ntohs(th->dest);
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+
+	if (security_inet_conn_request(sk, skb, req))
+		goto out_free;
+
 	if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index dd1d8ffd5f59..65d1f6755f98 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1946,4 +1946,5 @@ module_init(kcm_init);
 module_exit(kcm_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KCM (Kernel Connection Multiplexor) sockets");
 MODULE_ALIAS_NETPROTO(PF_KCM);
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 7cac441862e2..51bccfb00a9c 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -127,8 +127,14 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
 	skb->transport_header += llc_len;
 	skb_pull(skb, llc_len);
 	if (skb->protocol == htons(ETH_P_802_2)) {
-		__be16 pdulen = eth_hdr(skb)->h_proto;
-		s32 data_size = ntohs(pdulen) - llc_len;
+		__be16 pdulen;
+		s32 data_size;
+
+		if (skb->mac_len < ETH_HLEN)
+			return 0;
+
+		pdulen = eth_hdr(skb)->h_proto;
+		data_size = ntohs(pdulen) - llc_len;
 
 		if (data_size < 0 ||
 		    !pskb_may_pull(skb, data_size))
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index 79d1cef8f15a..06fb8e6944b0 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -153,6 +153,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
 	int rc = 1;
 	u32 data_size;
 
+	if (skb->mac_len < ETH_HLEN)
+		return 1;
+
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_da(skb, mac_sa);
 	llc_pdu_decode_ssap(skb, &dsap);
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 05c6ae092053..f50654292510 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -76,6 +76,9 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
 	u32 data_size;
 	struct sk_buff *nskb;
 
+	if (skb->mac_len < ETH_HLEN)
+		goto out;
+
 	/* The test request command is type U (llc_len = 3) */
 	data_size = ntohs(eth_hdr(skb)->h_proto) - 3;
 	nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3230506ae3ff..a2c16b501087 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2450,3 +2450,4 @@ static void __exit ip_vs_cleanup(void)
 module_init(ip_vs_init);
 module_exit(ip_vs_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Virtual Server");
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 5e6ec32aff2b..75f4c231f4a0 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -270,3 +270,4 @@ static void __exit ip_vs_dh_cleanup(void)
 module_init(ip_vs_dh_init);
 module_exit(ip_vs_dh_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs destination hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
index b846cc385279..ab117e5bc34e 100644
--- a/net/netfilter/ipvs/ip_vs_fo.c
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -72,3 +72,4 @@ static void __exit ip_vs_fo_cleanup(void)
 module_init(ip_vs_fo_init);
 module_exit(ip_vs_fo_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted failover scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index ef1f45e43b63..f53899d12416 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -635,3 +635,4 @@ static void __exit ip_vs_ftp_exit(void)
 module_init(ip_vs_ftp_init);
 module_exit(ip_vs_ftp_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs ftp helper");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index cf78ba4ce5ff..8ceec7a2fa8f 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -632,3 +632,4 @@ static void __exit ip_vs_lblc_cleanup(void)
 module_init(ip_vs_lblc_init);
 module_exit(ip_vs_lblc_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 9eddf118b40e..0fb64707213f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -817,3 +817,4 @@ static void __exit ip_vs_lblcr_cleanup(void)
 module_init(ip_vs_lblcr_init);
 module_exit(ip_vs_lblcr_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection with replication scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 9d34d81fc6f1..c2764505e380 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -86,3 +86,4 @@ static void __exit ip_vs_lc_cleanup(void)
 module_init(ip_vs_lc_init);
 module_exit(ip_vs_lc_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index f56862a87518..ed7f5c889b41 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -136,3 +136,4 @@ static void __exit ip_vs_nq_cleanup(void)
 module_init(ip_vs_nq_init);
 module_exit(ip_vs_nq_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs never queue scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index c03066fdd5ca..c7708b809700 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -79,3 +79,4 @@ static void __exit ip_vs_ovf_cleanup(void)
 module_init(ip_vs_ovf_init);
 module_exit(ip_vs_ovf_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs overflow connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0ac6705a61d3..e4ce1d9a63f9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -185,3 +185,4 @@ static void __exit ip_vs_sip_cleanup(void)
 module_init(ip_vs_sip_init);
 module_exit(ip_vs_sip_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs sip helper");
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 38495c6f6c7c..6baa34dff9f0 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -122,4 +122,5 @@ static void __exit ip_vs_rr_cleanup(void)
 
 module_init(ip_vs_rr_init);
 module_exit(ip_vs_rr_cleanup);
+MODULE_DESCRIPTION("ipvs round-robin scheduler");
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 7663288e5358..a46f99a56618 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_sed_cleanup(void)
 module_init(ip_vs_sed_init);
 module_exit(ip_vs_sed_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs shortest expected delay scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index c2028e412092..92e77d7a6b50 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -376,3 +376,4 @@ static void __exit ip_vs_sh_cleanup(void)
 module_init(ip_vs_sh_init);
 module_exit(ip_vs_sh_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs source hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
index 3308e4cc740a..8d5419edde50 100644
--- a/net/netfilter/ipvs/ip_vs_twos.c
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_twos_cleanup(void)
 module_init(ip_vs_twos_init);
 module_exit(ip_vs_twos_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs power of twos choice scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 09f584b564a0..9fa500927c0a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -109,3 +109,4 @@ static void __exit ip_vs_wlc_cleanup(void)
 module_init(ip_vs_wlc_init);
 module_exit(ip_vs_wlc_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 1bc7a0789d85..85ce0d04afac 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -263,3 +263,4 @@ static void __exit ip_vs_wrr_cleanup(void)
 module_init(ip_vs_wrr_init);
 module_exit(ip_vs_wrr_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted round-robin scheduler");
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index b21799d468d2..475358ec8212 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -230,9 +230,7 @@ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
 	return 0;
 }
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in nf_conntrack BTF");
+__bpf_kfunc_start_defs();
 
 /* bpf_xdp_ct_alloc - Allocate a new CT entry
  *
@@ -467,7 +465,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
 	return nf_ct_change_status_common(nfct, status);
 }
 
-__diag_pop()
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(nf_ct_kfunc_set)
 BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 9fb9b8031298..cfa0fe0356de 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -82,3 +82,4 @@ out:
 EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Broadcast connection tracking helper");
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 334db22199c1..fb0ae15e96df 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -57,6 +57,7 @@
 #include "nf_internals.h"
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("List and change connection tracking table");
 
 struct ctnetlink_list_dump_ctx {
 	struct nf_conn *last;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c928ff63b10e..f36727ed91e1 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -699,3 +699,4 @@ MODULE_ALIAS("ip_conntrack");
 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking");
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 141ee7783223..6e3b2f58855f 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -12,9 +12,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_nat.h>
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in nf_nat BTF");
+__bpf_kfunc_start_defs();
 
 /* bpf_ct_set_nat_info - Set source or destination nat address
  *
@@ -54,7 +52,7 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
 	return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
 }
 
-__diag_pop()
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(nf_nat_kfunc_set)
 BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c4e0516a8dfa..c3d7ecbc777c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1263,6 +1263,7 @@ static void __exit nf_nat_cleanup(void)
 }
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Network address translation core");
 
 module_init(nf_nat_init);
 module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 6616ba5d0b04..5b37487d9d11 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -80,6 +80,26 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
 
 static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
 
+static bool nf_nat_redirect_ipv6_usable(const struct inet6_ifaddr *ifa, unsigned int scope)
+{
+	unsigned int ifa_addr_type = ipv6_addr_type(&ifa->addr);
+
+	if (ifa_addr_type & IPV6_ADDR_MAPPED)
+		return false;
+
+	if ((ifa->flags & IFA_F_TENTATIVE) && (!(ifa->flags & IFA_F_OPTIMISTIC)))
+		return false;
+
+	if (scope) {
+		unsigned int ifa_scope = ifa_addr_type & IPV6_ADDR_SCOPE_MASK;
+
+		if (!(scope & ifa_scope))
+			return false;
+	}
+
+	return true;
+}
+
 unsigned int
 nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		     unsigned int hooknum)
@@ -89,14 +109,19 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 	if (hooknum == NF_INET_LOCAL_OUT) {
 		newdst.in6 = loopback_addr;
 	} else {
+		unsigned int scope = ipv6_addr_scope(&ipv6_hdr(skb)->daddr);
 		struct inet6_dev *idev;
-		struct inet6_ifaddr *ifa;
 		bool addr = false;
 
 		idev = __in6_dev_get(skb->dev);
 		if (idev != NULL) {
+			const struct inet6_ifaddr *ifa;
+
 			read_lock_bh(&idev->lock);
 			list_for_each_entry(ifa, &idev->addr_list, if_list) {
+				if (!nf_nat_redirect_ipv6_usable(ifa, scope))
+					continue;
+
 				newdst.in6 = ifa->addr;
 				addr = true;
 				break;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3c1fd8283bf4..a761ee6796f6 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6520,6 +6520,12 @@ static int nft_setelem_deactivate(const struct net *net,
 	return ret;
 }
 
+static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall)
+{
+	list_del_rcu(&catchall->list);
+	kfree_rcu(catchall, rcu);
+}
+
 static void nft_setelem_catchall_remove(const struct net *net,
 					const struct nft_set *set,
 					struct nft_elem_priv *elem_priv)
@@ -6528,8 +6534,7 @@ static void nft_setelem_catchall_remove(const struct net *net,
 
 	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
 		if (catchall->elem == elem_priv) {
-			list_del_rcu(&catchall->list);
-			kfree_rcu(catchall, rcu);
+			nft_setelem_catchall_destroy(catchall);
 			break;
 		}
 	}
@@ -9678,11 +9683,12 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
 						  unsigned int gc_seq,
 						  bool sync)
 {
-	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem_catchall *catchall, *next;
 	const struct nft_set *set = gc->set;
+	struct nft_elem_priv *elem_priv;
 	struct nft_set_ext *ext;
 
-	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
 		ext = nft_set_elem_ext(set, catchall->elem);
 
 		if (!nft_set_elem_expired(ext))
@@ -9700,7 +9706,13 @@ dead_elem:
 		if (!gc)
 			return NULL;
 
-		nft_trans_gc_elem_add(gc, catchall->elem);
+		elem_priv = catchall->elem;
+		if (sync) {
+			nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
+			nft_setelem_catchall_destroy(catchall);
+		}
+
+		nft_trans_gc_elem_add(gc, elem_priv);
 	}
 
 	return gc;
@@ -11386,4 +11398,5 @@ module_exit(nf_tables_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Framework for packet filtering and classification");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 50723ba08289..c0fc431991e8 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -447,4 +447,5 @@ module_init(nfnl_osf_init);
 module_exit(nfnl_osf_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Passive OS fingerprint matching");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index 98e4946100c5..40e230d8b712 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -137,6 +137,7 @@ module_init(nft_chain_nat_init);
 module_exit(nft_chain_nat_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nftables network address translation support");
 #ifdef CONFIG_NF_TABLES_IPV4
 MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
 #endif
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 04b51f285332..1bfe258018da 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -204,4 +204,5 @@ bool nft_fib_reduce(struct nft_regs_track *track,
 EXPORT_SYMBOL_GPL(nft_fib_reduce);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Query routing table from nftables");
 MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index a5268e6dd32f..358e742afad7 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -270,4 +270,5 @@ module_exit(nft_fwd_netdev_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nftables netdev packet forwarding support");
 MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 7ddb9a78e3fc..ef93e0d3bee0 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -561,7 +561,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 {
 	struct recent_table *t = pde_data(file_inode(file));
 	struct recent_entry *e;
-	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
+	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")];
 	const char *c = buf;
 	union nf_inet_addr addr = {};
 	u_int16_t family;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 9c4f231be275..1eeff9422856 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -257,5 +257,6 @@ static void __exit netlink_diag_exit(void)
 
 module_init(netlink_diag_init);
 module_exit(netlink_diag_exit);
+MODULE_DESCRIPTION("Netlink-based socket monitoring/diagnostic interface (sock_diag)");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 0b9a785dea45..3019a4406ca4 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -985,7 +985,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 		if (err)
 			return err;
 
-		nf_conn_act_ct_ext_add(ct);
+		nf_conn_act_ct_ext_add(skb, ct, ctinfo);
 	} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
 		   labels_nonzero(&info->labels.mask)) {
 		err = ovs_ct_set_labels(ct, key, &info->labels.value,
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index ac85d4644a3c..df8a271948a1 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -212,7 +212,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 		conn->idle_timestamp = jiffies;
 		if (atomic_dec_and_test(&conn->active))
 			rxrpc_set_service_reap_timer(conn->rxnet,
-						     jiffies + rxrpc_connection_expiry);
+						     jiffies + rxrpc_connection_expiry * HZ);
 	}
 
 	rxrpc_put_call(call, rxrpc_call_put_io_thread);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 7d910aee4f8c..c553a30e9c83 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -87,7 +87,7 @@ static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
 	struct rxrpc_local *local =
 		container_of(timer, struct rxrpc_local, client_conn_reap_timer);
 
-	if (local->kill_all_client_conns &&
+	if (!local->kill_all_client_conns &&
 	    test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags))
 		rxrpc_wake_up_io_thread(local);
 }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9d3f26bf0440..c39252d61ebb 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1098,7 +1098,7 @@ repeat:
 			}
 		} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
 			if (unlikely(!rcu_access_pointer(a->goto_chain))) {
-				net_warn_ratelimited("can't go to NULL chain!\n");
+				tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
 				return TC_ACT_SHOT;
 			}
 			tcf_action_goto_chain_exec(a, res);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 9583645e86c2..0db0ecf1d110 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -376,6 +376,17 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
 	entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
 }
 
+static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload *entry)
+{
+	struct nf_conn_act_ct_ext *act_ct_ext;
+
+	act_ct_ext = nf_conn_act_ct_ext_find(entry->ct);
+	if (act_ct_ext) {
+		tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
+		tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
+	}
+}
+
 static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
 				  struct nf_conn *ct,
 				  bool tcp, bool bidirectional)
@@ -671,6 +682,8 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
 	else
 		ctinfo = IP_CT_ESTABLISHED_REPLY;
 
+	nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
+	tcf_ct_flow_ct_ext_ifidx_update(flow);
 	flow_offload_refresh(nf_ft, flow, force_refresh);
 	if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
 		/* Process this flow in SW to allow promoting to ASSURED */
@@ -1034,7 +1047,7 @@ do_nat:
 		tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
 
 		if (!nf_ct_is_confirmed(ct))
-			nf_conn_act_ct_ext_add(ct);
+			nf_conn_act_ct_ext_add(skb, ct, ctinfo);
 
 		/* This will take care of sending queued events
 		 * even if the connection is already confirmed.
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c9a811f4c7ee..393b78729216 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -677,4 +677,5 @@ static void __exit gate_cleanup_module(void)
 
 module_init(gate_init_module);
 module_exit(gate_cleanup_module);
+MODULE_DESCRIPTION("TC gate action");
 MODULE_LICENSE("GPL v2");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1daeb2182b70..1976bd163986 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1658,6 +1658,7 @@ static inline int __tcf_classify(struct sk_buff *skb,
 				 int act_index,
 				 u32 *last_executed_chain)
 {
+	u32 orig_reason = res->drop_reason;
 #ifdef CONFIG_NET_CLS_ACT
 	const int max_reclassify_loop = 16;
 	const struct tcf_proto *first_tp;
@@ -1712,8 +1713,14 @@ reclassify:
 			goto reset;
 		}
 #endif
-		if (err >= 0)
+		if (err >= 0) {
+			/* Policy drop or drop reason is over-written by
+			 * classifiers with a bogus value(0) */
+			if (err == TC_ACT_SHOT &&
+			    res->drop_reason == SKB_NOT_DROPPED_YET)
+				tcf_set_drop_reason(res, orig_reason);
 			return err;
+		}
 	}
 
 	if (unlikely(n)) {
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 1b92c33b5f81..a1f56931330c 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -341,4 +341,5 @@ static void __exit exit_basic(void)
 
 module_init(init_basic)
 module_exit(exit_basic)
+MODULE_DESCRIPTION("TC basic classifier");
 MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index bd9322d71910..7ee8dbf49ed0 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -222,4 +222,5 @@ static void __exit exit_cgroup_cls(void)
 
 module_init(init_cgroup_cls);
 module_exit(exit_cgroup_cls);
+MODULE_DESCRIPTION("TC cgroup classifier");
 MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index c49d6af0e048..afc534ee0a18 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -446,4 +446,5 @@ static void __exit exit_fw(void)
 
 module_init(init_fw)
 module_exit(exit_fw)
+MODULE_DESCRIPTION("SKB mark based TC classifier");
 MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1424bfeaca73..12a505db4183 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -684,4 +684,5 @@ static void __exit exit_route4(void)
 
 module_init(init_route4)
 module_exit(exit_route4)
+MODULE_DESCRIPTION("Routing table realm based TC classifier");
 MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 6663e971a13e..d5bdfd4a7655 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1489,4 +1489,5 @@ static void __exit exit_u32(void)
 
 module_init(init_u32)
 module_exit(exit_u32)
+MODULE_DESCRIPTION("Universal 32bit based TC Classifier");
 MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index cac870eb7897..9a0b85190a2c 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -574,3 +574,4 @@ static void __exit cbs_module_exit(void)
 module_init(cbs_module_init)
 module_exit(cbs_module_exit)
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Credit Based shaper");
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 19c851125901..ae1da08e268f 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -513,3 +513,4 @@ module_init(choke_module_init)
 module_exit(choke_module_exit)
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Choose and keep responsive flows scheduler");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 19901e77cd3b..097740a9afea 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -495,3 +495,4 @@ static void __exit drr_exit(void)
 module_init(drr_init);
 module_exit(drr_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Deficit Round Robin scheduler");
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 61d1f0e32cf3..4808159a5466 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -513,3 +513,4 @@ static void __exit etf_module_exit(void)
 module_init(etf_module_init)
 module_exit(etf_module_exit)
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc");
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index b10efeaf0629..f7c88495946b 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -826,3 +826,4 @@ static void __exit ets_exit(void)
 module_init(ets_init);
 module_exit(ets_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index e1040421b797..450f5c67ac49 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -269,3 +269,4 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
 	return q ? : ERR_PTR(err);
 }
 EXPORT_SYMBOL(fifo_create_dflt);
+MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler");
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 0fd18c344ab5..3a31c47fea9b 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -919,14 +919,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
 	[TCA_FQ_TIMER_SLACK]		= { .type = NLA_U32 },
 	[TCA_FQ_HORIZON]		= { .type = NLA_U32 },
 	[TCA_FQ_HORIZON_DROP]		= { .type = NLA_U8 },
-	[TCA_FQ_PRIOMAP]		= {
-			.type = NLA_BINARY,
-			.len = sizeof(struct tc_prio_qopt),
-		},
-	[TCA_FQ_WEIGHTS]		= {
-			.type = NLA_BINARY,
-			.len = FQ_BANDS * sizeof(s32),
-		},
+	[TCA_FQ_PRIOMAP]		= NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)),
+	[TCA_FQ_WEIGHTS]		= NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)),
 };
 
 /* compress a u8 array with all elems <= 3 to an array of 2-bit fields */
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 872d127c9db4..8c61eb3dc943 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -945,3 +945,4 @@ module_init(gred_module_init)
 module_exit(gred_module_exit)
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic Random Early Detection qdisc");
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 880c5f16b29c..16c45da4036a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1693,5 +1693,6 @@ hfsc_cleanup(void)
 }
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Hierarchical Fair Service Curve scheduler");
 module_init(hfsc_init);
 module_exit(hfsc_cleanup);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0d947414e616..7349233eaa9b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -2179,3 +2179,4 @@ static void __exit htb_module_exit(void)
 module_init(htb_module_init)
 module_exit(htb_module_exit)
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler");
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index a463a63192c3..5fa9eaa79bfc 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -370,3 +370,4 @@ module_exit(ingress_module_exit);
 
 MODULE_ALIAS("sch_clsact");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs");
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 793009f445c0..43e53ee00a56 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -789,3 +789,4 @@ module_init(mqprio_module_init);
 module_exit(mqprio_module_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Classful multiqueue prio qdisc");
diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c
index 83b3793c4012..b3a5572c167b 100644
--- a/net/sched/sch_mqprio_lib.c
+++ b/net/sched/sch_mqprio_lib.c
@@ -129,3 +129,4 @@ void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE],
 EXPORT_SYMBOL_GPL(mqprio_fp_to_offload);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 75c9c860182b..d66d5f0ec080 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -410,3 +410,4 @@ module_init(multiq_module_init)
 module_exit(multiq_module_exit)
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Multi queue to hardware queue mapping qdisc");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6ba2dc191ed9..fa678eb88528 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1307,3 +1307,4 @@ static void __exit netem_module_exit(void)
 module_init(netem_module_init)
 module_exit(netem_module_exit)
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Network characteristics emulator qdisc");
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 35f49edf63db..992f0c8d7988 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -226,3 +226,4 @@ static void __exit plug_module_exit(void)
 module_init(plug_module_init)
 module_exit(plug_module_exit)
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Qdisc to plug and unplug traffic via netlink control");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fdc5ef52c3ee..8ecdd3ef6f8e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -433,3 +433,4 @@ module_init(prio_module_init)
 module_exit(prio_module_exit)
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Simple 3-band priority qdisc");
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 28315166fe8e..48a604c320c7 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1535,3 +1535,4 @@ static void __exit qfq_exit(void)
 module_init(qfq_init);
 module_exit(qfq_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 16277b6a0238..607b6c8b3a9b 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -563,3 +563,4 @@ module_init(red_module_init)
 module_exit(red_module_exit)
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Random Early Detection qdisc");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 66dcb18638fe..eb77558fa367 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -937,3 +937,4 @@ static void __exit sfq_module_exit(void)
 module_init(sfq_module_init)
 module_exit(sfq_module_exit)
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Stochastic Fairness qdisc");
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 5df2dacb7b1a..28beb11762d8 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -307,3 +307,4 @@ module_init(skbprio_module_init)
 module_exit(skbprio_module_exit)
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SKB priority based scheduling qdisc");
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 2e1949de4171..31a8252bd09c 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -2572,3 +2572,4 @@ static void __exit taprio_module_exit(void)
 module_init(taprio_module_init);
 module_exit(taprio_module_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Time Aware Priority qdisc");
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 17d2d00ddb18..dd6b1a723bf7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -621,3 +621,4 @@ static void __exit tbf_module_exit(void)
 module_init(tbf_module_init)
 module_exit(tbf_module_exit)
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Token Bucket Filter qdisc");
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 7721239c185f..59304611dc00 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -523,3 +523,4 @@ module_init(teql_init);
 module_exit(teql_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index abd2667734d4..da97f946b79b 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
 
 	if (!smc->use_fallback) {
 		rc = smc_close_active(smc);
-		sock_set_flag(sk, SOCK_DEAD);
+		smc_sock_set_flag(sk, SOCK_DEAD);
 		sk->sk_shutdown |= SHUTDOWN_MASK;
 	} else {
 		if (sk->sk_state != SMC_CLOSED) {
@@ -1743,7 +1743,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 		if (new_clcsock)
 			sock_release(new_clcsock);
 		new_sk->sk_state = SMC_CLOSED;
-		sock_set_flag(new_sk, SOCK_DEAD);
+		smc_sock_set_flag(new_sk, SOCK_DEAD);
 		sock_put(new_sk); /* final */
 		*new_smc = NULL;
 		goto out;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 24745fde4ac2..e377980b8414 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -377,4 +377,9 @@ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb);
 int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
 int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
 
+static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
+{
+	set_bit(flag, &sk->sk_flags);
+}
+
 #endif	/* __SMC_H */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 89105e95b452..3c06625ceb20 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -28,13 +28,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
 {
 	struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
 	struct smc_connection *conn = cdcpend->conn;
+	struct smc_buf_desc *sndbuf_desc;
 	struct smc_sock *smc;
 	int diff;
 
+	sndbuf_desc = conn->sndbuf_desc;
 	smc = container_of(conn, struct smc_sock, conn);
 	bh_lock_sock(&smc->sk);
-	if (!wc_status) {
-		diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
+	if (!wc_status && sndbuf_desc) {
+		diff = smc_curs_diff(sndbuf_desc->len,
 				     &cdcpend->conn->tx_curs_fin,
 				     &cdcpend->cursor);
 		/* sndbuf_space is decreased in smc_sendmsg */
@@ -114,9 +116,6 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 	union smc_host_cursor cfed;
 	int rc;
 
-	if (unlikely(!READ_ONCE(conn->sndbuf_desc)))
-		return -ENOBUFS;
-
 	smc_cdc_add_pending_send(conn, pend);
 
 	conn->tx_cdc_seq++;
@@ -385,7 +384,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
 		smc->sk.sk_shutdown |= RCV_SHUTDOWN;
 		if (smc->clcsock && smc->clcsock->sk)
 			smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
-		sock_set_flag(&smc->sk, SOCK_DONE);
+		smc_sock_set_flag(&smc->sk, SOCK_DONE);
 		sock_hold(&smc->sk); /* sock_put in close_work */
 		if (!queue_work(smc_close_wq, &conn->close_work))
 			sock_put(&smc->sk);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index dbdf03e8aa5b..10219f55aad1 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -116,7 +116,8 @@ static void smc_close_cancel_work(struct smc_sock *smc)
 	struct sock *sk = &smc->sk;
 
 	release_sock(sk);
-	cancel_work_sync(&smc->conn.close_work);
+	if (cancel_work_sync(&smc->conn.close_work))
+		sock_put(sk);
 	cancel_delayed_work_sync(&smc->conn.tx_work);
 	lock_sock(sk);
 }
@@ -173,7 +174,7 @@ void smc_close_active_abort(struct smc_sock *smc)
 		break;
 	}
 
-	sock_set_flag(sk, SOCK_DEAD);
+	smc_sock_set_flag(sk, SOCK_DEAD);
 	sk->sk_state_change(sk);
 
 	if (release_clcsock) {
diff --git a/net/socket.c b/net/socket.c
index 0d1c4e78fc7f..3379c64217a4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1685,20 +1685,16 @@ struct file *__sys_socket_file(int family, int type, int protocol)
  *	Therefore, __weak is needed to ensure that the call is still
  *	emitted, by telling the compiler that we don't know what the
  *	function might eventually be.
- *
- *	__diag_* below are needed to dismiss the missing prototype warning.
  */
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "A fmod_ret entry point for BPF programs");
+__bpf_hook_start();
 
 __weak noinline int update_socket_protocol(int family, int type, int protocol)
 {
 	return protocol;
 }
 
-__diag_pop();
+__bpf_hook_end();
 
 int __sys_socket(int family, int type, int protocol)
 {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9c210273d06b..daa9582ec861 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -111,7 +111,8 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
 
 	pipefs_sb = rpc_get_sb_net(net);
 	if (pipefs_sb) {
-		__rpc_clnt_remove_pipedir(clnt);
+		if (pipefs_sb == clnt->pipefs_sb)
+			__rpc_clnt_remove_pipedir(clnt);
 		rpc_put_sb_net(net);
 	}
 }
@@ -151,6 +152,8 @@ rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
 {
 	struct dentry *dentry;
 
+	clnt->pipefs_sb = pipefs_sb;
+
 	if (clnt->cl_program->pipe_dir_name != NULL) {
 		dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
 		if (IS_ERR(dentry))
@@ -2171,6 +2174,7 @@ call_connect_status(struct rpc_task *task)
 	task->tk_status = 0;
 	switch (status) {
 	case -ECONNREFUSED:
+	case -ECONNRESET:
 		/* A positive refusal suggests a rebind is needed. */
 		if (RPC_IS_SOFTCONN(task))
 			break;
@@ -2179,7 +2183,6 @@ call_connect_status(struct rpc_task *task)
 			goto out_retry;
 		}
 		fallthrough;
-	case -ECONNRESET:
 	case -ECONNABORTED:
 	case -ENETDOWN:
 	case -ENETUNREACH:
@@ -2220,7 +2223,7 @@ call_connect_status(struct rpc_task *task)
 			}
 			xprt_switch_put(xps);
 			if (!task->tk_xprt)
-				return;
+				goto out;
 		}
 		goto out_retry;
 	case -ENOBUFS:
@@ -2235,6 +2238,7 @@ out_next:
 out_retry:
 	/* Check for timeouts before looping back to call_bind */
 	task->tk_action = call_bind;
+out:
 	rpc_check_timeout(task);
 }
 
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5988a5c5ff3f..102c3818bc54 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -769,6 +769,10 @@ void rpcb_getport_async(struct rpc_task *task)
 
 	child = rpcb_call_async(rpcb_clnt, map, proc);
 	rpc_release_client(rpcb_clnt);
+	if (IS_ERR(child)) {
+		/* rpcb_map_release() has freed the arguments */
+		return;
+	}
 
 	xprt->stat.bind_count++;
 	rpc_put_task(child);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ab453ede54f0..2364c485540c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -283,7 +283,7 @@ out_unlock:
 	xprt_clear_locked(xprt);
 out_sleep:
 	task->tk_status = -EAGAIN;
-	if  (RPC_IS_SOFT(task))
+	if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
 		rpc_sleep_on_timeout(&xprt->sending, task, NULL,
 				xprt_request_timeout(req));
 	else
@@ -349,7 +349,7 @@ out_unlock:
 	xprt_clear_locked(xprt);
 out_sleep:
 	task->tk_status = -EAGAIN;
-	if (RPC_IS_SOFT(task))
+	if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
 		rpc_sleep_on_timeout(&xprt->sending, task, NULL,
 				xprt_request_timeout(req));
 	else
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a15bf2ede89b..58f3dc8d0d71 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1181,6 +1181,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
+	transport->xprt_err = 0;
 	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
 	clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
 	clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
@@ -2772,18 +2773,13 @@ static void xs_wake_error(struct sock_xprt *transport)
 {
 	int sockerr;
 
-	if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
-		return;
-	mutex_lock(&transport->recv_mutex);
-	if (transport->sock == NULL)
-		goto out;
 	if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
-		goto out;
+		return;
 	sockerr = xchg(&transport->xprt_err, 0);
-	if (sockerr < 0)
+	if (sockerr < 0) {
 		xprt_wake_pending_tasks(&transport->xprt, sockerr);
-out:
-	mutex_unlock(&transport->recv_mutex);
+		xs_tcp_force_close(&transport->xprt);
+	}
 }
 
 static void xs_wake_pending(struct sock_xprt *transport)
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index e8fd257c0e68..1a9a5bdaccf4 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -88,7 +88,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
 
 const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
 	[TIPC_NLA_LINK_UNSPEC]		= { .type = NLA_UNSPEC },
-	[TIPC_NLA_LINK_NAME]		= { .type = NLA_STRING,
+	[TIPC_NLA_LINK_NAME]		= { .type = NLA_NUL_STRING,
 					    .len = TIPC_MAX_LINK_NAME },
 	[TIPC_NLA_LINK_MTU]		= { .type = NLA_U32 },
 	[TIPC_NLA_LINK_BROADCAST]	= { .type = NLA_FLAG },
@@ -125,7 +125,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
 
 const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1]	= {
 	[TIPC_NLA_BEARER_UNSPEC]	= { .type = NLA_UNSPEC },
-	[TIPC_NLA_BEARER_NAME]		= { .type = NLA_STRING,
+	[TIPC_NLA_BEARER_NAME]		= { .type = NLA_NUL_STRING,
 					    .len = TIPC_MAX_BEARER_NAME },
 	[TIPC_NLA_BEARER_PROP]		= { .type = NLA_NESTED },
 	[TIPC_NLA_BEARER_DOMAIN]	= { .type = NLA_U32 }
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e22c81435ef7..f6dc896bf44c 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -130,6 +130,8 @@ static void virtio_transport_init_hdr(struct sk_buff *skb,
 	hdr->dst_port	= cpu_to_le32(dst_port);
 	hdr->flags	= cpu_to_le32(info->flags);
 	hdr->len	= cpu_to_le32(payload_len);
+	hdr->buf_alloc	= cpu_to_le32(0);
+	hdr->fwd_cnt	= cpu_to_le32(0);
 }
 
 static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
@@ -1369,11 +1371,17 @@ virtio_transport_recv_connected(struct sock *sk,
 			vsk->peer_shutdown |= RCV_SHUTDOWN;
 		if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
 			vsk->peer_shutdown |= SEND_SHUTDOWN;
-		if (vsk->peer_shutdown == SHUTDOWN_MASK &&
-		    vsock_stream_has_data(vsk) <= 0 &&
-		    !sock_flag(sk, SOCK_DONE)) {
-			(void)virtio_transport_reset(vsk, NULL);
-			virtio_transport_do_close(vsk, true);
+		if (vsk->peer_shutdown == SHUTDOWN_MASK) {
+			if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) {
+				(void)virtio_transport_reset(vsk, NULL);
+				virtio_transport_do_close(vsk, true);
+			}
+			/* Remove this socket anyway because the remote peer sent
+			 * the shutdown. This way a new connection will succeed
+			 * if the remote peer uses the same source port,
+			 * even if the old socket is still unreleased, but now disconnected.
+			 */
+			vsock_remove_sock(vsk);
 		}
 		if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
 			sk->sk_state_change(sk);
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index d74f3fd20f2b..7d5e920141e9 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -27,9 +27,7 @@ struct bpf_xfrm_info {
 	int link;
 };
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in xfrm_interface BTF");
+__bpf_kfunc_start_defs();
 
 /* bpf_skb_get_xfrm_info - Get XFRM metadata
  *
@@ -93,7 +91,7 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp
 	return 0;
 }
 
-__diag_pop()
+__bpf_kfunc_end_defs();
 
 BTF_SET8_START(xfrm_ifc_kfunc_set)
 BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
diff --git a/sound/core/info.c b/sound/core/info.c
index 0b2f04dcb589..e2f302e55bbb 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -56,7 +56,7 @@ struct snd_info_private_data {
 };
 
 static int snd_info_version_init(void);
-static void snd_info_disconnect(struct snd_info_entry *entry);
+static void snd_info_clear_entries(struct snd_info_entry *entry);
 
 /*
 
@@ -569,11 +569,16 @@ void snd_info_card_disconnect(struct snd_card *card)
 {
 	if (!card)
 		return;
-	mutex_lock(&info_mutex);
+
 	proc_remove(card->proc_root_link);
-	card->proc_root_link = NULL;
 	if (card->proc_root)
-		snd_info_disconnect(card->proc_root);
+		proc_remove(card->proc_root->p);
+
+	mutex_lock(&info_mutex);
+	if (card->proc_root)
+		snd_info_clear_entries(card->proc_root);
+	card->proc_root_link = NULL;
+	card->proc_root = NULL;
 	mutex_unlock(&info_mutex);
 }
 
@@ -745,15 +750,14 @@ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card,
 }
 EXPORT_SYMBOL(snd_info_create_card_entry);
 
-static void snd_info_disconnect(struct snd_info_entry *entry)
+static void snd_info_clear_entries(struct snd_info_entry *entry)
 {
 	struct snd_info_entry *p;
 
 	if (!entry->p)
 		return;
 	list_for_each_entry(p, &entry->children, list)
-		snd_info_disconnect(p);
-	proc_remove(entry->p);
+		snd_info_clear_entries(p);
 	entry->p = NULL;
 }
 
@@ -770,8 +774,9 @@ void snd_info_free_entry(struct snd_info_entry * entry)
 	if (!entry)
 		return;
 	if (entry->p) {
+		proc_remove(entry->p);
 		mutex_lock(&info_mutex);
-		snd_info_disconnect(entry);
+		snd_info_clear_entries(entry);
 		mutex_unlock(&info_mutex);
 	}
 
diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c
index 23cf8284ce36..0ba8f0c4cd99 100644
--- a/sound/oss/dmasound/dmasound_paula.c
+++ b/sound/oss/dmasound/dmasound_paula.c
@@ -720,15 +720,14 @@ static int __init amiga_audio_probe(struct platform_device *pdev)
 	return dmasound_init();
 }
 
-static int __exit amiga_audio_remove(struct platform_device *pdev)
+static void __exit amiga_audio_remove(struct platform_device *pdev)
 {
 	dmasound_deinit();
-	return 0;
 }
 
 static struct platform_driver amiga_audio_driver = {
-	.remove = __exit_p(amiga_audio_remove),
-	.driver   = {
+	.remove_new = __exit_p(amiga_audio_remove),
+	.driver = {
 		.name	= "amiga-audio",
 	},
 };
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 3c9e6e97ad0f..03264915c618 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2068,6 +2068,7 @@ static const struct pci_device_id driver_denylist[] = {
 	{ PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */
 	{ PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */
 	{ PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */
+	{ PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1022, 0xd601) }, /* ASRock X670E Taichi */
 	{}
 };
 
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 58006c8bcfb9..669ae3d6e447 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7343,8 +7343,10 @@ enum {
 	ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
 	ALC299_FIXUP_PREDATOR_SPK,
 	ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE,
+	ALC289_FIXUP_DELL_SPK1,
 	ALC289_FIXUP_DELL_SPK2,
 	ALC289_FIXUP_DUAL_SPK,
+	ALC289_FIXUP_RTK_AMP_DUAL_SPK,
 	ALC294_FIXUP_SPK2_TO_DAC1,
 	ALC294_FIXUP_ASUS_DUAL_SPK,
 	ALC285_FIXUP_THINKPAD_X1_GEN7,
@@ -7444,6 +7446,8 @@ enum {
 	ALC287_FIXUP_THINKPAD_I2S_SPK,
 	ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD,
 	ALC2XX_FIXUP_HEADSET_MIC,
+	ALC289_FIXUP_DELL_CS35L41_SPI_2,
+	ALC294_FIXUP_CS35L41_I2C_2,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -8670,6 +8674,15 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
 	},
+	[ALC289_FIXUP_DELL_SPK1] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x14, 0x90170140 },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE
+	},
 	[ALC289_FIXUP_DELL_SPK2] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -8685,6 +8698,12 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC289_FIXUP_DELL_SPK2
 	},
+	[ALC289_FIXUP_RTK_AMP_DUAL_SPK] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc285_fixup_speaker2_to_dac1,
+		.chained = true,
+		.chain_id = ALC289_FIXUP_DELL_SPK1
+	},
 	[ALC294_FIXUP_SPK2_TO_DAC1] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc285_fixup_speaker2_to_dac1,
@@ -9552,6 +9571,16 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc_fixup_headset_mic,
 	},
+	[ALC289_FIXUP_DELL_CS35L41_SPI_2] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = cs35l41_fixup_spi_two,
+		.chained = true,
+		.chain_id = ALC289_FIXUP_DUAL_SPK
+	},
+	[ALC294_FIXUP_CS35L41_I2C_2] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = cs35l41_fixup_i2c_two,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -9662,13 +9691,15 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
 	SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
 	SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
-	SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
-	SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
-	SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC245_FIXUP_CS35L41_SPI_2),
-	SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
-	SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
-	SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
-	SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC289_FIXUP_DELL_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cc0, "Dell Oasis 13", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
+	SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1028, 0x0cc5, "Dell Oasis 14", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -9913,6 +9944,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
 	SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
+	SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
 	SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
 	SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
@@ -9929,6 +9961,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
 	SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
 	SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
 	SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c
index 1185e5aac523..60cbc881be6e 100644
--- a/sound/soc/amd/acp/acp-i2s.c
+++ b/sound/soc/amd/acp/acp-i2s.c
@@ -26,7 +26,6 @@
 
 #define DRV_NAME "acp_i2s_playcap"
 #define	I2S_MASTER_MODE_ENABLE		1
-#define	I2S_MODE_ENABLE			0
 #define	LRCLK_DIV_FIELD			GENMASK(10, 2)
 #define	BCLK_DIV_FIELD			GENMASK(23, 11)
 #define	ACP63_LRCLK_DIV_FIELD		GENMASK(12, 2)
@@ -56,7 +55,8 @@ static inline void acp_set_i2s_clk(struct acp_dev_data *adata, int dai_id)
 	}
 
 	val  = I2S_MASTER_MODE_ENABLE;
-	val |= I2S_MODE_ENABLE & BIT(1);
+	if (adata->tdm_mode)
+		val |= BIT(1);
 
 	switch (chip->acp_rev) {
 	case ACP63_DEV:
diff --git a/sound/soc/codecs/aw88395/aw88395.c b/sound/soc/codecs/aw88395/aw88395.c
index 77227c8f01f6..3c459a67ad0c 100644
--- a/sound/soc/codecs/aw88395/aw88395.c
+++ b/sound/soc/codecs/aw88395/aw88395.c
@@ -356,7 +356,7 @@ static const struct snd_kcontrol_new aw88395_controls[] = {
 		aw88395_get_fade_in_time, aw88395_set_fade_in_time),
 	SOC_SINGLE_EXT("Volume Ramp Down Step", 0, 0, FADE_TIME_MAX, FADE_TIME_MIN,
 		aw88395_get_fade_out_time, aw88395_set_fade_out_time),
-	SOC_SINGLE_EXT("Calib", 0, 0, 100, 0,
+	SOC_SINGLE_EXT("Calib", 0, 0, AW88395_CALI_RE_MAX, 0,
 		aw88395_re_get, aw88395_re_set),
 	AW88395_PROFILE_EXT("Profile Set", aw88395_profile_info,
 		aw88395_profile_get, aw88395_profile_set),
diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c
index ce30bc7cdea9..54f8457e8497 100644
--- a/sound/soc/codecs/aw88399.c
+++ b/sound/soc/codecs/aw88399.c
@@ -438,7 +438,7 @@ static int aw_dev_set_vcalb(struct aw88399 *aw88399)
 	if (ret)
 		return ret;
 
-	vsense_select = vsense_select & (~AW88399_VDSEL_MASK);
+	vsense_select = vsense_value & (~AW88399_VDSEL_MASK);
 
 	ret = aw88399_dev_get_icalk(aw88399, &icalk);
 	if (ret) {
@@ -486,8 +486,8 @@ static int aw_dev_update_cali_re(struct aw_cali_desc *cali_desc)
 	u32 cali_re;
 	int ret;
 
-	if ((aw_dev->cali_desc.cali_re <= AW88399_CALI_RE_MAX) ||
-			(aw_dev->cali_desc.cali_re >= AW88399_CALI_RE_MIN))
+	if ((aw_dev->cali_desc.cali_re >= AW88399_CALI_RE_MAX) ||
+			(aw_dev->cali_desc.cali_re <= AW88399_CALI_RE_MIN))
 		return -EINVAL;
 
 	cali_re = AW88399_SHOW_RE_TO_DSP_RE((aw_dev->cali_desc.cali_re +
@@ -1710,7 +1710,7 @@ static const struct snd_kcontrol_new aw88399_controls[] = {
 		aw88399_get_fade_in_time, aw88399_set_fade_in_time),
 	SOC_SINGLE_EXT("Volume Ramp Down Step", 0, 0, FADE_TIME_MAX, FADE_TIME_MIN,
 		aw88399_get_fade_out_time, aw88399_set_fade_out_time),
-	SOC_SINGLE_EXT("Calib", 0, 0, 100, 0,
+	SOC_SINGLE_EXT("Calib", 0, 0, AW88399_CALI_RE_MAX, 0,
 		aw88399_re_get, aw88399_re_set),
 	AW88399_PROFILE_EXT("AW88399 Profile Set", aw88399_profile_info,
 		aw88399_profile_get, aw88399_profile_set),
diff --git a/sound/soc/codecs/aw88399.h b/sound/soc/codecs/aw88399.h
index 8b3f1e101985..4f391099d0f2 100644
--- a/sound/soc/codecs/aw88399.h
+++ b/sound/soc/codecs/aw88399.h
@@ -522,7 +522,7 @@ enum {
 
 enum {
 	AW88399_DEV_VDSEL_DAC = 0,
-	AW88399_DEV_VDSEL_VSENSE = 1,
+	AW88399_DEV_VDSEL_VSENSE = 32,
 };
 
 enum {
diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c
index 3bbe85091649..4c4405942779 100644
--- a/sound/soc/codecs/da7219-aad.c
+++ b/sound/soc/codecs/da7219-aad.c
@@ -927,10 +927,15 @@ void da7219_aad_suspend(struct snd_soc_component *component)
 	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
 	u8 micbias_ctrl;
 
+	disable_irq(da7219_aad->irq);
+
 	if (da7219_aad->jack) {
 		/* Disable jack detection during suspend */
 		snd_soc_component_update_bits(component, DA7219_ACCDET_CONFIG_1,
 				    DA7219_ACCDET_EN_MASK, 0);
+		cancel_delayed_work_sync(&da7219_aad->jack_det_work);
+		/* Disable ground switch */
+		snd_soc_component_update_bits(component, 0xFB, 0x01, 0x00);
 
 		/*
 		 * If we have a 4-pole jack inserted, then micbias will be
@@ -947,8 +952,6 @@ void da7219_aad_suspend(struct snd_soc_component *component)
 			}
 		}
 	}
-
-	synchronize_irq(da7219_aad->irq);
 }
 
 void da7219_aad_resume(struct snd_soc_component *component)
@@ -971,6 +974,8 @@ void da7219_aad_resume(struct snd_soc_component *component)
 				    DA7219_ACCDET_EN_MASK,
 				    DA7219_ACCDET_EN_MASK);
 	}
+
+	enable_irq(da7219_aad->irq);
 }
 
 
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index 09eef6042aad..20da1eaa4f1c 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -877,18 +877,13 @@ static int hdmi_codec_set_jack(struct snd_soc_component *component,
 			       void *data)
 {
 	struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component);
-	int ret = -ENOTSUPP;
 
 	if (hcp->hcd.ops->hook_plugged_cb) {
 		hcp->jack = jack;
-		ret = hcp->hcd.ops->hook_plugged_cb(component->dev->parent,
-						    hcp->hcd.data,
-						    plugged_cb,
-						    component->dev);
-		if (ret)
-			hcp->jack = NULL;
+		return 0;
 	}
-	return ret;
+
+	return -ENOTSUPP;
 }
 
 static int hdmi_dai_spdif_probe(struct snd_soc_dai *dai)
@@ -982,6 +977,21 @@ static int hdmi_of_xlate_dai_id(struct snd_soc_component *component,
 	return ret;
 }
 
+static int hdmi_probe(struct snd_soc_component *component)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component);
+	int ret = 0;
+
+	if (hcp->hcd.ops->hook_plugged_cb) {
+		ret = hcp->hcd.ops->hook_plugged_cb(component->dev->parent,
+						    hcp->hcd.data,
+						    plugged_cb,
+						    component->dev);
+	}
+
+	return ret;
+}
+
 static void hdmi_remove(struct snd_soc_component *component)
 {
 	struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component);
@@ -992,6 +1002,7 @@ static void hdmi_remove(struct snd_soc_component *component)
 }
 
 static const struct snd_soc_component_driver hdmi_driver = {
+	.probe			= hdmi_probe,
 	.remove			= hdmi_remove,
 	.dapm_widgets		= hdmi_widgets,
 	.num_dapm_widgets	= ARRAY_SIZE(hdmi_widgets),
diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c
index 5cf28d034f09..f66417a0f29f 100644
--- a/sound/soc/codecs/nau8540.c
+++ b/sound/soc/codecs/nau8540.c
@@ -530,12 +530,61 @@ static int nau8540_set_tdm_slot(struct snd_soc_dai *dai,
 	return 0;
 }
 
+static int nau8540_dai_trigger(struct snd_pcm_substream *substream,
+			       int cmd, struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct nau8540 *nau8540 = snd_soc_component_get_drvdata(component);
+	struct regmap *regmap = nau8540->regmap;
+	unsigned int val;
+	int ret = 0;
+
+	/* Reading the peak data to detect abnormal data in the ADC channel.
+	 * If abnormal data happens, the driver takes recovery actions to
+	 * refresh the ADC channel.
+	 */
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL,
+				   NAU8540_CLK_AGC_EN, NAU8540_CLK_AGC_EN);
+		regmap_update_bits(regmap, NAU8540_REG_ALC_CONTROL_3,
+				   NAU8540_ALC_CH_ALL_EN, NAU8540_ALC_CH_ALL_EN);
+
+		regmap_read(regmap, NAU8540_REG_PEAK_CH1, &val);
+		dev_dbg(nau8540->dev, "1.ADC CH1 peak data %x", val);
+		if (!val) {
+			regmap_update_bits(regmap, NAU8540_REG_MUTE,
+					   NAU8540_PGA_CH_ALL_MUTE, NAU8540_PGA_CH_ALL_MUTE);
+			regmap_update_bits(regmap, NAU8540_REG_MUTE,
+					   NAU8540_PGA_CH_ALL_MUTE, 0);
+			regmap_write(regmap, NAU8540_REG_RST, 0x1);
+			regmap_write(regmap, NAU8540_REG_RST, 0);
+			regmap_read(regmap, NAU8540_REG_PEAK_CH1, &val);
+			dev_dbg(nau8540->dev, "2.ADC CH1 peak data %x", val);
+			if (!val) {
+				dev_err(nau8540->dev, "Channel recovery failed!!");
+				ret = -EIO;
+			}
+		}
+		regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL,
+				   NAU8540_CLK_AGC_EN, 0);
+		regmap_update_bits(regmap, NAU8540_REG_ALC_CONTROL_3,
+				   NAU8540_ALC_CH_ALL_EN, 0);
+		break;
+
+	default:
+		break;
+	}
+
+	return ret;
+}
 
 static const struct snd_soc_dai_ops nau8540_dai_ops = {
 	.startup = nau8540_dai_startup,
 	.hw_params = nau8540_hw_params,
 	.set_fmt = nau8540_set_fmt,
 	.set_tdm_slot = nau8540_set_tdm_slot,
+	.trigger = nau8540_dai_trigger,
 };
 
 #define NAU8540_RATES SNDRV_PCM_RATE_8000_48000
diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h
index 305ea9207cf0..2ce6063d462b 100644
--- a/sound/soc/codecs/nau8540.h
+++ b/sound/soc/codecs/nau8540.h
@@ -85,6 +85,7 @@
 
 /* CLOCK_CTRL (0x02) */
 #define NAU8540_CLK_ADC_EN		(0x1 << 15)
+#define NAU8540_CLK_AGC_EN		(0x1 << 3)
 #define NAU8540_CLK_I2S_EN		(0x1 << 1)
 
 /* CLOCK_SRC (0x03) */
@@ -168,6 +169,13 @@
 #define NAU8540_TDM_OFFSET_EN		(0x1 << 14)
 #define NAU8540_TDM_TX_MASK		0xf
 
+/* ALC_CONTROL_3 (0x22) */
+#define NAU8540_ALC_CH1_EN		(0x1 << 12)
+#define NAU8540_ALC_CH2_EN		(0x1 << 13)
+#define NAU8540_ALC_CH3_EN		(0x1 << 14)
+#define NAU8540_ALC_CH4_EN		(0x1 << 15)
+#define NAU8540_ALC_CH_ALL_EN		(0xf << 12)
+
 /* ADC_SAMPLE_RATE (0x3A) */
 #define NAU8540_CH_SYNC		(0x1 << 14)
 #define NAU8540_ADC_OSR_MASK		0x3
@@ -181,6 +189,13 @@
 #define NAU8540_VMID_SEL_SFT		4
 #define NAU8540_VMID_SEL_MASK		(0x3 << NAU8540_VMID_SEL_SFT)
 
+/* MUTE (0x61) */
+#define NAU8540_PGA_CH1_MUTE		0x1
+#define NAU8540_PGA_CH2_MUTE		0x2
+#define NAU8540_PGA_CH3_MUTE		0x4
+#define NAU8540_PGA_CH4_MUTE		0x8
+#define NAU8540_PGA_CH_ALL_MUTE		0xf
+
 /* MIC_BIAS (0x67) */
 #define NAU8540_PU_PRE			(0x1 << 8)
 
diff --git a/sound/soc/codecs/rt712-sdca.c b/sound/soc/codecs/rt712-sdca.c
index 7077ff6ba1f4..6954fbe7ec5f 100644
--- a/sound/soc/codecs/rt712-sdca.c
+++ b/sound/soc/codecs/rt712-sdca.c
@@ -963,13 +963,6 @@ static int rt712_sdca_probe(struct snd_soc_component *component)
 	rt712_sdca_parse_dt(rt712, &rt712->slave->dev);
 	rt712->component = component;
 
-	if (!rt712->first_hw_init)
-		return 0;
-
-	ret = pm_runtime_resume(component->dev);
-	if (ret < 0 && ret != -EACCES)
-		return ret;
-
 	/* add SPK route */
 	if (rt712->hw_id != RT712_DEV_ID_713) {
 		snd_soc_add_component_controls(component,
@@ -980,6 +973,13 @@ static int rt712_sdca_probe(struct snd_soc_component *component)
 			rt712_sdca_spk_dapm_routes, ARRAY_SIZE(rt712_sdca_spk_dapm_routes));
 	}
 
+	if (!rt712->first_hw_init)
+		return 0;
+
+	ret = pm_runtime_resume(component->dev);
+	if (ret < 0 && ret != -EACCES)
+		return ret;
+
 	return 0;
 }
 
diff --git a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c
index 6dfcfcf47cab..f78197c8e582 100644
--- a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c
+++ b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c
@@ -1216,7 +1216,7 @@ static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev)
 	playback_codec = of_get_child_by_name(pdev->dev.of_node, "playback-codecs");
 	if (!playback_codec) {
 		ret = -EINVAL;
-		dev_err_probe(&pdev->dev, ret, "Property 'speaker-codecs' missing or invalid\n");
+		dev_err_probe(&pdev->dev, ret, "Property 'playback-codecs' missing or invalid\n");
 		goto err_playback_codec;
 	}
 
@@ -1230,7 +1230,7 @@ static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev)
 	for_each_card_prelinks(card, i, dai_link) {
 		ret = mt8186_mt6366_card_set_be_link(card, dai_link, playback_codec, "I2S3");
 		if (ret) {
-			dev_err_probe(&pdev->dev, ret, "%s set speaker_codec fail\n",
+			dev_err_probe(&pdev->dev, ret, "%s set playback_codec fail\n",
 				      dai_link->name);
 			goto err_probe;
 		}
diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c
index 5c51dbef6e86..860e66ec85e8 100644
--- a/sound/soc/rockchip/rockchip_i2s_tdm.c
+++ b/sound/soc/rockchip/rockchip_i2s_tdm.c
@@ -1757,7 +1757,7 @@ static struct platform_driver rockchip_i2s_tdm_driver = {
 	.remove_new = rockchip_i2s_tdm_remove,
 	.driver = {
 		.name = DRV_NAME,
-		.of_match_table = of_match_ptr(rockchip_i2s_tdm_match),
+		.of_match_table = rockchip_i2s_tdm_match,
 		.pm = &rockchip_i2s_tdm_pm_ops,
 	},
 };
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 4e2beda6f9bf..3844f777c87b 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3670,7 +3670,7 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
 		dapm_pinctrl_event(w, NULL, SND_SOC_DAPM_POST_PMD);
 		break;
 	case snd_soc_dapm_clock_supply:
-		w->clk = devm_clk_get(dapm->dev, w->name);
+		w->clk = devm_clk_get(dapm->dev, widget->name);
 		if (IS_ERR(w->clk)) {
 			ret = PTR_ERR(w->clk);
 			goto request_failed;
diff --git a/sound/soc/sof/sof-client.c b/sound/soc/sof/sof-client.c
index 9dce7f53b482..54dca91255a0 100644
--- a/sound/soc/sof/sof-client.c
+++ b/sound/soc/sof/sof-client.c
@@ -176,7 +176,7 @@ int sof_register_clients(struct snd_sof_dev *sdev)
 		goto err_kernel_injector;
 	}
 
-	/* Platform depndent client device registration */
+	/* Platform dependent client device registration */
 
 	if (sof_ops(sdev) && sof_ops(sdev)->register_ipc_clients)
 		ret = sof_ops(sdev)->register_ipc_clients(sdev);
diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c
index 7643a54592f5..2110ffe5281c 100644
--- a/sound/soc/ti/omap-mcbsp.c
+++ b/sound/soc/ti/omap-mcbsp.c
@@ -73,14 +73,16 @@ static int omap2_mcbsp_set_clks_src(struct omap_mcbsp *mcbsp, u8 fck_src_id)
 		return 0;
 	}
 
-	pm_runtime_put_sync(mcbsp->dev);
+	if (mcbsp->active)
+		pm_runtime_put_sync(mcbsp->dev);
 
 	r = clk_set_parent(mcbsp->fclk, fck_src);
 	if (r)
 		dev_err(mcbsp->dev, "CLKS: could not clk_set_parent() to %s\n",
 			src);
 
-	pm_runtime_get_sync(mcbsp->dev);
+	if (mcbsp->active)
+		pm_runtime_get_sync(mcbsp->dev);
 
 	clk_put(fck_src);
 
diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c
index 75b744b47986..bc5065bd99b2 100644
--- a/tools/net/ynl/generated/devlink-user.c
+++ b/tools/net/ynl/generated/devlink-user.c
@@ -121,6 +121,8 @@ const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value)
 static const char * const devlink_port_fn_attr_cap_strmap[] = {
 	[0] = "roce-bit",
 	[1] = "migratable-bit",
+	[2] = "ipsec-crypto-bit",
+	[3] = "ipsec-packet-bit",
 };
 
 const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value)
diff --git a/tools/net/ynl/generated/nfsd-user.c b/tools/net/ynl/generated/nfsd-user.c
index fec6828680ce..360b6448c6e9 100644
--- a/tools/net/ynl/generated/nfsd-user.c
+++ b/tools/net/ynl/generated/nfsd-user.c
@@ -50,9 +50,116 @@ struct ynl_policy_nest nfsd_rpc_status_nest = {
 /* Common nested types */
 /* ============== NFSD_CMD_RPC_STATUS_GET ============== */
 /* NFSD_CMD_RPC_STATUS_GET - dump */
-void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp)
+int nfsd_rpc_status_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data)
 {
-	struct nfsd_rpc_status_get_list *next = rsp;
+	struct nfsd_rpc_status_get_rsp_dump *dst;
+	struct ynl_parse_arg *yarg = data;
+	unsigned int n_compound_ops = 0;
+	const struct nlattr *attr;
+	int i;
+
+	dst = yarg->data;
+
+	if (dst->compound_ops)
+		return ynl_error_parse(yarg, "attribute already present (rpc-status.compound-ops)");
+
+	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+		unsigned int type = mnl_attr_get_type(attr);
+
+		if (type == NFSD_A_RPC_STATUS_XID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.xid = 1;
+			dst->xid = mnl_attr_get_u32(attr);
+		} else if (type == NFSD_A_RPC_STATUS_FLAGS) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.flags = 1;
+			dst->flags = mnl_attr_get_u32(attr);
+		} else if (type == NFSD_A_RPC_STATUS_PROG) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.prog = 1;
+			dst->prog = mnl_attr_get_u32(attr);
+		} else if (type == NFSD_A_RPC_STATUS_VERSION) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.version = 1;
+			dst->version = mnl_attr_get_u8(attr);
+		} else if (type == NFSD_A_RPC_STATUS_PROC) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.proc = 1;
+			dst->proc = mnl_attr_get_u32(attr);
+		} else if (type == NFSD_A_RPC_STATUS_SERVICE_TIME) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.service_time = 1;
+			dst->service_time = mnl_attr_get_u64(attr);
+		} else if (type == NFSD_A_RPC_STATUS_SADDR4) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.saddr4 = 1;
+			dst->saddr4 = mnl_attr_get_u32(attr);
+		} else if (type == NFSD_A_RPC_STATUS_DADDR4) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.daddr4 = 1;
+			dst->daddr4 = mnl_attr_get_u32(attr);
+		} else if (type == NFSD_A_RPC_STATUS_SADDR6) {
+			unsigned int len;
+
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+
+			len = mnl_attr_get_payload_len(attr);
+			dst->_present.saddr6_len = len;
+			dst->saddr6 = malloc(len);
+			memcpy(dst->saddr6, mnl_attr_get_payload(attr), len);
+		} else if (type == NFSD_A_RPC_STATUS_DADDR6) {
+			unsigned int len;
+
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+
+			len = mnl_attr_get_payload_len(attr);
+			dst->_present.daddr6_len = len;
+			dst->daddr6 = malloc(len);
+			memcpy(dst->daddr6, mnl_attr_get_payload(attr), len);
+		} else if (type == NFSD_A_RPC_STATUS_SPORT) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.sport = 1;
+			dst->sport = mnl_attr_get_u16(attr);
+		} else if (type == NFSD_A_RPC_STATUS_DPORT) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.dport = 1;
+			dst->dport = mnl_attr_get_u16(attr);
+		} else if (type == NFSD_A_RPC_STATUS_COMPOUND_OPS) {
+			n_compound_ops++;
+		}
+	}
+
+	if (n_compound_ops) {
+		dst->compound_ops = calloc(n_compound_ops, sizeof(*dst->compound_ops));
+		dst->n_compound_ops = n_compound_ops;
+		i = 0;
+		mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+			if (mnl_attr_get_type(attr) == NFSD_A_RPC_STATUS_COMPOUND_OPS) {
+				dst->compound_ops[i] = mnl_attr_get_u32(attr);
+				i++;
+			}
+		}
+	}
+
+	return MNL_CB_OK;
+}
+
+void
+nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp)
+{
+	struct nfsd_rpc_status_get_rsp_list *next = rsp;
 
 	while ((void *)next != YNL_LIST_END) {
 		rsp = next;
@@ -65,15 +172,16 @@ void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp)
 	}
 }
 
-struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys)
+struct nfsd_rpc_status_get_rsp_list *
+nfsd_rpc_status_get_dump(struct ynl_sock *ys)
 {
 	struct ynl_dump_state yds = {};
 	struct nlmsghdr *nlh;
 	int err;
 
 	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_list);
-	yds.cb = nfsd_rpc_status_get_rsp_parse;
+	yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_rsp_list);
+	yds.cb = nfsd_rpc_status_get_rsp_dump_parse;
 	yds.rsp_cmd = NFSD_CMD_RPC_STATUS_GET;
 	yds.rsp_policy = &nfsd_rpc_status_nest;
 
@@ -86,7 +194,7 @@ struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys)
 	return yds.first;
 
 free_list:
-	nfsd_rpc_status_get_list_free(yds.first);
+	nfsd_rpc_status_get_rsp_list_free(yds.first);
 	return NULL;
 }
 
diff --git a/tools/net/ynl/generated/nfsd-user.h b/tools/net/ynl/generated/nfsd-user.h
index b6b69501031a..989c6e209ced 100644
--- a/tools/net/ynl/generated/nfsd-user.h
+++ b/tools/net/ynl/generated/nfsd-user.h
@@ -21,13 +21,47 @@ const char *nfsd_op_str(int op);
 /* Common nested types */
 /* ============== NFSD_CMD_RPC_STATUS_GET ============== */
 /* NFSD_CMD_RPC_STATUS_GET - dump */
-struct nfsd_rpc_status_get_list {
-	struct nfsd_rpc_status_get_list *next;
-	struct nfsd_rpc_status_get_rsp obj __attribute__ ((aligned (8)));
+struct nfsd_rpc_status_get_rsp_dump {
+	struct {
+		__u32 xid:1;
+		__u32 flags:1;
+		__u32 prog:1;
+		__u32 version:1;
+		__u32 proc:1;
+		__u32 service_time:1;
+		__u32 saddr4:1;
+		__u32 daddr4:1;
+		__u32 saddr6_len;
+		__u32 daddr6_len;
+		__u32 sport:1;
+		__u32 dport:1;
+	} _present;
+
+	__u32 xid /* big-endian */;
+	__u32 flags;
+	__u32 prog;
+	__u8 version;
+	__u32 proc;
+	__s64 service_time;
+	__u32 saddr4 /* big-endian */;
+	__u32 daddr4 /* big-endian */;
+	void *saddr6;
+	void *daddr6;
+	__u16 sport /* big-endian */;
+	__u16 dport /* big-endian */;
+	unsigned int n_compound_ops;
+	__u32 *compound_ops;
+};
+
+struct nfsd_rpc_status_get_rsp_list {
+	struct nfsd_rpc_status_get_rsp_list *next;
+	struct nfsd_rpc_status_get_rsp_dump obj __attribute__((aligned(8)));
 };
 
-void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp);
+void
+nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp);
 
-struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys);
+struct nfsd_rpc_status_get_rsp_list *
+nfsd_rpc_status_get_dump(struct ynl_sock *ys);
 
 #endif /* _LINUX_NFSD_GEN_H */
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 13427436bfb7..c4003a83cd5d 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -3,6 +3,7 @@
 
 import argparse
 import collections
+import filecmp
 import os
 import re
 import shutil
@@ -1168,7 +1169,7 @@ class CodeWriter:
         if out_file is None:
             self._out = os.sys.stdout
         else:
-            self._out = tempfile.TemporaryFile('w+')
+            self._out = tempfile.NamedTemporaryFile('w+')
             self._out_file = out_file
 
     def __del__(self):
@@ -1177,6 +1178,10 @@ class CodeWriter:
     def close_out_file(self):
         if self._out == os.sys.stdout:
             return
+        # Avoid modifying the file if contents didn't change
+        self._out.flush()
+        if os.path.isfile(self._out_file) and filecmp.cmp(self._out.name, self._out_file, shallow=False):
+            return
         with open(self._out_file, 'w+') as out_file:
             self._out.seek(0)
             shutil.copyfileobj(self._out, out_file)
diff --git a/tools/power/cpupower/man/cpupower-powercap-info.1 b/tools/power/cpupower/man/cpupower-powercap-info.1
index df3087000efb..145d6f06fa72 100644
--- a/tools/power/cpupower/man/cpupower-powercap-info.1
+++ b/tools/power/cpupower/man/cpupower-powercap-info.1
@@ -17,7 +17,7 @@ settings of all cores, see cpupower(1) how to choose specific cores.
 .SH "DOCUMENTATION"
 
 kernel sources:
-Documentation/power/powercap/powercap.txt
+Documentation/power/powercap/powercap.rst
 
 
 .SH "SEE ALSO"
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index a5e246f7b202..91907b321f91 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -39,9 +39,7 @@ struct bpf_testmod_struct_arg_4 {
 	int b;
 };
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in bpf_testmod.ko BTF");
+__bpf_hook_start();
 
 noinline int
 bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) {
@@ -335,7 +333,7 @@ noinline int bpf_fentry_shadow_test(int a)
 }
 EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
 
-__diag_pop();
+__bpf_hook_end();
 
 static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
 	.attr = { .name = "bpf_testmod", .mode = 0666, },
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
index 1a9eeefda9a8..8bf497a9843e 100644
--- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -326,20 +326,14 @@ static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *
 
 static int create_hash(void)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
-		.map_flags = BPF_F_NO_PREALLOC,
-	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
 
 	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
 }
 
 static int create_percpu_hash(void)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
-		.map_flags = BPF_F_NO_PREALLOC,
-	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
 
 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
 }
@@ -356,21 +350,17 @@ static int create_percpu_hash_prealloc(void)
 
 static int create_lru_hash(__u32 type, __u32 map_flags)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
-		.map_flags = map_flags,
-	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
 
 	return map_create(type, "lru_hash", &map_opts);
 }
 
 static int create_hash_of_maps(void)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts,
 		.map_flags = BPF_F_NO_PREALLOC,
 		.inner_map_fd = create_small_hash(),
-	};
+	);
 	int ret;
 
 	ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index e02feb5fae97..574d9a0cdc8e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -4,6 +4,7 @@
 #include <test_progs.h>
 #include <bpf/libbpf.h>
 #include <bpf/btf.h>
+#include "iters_css_task.skel.h"
 #include "cgroup_iter.skel.h"
 #include "cgroup_helpers.h"
 
@@ -263,6 +264,35 @@ close_cgrp:
 	close(cgrp_fd);
 }
 
+static void test_walk_self_only_css_task(void)
+{
+	struct iters_css_task *skel;
+	int err;
+
+	skel = iters_css_task__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.cgroup_id_printer, true);
+
+	err = iters_css_task__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = join_cgroup(cg_path[CHILD2]);
+	if (!ASSERT_OK(err, "join_cgroup"))
+		goto cleanup;
+
+	skel->bss->target_pid = getpid();
+	snprintf(expected_output, sizeof(expected_output),
+		PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]);
+	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2],
+		BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task");
+	ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+cleanup:
+	iters_css_task__destroy(skel);
+}
+
 void test_cgroup_iter(void)
 {
 	struct cgroup_iter *skel = NULL;
@@ -293,6 +323,9 @@ void test_cgroup_iter(void)
 		test_walk_self_only(skel);
 	if (test__start_subtest("cgroup_iter__dead_self_only"))
 		test_walk_dead_self_only(skel);
+	if (test__start_subtest("cgroup_iter__self_only_css_task"))
+		test_walk_self_only_css_task();
+
 out:
 	cgroup_iter__destroy(skel);
 	cleanup_cgroups();
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index c2425791c923..bf84d4a1d9ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -294,6 +294,7 @@ void test_iters(void)
 	RUN_TESTS(iters_state_safety);
 	RUN_TESTS(iters_looping);
 	RUN_TESTS(iters);
+	RUN_TESTS(iters_css_task);
 
 	if (env.has_testmod)
 		RUN_TESTS(iters_testmod_seq);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
index 214d9f4a94a5..ea933fd151c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -8,7 +8,8 @@
 #include <sys/types.h>
 #include <test_progs.h>
 
-#define TDIR "/sys/kernel/debug"
+/* TDIR must be in a location we can create a directory in. */
+#define TDIR "/tmp/test_bpffs_testdir"
 
 static int read_iter(char *file)
 {
@@ -43,8 +44,11 @@ static int fn(void)
 	if (!ASSERT_OK(err, "mount /"))
 		goto out;
 
-	err = umount(TDIR);
-	if (!ASSERT_OK(err, "umount " TDIR))
+	err =  mkdir(TDIR, 0777);
+	/* If the directory already exists we can carry on. It may be left over
+	 * from a previous run.
+	 */
+	if ((err && errno != EEXIST) && !ASSERT_OK(err, "mkdir " TDIR))
 		goto out;
 
 	err = mount("none", TDIR, "tmpfs", 0, NULL);
@@ -138,6 +142,7 @@ out:
 	rmdir(TDIR "/fs1");
 	rmdir(TDIR "/fs2");
 	umount(TDIR);
+	rmdir(TDIR);
 	exit(err);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e3e68c97b40c..e5c61aa6604a 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -46,6 +46,7 @@
 #include "verifier_movsx.skel.h"
 #include "verifier_netfilter_ctx.skel.h"
 #include "verifier_netfilter_retcode.skel.h"
+#include "verifier_precision.skel.h"
 #include "verifier_prevent_map_lookup.skel.h"
 #include "verifier_raw_stack.skel.h"
 #include "verifier_raw_tp_writable.skel.h"
@@ -153,6 +154,7 @@ void test_verifier_meta_access(void)          { RUN(verifier_meta_access); }
 void test_verifier_movsx(void)                 { RUN(verifier_movsx); }
 void test_verifier_netfilter_ctx(void)        { RUN(verifier_netfilter_ctx); }
 void test_verifier_netfilter_retcode(void)    { RUN(verifier_netfilter_retcode); }
+void test_verifier_precision(void)            { RUN(verifier_precision); }
 void test_verifier_prevent_map_lookup(void)   { RUN(verifier_prevent_map_lookup); }
 void test_verifier_raw_stack(void)            { RUN(verifier_raw_stack); }
 void test_verifier_raw_tp_writable(void)      { RUN(verifier_raw_tp_writable); }
diff --git a/tools/testing/selftests/bpf/progs/iters_css_task.c b/tools/testing/selftests/bpf/progs/iters_css_task.c
index 5089ce384a1c..9ac758649cb8 100644
--- a/tools/testing/selftests/bpf/progs/iters_css_task.c
+++ b/tools/testing/selftests/bpf/progs/iters_css_task.c
@@ -10,6 +10,7 @@
 
 char _license[] SEC("license") = "GPL";
 
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
 struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
 void bpf_cgroup_release(struct cgroup *p) __ksym;
 
@@ -45,3 +46,57 @@ int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma,
 
 	return -EPERM;
 }
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+	return cgrp->kn->id;
+}
+
+SEC("?iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct cgroup *cgrp = ctx->cgroup;
+	struct cgroup_subsys_state *css;
+	struct task_struct *task;
+
+	/* epilogue */
+	if (cgrp == NULL) {
+		BPF_SEQ_PRINTF(seq, "epilogue\n");
+		return 0;
+	}
+
+	/* prologue */
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "prologue\n");
+
+	BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+	css = &cgrp->self;
+	css_task_cnt = 0;
+	bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+		if (task->pid == target_pid)
+			css_task_cnt++;
+	}
+
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int BPF_PROG(iter_css_task_for_each_sleep)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id);
+	struct cgroup_subsys_state *css;
+	struct task_struct *task;
+
+	if (cgrp == NULL)
+		return 0;
+	css = &cgrp->self;
+
+	bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+	}
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
index c3bf96a67dba..6b1588d70652 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_failure.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -84,8 +84,8 @@ int BPF_PROG(iter_css_lock_and_unlock)
 	return 0;
 }
 
-SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("css_task_iter is only allowed in bpf_lsm and bpf iter-s")
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+__failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs")
 int BPF_PROG(iter_css_task_for_each)
 {
 	u64 cg_id = bpf_get_current_cgroup_id();
diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
index 3ddcb3777912..2a2a942737d7 100644
--- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
+++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
@@ -7,7 +7,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
      (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) ||       \
-     defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18
+     defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \
+     __clang_major__ >= 18
 const volatile int skip = 0;
 #else
 const volatile int skip = 1;
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
index 107525fb4a6a..e61755656e8d 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bswap.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 9f202eda952f..d1edbcff9a18 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 375525329637..d4427d8e1217 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index b2a04d1179d0..cbb9d6714f53 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
new file mode 100644
index 000000000000..193c0f8272d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 SUSE LLC */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0xfffffff8 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (87) r2 = -r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 8")
+__naked int bpf_neg(void)
+{
+	asm volatile (
+		"r2 = 8;"
+		"r2 = -r2;"
+		"if r2 != -8 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d4) r2 = le16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_le(void)
+{
+	asm volatile (
+		"r2 = 0;"
+		"r2 = le16 r2;"
+		"if r2 != 0 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (dc) r2 = be16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_be(void)
+{
+	asm volatile (
+		"r2 = 0;"
+		"r2 = be16 r2;"
+		"if r2 != 0 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	__clang_major__ >= 18
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d7) r2 = bswap16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_bswap(void)
+{
+	asm volatile (
+		"r2 = 0;"
+		"r2 = bswap16 r2;"
+		"if r2 != 0 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+#endif /* v4 instruction */
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
index 8fc5174808b2..2a2271cf0294 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index 3af2501082b2..b616575c3b00 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -65,3 +65,35 @@
 	.expected_attach_type = BPF_SK_LOOKUP,
 	.runs = -1,
 },
+{
+	"BPF_ST_MEM stack imm sign",
+	/* Check if verifier correctly reasons about sign of an
+	 * immediate spilled to stack by BPF_ST instruction.
+	 *
+	 *   fp[-8] = -44;
+	 *   r0 = fp[-8];
+	 *   if r0 s< 0 goto ret0;
+	 *   r0 = -1;
+	 *   exit;
+	 * ret0:
+	 *   r0 = 0;
+	 *   exit;
+	 */
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, -44),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, -1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	/* Use prog type that requires return value in range [0, 1] */
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+	.result = VERBOSE_ACCEPT,
+	.runs = -1,
+	.errstr = "0: (7a) *(u64 *)(r10 -8) = -44        ; R10=fp0 fp-8_w=-44\
+	2: (c5) if r0 s< 0x0 goto pc+2\
+	R0_w=-44",
+},
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 17c0f92ff160..c3ba40d0b9de 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -430,7 +430,7 @@ static void print_usage(void)
 
 static void read_args(int argc, char *argv[])
 {
-	char opt;
+	int opt;
 
 	while ((opt = getopt(argc, argv, "mh")) != -1) {
 		switch (opt) {
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index f838dd370f6a..b3b2dc5a630c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -2048,7 +2048,7 @@ run_test() {
 	case $ret in
 		0)
 			all_skipped=false
-			[ $exitcode=$ksft_skip ] && exitcode=0
+			[ $exitcode -eq $ksft_skip ] && exitcode=0
 		;;
 		$ksft_skip)
 			[ $all_skipped = true ] && exitcode=$ksft_skip
diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile
index ebdbb3c22e54..f224b84591fb 100644
--- a/tools/testing/selftests/riscv/hwprobe/Makefile
+++ b/tools/testing/selftests/riscv/hwprobe/Makefile
@@ -2,9 +2,14 @@
 # Copyright (C) 2021 ARM Limited
 # Originally tools/testing/arm64/abi/Makefile
 
-TEST_GEN_PROGS := hwprobe
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_GEN_PROGS := hwprobe cbo
 
 include ../../lib.mk
 
 $(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S
-	$(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
+	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/cbo: cbo.c sys_hwprobe.S
+	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
new file mode 100644
index 000000000000..50a2cc8aef38
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ *
+ * Run with 'taskset -c <cpu-list> cbo' to only execute hwprobe on a
+ * subset of cpus, as well as only executing the tests on those cpus.
+ */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <assert.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <asm/ucontext.h>
+
+#include "hwprobe.h"
+#include "../../kselftest.h"
+
+#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+
+static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
+
+static bool illegal_insn;
+
+static void sigill_handler(int sig, siginfo_t *info, void *context)
+{
+	unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext;
+	uint32_t insn = *(uint32_t *)regs[0];
+
+	assert(insn == MK_CBO(regs[11]));
+
+	illegal_insn = true;
+	regs[0] += 4;
+}
+
+static void cbo_insn(char *base, int fn)
+{
+	uint32_t insn = MK_CBO(fn);
+
+	asm volatile(
+	"mv	a0, %0\n"
+	"li	a1, %1\n"
+	".4byte	%2\n"
+	: : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory");
+}
+
+static void cbo_inval(char *base) { cbo_insn(base, 0); }
+static void cbo_clean(char *base) { cbo_insn(base, 1); }
+static void cbo_flush(char *base) { cbo_insn(base, 2); }
+static void cbo_zero(char *base)  { cbo_insn(base, 4); }
+
+static void test_no_zicbom(void *arg)
+{
+	ksft_print_msg("Testing Zicbom instructions remain privileged\n");
+
+	illegal_insn = false;
+	cbo_clean(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.clean\n");
+
+	illegal_insn = false;
+	cbo_flush(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.flush\n");
+
+	illegal_insn = false;
+	cbo_inval(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.inval\n");
+}
+
+static void test_no_zicboz(void *arg)
+{
+	ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n");
+
+	illegal_insn = false;
+	cbo_zero(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.zero\n");
+}
+
+static bool is_power_of_2(__u64 n)
+{
+	return n != 0 && (n & (n - 1)) == 0;
+}
+
+static void test_zicboz(void *arg)
+{
+	struct riscv_hwprobe pair = {
+		.key = RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE,
+	};
+	cpu_set_t *cpus = (cpu_set_t *)arg;
+	__u64 block_size;
+	int i, j;
+	long rc;
+
+	rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+	block_size = pair.value;
+	ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
+			 is_power_of_2(block_size), "Zicboz block size\n");
+	ksft_print_msg("Zicboz block size: %ld\n", block_size);
+
+	illegal_insn = false;
+	cbo_zero(&mem[block_size]);
+	ksft_test_result(!illegal_insn, "cbo.zero\n");
+
+	if (illegal_insn || !is_power_of_2(block_size)) {
+		ksft_test_result_skip("cbo.zero check\n");
+		return;
+	}
+
+	assert(block_size <= 1024);
+
+	for (i = 0; i < 4096 / block_size; ++i) {
+		if (i % 2)
+			cbo_zero(&mem[i * block_size]);
+	}
+
+	for (i = 0; i < 4096 / block_size; ++i) {
+		char expected = i % 2 ? 0x0 : 0xa5;
+
+		for (j = 0; j < block_size; ++j) {
+			if (mem[i * block_size + j] != expected) {
+				ksft_test_result_fail("cbo.zero check\n");
+				ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n",
+					       i * block_size + j, expected);
+				return;
+			}
+		}
+	}
+
+	ksft_test_result_pass("cbo.zero check\n");
+}
+
+static void check_no_zicboz_cpus(cpu_set_t *cpus)
+{
+	struct riscv_hwprobe pair = {
+		.key = RISCV_HWPROBE_KEY_IMA_EXT_0,
+	};
+	cpu_set_t one_cpu;
+	int i = 0, c = 0;
+	long rc;
+
+	while (i++ < CPU_COUNT(cpus)) {
+		while (!CPU_ISSET(c, cpus))
+			++c;
+
+		CPU_ZERO(&one_cpu);
+		CPU_SET(c, &one_cpu);
+
+		rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
+		assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+		if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ)
+			ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n"
+					   "Use taskset to select a set of harts where Zicboz\n"
+					   "presence (present or not) is consistent for each hart\n");
+		++c;
+	}
+}
+
+enum {
+	TEST_ZICBOZ,
+	TEST_NO_ZICBOZ,
+	TEST_NO_ZICBOM,
+};
+
+static struct test_info {
+	bool enabled;
+	unsigned int nr_tests;
+	void (*test_fn)(void *arg);
+} tests[] = {
+	[TEST_ZICBOZ]		= { .nr_tests = 3, test_zicboz },
+	[TEST_NO_ZICBOZ]	= { .nr_tests = 1, test_no_zicboz },
+	[TEST_NO_ZICBOM]	= { .nr_tests = 3, test_no_zicbom },
+};
+
+int main(int argc, char **argv)
+{
+	struct sigaction act = {
+		.sa_sigaction = &sigill_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+	struct riscv_hwprobe pair;
+	unsigned int plan = 0;
+	cpu_set_t cpus;
+	long rc;
+	int i;
+
+	if (argc > 1 && !strcmp(argv[1], "--sigill")) {
+		rc = sigaction(SIGILL, &act, NULL);
+		assert(rc == 0);
+		tests[TEST_NO_ZICBOZ].enabled = true;
+		tests[TEST_NO_ZICBOM].enabled = true;
+	}
+
+	rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
+	assert(rc == 0);
+
+	ksft_print_header();
+
+	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+	rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
+	if (rc < 0)
+		ksft_exit_fail_msg("hwprobe() failed with %d\n", rc);
+	assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+	if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
+		tests[TEST_ZICBOZ].enabled = true;
+		tests[TEST_NO_ZICBOZ].enabled = false;
+	} else {
+		check_no_zicboz_cpus(&cpus);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i)
+		plan += tests[i].enabled ? tests[i].nr_tests : 0;
+
+	if (plan == 0)
+		ksft_print_msg("No tests enabled.\n");
+	else
+		ksft_set_plan(plan);
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (tests[i].enabled)
+			tests[i].test_fn(&cpus);
+	}
+
+	ksft_finished();
+}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
index 09f290a67420..c474891df307 100644
--- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -1,14 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
-#include <stddef.h>
-#include <asm/hwprobe.h>
-
-/*
- * Rather than relying on having a new enough libc to define this, just do it
- * ourselves.  This way we don't need to be coupled to a new-enough libc to
- * contain the call.
- */
-long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
-		   size_t cpu_count, unsigned long *cpus, unsigned int flags);
+#include "hwprobe.h"
+#include "../../kselftest.h"
 
 int main(int argc, char **argv)
 {
@@ -16,6 +8,9 @@ int main(int argc, char **argv)
 	unsigned long cpus;
 	long out;
 
+	ksft_print_header();
+	ksft_set_plan(5);
+
 	/* Fake the CPU_SET ops. */
 	cpus = -1;
 
@@ -25,13 +20,16 @@ int main(int argc, char **argv)
 	 */
 	for (long i = 0; i < 8; i++)
 		pairs[i].key = i;
+
 	out = riscv_hwprobe(pairs, 8, 1, &cpus, 0);
 	if (out != 0)
-		return -1;
+		ksft_exit_fail_msg("hwprobe() failed with %ld\n", out);
+
 	for (long i = 0; i < 4; ++i) {
 		/* Fail if the kernel claims not to recognize a base key. */
 		if ((i < 4) && (pairs[i].key != i))
-			return -2;
+			ksft_exit_fail_msg("Failed to recognize base key: key != i, "
+					   "key=%ld, i=%ld\n", pairs[i].key, i);
 
 		if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
 			continue;
@@ -39,52 +37,30 @@ int main(int argc, char **argv)
 		if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
 			continue;
 
-		return -3;
+		ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value);
 	}
 
-	/*
-	 * This should also work with a NULL CPU set, but should not work
-	 * with an improperly supplied CPU set.
-	 */
 	out = riscv_hwprobe(pairs, 8, 0, 0, 0);
-	if (out != 0)
-		return -4;
+	ksft_test_result(out == 0, "NULL CPU set\n");
 
 	out = riscv_hwprobe(pairs, 8, 0, &cpus, 0);
-	if (out == 0)
-		return -5;
+	ksft_test_result(out != 0, "Bad CPU set\n");
 
 	out = riscv_hwprobe(pairs, 8, 1, 0, 0);
-	if (out == 0)
-		return -6;
+	ksft_test_result(out != 0, "NULL CPU set with non-zero count\n");
 
-	/*
-	 * Check that keys work by providing one that we know exists, and
-	 * checking to make sure the resultig pair is what we asked for.
-	 */
 	pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR;
 	out = riscv_hwprobe(pairs, 1, 1, &cpus, 0);
-	if (out != 0)
-		return -7;
-	if (pairs[0].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
-		return -8;
+	ksft_test_result(out == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+			 "Existing key is maintained\n");
 
-	/*
-	 * Check that an unknown key gets overwritten with -1,
-	 * but doesn't block elements after it.
-	 */
 	pairs[0].key = 0x5555;
 	pairs[1].key = 1;
 	pairs[1].value = 0xAAAA;
 	out = riscv_hwprobe(pairs, 2, 0, 0, 0);
-	if (out != 0)
-		return -9;
-
-	if (pairs[0].key != -1)
-		return -10;
-
-	if ((pairs[1].key != 1) || (pairs[1].value == 0xAAAA))
-		return -11;
+	ksft_test_result(out == 0 && pairs[0].key == -1 &&
+			 pairs[1].key == 1 && pairs[1].value != 0xAAAA,
+			 "Unknown key overwritten with -1 and doesn't block other elements\n");
 
-	return 0;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
new file mode 100644
index 000000000000..721b0ce73a56
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_RISCV_HWPROBE_H
+#define SELFTEST_RISCV_HWPROBE_H
+#include <stddef.h>
+#include <asm/hwprobe.h>
+
+/*
+ * Rather than relying on having a new enough libc to define this, just do it
+ * ourselves.  This way we don't need to be coupled to a new-enough libc to
+ * contain the call.
+ */
+long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+		   size_t cpu_count, unsigned long *cpus, unsigned int flags);
+
+#endif
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 92336721321a..ae2b33c21c45 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -85,6 +85,48 @@ void vsock_wait_remote_close(int fd)
 	close(epollfd);
 }
 
+/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */
+int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type)
+{
+	struct sockaddr_vm sa_client = {
+		.svm_family = AF_VSOCK,
+		.svm_cid = VMADDR_CID_ANY,
+		.svm_port = bind_port,
+	};
+	struct sockaddr_vm sa_server = {
+		.svm_family = AF_VSOCK,
+		.svm_cid = cid,
+		.svm_port = port,
+	};
+
+	int client_fd, ret;
+
+	client_fd = socket(AF_VSOCK, type, 0);
+	if (client_fd < 0) {
+		perror("socket");
+		exit(EXIT_FAILURE);
+	}
+
+	if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) {
+		perror("bind");
+		exit(EXIT_FAILURE);
+	}
+
+	timeout_begin(TIMEOUT);
+	do {
+		ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server));
+		timeout_check("connect");
+	} while (ret < 0 && errno == EINTR);
+	timeout_end();
+
+	if (ret < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	return client_fd;
+}
+
 /* Connect to <cid, port> and return the file descriptor. */
 static int vsock_connect(unsigned int cid, unsigned int port, int type)
 {
@@ -104,6 +146,10 @@ static int vsock_connect(unsigned int cid, unsigned int port, int type)
 	control_expectln("LISTENING");
 
 	fd = socket(AF_VSOCK, type, 0);
+	if (fd < 0) {
+		perror("socket");
+		exit(EXIT_FAILURE);
+	}
 
 	timeout_begin(TIMEOUT);
 	do {
@@ -132,11 +178,8 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
 	return vsock_connect(cid, port, SOCK_SEQPACKET);
 }
 
-/* Listen on <cid, port> and return the first incoming connection.  The remote
- * address is stored to clientaddrp.  clientaddrp may be NULL.
- */
-static int vsock_accept(unsigned int cid, unsigned int port,
-			struct sockaddr_vm *clientaddrp, int type)
+/* Listen on <cid, port> and return the file descriptor. */
+static int vsock_listen(unsigned int cid, unsigned int port, int type)
 {
 	union {
 		struct sockaddr sa;
@@ -148,16 +191,13 @@ static int vsock_accept(unsigned int cid, unsigned int port,
 			.svm_cid = cid,
 		},
 	};
-	union {
-		struct sockaddr sa;
-		struct sockaddr_vm svm;
-	} clientaddr;
-	socklen_t clientaddr_len = sizeof(clientaddr.svm);
 	int fd;
-	int client_fd;
-	int old_errno;
 
 	fd = socket(AF_VSOCK, type, 0);
+	if (fd < 0) {
+		perror("socket");
+		exit(EXIT_FAILURE);
+	}
 
 	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
 		perror("bind");
@@ -169,6 +209,24 @@ static int vsock_accept(unsigned int cid, unsigned int port,
 		exit(EXIT_FAILURE);
 	}
 
+	return fd;
+}
+
+/* Listen on <cid, port> and return the first incoming connection.  The remote
+ * address is stored to clientaddrp.  clientaddrp may be NULL.
+ */
+static int vsock_accept(unsigned int cid, unsigned int port,
+			struct sockaddr_vm *clientaddrp, int type)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} clientaddr;
+	socklen_t clientaddr_len = sizeof(clientaddr.svm);
+	int fd, client_fd, old_errno;
+
+	fd = vsock_listen(cid, port, type);
+
 	control_writeln("LISTENING");
 
 	timeout_begin(TIMEOUT);
@@ -207,6 +265,11 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
 	return vsock_accept(cid, port, clientaddrp, SOCK_STREAM);
 }
 
+int vsock_stream_listen(unsigned int cid, unsigned int port)
+{
+	return vsock_listen(cid, port, SOCK_STREAM);
+}
+
 int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
 			   struct sockaddr_vm *clientaddrp)
 {
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index a77175d25864..03c88d0cb861 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -36,9 +36,12 @@ struct test_case {
 void init_signals(void);
 unsigned int parse_cid(const char *str);
 int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_bind_connect(unsigned int cid, unsigned int port,
+		       unsigned int bind_port, int type);
 int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
 int vsock_stream_accept(unsigned int cid, unsigned int port,
 			struct sockaddr_vm *clientaddrp);
+int vsock_stream_listen(unsigned int cid, unsigned int port);
 int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
 			   struct sockaddr_vm *clientaddrp);
 void vsock_wait_remote_close(int fd);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index c1f7bc9abd22..5b0e93f9996c 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -1180,6 +1180,51 @@ static void test_stream_shutrd_server(const struct test_opts *opts)
 	close(fd);
 }
 
+static void test_double_bind_connect_server(const struct test_opts *opts)
+{
+	int listen_fd, client_fd, i;
+	struct sockaddr_vm sa_client;
+	socklen_t socklen_client = sizeof(sa_client);
+
+	listen_fd = vsock_stream_listen(VMADDR_CID_ANY, 1234);
+
+	for (i = 0; i < 2; i++) {
+		control_writeln("LISTENING");
+
+		timeout_begin(TIMEOUT);
+		do {
+			client_fd = accept(listen_fd, (struct sockaddr *)&sa_client,
+					   &socklen_client);
+			timeout_check("accept");
+		} while (client_fd < 0 && errno == EINTR);
+		timeout_end();
+
+		if (client_fd < 0) {
+			perror("accept");
+			exit(EXIT_FAILURE);
+		}
+
+		/* Waiting for remote peer to close connection */
+		vsock_wait_remote_close(client_fd);
+	}
+
+	close(listen_fd);
+}
+
+static void test_double_bind_connect_client(const struct test_opts *opts)
+{
+	int i, client_fd;
+
+	for (i = 0; i < 2; i++) {
+		/* Wait until server is ready to accept a new connection */
+		control_expectln("LISTENING");
+
+		client_fd = vsock_bind_connect(opts->peer_cid, 1234, 4321, SOCK_STREAM);
+
+		close(client_fd);
+	}
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -1285,6 +1330,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_stream_msgzcopy_empty_errq_client,
 		.run_server = test_stream_msgzcopy_empty_errq_server,
 	},
+	{
+		.name = "SOCK_STREAM double bind connect",
+		.run_client = test_double_bind_connect_client,
+		.run_server = test_double_bind_connect_server,
+	},
 	{},
 };